Our raw excel file is in our repository. We can automatically download that file and put it in a temporary file. Then we can read that excel document into R and remove the temp file.
tmp<-tempfile(fileext=".xlsx")
download.file("https://github.com/pjournal/mef03g-mujde-r/blob/master/EGM_Mujde_R_Raw_Data_1.xlsx?raw=true",destfile=tmp,mode="wb")
raw_data<-readxl::read_excel(tmp,col_names=FALSE,skip=2)
file.remove(tmp)
## [1] TRUE
library(readxl)
library(tidyverse)
library(ggplot2)
### We change the title names according to our data.
colnames(raw_data) <- c("date","pension_fund_company","n_of_participants", "fund_size_participants", "gov_contribution", "contribution", "n_of_pensioners", "n_of_ind_contracts", "n_of_group_ind_contracts", "n_of_employer_group_certificates", "n_total","size_of_ind_contracts", "size_of_group_ind_contracts", "size_of_employer_group_certificates" ,"size_total")
# Let's see our data
head(raw_data)
## # A tibble: 6 x 15
## date pension_fund_co~ n_of_participan~ fund_size_parti~ gov_contribution
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 31.0~ Aegon Emeklilik~ 37671 132128516. 15559754.
## 2 31.0~ Allianz Hayat v~ 94630 2801049763. 385315268.
## 3 31.0~ Allianz YaÅŸam v~ 728934 12158903088. 1435419831.
## 4 31.0~ Anadolu Hayat E~ 1091010 16312824287. 2768140863.
## 5 31.0~ Avivasa Emeklil~ 787046 16801925214. 2467365796.
## 6 31.0~ Axa Hayat ve Em~ 33794 426214299. 86715369.
## # ... with 10 more variables: contribution <dbl>, n_of_pensioners <dbl>,
## # n_of_ind_contracts <dbl>, n_of_group_ind_contracts <dbl>,
## # n_of_employer_group_certificates <dbl>, n_total <dbl>,
## # size_of_ind_contracts <dbl>, size_of_group_ind_contracts <dbl>,
## # size_of_employer_group_certificates <dbl>, size_total <dbl>
### We'll filter the dates in order to find the difference of the customer base in terms of numbers between 31.07.2017 and 31.07.2019
egm_data <- raw_data
egm_sum_participants<-egm_data %>% select(date,pension_fund_company,n_of_participants) %>% filter(date == "31.07.2017" | date =="31.07.2019") %>% arrange(pension_fund_company,date)
print(egm_sum_participants)
## # A tibble: 36 x 3
## date pension_fund_company n_of_participants
## <chr> <chr> <dbl>
## 1 31.07.2017 Aegon Emeklilik ve Hayat 41689
## 2 31.07.2019 Aegon Emeklilik ve Hayat 37671
## 3 31.07.2017 Allianz Hayat ve Emeklilik 113085
## 4 31.07.2019 Allianz Hayat ve Emeklilik 94630
## 5 31.07.2017 Allianz YaÅŸam ve Emeklilik 779339
## 6 31.07.2019 Allianz YaÅŸam ve Emeklilik 728934
## 7 31.07.2017 Anadolu Hayat Emeklilik 1135063
## 8 31.07.2019 Anadolu Hayat Emeklilik 1091010
## 9 31.07.2017 Avivasa Emeklilik ve Hayat 886713
## 10 31.07.2019 Avivasa Emeklilik ve Hayat 787046
## # ... with 26 more rows
### We compared the member population of 2 years for each fund company above and substracted the 2019 numbers from 2017 numbers in order to find the difference
diff_participants <- egm_sum_participants %>% group_by(pension_fund_company) %>% mutate(diff = n_of_participants - lag(n_of_participants,default = first(n_of_participants)))
print(diff_participants)
## # A tibble: 36 x 4
## # Groups: pension_fund_company [20]
## date pension_fund_company n_of_participants diff
## <chr> <chr> <dbl> <dbl>
## 1 31.07.2017 Aegon Emeklilik ve Hayat 41689 0
## 2 31.07.2019 Aegon Emeklilik ve Hayat 37671 -4018
## 3 31.07.2017 Allianz Hayat ve Emeklilik 113085 0
## 4 31.07.2019 Allianz Hayat ve Emeklilik 94630 -18455
## 5 31.07.2017 Allianz YaÅŸam ve Emeklilik 779339 0
## 6 31.07.2019 Allianz YaÅŸam ve Emeklilik 728934 -50405
## 7 31.07.2017 Anadolu Hayat Emeklilik 1135063 0
## 8 31.07.2019 Anadolu Hayat Emeklilik 1091010 -44053
## 9 31.07.2017 Avivasa Emeklilik ve Hayat 886713 0
## 10 31.07.2019 Avivasa Emeklilik ve Hayat 787046 -99667
## # ... with 26 more rows
summary_diff <- diff_participants %>% group_by(pension_fund_company) %>% summarise(sum(diff))
print(summary_diff)
## # A tibble: 20 x 2
## pension_fund_company `sum(diff)`
## <chr> <dbl>
## 1 Aegon Emeklilik ve Hayat -4018
## 2 Allianz Hayat ve Emeklilik -18455
## 3 Allianz YaÅŸam ve Emeklilik -50405
## 4 Anadolu Hayat Emeklilik -44053
## 5 Avivasa Emeklilik ve Hayat -99667
## 6 Axa Hayat ve Emeklilik -864
## 7 Bereket Emeklilik ve Hayat -21079
## 8 BNP Paribas Cardif Emeklilik -3009
## 9 Cigna Finans Emeklilik ve Hayat -29194
## 10 Fiba Emeklilik ve Hayat 49016
## 11 Garanti Emeklilik ve Hayat -78313
## 12 Groupama Emeklilik 0
## 13 Groupama Hayat 0
## 14 Halk Hayat ve Emeklilik 87602
## 15 Katılım Emeklilik ve Hayat 76666
## 16 Metlife Emeklilik ve Hayat -1789
## 17 NN Hayat ve Emeklilik -10947
## 18 Vakıf Emeklilik 0
## 19 Vakıf Emeklilik ve Hayat 0
## 20 Ziraat Hayat ve Emeklilik 103282
colnames(summary_diff) <- c("pension_fund_company","sum_of_cust")
summary_diff <- summary_diff %>% arrange(sum_of_cust)
print(summary_diff)
## # A tibble: 20 x 2
## pension_fund_company sum_of_cust
## <chr> <dbl>
## 1 Avivasa Emeklilik ve Hayat -99667
## 2 Garanti Emeklilik ve Hayat -78313
## 3 Allianz YaÅŸam ve Emeklilik -50405
## 4 Anadolu Hayat Emeklilik -44053
## 5 Cigna Finans Emeklilik ve Hayat -29194
## 6 Bereket Emeklilik ve Hayat -21079
## 7 Allianz Hayat ve Emeklilik -18455
## 8 NN Hayat ve Emeklilik -10947
## 9 Aegon Emeklilik ve Hayat -4018
## 10 BNP Paribas Cardif Emeklilik -3009
## 11 Metlife Emeklilik ve Hayat -1789
## 12 Axa Hayat ve Emeklilik -864
## 13 Groupama Emeklilik 0
## 14 Groupama Hayat 0
## 15 Vakıf Emeklilik 0
## 16 Vakıf Emeklilik ve Hayat 0
## 17 Fiba Emeklilik ve Hayat 49016
## 18 Katılım Emeklilik ve Hayat 76666
## 19 Halk Hayat ve Emeklilik 87602
## 20 Ziraat Hayat ve Emeklilik 103282
###Below, graph shows that how many customers each fund company has gained or lost over 2 years
gg1 <- ggplot(summary_diff, aes(x=pension_fund_company, y=sum_of_cust)) + geom_point() + theme(axis.text.x = element_text(angle = 90)) + ylab("change of cust number over 2 years")
print(gg1)
Analyze 2
### We calculated fund amount per each fund participant (fund amount per capita). We used the most update data ( 31.07.2019).
egm_per_capita <- egm_data %>% select(date,pension_fund_company,n_of_participants,fund_size_participants) %>% filter(date =="31.07.2019") %>% mutate(per_capita = fund_size_participants/n_of_participants) %>% mutate_if(is.numeric,funs(ifelse(is.na(.),0,.))) %>% arrange(desc(per_capita))
## Warning: funs() is soft deprecated as of dplyr 0.8.0
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once per session.
print(egm_per_capita)
## # A tibble: 18 x 5
## date pension_fund_compa~ n_of_participan~ fund_size_partic~ per_capita
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 31.07~ Allianz Hayat ve E~ 94630 2801049763. 29600.
## 2 31.07~ Avivasa Emeklilik ~ 787046 16801925214. 21348.
## 3 31.07~ Fiba Emeklilik ve ~ 91108 1614645975. 17722.
## 4 31.07~ Allianz YaÅŸam ve E~ 728934 12158903088. 16680.
## 5 31.07~ Anadolu Hayat Emek~ 1091010 16312824287. 14952.
## 6 31.07~ Vakıf Emeklilik ve~ 547507 7224409572. 13195.
## 7 31.07~ Axa Hayat ve Emekl~ 33794 426214299. 12612.
## 8 31.07~ BNP Paribas Cardif~ 181937 2260767911. 12426.
## 9 31.07~ NN Hayat ve Emekli~ 255144 3165530598. 12407.
## 10 31.07~ Garanti Emeklilik ~ 1111079 12694233683. 11425.
## 11 31.07~ Metlife Emeklilik ~ 184525 1619012245. 8774.
## 12 31.07~ Cigna Finans Emekl~ 83601 707565760. 8464.
## 13 31.07~ Halk Hayat ve Emek~ 561547 4066235326. 7241.
## 14 31.07~ Ziraat Hayat ve Em~ 657702 4544494775. 6910.
## 15 31.07~ Katılım Emeklilik ~ 253288 1466479144. 5790.
## 16 31.07~ Bereket Emeklilik ~ 93913 360296123. 3836.
## 17 31.07~ Aegon Emeklilik ve~ 37671 132128516. 3507.
## 18 31.07~ Groupama Hayat 0 0 0
## Graph shows that, Allianz Fund Participants have the biggest fund amound per capita. That means, Allianz Fund Participants must be paying the highest mountly premium.
gg2 <- ggplot(egm_per_capita, aes(x=pension_fund_company, y=per_capita, fill=pension_fund_company)) + geom_bar(stat="identity")+
theme_classic() + xlab("Firms") + theme(axis.text.x = element_text(angle = 90)) + ylab("fund amount per participant in TRY")
print(gg2)
Analyze 3
### We found top 5 fund companies in terms of customer base at 31.07.2019.
egm_company<-egm_data %>% select(date,pension_fund_company,n_of_participants) %>% filter(date=="31.07.2019") %>% group_by(date) %>% mutate(percentage_of_participants=100*(n_of_participants/sum(n_of_participants))) %>%top_n(5,percentage_of_participants )
print(egm_company)
## # A tibble: 5 x 4
## # Groups: date [1]
## date pension_fund_company n_of_participan~ percentage_of_partici~
## <chr> <chr> <dbl> <dbl>
## 1 31.07.20~ Allianz YaÅŸam ve Emekl~ 728934 10.7
## 2 31.07.20~ Anadolu Hayat Emeklilik 1091010 16.1
## 3 31.07.20~ Avivasa Emeklilik ve H~ 787046 11.6
## 4 31.07.20~ Garanti Emeklilik ve H~ 1111079 16.4
## 5 31.07.20~ Ziraat Hayat ve Emekli~ 657702 9.68
##In the pie chart you can see top five fund companies customers base percentage.
gg3<-ggplot(egm_company, aes(x="", y=percentage_of_participants, fill=pension_fund_company)) + geom_bar(stat="identity", width=1) + coord_polar("y", start=0) +geom_text(aes(label = paste0(round(percentage_of_participants), "%")), position = position_stack(vjust = 0.5))
print(gg3)