EGM Assignment

Group Mujde ‘R’

Our raw excel file is in our repository. We can automatically download that file and put it in a temporary file. Then we can read that excel document into R and remove the temp file.

tmp<-tempfile(fileext=".xlsx")
download.file("https://github.com/pjournal/mef03g-mujde-r/blob/master/EGM_Mujde_R_Raw_Data_1.xlsx?raw=true",destfile=tmp,mode="wb")
raw_data<-readxl::read_excel(tmp,col_names=FALSE,skip=2)
file.remove(tmp)
## [1] TRUE
library(readxl)
library(tidyverse)
library(ggplot2)
### We change the title names according to our data.
colnames(raw_data) <- c("date","pension_fund_company","n_of_participants",  "fund_size_participants",   "gov_contribution", "contribution", "n_of_pensioners",  "n_of_ind_contracts",   "n_of_group_ind_contracts", "n_of_employer_group_certificates", "n_total","size_of_ind_contracts",  "size_of_group_ind_contracts",  "size_of_employer_group_certificates"   ,"size_total")
# Let's see our data
head(raw_data)
## # A tibble: 6 x 15
##   date  pension_fund_co~ n_of_participan~ fund_size_parti~ gov_contribution
##   <chr> <chr>                       <dbl>            <dbl>            <dbl>
## 1 31.0~ Aegon Emeklilik~            37671       132128516.        15559754.
## 2 31.0~ Allianz Hayat v~            94630      2801049763.       385315268.
## 3 31.0~ Allianz YaÅŸam v~           728934     12158903088.      1435419831.
## 4 31.0~ Anadolu Hayat E~          1091010     16312824287.      2768140863.
## 5 31.0~ Avivasa Emeklil~           787046     16801925214.      2467365796.
## 6 31.0~ Axa Hayat ve Em~            33794       426214299.        86715369.
## # ... with 10 more variables: contribution <dbl>, n_of_pensioners <dbl>,
## #   n_of_ind_contracts <dbl>, n_of_group_ind_contracts <dbl>,
## #   n_of_employer_group_certificates <dbl>, n_total <dbl>,
## #   size_of_ind_contracts <dbl>, size_of_group_ind_contracts <dbl>,
## #   size_of_employer_group_certificates <dbl>, size_total <dbl>
### We'll filter the dates in order to find the difference of the customer base in terms of numbers between 31.07.2017 and 31.07.2019 
egm_data <- raw_data
egm_sum_participants<-egm_data %>% select(date,pension_fund_company,n_of_participants) %>% filter(date == "31.07.2017" | date =="31.07.2019")  %>% arrange(pension_fund_company,date)
print(egm_sum_participants) 
## # A tibble: 36 x 3
##    date       pension_fund_company       n_of_participants
##    <chr>      <chr>                                  <dbl>
##  1 31.07.2017 Aegon Emeklilik ve Hayat               41689
##  2 31.07.2019 Aegon Emeklilik ve Hayat               37671
##  3 31.07.2017 Allianz Hayat ve Emeklilik            113085
##  4 31.07.2019 Allianz Hayat ve Emeklilik             94630
##  5 31.07.2017 Allianz YaÅŸam ve Emeklilik            779339
##  6 31.07.2019 Allianz YaÅŸam ve Emeklilik            728934
##  7 31.07.2017 Anadolu Hayat Emeklilik              1135063
##  8 31.07.2019 Anadolu Hayat Emeklilik              1091010
##  9 31.07.2017 Avivasa Emeklilik ve Hayat            886713
## 10 31.07.2019 Avivasa Emeklilik ve Hayat            787046
## # ... with 26 more rows
### We compared the member population of 2 years for each fund company above and substracted the 2019 numbers from 2017 numbers in order to find the difference

diff_participants <- egm_sum_participants %>% group_by(pension_fund_company) %>% mutate(diff = n_of_participants - lag(n_of_participants,default = first(n_of_participants)))
print(diff_participants)
## # A tibble: 36 x 4
## # Groups:   pension_fund_company [20]
##    date       pension_fund_company       n_of_participants   diff
##    <chr>      <chr>                                  <dbl>  <dbl>
##  1 31.07.2017 Aegon Emeklilik ve Hayat               41689      0
##  2 31.07.2019 Aegon Emeklilik ve Hayat               37671  -4018
##  3 31.07.2017 Allianz Hayat ve Emeklilik            113085      0
##  4 31.07.2019 Allianz Hayat ve Emeklilik             94630 -18455
##  5 31.07.2017 Allianz YaÅŸam ve Emeklilik            779339      0
##  6 31.07.2019 Allianz YaÅŸam ve Emeklilik            728934 -50405
##  7 31.07.2017 Anadolu Hayat Emeklilik              1135063      0
##  8 31.07.2019 Anadolu Hayat Emeklilik              1091010 -44053
##  9 31.07.2017 Avivasa Emeklilik ve Hayat            886713      0
## 10 31.07.2019 Avivasa Emeklilik ve Hayat            787046 -99667
## # ... with 26 more rows
summary_diff <- diff_participants %>% group_by(pension_fund_company) %>% summarise(sum(diff))
print(summary_diff)
## # A tibble: 20 x 2
##    pension_fund_company            `sum(diff)`
##    <chr>                                 <dbl>
##  1 Aegon Emeklilik ve Hayat              -4018
##  2 Allianz Hayat ve Emeklilik           -18455
##  3 Allianz YaÅŸam ve Emeklilik           -50405
##  4 Anadolu Hayat Emeklilik              -44053
##  5 Avivasa Emeklilik ve Hayat           -99667
##  6 Axa Hayat ve Emeklilik                 -864
##  7 Bereket Emeklilik ve Hayat           -21079
##  8 BNP Paribas Cardif Emeklilik          -3009
##  9 Cigna Finans Emeklilik ve Hayat      -29194
## 10 Fiba Emeklilik ve Hayat               49016
## 11 Garanti Emeklilik ve Hayat           -78313
## 12 Groupama Emeklilik                        0
## 13 Groupama Hayat                            0
## 14 Halk Hayat ve Emeklilik               87602
## 15 Katılım Emeklilik ve Hayat            76666
## 16 Metlife Emeklilik ve Hayat            -1789
## 17 NN Hayat ve Emeklilik                -10947
## 18 Vakıf Emeklilik                           0
## 19 Vakıf Emeklilik ve Hayat                  0
## 20 Ziraat Hayat ve Emeklilik            103282
colnames(summary_diff) <- c("pension_fund_company","sum_of_cust")
summary_diff <- summary_diff %>% arrange(sum_of_cust)
print(summary_diff)
## # A tibble: 20 x 2
##    pension_fund_company            sum_of_cust
##    <chr>                                 <dbl>
##  1 Avivasa Emeklilik ve Hayat           -99667
##  2 Garanti Emeklilik ve Hayat           -78313
##  3 Allianz YaÅŸam ve Emeklilik           -50405
##  4 Anadolu Hayat Emeklilik              -44053
##  5 Cigna Finans Emeklilik ve Hayat      -29194
##  6 Bereket Emeklilik ve Hayat           -21079
##  7 Allianz Hayat ve Emeklilik           -18455
##  8 NN Hayat ve Emeklilik                -10947
##  9 Aegon Emeklilik ve Hayat              -4018
## 10 BNP Paribas Cardif Emeklilik          -3009
## 11 Metlife Emeklilik ve Hayat            -1789
## 12 Axa Hayat ve Emeklilik                 -864
## 13 Groupama Emeklilik                        0
## 14 Groupama Hayat                            0
## 15 Vakıf Emeklilik                           0
## 16 Vakıf Emeklilik ve Hayat                  0
## 17 Fiba Emeklilik ve Hayat               49016
## 18 Katılım Emeklilik ve Hayat            76666
## 19 Halk Hayat ve Emeklilik               87602
## 20 Ziraat Hayat ve Emeklilik            103282
###Below, graph shows that how many customers each fund company has gained or lost over 2 years

gg1 <- ggplot(summary_diff, aes(x=pension_fund_company, y=sum_of_cust)) + geom_point() + theme(axis.text.x = element_text(angle = 90)) + ylab("change of cust number over 2 years")
print(gg1)

Analyze 2

### We calculated fund amount per each fund participant (fund amount per capita). We used the most update data ( 31.07.2019). 

egm_per_capita <- egm_data %>% select(date,pension_fund_company,n_of_participants,fund_size_participants) %>% filter(date =="31.07.2019")  %>% mutate(per_capita = fund_size_participants/n_of_participants) %>% mutate_if(is.numeric,funs(ifelse(is.na(.),0,.))) %>% arrange(desc(per_capita))
## Warning: funs() is soft deprecated as of dplyr 0.8.0
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once per session.
print(egm_per_capita) 
## # A tibble: 18 x 5
##    date   pension_fund_compa~ n_of_participan~ fund_size_partic~ per_capita
##    <chr>  <chr>                          <dbl>             <dbl>      <dbl>
##  1 31.07~ Allianz Hayat ve E~            94630       2801049763.     29600.
##  2 31.07~ Avivasa Emeklilik ~           787046      16801925214.     21348.
##  3 31.07~ Fiba Emeklilik ve ~            91108       1614645975.     17722.
##  4 31.07~ Allianz YaÅŸam ve E~           728934      12158903088.     16680.
##  5 31.07~ Anadolu Hayat Emek~          1091010      16312824287.     14952.
##  6 31.07~ Vakıf Emeklilik ve~           547507       7224409572.     13195.
##  7 31.07~ Axa Hayat ve Emekl~            33794        426214299.     12612.
##  8 31.07~ BNP Paribas Cardif~           181937       2260767911.     12426.
##  9 31.07~ NN Hayat ve Emekli~           255144       3165530598.     12407.
## 10 31.07~ Garanti Emeklilik ~          1111079      12694233683.     11425.
## 11 31.07~ Metlife Emeklilik ~           184525       1619012245.      8774.
## 12 31.07~ Cigna Finans Emekl~            83601        707565760.      8464.
## 13 31.07~ Halk Hayat ve Emek~           561547       4066235326.      7241.
## 14 31.07~ Ziraat Hayat ve Em~           657702       4544494775.      6910.
## 15 31.07~ Katılım Emeklilik ~           253288       1466479144.      5790.
## 16 31.07~ Bereket Emeklilik ~            93913        360296123.      3836.
## 17 31.07~ Aegon Emeklilik ve~            37671        132128516.      3507.
## 18 31.07~ Groupama Hayat                     0                0          0
## Graph shows that, Allianz Fund Participants have the biggest fund amound per capita. That means, Allianz Fund Participants must be paying the highest mountly premium.

gg2 <- ggplot(egm_per_capita, aes(x=pension_fund_company, y=per_capita, fill=pension_fund_company)) + geom_bar(stat="identity")+ 
  theme_classic() + xlab("Firms")  + theme(axis.text.x = element_text(angle = 90)) + ylab("fund amount per participant in TRY")
print(gg2)

Analyze 3

### We found top 5 fund companies in terms of customer base at 31.07.2019.

egm_company<-egm_data %>% select(date,pension_fund_company,n_of_participants) %>% filter(date=="31.07.2019") %>% group_by(date) %>% mutate(percentage_of_participants=100*(n_of_participants/sum(n_of_participants))) %>%top_n(5,percentage_of_participants ) 

print(egm_company)
## # A tibble: 5 x 4
## # Groups:   date [1]
##   date      pension_fund_company    n_of_participan~ percentage_of_partici~
##   <chr>     <chr>                              <dbl>                  <dbl>
## 1 31.07.20~ Allianz YaÅŸam ve Emekl~           728934                  10.7 
## 2 31.07.20~ Anadolu Hayat Emeklilik          1091010                  16.1 
## 3 31.07.20~ Avivasa Emeklilik ve H~           787046                  11.6 
## 4 31.07.20~ Garanti Emeklilik ve H~          1111079                  16.4 
## 5 31.07.20~ Ziraat Hayat ve Emekli~           657702                   9.68
##In the pie chart you can see top five fund companies customers base percentage.


gg3<-ggplot(egm_company, aes(x="", y=percentage_of_participants, fill=pension_fund_company)) + geom_bar(stat="identity", width=1) + coord_polar("y", start=0) +geom_text(aes(label = paste0(round(percentage_of_participants), "%")), position = position_stack(vjust = 0.5))
print(gg3)