1.Introduction

This is a worksheet of EGM Group Assignment.

The project’s dataset includes key indicators of Individual Pension System from the August 2014 through July 2019.

2.Import EGM Dataset

First, We downloaded dataset from the EGM website, then the “Rapor Tarihi” filter removed and put in the dataset as new column.

Finally in this step, we import the dataset to R Studio.

library(readxl)
egm_data<- read_excel("EGM_Dataset.xlsx",skip=2,col_names=FALSE)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ... and 10 more problems

3. Loading Libraries & Data Transformation

We import packages we’ll use at this step.

We’ve changed column names and date format.

Then we checked the dataset wtih head&tail functions.

library(tidyverse)
library(formattable)
library(ggplot2)
library(lubridate)

names(egm_data) <- c("tarih", "sirket_adi", "katilimci_sayisi", "katilimci_fon_tutari", "devlet_katkisi_tutari", "katki_payi_tutari", "emekli_katilimci_sayisi", "bireysel_sozlesme_sss", "grup_bireysel_sozlesme_sss", "isveren_grup_sertifika_sss", "toplam_sss", "bireysel_sozlesme_yyt", "grup_bireysel_sozlesme_yyt", "isveren_grup_sertifika_yyt", "toplam_yyt")

egm_data$tarih <- ymd(egm_data$tarih)

head(egm_data)
## # A tibble: 6 x 15
##   tarih      sirket_adi katilimci_sayisi katilimci_fon_t~ devlet_katkisi_~
##   <date>     <chr>                 <dbl>            <dbl>            <dbl>
## 1 2019-07-31 Aegon Eme~            37671       132128516.        15559754.
## 2 2019-07-31 Allianz H~            94630      2801049763.       385315268.
## 3 2019-07-31 Allianz Y~           728934     12158903088.      1435419831.
## 4 2019-07-31 Anadolu H~          1091010     16312824287.      2768140863.
## 5 2019-07-31 Avivasa E~           787046     16801925214.      2467365796.
## 6 2019-07-31 Axa Hayat~            33794       426214299.        86715369.
## # ... with 10 more variables: katki_payi_tutari <dbl>,
## #   emekli_katilimci_sayisi <dbl>, bireysel_sozlesme_sss <dbl>,
## #   grup_bireysel_sozlesme_sss <dbl>, isveren_grup_sertifika_sss <dbl>,
## #   toplam_sss <dbl>, bireysel_sozlesme_yyt <dbl>,
## #   grup_bireysel_sozlesme_yyt <dbl>, isveren_grup_sertifika_yyt <dbl>,
## #   toplam_yyt <dbl>
tail(egm_data)
## # A tibble: 6 x 15
##   tarih      sirket_adi katilimci_sayisi katilimci_fon_t~ devlet_katkisi_~
##   <date>     <chr>                 <dbl>            <dbl>            <dbl>
## 1 2014-08-01 Halk Haya~           195343       556485432.        71758074.
## 2 2014-08-01 ING Emekl~           255942      1532732696.        92959545.
## 3 2014-08-01 Katılım E~               51          112389.            7754.
## 4 2014-08-01 Metlife E~           141453       470391534.        49819379.
## 5 2014-08-01 Vakıf Eme~           311505      1991224407.       149848894.
## 6 2014-08-01 Ziraat Ha~           190041       552087005.        70118274.
## # ... with 10 more variables: katki_payi_tutari <dbl>,
## #   emekli_katilimci_sayisi <dbl>, bireysel_sozlesme_sss <dbl>,
## #   grup_bireysel_sozlesme_sss <dbl>, isveren_grup_sertifika_sss <dbl>,
## #   toplam_sss <dbl>, bireysel_sozlesme_yyt <dbl>,
## #   grup_bireysel_sozlesme_yyt <dbl>, isveren_grup_sertifika_yyt <dbl>,
## #   toplam_yyt <dbl>

4. Analysis

Mean Participant in First 20 Companies

mean_participant_list <- egm_data %>%
  group_by(sirket_adi) %>% arrange(sirket_adi) %>%
  summarise(mean_participant = mean(katilimci_sayisi/1000000)) %>%
  arrange(desc(mean_participant)) %>%
  top_n(20)


ggplot(mean_participant_list,aes(x=sirket_adi, y = mean_participant))+
  geom_bar(stat ="identity")+
  theme(axis.text.x = element_text(angle=90))

Best 10 Companies in Market Share

head_10_by_pazar_payi <- egm_data %>% 
  group_by(sirket_adi) %>%
  summarise(ortalama_fon_tutari=mean(katilimci_fon_tutari, na.rm = TRUE)) %>%
  mutate(pazar_payi = percent(ortalama_fon_tutari / sum(ortalama_fon_tutari))) %>%
  top_n(10, pazar_payi)

print(head_10_by_pazar_payi)
## # A tibble: 10 x 3
##    sirket_adi                 ortalama_fon_tutari pazar_payi
##    <chr>                                    <dbl> <formttbl>
##  1 Allianz Hayat ve Emeklilik         1892338923. 2.99%     
##  2 Allianz Yaşam ve Emeklilik         7908774626. 12.51%    
##  3 Anadolu Hayat Emeklilik           10478735413. 16.57%    
##  4 Avivasa Emeklilik ve Hayat        10923243669. 17.27%    
##  5 Garanti Emeklilik ve Hayat         8559410584. 13.53%    
##  6 Halk Hayat ve Emeklilik            2096892013. 3.32%     
##  7 NN Hayat ve Emeklilik              2251406261. 3.56%     
##  8 Vakıf Emeklilik                    3294278571. 5.21%     
##  9 Vakıf Emeklilik ve Hayat           5805748351. 9.18%     
## 10 Ziraat Hayat ve Emeklilik          2188574399. 3.46%
# Plot: Top 10 Companies by Market Share (Mean Participant Funds):

ggplot(data = head_10_by_pazar_payi, aes(x=sirket_adi, y=pazar_payi, fill=sirket_adi)) +
  geom_bar(stat="identity")+
  coord_polar()+
  theme(legend.position = "right", axis.text.x = element_text(angle = 0))+
  geom_text(aes(y = pazar_payi , label = pazar_payi))+
  labs(title = "Best 10 Company in Market Share", x="", y="")

Total Yearly Company Size

aa <- egm_data %>% mutate(YEAR = year(tarih)) %>%  
  group_by(sirket_adi, YEAR) %>%
  summarise(sum_size_total=sum(toplam_yyt)) %>%
  arrange(desc(sum_size_total)) %>%
  filter(sum_size_total > 165000000000) 
  print(aa)
## # A tibble: 24 x 3
## # Groups:   sirket_adi [7]
##    sirket_adi                  YEAR sum_size_total
##    <chr>                      <dbl>          <dbl>
##  1 Avivasa Emeklilik ve Hayat  2018  663789188160.
##  2 Anadolu Hayat Emeklilik     2018  632319579558.
##  3 Avivasa Emeklilik ve Hayat  2017  572261943028.
##  4 Anadolu Hayat Emeklilik     2017  543692909040.
##  5 Garanti Emeklilik ve Hayat  2018  503189721421.
##  6 Avivasa Emeklilik ve Hayat  2016  489036620445.
##  7 Anadolu Hayat Emeklilik     2016  469862271228.
##  8 Garanti Emeklilik ve Hayat  2017  447666364137.
##  9 Allianz Yaşam ve Emeklilik  2018  447588263953.
## 10 Avivasa Emeklilik ve Hayat  2019  402945437694.
## # ... with 14 more rows
  aa %>%
  ggplot(data = ., aes(x = sirket_adi, y = sum_size_total, fill = sirket_adi)) +
      geom_bar(stat = "identity") + theme(axis.text = element_text(angle = 90)) +
  facet_wrap(~ YEAR, ncol = 3) + labs(title = "Total Yearly Company Size",
           subtitle = "Data plotted by year",
           y = "Sum size total",
           x = "Company")