First Step : Call Necessary Libraries

library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble  2.1.3     ✔ purrr   0.3.3
## ✔ tidyr   1.0.0     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.4.0
## ── Conflicts ─────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date

Download Data From Source

tmp<-tempfile(fileext=".xlsx")
download.file("https://github.com/pjournal/mef03g-r-mut/raw/master/BES/EGM_DATA.xlsx",destfile = tmp)
BES <- readxl::read_excel(tmp,col_names = F ,skip = 2)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * … and 10 more problems
file.remove(tmp)
## [1] TRUE

Check What is

head(BES)
## # A tibble: 6 x 15
##   ...1    ...2    ...3   ...4    ...5 ...6                 ...7   ...8
##   <chr>  <dbl>   <dbl>  <dbl>   <dbl> <dttm>              <dbl>  <dbl>
## 1 Aego… 3.77e4 1.32e 8 1.56e7 8.24e 7 2019-07-31 00:00:00   750  29407
## 2 Alli… 9.46e4 2.80e 9 3.85e8 1.87e 9 2019-07-31 00:00:00  4157  92110
## 3 Alli… 7.29e5 1.22e10 1.44e9 8.13e 9 2019-07-31 00:00:00 12706 602553
## 4 Anad… 1.09e6 1.63e10 2.77e9 1.14e10 2019-07-31 00:00:00 26444 947707
## 5 Aviv… 7.87e5 1.68e10 2.47e9 1.17e10 2019-07-31 00:00:00 20650 893025
## 6 Axa … 3.38e4 4.26e 8 8.67e7 3.11e 8 2019-07-31 00:00:00    73  41396
## # … with 7 more variables: ...9 <dbl>, ...10 <dbl>, ...11 <dbl>,
## #   ...12 <dbl>, ...13 <dbl>, ...14 <dbl>, ...15 <dbl>
tail(BES)
## # A tibble: 6 x 15
##   ...1    ...2   ...3   ...4   ...5 ...6                 ...7   ...8   ...9
##   <chr>  <dbl>  <dbl>  <dbl>  <dbl> <dttm>              <dbl>  <dbl>  <dbl>
## 1 Halk… 445422 1.95e9 3.50e8 1.78e9 2017-01-06 00:00:00   111 214731 238995
## 2 Katı… 162207 5.34e8 1.05e8 5.09e8 2017-01-06 00:00:00    20 109836  46228
## 3 Metl… 179609 1.02e9 1.68e8 9.09e8 2017-01-06 00:00:00   164 143625  35857
## 4 NN H… 268929 2.08e9 2.79e8 1.69e9 2017-01-06 00:00:00  2337 261658  39099
## 5 Vakı… 466604 4.26e9 5.37e8 3.59e9 2017-01-06 00:00:00  3747 558347  80548
## 6 Zira… 514901 2.01e9 3.79e8 1.82e9 2017-01-06 00:00:00   467 348221 189251
## # … with 6 more variables: ...10 <dbl>, ...11 <dbl>, ...12 <dbl>,
## #   ...13 <dbl>, ...14 <dbl>, ...15 <dbl>

Organize the Data

colnames(BES) <- c("pension_fund_company",  "n_of_participants",    "fund_size_participants",   "gov_contribution", "contribution", "date", "n_of_pensioners",  "n_of_ind_contracts",   "n_of_group_ind_contracts", "n_of_employer_group_certificates", "n_total",  "size_of_ind_contracts",    "size_of_group_ind_contracts",  "size_of_employer_group_certificates",  "size_total")
BES$date <- ymd(BES$date)

A Opinion About Companies

mean_participant_list <- BES %>%
  group_by(pension_fund_company) %>% arrange(pension_fund_company) %>%
  summarise(mean_participant = mean(n_of_participants/1000000)) %>%
  arrange(desc(mean_participant)) %>%
  top_n(10)
## Selecting by mean_participant
ggplot(mean_participant_list,aes(x=pension_fund_company,y = mean_participant))+
  geom_bar(stat ="identity")+
  theme(axis.text.x = element_text(angle=90))

A little bit makeup for our plot

barplot <- ggplot(mean_participant_list,aes(x=pension_fund_company,y = mean_participant))+
  geom_bar(stat ="identity")+
  theme(axis.text.x = element_text(angle=90))
barplot <- barplot + labs(title ="Top 10 Companies",
                                                      caption ="Data Source: Mean Participant df",
                                                      x = "Company Name",
                                                      y = "Participant as Millions")
print(barplot)

Analysis About Participants for 3 Month Interval

top_10_vec <- unlist(mean_participant_list$pension_fund_company)
analysis_list <- BES %>%
  filter(pension_fund_company %in% top_10_vec) %>%
  arrange(date)  %>%
  mutate(participants=n_of_participants/1000000) %>%
  select(date,pension_fund_company,participants)
analysis_plot <- BES %>%
  filter(pension_fund_company %in% top_10_vec) %>%
  arrange(date)  %>%
  mutate(participants=n_of_participants/1000000) %>%
  select(date,pension_fund_company,participants)
analysis_plot <- ggplot(analysis_list,aes(x=date,y=participants,color=pension_fund_company))+
  geom_line()+
  scale_x_date(date_breaks = "3 month")+
  theme(axis.text.x = element_text(size = 8,angle=45))
  
analysis_plot <- analysis_plot + labs(title = "Variation of the number of participants over time",
                                      x = "Dates",
                                      y = "Number of participants(Millions)",
                                      colour = " ")
print(analysis_plot)

## Analysis pension fund company according to total pension fund size according to years

aa <- BES %>% mutate(YEAR = year(date)) %>%  
  group_by(pension_fund_company, YEAR) %>%
  summarise(sum_size_total=sum(size_total)) %>%
  arrange(desc(sum_size_total)) %>%
  filter(sum_size_total > 165000000000) 
  print(aa)
## # A tibble: 15 x 3
## # Groups:   pension_fund_company [6]
##    pension_fund_company        YEAR sum_size_total
##    <chr>                      <dbl>          <dbl>
##  1 Avivasa Emeklilik ve Hayat  2018  663789188160.
##  2 Anadolu Hayat Emeklilik     2018  632319579558.
##  3 Avivasa Emeklilik ve Hayat  2017  572261943028.
##  4 Anadolu Hayat Emeklilik     2017  543692909040.
##  5 Garanti Emeklilik ve Hayat  2018  503189721421.
##  6 Garanti Emeklilik ve Hayat  2017  447666364137.
##  7 Allianz YaÅŸam ve Emeklilik  2018  447588263953.
##  8 Avivasa Emeklilik ve Hayat  2019  402945437694.
##  9 Anadolu Hayat Emeklilik     2019  395718773810.
## 10 Allianz YaÅŸam ve Emeklilik  2017  385000774130.
## 11 Garanti Emeklilik ve Hayat  2019  304667307251.
## 12 Allianz YaÅŸam ve Emeklilik  2019  278471319637.
## 13 Vakıf Emeklilik ve Hayat    2018  264743477965.
## 14 Ziraat Hayat ve Emeklilik   2018  169679435452.
## 15 Vakıf Emeklilik ve Hayat    2019  167996589907.

Show the analysis with data plotted by year

aa %>%
  ggplot(data = ., aes(x = pension_fund_company, y = sum_size_total, fill = pension_fund_company)) +
      geom_bar(stat = "identity") + theme(axis.text = element_text(angle = 90)) +
  facet_wrap(~ YEAR, ncol = 3) + labs(title = "Total Yearly Company Size",
           subtitle = "Data plotted by year",
           y = "Sum size total",
           x = "Company") 

Conclusion

While Avivasa Emeklilik and Anadolu Emeklilik show a competition but steady increase, Vakıf Emeklilik constantly increase and Ziraat shows down and up according to years