library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble 2.1.3 ✔ purrr 0.3.3
## ✔ tidyr 1.0.0 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ─────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
tmp<-tempfile(fileext=".xlsx")
download.file("https://github.com/pjournal/mef03g-r-mut/raw/master/BES/EGM_DATA.xlsx",destfile = tmp)
BES <- readxl::read_excel(tmp,col_names = F ,skip = 2)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * … and 10 more problems
file.remove(tmp)
## [1] TRUE
head(BES)
## # A tibble: 6 x 15
## ...1 ...2 ...3 ...4 ...5 ...6 ...7 ...8
## <chr> <dbl> <dbl> <dbl> <dbl> <dttm> <dbl> <dbl>
## 1 Aego… 3.77e4 1.32e 8 1.56e7 8.24e 7 2019-07-31 00:00:00 750 29407
## 2 Alli… 9.46e4 2.80e 9 3.85e8 1.87e 9 2019-07-31 00:00:00 4157 92110
## 3 Alli… 7.29e5 1.22e10 1.44e9 8.13e 9 2019-07-31 00:00:00 12706 602553
## 4 Anad… 1.09e6 1.63e10 2.77e9 1.14e10 2019-07-31 00:00:00 26444 947707
## 5 Aviv… 7.87e5 1.68e10 2.47e9 1.17e10 2019-07-31 00:00:00 20650 893025
## 6 Axa … 3.38e4 4.26e 8 8.67e7 3.11e 8 2019-07-31 00:00:00 73 41396
## # … with 7 more variables: ...9 <dbl>, ...10 <dbl>, ...11 <dbl>,
## # ...12 <dbl>, ...13 <dbl>, ...14 <dbl>, ...15 <dbl>
tail(BES)
## # A tibble: 6 x 15
## ...1 ...2 ...3 ...4 ...5 ...6 ...7 ...8 ...9
## <chr> <dbl> <dbl> <dbl> <dbl> <dttm> <dbl> <dbl> <dbl>
## 1 Halk… 445422 1.95e9 3.50e8 1.78e9 2017-01-06 00:00:00 111 214731 238995
## 2 Katı… 162207 5.34e8 1.05e8 5.09e8 2017-01-06 00:00:00 20 109836 46228
## 3 Metl… 179609 1.02e9 1.68e8 9.09e8 2017-01-06 00:00:00 164 143625 35857
## 4 NN H… 268929 2.08e9 2.79e8 1.69e9 2017-01-06 00:00:00 2337 261658 39099
## 5 Vakı… 466604 4.26e9 5.37e8 3.59e9 2017-01-06 00:00:00 3747 558347 80548
## 6 Zira… 514901 2.01e9 3.79e8 1.82e9 2017-01-06 00:00:00 467 348221 189251
## # … with 6 more variables: ...10 <dbl>, ...11 <dbl>, ...12 <dbl>,
## # ...13 <dbl>, ...14 <dbl>, ...15 <dbl>
colnames(BES) <- c("pension_fund_company", "n_of_participants", "fund_size_participants", "gov_contribution", "contribution", "date", "n_of_pensioners", "n_of_ind_contracts", "n_of_group_ind_contracts", "n_of_employer_group_certificates", "n_total", "size_of_ind_contracts", "size_of_group_ind_contracts", "size_of_employer_group_certificates", "size_total")
BES$date <- ymd(BES$date)
mean_participant_list <- BES %>%
group_by(pension_fund_company) %>% arrange(pension_fund_company) %>%
summarise(mean_participant = mean(n_of_participants/1000000)) %>%
arrange(desc(mean_participant)) %>%
top_n(10)
## Selecting by mean_participant
ggplot(mean_participant_list,aes(x=pension_fund_company,y = mean_participant))+
geom_bar(stat ="identity")+
theme(axis.text.x = element_text(angle=90))
barplot <- ggplot(mean_participant_list,aes(x=pension_fund_company,y = mean_participant))+
geom_bar(stat ="identity")+
theme(axis.text.x = element_text(angle=90))
barplot <- barplot + labs(title ="Top 10 Companies",
caption ="Data Source: Mean Participant df",
x = "Company Name",
y = "Participant as Millions")
print(barplot)
top_10_vec <- unlist(mean_participant_list$pension_fund_company)
analysis_list <- BES %>%
filter(pension_fund_company %in% top_10_vec) %>%
arrange(date) %>%
mutate(participants=n_of_participants/1000000) %>%
select(date,pension_fund_company,participants)
analysis_plot <- BES %>%
filter(pension_fund_company %in% top_10_vec) %>%
arrange(date) %>%
mutate(participants=n_of_participants/1000000) %>%
select(date,pension_fund_company,participants)
analysis_plot <- ggplot(analysis_list,aes(x=date,y=participants,color=pension_fund_company))+
geom_line()+
scale_x_date(date_breaks = "3 month")+
theme(axis.text.x = element_text(size = 8,angle=45))
analysis_plot <- analysis_plot + labs(title = "Variation of the number of participants over time",
x = "Dates",
y = "Number of participants(Millions)",
colour = " ")
print(analysis_plot)
## Analysis pension fund company according to total pension fund size according to years
aa <- BES %>% mutate(YEAR = year(date)) %>%
group_by(pension_fund_company, YEAR) %>%
summarise(sum_size_total=sum(size_total)) %>%
arrange(desc(sum_size_total)) %>%
filter(sum_size_total > 165000000000)
print(aa)
## # A tibble: 15 x 3
## # Groups: pension_fund_company [6]
## pension_fund_company YEAR sum_size_total
## <chr> <dbl> <dbl>
## 1 Avivasa Emeklilik ve Hayat 2018 663789188160.
## 2 Anadolu Hayat Emeklilik 2018 632319579558.
## 3 Avivasa Emeklilik ve Hayat 2017 572261943028.
## 4 Anadolu Hayat Emeklilik 2017 543692909040.
## 5 Garanti Emeklilik ve Hayat 2018 503189721421.
## 6 Garanti Emeklilik ve Hayat 2017 447666364137.
## 7 Allianz YaÅŸam ve Emeklilik 2018 447588263953.
## 8 Avivasa Emeklilik ve Hayat 2019 402945437694.
## 9 Anadolu Hayat Emeklilik 2019 395718773810.
## 10 Allianz YaÅŸam ve Emeklilik 2017 385000774130.
## 11 Garanti Emeklilik ve Hayat 2019 304667307251.
## 12 Allianz YaÅŸam ve Emeklilik 2019 278471319637.
## 13 Vakıf Emeklilik ve Hayat 2018 264743477965.
## 14 Ziraat Hayat ve Emeklilik 2018 169679435452.
## 15 Vakıf Emeklilik ve Hayat 2019 167996589907.
aa %>%
ggplot(data = ., aes(x = pension_fund_company, y = sum_size_total, fill = pension_fund_company)) +
geom_bar(stat = "identity") + theme(axis.text = element_text(angle = 90)) +
facet_wrap(~ YEAR, ncol = 3) + labs(title = "Total Yearly Company Size",
subtitle = "Data plotted by year",
y = "Sum size total",
x = "Company")
While Avivasa Emeklilik and Anadolu Emeklilik show a competition but steady increase, Vakıf Emeklilik constantly increase and Ziraat shows down and up according to years