This is a worksheet of EGM Group Assignment.
The project’s dataset includes key indicators of Individual Pension System from the August 2014 through July 2019.
First, We downloaded dataset from the EGM website, then the “Rapor Tarihi” filter removed and put in the dataset as new column.
Finally in this step, we import the dataset to R Studio.
library(readxl)
egm_data<- read_excel("EGM_Dataset.xlsx",skip=2,col_names=FALSE)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ... and 10 more problems
We import packages we’ll use at this step.
We’ve changed column names and date format.
Then we checked the dataset wtih head&tail functions.
library(tidyverse)
library(formattable)
library(ggplot2)
library(lubridate)
names(egm_data) <- c("tarih", "sirket_adi", "katilimci_sayisi", "katilimci_fon_tutari", "devlet_katkisi_tutari", "katki_payi_tutari", "emekli_katilimci_sayisi", "bireysel_sozlesme_sss", "grup_bireysel_sozlesme_sss", "isveren_grup_sertifika_sss", "toplam_sss", "bireysel_sozlesme_yyt", "grup_bireysel_sozlesme_yyt", "isveren_grup_sertifika_yyt", "toplam_yyt")
egm_data$tarih <- ymd(egm_data$tarih)
head(egm_data)
## # A tibble: 6 x 15
## tarih sirket_adi katilimci_sayisi katilimci_fon_t~ devlet_katkisi_~
## <date> <chr> <dbl> <dbl> <dbl>
## 1 2019-07-31 Aegon Eme~ 37671 132128516. 15559754.
## 2 2019-07-31 Allianz H~ 94630 2801049763. 385315268.
## 3 2019-07-31 Allianz Y~ 728934 12158903088. 1435419831.
## 4 2019-07-31 Anadolu H~ 1091010 16312824287. 2768140863.
## 5 2019-07-31 Avivasa E~ 787046 16801925214. 2467365796.
## 6 2019-07-31 Axa Hayat~ 33794 426214299. 86715369.
## # ... with 10 more variables: katki_payi_tutari <dbl>,
## # emekli_katilimci_sayisi <dbl>, bireysel_sozlesme_sss <dbl>,
## # grup_bireysel_sozlesme_sss <dbl>, isveren_grup_sertifika_sss <dbl>,
## # toplam_sss <dbl>, bireysel_sozlesme_yyt <dbl>,
## # grup_bireysel_sozlesme_yyt <dbl>, isveren_grup_sertifika_yyt <dbl>,
## # toplam_yyt <dbl>
tail(egm_data)
## # A tibble: 6 x 15
## tarih sirket_adi katilimci_sayisi katilimci_fon_t~ devlet_katkisi_~
## <date> <chr> <dbl> <dbl> <dbl>
## 1 2014-08-01 Halk Haya~ 195343 556485432. 71758074.
## 2 2014-08-01 ING Emekl~ 255942 1532732696. 92959545.
## 3 2014-08-01 Katılım E~ 51 112389. 7754.
## 4 2014-08-01 Metlife E~ 141453 470391534. 49819379.
## 5 2014-08-01 Vakıf Eme~ 311505 1991224407. 149848894.
## 6 2014-08-01 Ziraat Ha~ 190041 552087005. 70118274.
## # ... with 10 more variables: katki_payi_tutari <dbl>,
## # emekli_katilimci_sayisi <dbl>, bireysel_sozlesme_sss <dbl>,
## # grup_bireysel_sozlesme_sss <dbl>, isveren_grup_sertifika_sss <dbl>,
## # toplam_sss <dbl>, bireysel_sozlesme_yyt <dbl>,
## # grup_bireysel_sozlesme_yyt <dbl>, isveren_grup_sertifika_yyt <dbl>,
## # toplam_yyt <dbl>
mean_participant_list <- egm_data %>%
group_by(sirket_adi) %>% arrange(sirket_adi) %>%
summarise(mean_participant = mean(katilimci_sayisi/1000000)) %>%
arrange(desc(mean_participant)) %>%
top_n(20)
ggplot(mean_participant_list,aes(x=sirket_adi, y = mean_participant))+
geom_bar(stat ="identity")+
theme(axis.text.x = element_text(angle=90))
aa <- egm_data %>% mutate(YEAR = year(tarih)) %>%
group_by(sirket_adi, YEAR) %>%
summarise(sum_size_total=sum(toplam_yyt)) %>%
arrange(desc(sum_size_total)) %>%
filter(sum_size_total > 165000000000)
print(aa)
## # A tibble: 24 x 3
## # Groups: sirket_adi [7]
## sirket_adi YEAR sum_size_total
## <chr> <dbl> <dbl>
## 1 Avivasa Emeklilik ve Hayat 2018 663789188160.
## 2 Anadolu Hayat Emeklilik 2018 632319579558.
## 3 Avivasa Emeklilik ve Hayat 2017 572261943028.
## 4 Anadolu Hayat Emeklilik 2017 543692909040.
## 5 Garanti Emeklilik ve Hayat 2018 503189721421.
## 6 Avivasa Emeklilik ve Hayat 2016 489036620445.
## 7 Anadolu Hayat Emeklilik 2016 469862271228.
## 8 Garanti Emeklilik ve Hayat 2017 447666364137.
## 9 Allianz Yaşam ve Emeklilik 2018 447588263953.
## 10 Avivasa Emeklilik ve Hayat 2019 402945437694.
## # ... with 14 more rows
aa %>%
ggplot(data = ., aes(x = sirket_adi, y = sum_size_total, fill = sirket_adi)) +
geom_bar(stat = "identity") + theme(axis.text = element_text(angle = 90)) +
facet_wrap(~ YEAR, ncol = 3) + labs(title = "Total Yearly Company Size",
subtitle = "Data plotted by year",
y = "Sum size total",
x = "Company")