1.Introduction and Preparation

The Individual Pension System (“IPS” for short) is a private pension system that provides an income to maintain living standards during retirement through the long-term investment of the savings people make during their active careers. People participate in the system voluntarily to benefit from an income in addition to the pension income provided by the social security system.

The dataset we used in this analysis from Pension Monitoring Center includes number of partitcipants, fund_amount, contract numbers and invested amounts of pension companies in Turkey.

Column names and explanations are below:

  1. pens_company: Pension Company
  2. participant_nbr: Number of participants
  3. participant_fund_amt: Voluntary participants’ total fund amount
  4. state_cont_amt: State Contribution
  5. cont_amt: contribution amount
  6. pensioner_nbr: Number of pensioners
  7. pens_cont_nbr: Number of pension contracts
  8. grp_cont_nbr: Group Individual Contracts
  9. grp_noncont_nbr: Noncontributory Group Contract
  10. total_cont_nbr: Total Contracts
  11. pens_invstd_amt: Total Invested Amount of pension contracts
  12. grp_cont_invstd_amt: Total Invested Amount of Group Individual Contracts Noncontributory Group Contract
  13. grp_noncont_invstd_amt: Total Invested Amount of Noncontributory Group Contract
  14. total_invstd_amt: Total Invested Amount
  15. year
  16. month
raw_data_full <- ''
# Creating a list including all csv names to use in for loop
list_all <- c('201907','201906','201905','201904','201903','201902','201901','201812','201811','201810','201809','201808','201807','201806','201805','201804','201803','201802','201801','201712','201711','201710','201709','201708')

for (i in 1:length(list_all)) {
    
  file_url<- paste('https://github.com/pjournal/mef03g-road-runner/blob/master/BES-DATA/', list_all[i],'.csv?raw=true', sep='')
  
  print(file_url)
  
  # Reading csv from the url
  raw_data <- read.csv(file_url,sep=',',header=F)
  
  # Replacing NA values with 0 and label the time period with year and month, so when data is merged the data there won't be any confusion.
  raw_data <- 
    raw_data %>%
    mutate_if(is.numeric,funs(ifelse(is.na(.),0,.))) %>% 
    mutate(yearmonth=list_all[i])
  
  # Appending raw_data_full into raw_data_full for each iteration
  raw_data_full<-rbind(raw_data_full,raw_data)
} 
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/BES-DATA/201907.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/BES-DATA/201906.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/BES-DATA/201905.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/BES-DATA/201904.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/BES-DATA/201903.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/BES-DATA/201902.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/BES-DATA/201901.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/BES-DATA/201812.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/BES-DATA/201811.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/BES-DATA/201810.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/BES-DATA/201809.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/BES-DATA/201808.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/BES-DATA/201807.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/BES-DATA/201806.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/BES-DATA/201805.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/BES-DATA/201804.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/BES-DATA/201803.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/BES-DATA/201802.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/BES-DATA/201801.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/BES-DATA/201712.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/BES-DATA/201711.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/BES-DATA/201710.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/BES-DATA/201709.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/BES-DATA/201708.csv?raw=true"
colnames(raw_data_full) <- c('pens_company','participant_nbr','participant_fund_amt','state_cont_amt','cont_amt','pensioner_nbr','pens_cont_nbr','grp_cont_nbr','grp_noncont_nbr','total_cont_nbr','pens_invstd_amt','grp_cont_invstd_amt','grp_noncont_invstd_amt','total_invstd_amt','yearmonth')

raw_data_full <- raw_data_full %>% slice(-c(1))

2.Analysis

2.1.Top 5 -5 Companies with the Most Customers

df_category <-
raw_data_full %>%
  group_by(pens_company)%>%
  summarise(TotalNum = sum(as.numeric((gsub(",","",participant_nbr))),na.rm = T)) %>%
  arrange(desc(TotalNum)) %>%
  mutate(rwn = row_number()) %>%
  filter(rwn < 6)


df_category
## # A tibble: 5 x 3
##   pens_company                TotalNum   rwn
##   <fct>                          <dbl> <int>
## 1 Garanti Emeklilik ve Hayat 146513591     1
## 2 Anadolu Hayat Emeklilik    141010002     2
## 3 Avivasa Emeklilik ve Hayat 106664379     3
## 4 Allianz Ya?am ve Emeklilik  94777714     4
## 5 Ziraat Hayat ve Emeklilik   76602615     5
#Creating a column Category to transpose the values
HisPer  <- data.frame(Category = df_category$pens_company, freq=df_category$TotalNum)

ggplot(HisPer, aes(x=Category, y=freq, fill=Category))+
  geom_bar(stat="identity")+
  geom_text(aes(label=freq), vjust=1.6, color="black", size=3.5)+
  theme_minimal()+
  labs(x="Pens Company",y="# of Customer",title="5 Companies with the Most Customers",fill="Company")+
  theme(axis.text.x = element_text(angle=30))

2.2. Payment percentage per participant

df_rate <-
raw_data_full %>%
  group_by(pens_company)%>%
  summarise(Totalfund = sum(as.numeric((gsub(",","",participant_fund_amt))),na.rm = T),
            Totalpart = sum(as.numeric((gsub(",","",participant_nbr))),na.rm = T),
            rwn = Totalfund / Totalpart ) %>%
  arrange(desc(rwn)) %>%
  mutate(sr = row_number()) %>%
  filter(sr < 6)

df_rate2 <- df_rate %>% mutate_if(is.numeric, round, 0)

#Creating a column Category to transpose the values
HisRate  <- data.frame(Category = df_rate2$pens_company, freq=df_rate2$rwn)

ggplot(HisRate, aes(x=Category, y=freq, fill=Category))+
  geom_bar(stat="identity")+
  geom_text(aes(label=freq), vjust=1.6, color="black", size=3.5)+
  theme_minimal()+
  labs(x="Pens Company",y="Rate of #Foundation to #Customer ",title="5 Companies with the Most Rate of Foundation to Customers",fill="Company")+
  theme(axis.text.x = element_text(angle=30))

2.3. Pensioner Number of Top 5 Company

df_retired <- 
raw_data_full %>% select(yearmonth,pens_company,pensioner_nbr) %>% 
  filter(substr(yearmonth,1,4) == 2018) %>%
    filter(pens_company %in% c("Allianz Hayat ve Emeklilik","Allianz Ya?am ve Emeklilik","Avivasa Emeklilik ve Hayat","Fiba Emeklilik ve Hayat","Groupama Emeklilik"))


ggplot(df_retired,aes(x = df_retired$yearmonth, y = df_retired$pensioner_nbr, group=pens_company, colour=pens_company)) + 
  geom_line() + 
  geom_point()+
  labs( x="Monthly Pension Number" , y = "Month Year")+ 
  theme(legend.position = "bottom", axis.text.x = element_text(angle = 60, vjust = 0.5, hjust = 0, size = 10))