#REQUIRED LIBRARIES
library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------- tidyverse 1.2.1 --
## <U+221A> ggplot2 3.2.1 <U+221A> purrr 0.3.2
## <U+221A> tibble 2.1.3 <U+221A> dplyr 0.8.3
## <U+221A> tidyr 1.0.0 <U+221A> stringr 1.4.0
## <U+221A> readr 1.3.1 <U+221A> forcats 0.4.0
## -- Conflicts ----------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(ggplot2 )
library(rvest)
## Loading required package: xml2
##
## Attaching package: 'rvest'
## The following object is masked from 'package:purrr':
##
## pluck
## The following object is masked from 'package:readr':
##
## guess_encoding
library(stringr)
##IMPORT AND ORGANIZE DATA
#TO GET LIST OF URL OF BKM
x <- list()
for (i in 0:9){
for (k in 1:12) {
x[[paste0("url201",i,"0",k)]]<-paste0("https://bkm.com.tr/secilen-aya-ait-sektorel-gelisim/?filter_year=201",i,"&filter_month=",k,"&List=Listele")
}
}
Please examine code above with the URL of the BKM Web Page linked below
#GETTING DATA FROM URL LIST AND BINDING TO ALL INVOLVED ONE DATAFRAME
DF<-data.frame(matrix(ncol = 5, nrow = 0))
for (i in x){
HTML_1<-read_html(i)
temp_DF<-html_table(html_nodes(HTML_1, "table"),fill = TRUE)[[4]]
for (n in 1:nrow(temp_DF)){
temp_DF$date[[n]]<-str_remove_all(gsub(".*year=(.+)&filter_month=", "\\1", i),"[A-Z]|[a-z]|&|=")
}
DF<-bind_rows(DF,temp_DF)
}
#DATA WRANGLING & CONTROL OF FINAL FORM OF DATA
DF$year = substr(DF$date, 1,4)
DF$month = substr(DF$date,start=5,stop=6)
colnames(DF)<-c("Sector","Numb_CC","Numb_DC","Amount_CC","Amount_DC","date","year","month")
DF<-DF%>%filter(Sector!="İşyeri Grubu")%>%filter(Sector!="TOPLAM")%>%filter(Sector!="Lütfen listeyi görebilmek için yukarıdan tarih seçiniz.")
str(DF)
## 'data.frame': 2840 obs. of 8 variables:
## $ Sector : chr "ARABA KİRALAMA" "ARAÇ KİRALAMA-SATIŞ/SERVİS/YEDEK PARÇA" "BENZİN VE YAKIT İSTASYONLARI" "ÇEŞİTLİ GIDA" ...
## $ Numb_CC : chr "30.540" "1.865.922" "16.655.032" "8.901.339" ...
## $ Numb_DC : chr "1.485" "82.655" "1.356.094" "1.272.039" ...
## $ Amount_CC: chr "13,30" "523,95" "1.990,10" "756,56" ...
## $ Amount_DC: chr "0,22" "11,77" "60,63" "25,52" ...
## $ date : chr "20101" "20101" "20101" "20101" ...
## $ year : chr "2010" "2010" "2010" "2010" ...
## $ month : chr "1" "1" "1" "1" ...
DF$Numb_CC<-gsub("[.]", "",DF$Numb_CC)
DF$Numb_DC<-gsub("[.]", "",DF$Numb_DC)
DF$Amount_CC<-gsub("[.]", "",DF$Amount_CC)
DF$Amount_DC<-gsub("[.]", "",DF$Amount_DC)
DF$Amount_CC<-gsub("[,]", ".",DF$Amount_CC)
DF$Amount_DC<-gsub("[,]", ".",DF$Amount_DC)
DF$Numb_CC<- as.numeric(as.character(DF$Numb_CC))
DF$Numb_DC<- as.numeric(as.character(DF$Numb_DC))
DF$Amount_CC<- as.numeric(as.character(DF$Amount_CC))
DF$Amount_DC<- as.numeric(as.character(DF$Amount_DC))
str(DF)
## 'data.frame': 2840 obs. of 8 variables:
## $ Sector : chr "ARABA KİRALAMA" "ARAÇ KİRALAMA-SATIŞ/SERVİS/YEDEK PARÇA" "BENZİN VE YAKIT İSTASYONLARI" "ÇEŞİTLİ GIDA" ...
## $ Numb_CC : num 30540 1865922 16655032 8901339 5282218 ...
## $ Numb_DC : num 1485 82655 1356094 1272039 91911 ...
## $ Amount_CC: num 13.3 524 1990.1 756.6 688.4 ...
## $ Amount_DC: num 0.22 11.77 60.63 25.52 18.05 ...
## $ date : chr "20101" "20101" "20101" "20101" ...
## $ year : chr "2010" "2010" "2010" "2010" ...
## $ month : chr "1" "1" "1" "1" ...
##ANALYSIS AND GRAPHS
# Analysis 1 - Yearly Development of Debit and Credit Card Usage
pl_df_sum<-DF%>%group_by(year)%>% summarise(Num_Mio=sum(Numb_CC+Numb_DC),Amount=sum(Amount_CC+Amount_DC))
pl_df_sum$Num_Mio<-round(pl_df_sum$Num_Mio/1000000,3)
#Card Usage / Debit and Credit Card Usage
print.data.frame(pl_df_sum)
## year Num_Mio Amount
## 1 2010 2170.624 222446.1
## 2 2011 2479.393 278671.8
## 3 2012 2850.980 349778.9
## 4 2013 3192.698 413301.8
## 5 2014 3440.270 466703.1
## 6 2015 3810.255 533899.6
## 7 2016 4187.843 588868.8
## 8 2017 4788.716 684278.6
## 9 2018 5622.294 840338.1
## 10 2019 3100.088 475112.6
ggplot(pl_df_sum) + geom_jitter(aes(x=year,y=Num_Mio,color=year,size=Amount)) +labs(title = "Cards Usage", subtitle = "Debit and Credit Card Usage", caption="(based on data from BKM)", y="Tot. Credit+Debit Card Use (in Mio)", x="Year")
As you can see in the data frame and graph “Card Usage / Debit and Credit Card Usage” Total usage of debit and credit card is increasing in both way (amount in Mio TL can be seen in size) and (number in Mio in y position) from 2010 to 2019.
## Analysis 2 - Yearly Development of Top5 Sector of Debit and Credit Card Usage
filter1<-DF%>% group_by(Sector) %>% summarise(Total_Numb_CC=sum(Numb_CC),Total_Numb_DC=sum(Numb_DC),Total_Amount_CC=sum(Amount_CC),Total_Amount_DC=sum(Amount_DC))%>% arrange(desc(Total_Numb_CC+Total_Numb_DC)) %>% slice(1:5)
yearly_develpment_fortop5<-DF %>% filter(DF$Sector==filter1$Sector)
yearly_develpment_fortop5$Numb_CC<-round(yearly_develpment_fortop5$Numb_CC/1000000)
yearly_develpment_fortop5$Numb_DC<-round(yearly_develpment_fortop5$Numb_DC/1000000)
ggplot(yearly_develpment_fortop5) + geom_jitter(aes(x=year,y=Numb_CC+Numb_DC,color=Sector,size=Amount_CC+Amount_DC)) +labs(title = "Cards Usage", subtitle = "Top 5 Sector (Debit and Credit Card Usage)", caption="(based on data from BKM)", y="Tot. Credit+Debit Card Use (in Mio)", x="Year") +scale_y_continuous()
As you can see in the graph “Card Usage / Top 5 Sector (Debit and Credit Card Usage)” Total usage of debit and credit card is increasing in both way (amount in Mio TL can be seen in size) and (number in Mio in y position) from 2010 to 2019 for Top5 sector. But we look closely to the details: Market/ Retail Shopping is the dominant leader from in those years. Despite the total food shopping transactions sum (Debit + Credit) is second in the list but only in terms of number of transaction not the amount in TL as you can see from graph above.