#REQUIRED LIBRARIES
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1 ✔ purrr 0.3.2
## ✔ tibble 2.1.3 ✔ dplyr 0.8.3
## ✔ tidyr 1.0.0 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(dplyr)
library(ggplot2 )
library(rvest)
## Loading required package: xml2
##
## Attaching package: 'rvest'
## The following object is masked from 'package:purrr':
##
## pluck
## The following object is masked from 'package:readr':
##
## guess_encoding
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
## IMPORT DATA
#TO GET LIST OF URL OF BKM
x <- list()
for (i in 0:9){
for (k in 1:12) {
x[[paste0("url201",i,"0",k)]]<-paste0("https://bkm.com.tr/secilen-aya-ait-sektorel-gelisim/?filter_year=201",i,"&filter_month=",k,"&List=Listele")
}
}
#GETTING DATA FROM URL LIST AND BINDING TO ALL INVOLVED ONE DATAFRAME
DF<-data.frame(matrix(ncol = 5, nrow = 0))
for (i in x){
HTML_1<-read_html(i)
temp_DF<-html_table(html_nodes(HTML_1, "table"),fill = TRUE)[[4]]
for (n in 1:nrow(temp_DF)){
temp_DF$date[[n]]<-str_remove_all(gsub(".*year=(.+)&filter_month=", "\\1", i),"[A-Z]|[a-z]|&|=")
}
DF<-bind_rows(DF,temp_DF)
}
#DATA WRANGLING & CONTROL OF FINAL FORM OF DATA
DF$year = substr(DF$date, 1,4)
DF$month = substr(DF$date,start=5,stop=6)
colnames(DF)<-c("Sector","Trans_Numb_CC","Trans_Numb_DC","Trans_Amount_CC","Trans_Amount_DC","date","year","month")
DF<-DF%>%filter(Sector!="İşyeri Grubu")%>%filter(Sector!="TOPLAM")%>%filter(Sector!="Lütfen listeyi görebilmek için yukarıdan tarih seçiniz.")
str(DF)
## 'data.frame': 2840 obs. of 8 variables:
## $ Sector : chr "ARABA KİRALAMA" "ARAÇ KİRALAMA-SATIŞ/SERVİS/YEDEK PARÇA" "BENZİN VE YAKIT İSTASYONLARI" "ÇEŞİTLİ GIDA" ...
## $ Trans_Numb_CC : chr "30.540" "1.865.922" "16.655.032" "8.901.339" ...
## $ Trans_Numb_DC : chr "1.485" "82.655" "1.356.094" "1.272.039" ...
## $ Trans_Amount_CC: chr "13,30" "523,95" "1.990,10" "756,56" ...
## $ Trans_Amount_DC: chr "0,22" "11,77" "60,63" "25,52" ...
## $ date : chr "20101" "20101" "20101" "20101" ...
## $ year : chr "2010" "2010" "2010" "2010" ...
## $ month : chr "1" "1" "1" "1" ...
DF$Trans_Numb_CC<-gsub("[.]", "",DF$Trans_Numb_CC)
DF$Trans_Numb_DC<-gsub("[.]", "",DF$Trans_Numb_DC)
DF$Trans_Amount_CC<-gsub("[.]", "",DF$Trans_Amount_CC)
DF$Trans_Amount_DC<-gsub("[.]", "",DF$Trans_Amount_DC)
DF$Trans_Amount_CC<-gsub("[,]", ".",DF$Trans_Amount_CC)
DF$Trans_Amount_DC<-gsub("[,]", ".",DF$Trans_Amount_DC)
DF$Trans_Numb_CC<- as.numeric(as.character(DF$Trans_Numb_CC))
DF$Trans_Numb_DC<- as.numeric(as.character(DF$Trans_Numb_DC))
DF$Trans_Amount_CC<- as.numeric(as.character(DF$Trans_Amount_CC))
DF$Trans_Amount_DC<- as.numeric(as.character(DF$Trans_Amount_DC))
str(DF)
## 'data.frame': 2840 obs. of 8 variables:
## $ Sector : chr "ARABA KİRALAMA" "ARAÇ KİRALAMA-SATIŞ/SERVİS/YEDEK PARÇA" "BENZİN VE YAKIT İSTASYONLARI" "ÇEŞİTLİ GIDA" ...
## $ Trans_Numb_CC : num 30540 1865922 16655032 8901339 5282218 ...
## $ Trans_Numb_DC : num 1485 82655 1356094 1272039 91911 ...
## $ Trans_Amount_CC: num 13.3 524 1990.1 756.6 688.4 ...
## $ Trans_Amount_DC: num 0.22 11.77 60.63 25.52 18.05 ...
## $ date : chr "20101" "20101" "20101" "20101" ...
## $ year : chr "2010" "2010" "2010" "2010" ...
## $ month : chr "1" "1" "1" "1" ...
#Differences between credit_c and debit_c transaction
trans_num <- DF %>% group_by(year) %>%
summarise(number_cc=sum(Trans_Numb_CC), number_dc= sum(Trans_Numb_DC)) %>%
mutate(diff_in_numb=(number_cc-number_dc)) %>%
filter(year %in% c(2010:2018))
ggplot(trans_num, aes(x = year, y= diff_in_numb,group=1)) +
geom_smooth(color="blue")+
geom_point(color="red")+
labs(title = "Differences between credit_c and debit_c transaction",
subtitle = "Katilimci Sayisi",
caption="(based on data from EGM)")+
scale_y_continuous(labels = comma ) +
theme(axis.text.x = element_text(angle= 35, vjust= 0.5))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
# Transaction Amount Comparision of top 5 sector between 2010-2019
sector_share <- DF %>% group_by(Sector) %>%
summarise(total_amount=sum(Trans_Amount_CC + Trans_Amount_DC)) %>%
arrange(desc(total_amount)) %>%
mutate(share_percentage = round(total_amount / sum(total_amount)*100,2)) %>%
slice(1:5)
sector_share
## # A tibble: 5 x 3
## Sector total_amount share_percentage
## <chr> <dbl> <dbl>
## 1 MARKET VE ALIŞVERİŞ MERKEZLERİ 854878. 17.6
## 2 BENZİN VE YAKIT İSTASYONLARI 480773. 9.91
## 3 GİYİM VE AKSESUAR 399549. 8.23
## 4 ÇEŞİTLİ GIDA 322643. 6.65
## 5 ELEKTRİK-ELEKTRONİK EŞYA, BİLGİSAYAR 311508. 6.42
ggplot(sector_share, aes(x=Sector,y=share_percentage, fill=Sector)) +
geom_bar(stat = "identity") +
coord_polar() +
geom_text(
aes(x=, y= share_percentage, label= percent(share_percentage/100)),
position = position_stack(vjust = 1,8),
size=4,angle=0)
##Credit Card and Debit Card Transaction Amount for Market BETWEEN 2010-2018
total <- DF %>% group_by(year) %>%
summarise(total_cc = sum(Trans_Amount_CC), total_dc = sum(Trans_Amount_DC)) %>%
mutate(total_amount = (total_cc + total_dc))
total
## # A tibble: 10 x 4
## year total_cc total_dc total_amount
## <chr> <dbl> <dbl> <dbl>
## 1 2010 213706. 8740. 222446.
## 2 2011 265337. 13335. 278672.
## 3 2012 331818. 17961. 349779.
## 4 2013 388514. 24788. 413302.
## 5 2014 433150. 33553. 466703.
## 6 2015 490839. 43060. 533900.
## 7 2016 534068. 54800. 588869.
## 8 2017 605382. 78897. 684279.
## 9 2018 725661. 114678. 840338.
## 10 2019 402133. 72979. 475113.
ggplot(total ) +
geom_jitter(aes(x=year,y=total_amount,color=total_amount,size=total_amount)) +
labs(title = "Cards Usage", subtitle = "Total Amount", caption="(based on data from BKM)", y="Tot. Credit+Debit Card Use (in Mio)", x="Year") +scale_y_continuous()