1.Data Extraction

BKM Sektörel Gelişim reports include transaction amounts and transaction count made by credit and debit cards on sectoral basis. Before We start to data analyse firstly we extract data from Html and construct the suitable format that we can start analyze.

extractYearMonth<- function(year,month) {
  url <- paste('https://bkm.com.tr/secilen-aya-ait-sektorel-gelisim/?filter_year=',year,'&filter_month=',month,'&List=Listele', sep='',collapse = NULL)
  return(url)
}

paramYear <-c('2017','2018','2019')
paramMonth <- c('01','02','03','04','05','06','07','08','09','10','11','12')
raw_data_full <- ''
    
for (i in 1:length(paramYear)) {
  for (j in 1:length(paramMonth)) {
    if (paramYear[i]=='2019' & paramMonth[j]=='07') {break}
    url<-extractYearMonth(paramYear[i],paramMonth[j])
    page <- read_html(url) #Creates an html document from URL
    df <- page%>%html_nodes("table") %>%.[(4)] %>% html_table(page, fill = TRUE,header = FALSE)  %>%  as.data.frame() %>%slice(3:max(nrow(.)))  
    df <- df %>%mutate_if(is.numeric,funs(ifelse(is.na(.),0,.))) %>%  mutate(yearmonth=paste(paramYear[i],paramMonth[j],sep=''))
    raw_data_full<-rbind(raw_data_full,df)
  }
}

Adding Columns

Column names and explanations are below:
  
  1.kategori :İşyeri Grubu
  2.kk_Islem_Adedi :İşlem Adedi(Kredi Kartı)
  3.bk_Islem_Adedi :İşlem Adedi(Banka Kartı)
  4.kk_Islem_Tutari :İşlem Tutarı(Kredi Kartı)
  5.bk_Islem_Tutari :İşlem Tutarı(Kredi Kartı)
  6.yearmonth 
colnames(raw_data_full) <- c('kategori','kk_Islem_Adedi','bk_Islem_Adedi','kk_Islem_Tutari','bk_Islem_Tutari'                              ,'yearmonth')
head(raw_data_full,10)
##                                  kategori kk_Islem_Adedi bk_Islem_Adedi
## 1                                                                      
## 2                          ARABA KIRALAMA        234.458         29.881
## 3  ARAÇ KIRALAMA-SATIS/SERVIS/YEDEK PARÇA      2.693.110        433.196
## 4            BENZIN VE YAKIT ISTASYONLARI     22.457.882      6.460.535
## 5                      BIREYSEL EMEKLILIK      2.568.154            508
## 6                            ÇESITLI GIDA     20.568.348     10.214.579
## 7                      DOGRUDAN PAZARLAMA        425.443         34.240
## 8   EGITIM / KIRTASIYE / OFIS MALZEMELERI      4.772.413      1.855.520
## 9    ELEKTRIK-ELEKTRONIK ESYA, BILGISAYAR      6.633.372      1.140.438
## 10                      GIYIM VE AKSESUAR     26.155.139      7.640.918
##    kk_Islem_Tutari bk_Islem_Tutari yearmonth
## 1                                           
## 2           116,02            5,24    201701
## 3         1.601,78           76,65    201701
## 4         3.861,49          427,61    201701
## 5           607,99            0,17    201701
## 6         3.142,49          354,55    201701
## 7           113,06            2,92    201701
## 8         1.198,04          108,68    201701
## 9         2.536,99          202,96    201701
## 10        3.245,08          625,81    201701

2.Data Analyzing

2.1.Top 3 Category with the Highest Numberf Transactions

Displaying the highest number of transactions by credit card and debit card by years.(2017 to 2019 June)

#Create dataset for the analyzing

raw_data <-raw_data_full
raw_data <- raw_data %>%mutate_if(is.numeric,funs(ifelse(is.na(.),0,.))) %>%  mutate(year=substr(yearmonth,1,4))

raw_data <-raw_data%>% group_by(kategori,year) %>% summarise(toplam_kk_adet=max(sum(as.numeric(gsub("\\.","",kk_Islem_Adedi)),na.rm=T)),toplam_bk_adet=max(sum(as.numeric(gsub("\\.","",bk_Islem_Adedi)),na.rm = T))) %>% select(year,kategori,toplam_kk_adet,toplam_bk_adet)

#Adding "toplam" column
raw_data2<-raw_data %>% mutate( toplam= (toplam_kk_adet+toplam_bk_adet))%>% select(year,kategori,toplam)%>%filter(toplam!=0)

#Filter unnecessary data
raw_data3 <-raw_data2%>%arrange(year,desc(toplam))%>%filter(kategori!='TOPLAM')

#Data set preparation for chart
by_raw_data <- raw_data3 %>% group_by(year) %>% mutate(
   rank =row_number(),vars_group = 'year'
  ) %>% filter(rank<4)

print(by_raw_data)
## # A tibble: 9 x 5
## # Groups:   year [3]
##   year  kategori                           toplam  rank vars_group
##   <chr> <chr>                               <dbl> <int> <chr>     
## 1 2017  MARKET VE ALISVERIS MERKEZLERI 1479037772     1 year      
## 2 2017  YEMEK                           679323209     2 year      
## 3 2017  GIYIM VE AKSESUAR               438312133     3 year      
## 4 2018  MARKET VE ALISVERIS MERKEZLERI 1718749111     1 year      
## 5 2018  YEMEK                           823028341     2 year      
## 6 2018  ÇESITLI GIDA                    475281316     3 year      
## 7 2019  MARKET VE ALISVERIS MERKEZLERI  957742221     1 year      
## 8 2019  YEMEK                           486166406     2 year      
## 9 2019  ÇESITLI GIDA                    286302262     3 year
ggplot(by_raw_data, aes(x=year, y=toplam, fill=kategori))+
  geom_bar(stat="identity")+
#  geom_text(aes(label=toplam), vjust=1.6, color="black", size=3.5)+
  theme_minimal()+
  labs(x="Year",y="Total",title="Top 3 Category (2017 to 2019 June) ",fill="Category")+
  theme(axis.text.x = element_text(angle=30))

2.2.Monthly debit card and credit card amount.

CategoryDataSet <-raw_data_full  %>%group_by(yearmonth)%>% summarize(kk_Islem_Tutari= sum(as.numeric(gsub(",", ".", gsub("\\.", "", kk_Islem_Tutari))),na.rm=T),bk_Islem_Tutari= sum(as.numeric(gsub(",", ".", gsub("\\.", "", bk_Islem_Tutari))),na.rm=T))%>%mutate(year=substr(yearmonth,1,4)) %>%filter(kk_Islem_Tutari!=0)%>% select(yearmonth,kk_Islem_Tutari,bk_Islem_Tutari)



DataSet <- data.frame(YearMonth=c(CategoryDataSet$yearmonth),
  Kategory = c("Debit Card Amount","Credit Card Amount")
  , Tutar=c(CategoryDataSet$bk_Islem_Tutari,CategoryDataSet$kk_Islem_Tutari))
DataSet
##    YearMonth           Kategory     Tutar
## 1     201701  Debit Card Amount   9781.54
## 2     201702 Credit Card Amount   9699.17
## 3     201703  Debit Card Amount  11246.02
## 4     201704 Credit Card Amount  12016.61
## 5     201705  Debit Card Amount  13303.50
## 6     201706 Credit Card Amount  13113.19
## 7     201707  Debit Card Amount  14839.01
## 8     201708 Credit Card Amount  15912.67
## 9     201709  Debit Card Amount  13951.15
## 10    201710 Credit Card Amount  14603.99
## 11    201711  Debit Card Amount  14127.50
## 12    201712 Credit Card Amount  15199.76
## 13    201801  Debit Card Amount  14529.02
## 14    201802 Credit Card Amount  13872.23
## 15    201803  Debit Card Amount  16094.91
## 16    201804 Credit Card Amount  17279.34
## 17    201805  Debit Card Amount  18306.38
## 18    201806 Credit Card Amount  19590.87
## 19    201807  Debit Card Amount  21835.94
## 20    201808 Credit Card Amount  23886.38
## 21    201809  Debit Card Amount  22399.98
## 22    201810 Credit Card Amount  21341.03
## 23    201811  Debit Card Amount  19777.54
## 24    201812 Credit Card Amount  20441.46
## 25    201901  Debit Card Amount  19674.22
## 26    201902 Credit Card Amount  19154.38
## 27    201903  Debit Card Amount  23316.92
## 28    201904 Credit Card Amount  26409.18
## 29    201905  Debit Card Amount  28012.93
## 30    201906 Credit Card Amount  29391.19
## 31    201701  Debit Card Amount  89670.84
## 32    201702 Credit Card Amount  83925.93
## 33    201703  Debit Card Amount  98598.32
## 34    201704 Credit Card Amount  94958.02
## 35    201705  Debit Card Amount 103439.69
## 36    201706 Credit Card Amount  97979.19
## 37    201707  Debit Card Amount 108884.13
## 38    201708 Credit Card Amount 110829.88
## 39    201709  Debit Card Amount 102243.73
## 40    201710 Credit Card Amount 105335.95
## 41    201711  Debit Card Amount 104840.66
## 42    201712 Credit Card Amount 110056.88
## 43    201801  Debit Card Amount 107169.65
## 44    201802 Credit Card Amount  97286.62
## 45    201803  Debit Card Amount 113822.30
## 46    201804 Credit Card Amount 113196.28
## 47    201805  Debit Card Amount 123009.35
## 48    201806 Credit Card Amount 117865.41
## 49    201807  Debit Card Amount 132626.05
## 50    201808 Credit Card Amount 127458.57
## 51    201809  Debit Card Amount 133528.88
## 52    201810 Credit Card Amount 128445.86
## 53    201811  Debit Card Amount 127153.65
## 54    201812 Credit Card Amount 129758.61
## 55    201901  Debit Card Amount 125321.13
## 56    201902 Credit Card Amount 115845.32
## 57    201903  Debit Card Amount 137914.56
## 58    201904 Credit Card Amount 134893.36
## 59    201905  Debit Card Amount 149707.01
## 60    201906 Credit Card Amount 140585.04

2.3.Annual Debit Card and Credit Card amount and chart.

library(tidyverse)
library(dplyr)

CategoryDataSet_Yearly <-raw_data_full %>%mutate_if(is.numeric,funs(ifelse(is.na(.),0,.))) %>%  mutate(year=substr(yearmonth,1,4))%>%group_by(year)%>% summarize(kk_Islem_Tutari= sum(as.numeric(gsub(",", ".", gsub("\\.", "", kk_Islem_Tutari))),na.rm=T),bk_Islem_Tutari= sum(as.numeric(gsub(",", ".", gsub("\\.", "", bk_Islem_Tutari))),na.rm=T)) %>%filter(kk_Islem_Tutari!=0)%>% select(year,kk_Islem_Tutari,bk_Islem_Tutari)

#Chart data set
DataSet_Yearly <- data.frame(Year=c(CategoryDataSet_Yearly$year),
  Category = c("Debit Card Amount","Credit Card Amount")
  , Amount=c(CategoryDataSet_Yearly$bk_Islem_Tutari,CategoryDataSet_Yearly$kk_Islem_Tutari))

DataSet_Yearly<-DataSet_Yearly%>% arrange(Year,Category,Amount)

DataSet_Yearly
##   Year           Category    Amount
## 1 2017 Credit Card Amount 1210763.2
## 2 2017  Debit Card Amount  157794.1
## 3 2018 Credit Card Amount  229355.1
## 4 2018  Debit Card Amount 1451321.2
## 5 2019 Credit Card Amount  804266.4
## 6 2019  Debit Card Amount  145958.8
ggplot(data=DataSet_Yearly, aes(x=Year, y=Amount, group=Category)) +
  geom_line(aes(color=Category))+
  geom_point(aes(color=Category))