BKM Sektörel Gelişim reports include transaction amounts and transaction count made by credit and debit cards on sectoral basis. Before We start to data analyse firstly we extract data from Html and construct the suitable format that we can start analyze.
extractYearMonth<- function(year,month) {
url <- paste('https://bkm.com.tr/secilen-aya-ait-sektorel-gelisim/?filter_year=',year,'&filter_month=',month,'&List=Listele', sep='',collapse = NULL)
return(url)
}
paramYear <-c('2017','2018','2019')
paramMonth <- c('01','02','03','04','05','06','07','08','09','10','11','12')
raw_data_full <- ''
for (i in 1:length(paramYear)) {
for (j in 1:length(paramMonth)) {
if (paramYear[i]=='2019' & paramMonth[j]=='07') {break}
url<-extractYearMonth(paramYear[i],paramMonth[j])
page <- read_html(url) #Creates an html document from URL
df <- page%>%html_nodes("table") %>%.[(4)] %>% html_table(page, fill = TRUE,header = FALSE) %>% as.data.frame() %>%slice(3:max(nrow(.)))
df <- df %>%mutate_if(is.numeric,funs(ifelse(is.na(.),0,.))) %>% mutate(yearmonth=paste(paramYear[i],paramMonth[j],sep=''))
raw_data_full<-rbind(raw_data_full,df)
}
}
Column names and explanations are below:
1.kategori :İşyeri Grubu
2.kk_Islem_Adedi :İşlem Adedi(Kredi Kartı)
3.bk_Islem_Adedi :İşlem Adedi(Banka Kartı)
4.kk_Islem_Tutari :İşlem Tutarı(Kredi Kartı)
5.bk_Islem_Tutari :İşlem Tutarı(Kredi Kartı)
6.yearmonth
colnames(raw_data_full) <- c('kategori','kk_Islem_Adedi','bk_Islem_Adedi','kk_Islem_Tutari','bk_Islem_Tutari' ,'yearmonth')
head(raw_data_full,10)
## kategori kk_Islem_Adedi bk_Islem_Adedi
## 1
## 2 ARABA KIRALAMA 234.458 29.881
## 3 ARAÇ KIRALAMA-SATIS/SERVIS/YEDEK PARÇA 2.693.110 433.196
## 4 BENZIN VE YAKIT ISTASYONLARI 22.457.882 6.460.535
## 5 BIREYSEL EMEKLILIK 2.568.154 508
## 6 ÇESITLI GIDA 20.568.348 10.214.579
## 7 DOGRUDAN PAZARLAMA 425.443 34.240
## 8 EGITIM / KIRTASIYE / OFIS MALZEMELERI 4.772.413 1.855.520
## 9 ELEKTRIK-ELEKTRONIK ESYA, BILGISAYAR 6.633.372 1.140.438
## 10 GIYIM VE AKSESUAR 26.155.139 7.640.918
## kk_Islem_Tutari bk_Islem_Tutari yearmonth
## 1
## 2 116,02 5,24 201701
## 3 1.601,78 76,65 201701
## 4 3.861,49 427,61 201701
## 5 607,99 0,17 201701
## 6 3.142,49 354,55 201701
## 7 113,06 2,92 201701
## 8 1.198,04 108,68 201701
## 9 2.536,99 202,96 201701
## 10 3.245,08 625,81 201701
Displaying the highest number of transactions by credit card and debit card by years.(2017 to 2019 June)
#Create dataset for the analyzing
raw_data <-raw_data_full
raw_data <- raw_data %>%mutate_if(is.numeric,funs(ifelse(is.na(.),0,.))) %>% mutate(year=substr(yearmonth,1,4))
raw_data <-raw_data%>% group_by(kategori,year) %>% summarise(toplam_kk_adet=max(sum(as.numeric(gsub("\\.","",kk_Islem_Adedi)),na.rm=T)),toplam_bk_adet=max(sum(as.numeric(gsub("\\.","",bk_Islem_Adedi)),na.rm = T))) %>% select(year,kategori,toplam_kk_adet,toplam_bk_adet)
#Adding "toplam" column
raw_data2<-raw_data %>% mutate( toplam= (toplam_kk_adet+toplam_bk_adet))%>% select(year,kategori,toplam)%>%filter(toplam!=0)
#Filter unnecessary data
raw_data3 <-raw_data2%>%arrange(year,desc(toplam))%>%filter(kategori!='TOPLAM')
#Data set preparation for chart
by_raw_data <- raw_data3 %>% group_by(year) %>% mutate(
rank =row_number(),vars_group = 'year'
) %>% filter(rank<4)
print(by_raw_data)
## # A tibble: 9 x 5
## # Groups: year [3]
## year kategori toplam rank vars_group
## <chr> <chr> <dbl> <int> <chr>
## 1 2017 MARKET VE ALISVERIS MERKEZLERI 1479037772 1 year
## 2 2017 YEMEK 679323209 2 year
## 3 2017 GIYIM VE AKSESUAR 438312133 3 year
## 4 2018 MARKET VE ALISVERIS MERKEZLERI 1718749111 1 year
## 5 2018 YEMEK 823028341 2 year
## 6 2018 ÇESITLI GIDA 475281316 3 year
## 7 2019 MARKET VE ALISVERIS MERKEZLERI 957742221 1 year
## 8 2019 YEMEK 486166406 2 year
## 9 2019 ÇESITLI GIDA 286302262 3 year
ggplot(by_raw_data, aes(x=year, y=toplam, fill=kategori))+
geom_bar(stat="identity")+
# geom_text(aes(label=toplam), vjust=1.6, color="black", size=3.5)+
theme_minimal()+
labs(x="Year",y="Total",title="Top 3 Category (2017 to 2019 June) ",fill="Category")+
theme(axis.text.x = element_text(angle=30))
CategoryDataSet <-raw_data_full %>%group_by(yearmonth)%>% summarize(kk_Islem_Tutari= sum(as.numeric(gsub(",", ".", gsub("\\.", "", kk_Islem_Tutari))),na.rm=T),bk_Islem_Tutari= sum(as.numeric(gsub(",", ".", gsub("\\.", "", bk_Islem_Tutari))),na.rm=T))%>%mutate(year=substr(yearmonth,1,4)) %>%filter(kk_Islem_Tutari!=0)%>% select(yearmonth,kk_Islem_Tutari,bk_Islem_Tutari)
DataSet <- data.frame(YearMonth=c(CategoryDataSet$yearmonth),
Kategory = c("Debit Card Amount","Credit Card Amount")
, Tutar=c(CategoryDataSet$bk_Islem_Tutari,CategoryDataSet$kk_Islem_Tutari))
DataSet
## YearMonth Kategory Tutar
## 1 201701 Debit Card Amount 9781.54
## 2 201702 Credit Card Amount 9699.17
## 3 201703 Debit Card Amount 11246.02
## 4 201704 Credit Card Amount 12016.61
## 5 201705 Debit Card Amount 13303.50
## 6 201706 Credit Card Amount 13113.19
## 7 201707 Debit Card Amount 14839.01
## 8 201708 Credit Card Amount 15912.67
## 9 201709 Debit Card Amount 13951.15
## 10 201710 Credit Card Amount 14603.99
## 11 201711 Debit Card Amount 14127.50
## 12 201712 Credit Card Amount 15199.76
## 13 201801 Debit Card Amount 14529.02
## 14 201802 Credit Card Amount 13872.23
## 15 201803 Debit Card Amount 16094.91
## 16 201804 Credit Card Amount 17279.34
## 17 201805 Debit Card Amount 18306.38
## 18 201806 Credit Card Amount 19590.87
## 19 201807 Debit Card Amount 21835.94
## 20 201808 Credit Card Amount 23886.38
## 21 201809 Debit Card Amount 22399.98
## 22 201810 Credit Card Amount 21341.03
## 23 201811 Debit Card Amount 19777.54
## 24 201812 Credit Card Amount 20441.46
## 25 201901 Debit Card Amount 19674.22
## 26 201902 Credit Card Amount 19154.38
## 27 201903 Debit Card Amount 23316.92
## 28 201904 Credit Card Amount 26409.18
## 29 201905 Debit Card Amount 28012.93
## 30 201906 Credit Card Amount 29391.19
## 31 201701 Debit Card Amount 89670.84
## 32 201702 Credit Card Amount 83925.93
## 33 201703 Debit Card Amount 98598.32
## 34 201704 Credit Card Amount 94958.02
## 35 201705 Debit Card Amount 103439.69
## 36 201706 Credit Card Amount 97979.19
## 37 201707 Debit Card Amount 108884.13
## 38 201708 Credit Card Amount 110829.88
## 39 201709 Debit Card Amount 102243.73
## 40 201710 Credit Card Amount 105335.95
## 41 201711 Debit Card Amount 104840.66
## 42 201712 Credit Card Amount 110056.88
## 43 201801 Debit Card Amount 107169.65
## 44 201802 Credit Card Amount 97286.62
## 45 201803 Debit Card Amount 113822.30
## 46 201804 Credit Card Amount 113196.28
## 47 201805 Debit Card Amount 123009.35
## 48 201806 Credit Card Amount 117865.41
## 49 201807 Debit Card Amount 132626.05
## 50 201808 Credit Card Amount 127458.57
## 51 201809 Debit Card Amount 133528.88
## 52 201810 Credit Card Amount 128445.86
## 53 201811 Debit Card Amount 127153.65
## 54 201812 Credit Card Amount 129758.61
## 55 201901 Debit Card Amount 125321.13
## 56 201902 Credit Card Amount 115845.32
## 57 201903 Debit Card Amount 137914.56
## 58 201904 Credit Card Amount 134893.36
## 59 201905 Debit Card Amount 149707.01
## 60 201906 Credit Card Amount 140585.04
library(tidyverse)
library(dplyr)
CategoryDataSet_Yearly <-raw_data_full %>%mutate_if(is.numeric,funs(ifelse(is.na(.),0,.))) %>% mutate(year=substr(yearmonth,1,4))%>%group_by(year)%>% summarize(kk_Islem_Tutari= sum(as.numeric(gsub(",", ".", gsub("\\.", "", kk_Islem_Tutari))),na.rm=T),bk_Islem_Tutari= sum(as.numeric(gsub(",", ".", gsub("\\.", "", bk_Islem_Tutari))),na.rm=T)) %>%filter(kk_Islem_Tutari!=0)%>% select(year,kk_Islem_Tutari,bk_Islem_Tutari)
#Chart data set
DataSet_Yearly <- data.frame(Year=c(CategoryDataSet_Yearly$year),
Category = c("Debit Card Amount","Credit Card Amount")
, Amount=c(CategoryDataSet_Yearly$bk_Islem_Tutari,CategoryDataSet_Yearly$kk_Islem_Tutari))
DataSet_Yearly<-DataSet_Yearly%>% arrange(Year,Category,Amount)
DataSet_Yearly
## Year Category Amount
## 1 2017 Credit Card Amount 1210763.2
## 2 2017 Debit Card Amount 157794.1
## 3 2018 Credit Card Amount 229355.1
## 4 2018 Debit Card Amount 1451321.2
## 5 2019 Credit Card Amount 804266.4
## 6 2019 Debit Card Amount 145958.8
ggplot(data=DataSet_Yearly, aes(x=Year, y=Amount, group=Category)) +
geom_line(aes(color=Category))+
geom_point(aes(color=Category))