BKM’s Sector Improvement Report include transaction counts and transaction volumes of Credit Cards/Banking Cards in different sector groups. We will analyse the data from 2019-January to 2019 July ## Recommended Libraries
library(rvest)
library(dplyr)
library(ggplot2)
library(knitr)
library(tidyverse)
options(width = 999)
for (i in 1:6){
target_url = paste("https://bkm.com.tr/secilen-aya-ait-sektorel-gelisim/?filter_year=2019&filter_month=",
i,sep='',collapse = NULL)
content = read_html(target_url)
table = bind_rows
bkm_df = content %>%
html_nodes("table") %>%.[(4)] %>%
html_table(page, fill = TRUE,header = FALSE) %>%
as.data.frame() %>%
slice(3:max(nrow(.) - 1))
bkm_df = bkm_df %>%mutate_if(is.numeric,funs(ifelse(is.na(.),0,.))) %>% mutate(month=i)
bkm_df_total = rbind(bkm_df)
}
colnames(bkm_df_total) <- c('Category','CC_Transaction_Count','Debit_Transaction_Count','CC_Transaction_Amount'
,'Debit_Transaction_Amount', 'Month')
## Cleaning the data format
bkm_df_total$CC_Transaction_Count <- as.numeric(str_replace_all(bkm_df_total$CC_Transaction_Count, pattern=fixed("."), ""))
bkm_df_total$Debit_Transaction_Count <- as.numeric(str_replace_all(bkm_df_total$Debit_Transaction_Count, pattern=fixed("."), ""))
bkm_df_total$CC_Transaction_Amount <- str_replace_all(bkm_df_total$CC_Transaction_Amount, pattern=fixed("."), "")
bkm_df_total$Debit_Transaction_Amount <- str_replace_all(bkm_df_total$Debit_Transaction_Amount, pattern=fixed("."), "")
bkm_df_total$CC_Transaction_Amount <- as.numeric(str_replace_all(bkm_df_total$CC_Transaction_Amount, pattern=fixed(","), "."))
bkm_df_total$Debit_Transaction_Amount <- as.numeric(str_replace_all(bkm_df_total$Debit_Transaction_Amount, pattern=fixed(","), "."))
## Let's see our data
kable(head(bkm_df_total), format="html", align = ("crrrrr"))
Category | CC_Transaction_Count | Debit_Transaction_Count | CC_Transaction_Amount | Debit_Transaction_Amount | Month |
---|---|---|---|---|---|
ARABA KÄ°RALAMA | 306426 | 77113 | 256.63 | 43.79 | 6 |
ARAÇ KİRALAMA-SATIŞ/SERVİS/YEDEK PARÇA | 3335321 | 902483 | 2588.91 | 190.61 | 6 |
BENZÄ°N VE YAKIT Ä°STASYONLARI | 32572382 | 13813215 | 6515.69 | 1195.27 | 6 |
BIREYSEL EMEKLILIK | 2060390 | 1256 | 686.98 | 0.42 | 6 |
ÇEŞİTLİ GIDA | 31076151 | 20446524 | 5147.68 | 994.90 | 6 |
DOÄžRUDAN PAZARLAMA | 738068 | 45268 | 723.68 | 21.26 | 6 |
## replication
unit_transactions <- bkm_df_total
## unit transaction amounts of cc and debit seperately
unit_transactions <- unit_transactions %>% mutate(unit_credit_amount = 1000000 * CC_Transaction_Amount / CC_Transaction_Count, unit_debit_amount = 1000000* Debit_Transaction_Amount / Debit_Transaction_Count)
## unit transaction percentages
unit_transactions <- unit_transactions %>% transmute(Category, Month, cc_percentage = 100 * unit_credit_amount / sum(unit_credit_amount), debit_percentage = 100 * unit_debit_amount / sum(unit_debit_amount))
## group by category to sum up all month data
unit_transactions <- unit_transactions %>% group_by(Category) %>% mutate(debit_percentage = sum(debit_percentage), cc_percentage = sum(cc_percentage))
ggplot(unit_transactions, aes(x=reorder(Category,cc_percentage+debit_percentage), y=cc_percentage+debit_percentage)) + geom_bar(aes(fill = cc_percentage+debit_percentage), stat = "identity",position ="stack") + coord_flip() + geom_point() + labs(x="Category",y="Unit Transaction Percentage ",title="Unit Transaction Percentage of Categories ",fill="Percentage")
## replication
top_5 <- bkm_df_total
top_5 <- top_5 %>% group_by(Category) %>% mutate(all_transactions = sum(CC_Transaction_Count) + sum(Debit_Transaction_Count)) %>% arrange(desc(all_transactions))
top_5 <- top_5[1:5,]
ggplot(top_5, aes(x="", y=all_transactions/1000000, fill=Category)) + geom_bar(stat="identity", width=1) + coord_polar("y", start=0) + labs(x = NULL, y = NULL, fill = NULL, title = "Transaction Count Chart of Top 5 Categories")