From Raw to Civilized Data

I collected the data from BKM website. I comibned the data for six months in 2019 and added columns of names.You can see first rows of the dataset in below table.

library(rvest)
library(dplyr)
library(ggplot2)
url <- "https://bkm.com.tr/secilen-aya-ait-sektorel-gelisim/?filter_year=2019&filter_month=1"
page <- read_html(url)
tablo <- html_table(page, fill = TRUE)[[4]][-c(1:2),]

for(i in 2:6) {
  url <- paste("https://bkm.com.tr/secilen-aya-ait-sektorel-gelisim/?filter_year=2019&filter_month=", i, sep = "")
  page <- read_html(url)
  tablo <- bind_rows(tablo, html_table(page, fill = TRUE)[[4]][-c(1:2),-1])
}
is_yeri <- c(tablo%>% select(X1) %>%  filter(X1 != "NA"))
is_yeri_1 <- c(rep(is_yeri[["X1"]], times=6))


tablo_1 <- tablo %>% mutate(X1 = is_yeri_1) %>% filter(X1 != "TOPLAM")


month_1 <- c(rep(1:6, times=1, each=26))
tablo_son <- tablo_1 %>% 
  mutate(month = month_1)


tablo2  <- as.data.frame(lapply(tablo_son, function(x) as.numeric(gsub(",", ".", gsub("\\.", "", x)))))
tablo2[,1] <- tablo_son[,1]
tablo_son <- tablo2

colnames(tablo_son) <- c("Sector_Name","Number_of_Transactions_Credit_Card","Number_of_Transactions_Debit_Card","Transaction_Amount_Credit_Card",
                         "Transaction_Amount_Debit_Card","month")
head(tablo_son)
##                              Sector_Name
## 1                         ARABA KİRALAMA
## 2 ARAÇ KİRALAMA-SATIŞ/SERVİS/YEDEK PARÇA
## 3           BENZİN VE YAKIT İSTASYONLARI
## 4                     BIREYSEL EMEKLILIK
## 5                           ÇEŞİTLİ GIDA
## 6                     DOĞRUDAN PAZARLAMA
##   Number_of_Transactions_Credit_Card Number_of_Transactions_Debit_Card
## 1                             256372                             49296
## 2                            2967019                            642136
## 3                           25277186                           8684036
## 4                            2271587                               697
## 5                           28362091                          15221891
## 6                             757602                             40038
##   Transaction_Amount_Credit_Card Transaction_Amount_Debit_Card month
## 1                         195.13                         14.77     1
## 2                        2185.84                        127.16     1
## 3                        5066.04                        680.01     1
## 4                         716.42                          0.30     1
## 5                        4473.98                        673.70     1
## 6                         678.99                          7.81     1

Average Transaction Amount of Some Sector for First month of 2019

I have compared the sectors where the average credit card amounts are more than 700 and the other sectors for Jun 2019. You can see the average amounts in different sectors in the chart below.

tablo1 <- tablo_son %>% filter(month == 1) %>%  
    transmute(Sector_Name, Average_transaction_amount_CC = (Transaction_Amount_Credit_Card / Number_of_Transactions_Credit_Card)*1000000) %>% 
    arrange(desc(Average_transaction_amount_CC)) %>%
    mutate(Sector_Name = case_when(Average_transaction_amount_CC > 700 ~ Sector_Name, TRUE ~ "OTHER")) %>%
    group_by(Sector_Name) %>% 
    transmute(Total_Average_transaction_amount_CC = sum(Average_transaction_amount_CC)) %>% 
    distinct() %>%
  arrange(desc(Total_Average_transaction_amount_CC)) %>% ungroup() %>%
  transmute(Sector_Name,share = round(Total_Average_transaction_amount_CC*100,2)) 
ggplot(data = tablo1, aes(x = "", y = share, fill = Sector_Name)) +
  geom_bar(width = 1, stat = "identity", color = "black") +
  coord_polar("y", start = 0)+
  geom_text(aes(x = 1.3, y = share, label = share),position = position_stack(vjust = 0.5),color = "black")+
  labs(fill = "Sector names")+
  theme_void()

Total Expenditure for Education

You can see the distribution of the amount of expenditure for “EĞİTİM / KIRTASİYE / OFİS MALZEMELERİ” by months from the bar graph below

tablo2<-tablo_son %>%
  filter(Sector_Name == "EĞİTİM / KIRTASİYE / OFİS MALZEMELERİ") %>%
  transmute(Sector_Name,Average_transaction_amount_CC = (Transaction_Amount_Credit_Card / Number_of_Transactions_Credit_Card)*1000000,Average_transaction_amount_DC = (Transaction_Amount_Credit_Card /Number_of_Transactions_Credit_Card)*1000000,month) %>%
  arrange(desc(month),desc(Average_transaction_amount_CC,Average_transaction_amount_DC)) %>%
  group_by(month) %>%
  summarize(expenditure_total = sum(Average_transaction_amount_CC) + sum(Average_transaction_amount_DC)) %>% ungroup() %>% 
  arrange(desc(expenditure_total))

You can see the distribution of the amount of expenditure for “EĞİTİM / KIRTASİYE / OFİS MALZEMELERİ” by months from the bar graph below