library(dplyr)
library(tidyverse)
library(ggplot2)
library(rvest)
library(grid)
library(scales)

Step 1 - Data Preparation for Analysis as of end of 2018

Download and read year end data 2018 from the website

url <- "https://bkm.com.tr/secilen-aya-ait-sektorel-gelisim/?filter_year=2018&filter_month=12&List=Listele" 
page <- read_html(url) 
year_end_2018 <- html_table(page, fill=TRUE)
glimpse(year_end_2018)
## List of 4
##  $ :'data.frame':    2 obs. of  3 variables:
##   ..$ X1: chr [1:2] "SEÇILEN AYA AIT SEKTÖREL GELISIM" "Seçilen Aya Ait Sektörel Gelisim"
##   ..$ X2: chr [1:2] "Seçilen Aya Ait Sektörel Gelisim" NA
##   ..$ X3: chr [1:2] "Seçilen Aya Ait Sektörel Gelisim" NA
##  $ :'data.frame':    1 obs. of  1 variable:
##   ..$ X1: chr "Seçilen Aya Ait Sektörel Gelisim"
##  $ :'data.frame':    1 obs. of  1 variable:
##   ..$ X1: chr "Yil:\n                    2019201820172016201520142013201220112010Ay:\n                                        "| __truncated__
##  $ :'data.frame':    29 obs. of  5 variables:
##   ..$ X1: chr [1:29] "Isyeri Grubu" "Isyeri Grubu" "ARABA KIRALAMA" "ARAÇ KIRALAMA-SATIS/SERVIS/YEDEK PARÇA" ...
##   ..$ X2: chr [1:29] "Islem Adedi" "Islem Adedi(Kredi Karti)" "266.038" "3.361.371" ...
##   ..$ X3: chr [1:29] "Islem Adedi" "Islem Adedi (Banka Karti)" "50.722" "709.323" ...
##   ..$ X4: chr [1:29] "Islem Tutari (Milyon TL)" "Islem Tutari \n                (Kredi Karti)" "199,57" "2.804,73" ...
##   ..$ X5: chr [1:29] "Islem Tutari (Milyon TL)" "Islem Tutari \n                    (Banka Karti)" "16,54" "147,82" ...

Select the columns and processing of raw data, since I’m only interested in CC data.

year_end_2018 <- year_end_2018[[4]] %>% slice(-(1:2)) %>% filter(X1 != "TOPLAM") %>% select(1, 2, 4)
colnames(year_end_2018) <- c("isyeri", "islem_adedi", "islem_tutari")
year_end_2018
##                                    isyeri islem_adedi islem_tutari
## 1                          ARABA KIRALAMA     266.038       199,57
## 2  ARAÇ KIRALAMA-SATIS/SERVIS/YEDEK PARÇA   3.361.371     2.804,73
## 3            BENZIN VE YAKIT ISTASYONLARI  27.007.860     5.490,44
## 4                      BIREYSEL EMEKLILIK   2.254.844       668,84
## 5                            ÇESITLI GIDA  28.278.290     4.609,98
## 6                      DOGRUDAN PAZARLAMA     803.029       750,34
## 7   EGITIM / KIRTASIYE / OFIS MALZEMELERI   6.889.839     1.417,88
## 8    ELEKTRIK-ELEKTRONIK ESYA, BILGISAYAR   7.980.083     3.917,89
## 9                       GIYIM VE AKSESUAR  28.162.255     5.224,67
## 10                            HAVAYOLLARI   1.757.052     1.211,97
## 11                      HIZMET SEKTÖRLERI  23.010.866     4.529,13
## 12                   KAMU/VERGI ODEMELERI   2.744.917     1.372,60
## 13                              KONAKLAMA   1.564.436       964,62
## 14       KULÃœP / DERNEK /SOSYAL HIZMETLER   1.164.357       243,98
## 15                KUMARHANE/IÇKILI YERLER     546.548       225,65
## 16                             KUYUMCULAR     863.939       892,64
## 17         MARKET VE ALISVERIS MERKEZLERI 104.967.591    11.412,56
## 18                  MOBILYA VE DEKORASYON   5.114.674     2.556,26
## 19                       MÃœTEAHHIT ISLERI     622.598       776,39
## 20        SAGLIK/SAGLIK ÃœRÃœNLERI/KOZMETIK  15.354.372     2.501,57
## 21         SEYAHAT ACENTELERI/TASIMACILIK   5.685.982     1.583,93
## 22                                SIGORTA   5.190.237     3.409,63
## 23                       TELEKOMÃœNIKASYON  18.453.861     1.773,13
## 24  YAPI MALZEMELERI, HIRDAVAT, NALBURIYE   3.424.605     2.675,00
## 25                                  YEMEK  40.487.702     2.314,60
## 26                                  DIGER   4.943.785     1.351,30

Changing Character Columns to Numeric Values

year_end_2018$islem_adedi <- as.numeric(gsub("\\.","", year_end_2018$islem_adedi))
year_end_2018$islem_tutari <- as.numeric(gsub(",",".",gsub("\\.","", year_end_2018$islem_tutari)))
str(year_end_2018)
## 'data.frame':    26 obs. of  3 variables:
##  $ isyeri      : chr  "ARABA KIRALAMA" "ARAÇ KIRALAMA-SATIS/SERVIS/YEDEK PARÇA" "BENZIN VE YAKIT ISTASYONLARI" "BIREYSEL EMEKLILIK" ...
##  $ islem_adedi : num  266038 3361371 27007860 2254844 28278290 ...
##  $ islem_tutari: num  200 2805 5490 669 4610 ...

Step 2 - Analysis as of end of 2018

A - Total Number of Transactions and Total CC Spendings per sector

adet <- ggplot(year_end_2018, aes(x=isyeri, y=islem_adedi)) +
  geom_bar(stat="identity",fill="steelblue")+ coord_flip() + scale_y_continuous(labels=comma) +
  labs(title = "Credit Card Transaction By Sector", x="", y="") +theme_minimal() 

tutar <- ggplot(year_end_2018, aes(x=isyeri, y=islem_tutari)) +
  geom_bar(stat="identity",fill="lightgreen") + coord_flip() + scale_y_continuous(labels=comma) +
  labs(title = "Credit Card Total Spending By Sector", x="", y="") +theme_minimal()
  
grid.newpage()
grid.draw(rbind(ggplotGrob(adet), ggplotGrob(tutar), size = "last"))

B - Top 10 Sectors in terms of Total CC Spendings

share <- year_end_2018 %>% group_by(isyeri) %>% summarise(islem_tutari = sum(islem_tutari)) %>% 
  mutate(harcama_payi = islem_tutari/sum(islem_tutari)) %>% top_n(10, harcama_payi)

share_plot <- ggplot(share, aes(x=isyeri, y=islem_tutari, fill=isyeri)) +
  geom_bar(stat="identity")+
  coord_polar()+
  theme(legend.position = "right", axis.text.x = element_text(angle = 0))+
  geom_text(aes(y = islem_tutari , label = islem_tutari))+
  labs(title = "Sectoral shares")

share_plot

C - Average Spending per Transaction

cart_volume <- year_end_2018 %>% group_by(isyeri) %>% mutate(avg_spending = islem_tutari/islem_adedi*1000000)
cart_volume
## # A tibble: 26 x 4
## # Groups:   isyeri [26]
##    isyeri                             islem_adedi islem_tutari avg_spending
##    <chr>                                    <dbl>        <dbl>        <dbl>
##  1 ARABA KIRALAMA                          266038         200.         750.
##  2 ARAÇ KIRALAMA-SATIS/SERVIS/YEDEK ~     3361371        2805.         834.
##  3 BENZIN VE YAKIT ISTASYONLARI          27007860        5490.         203.
##  4 BIREYSEL EMEKLILIK                     2254844         669.         297.
##  5 ÇESITLI GIDA                          28278290        4610.         163.
##  6 DOGRUDAN PAZARLAMA                      803029         750.         934.
##  7 EGITIM / KIRTASIYE / OFIS MALZEME~     6889839        1418.         206.
##  8 ELEKTRIK-ELEKTRONIK ESYA, BILGISA~     7980083        3918.         491.
##  9 GIYIM VE AKSESUAR                     28162255        5225.         186.
## 10 HAVAYOLLARI                            1757052        1212.         690.
## # ... with 16 more rows
ggplot(cart_volume, aes(x=isyeri, y=avg_spending)) +
  geom_bar(stat="identity",fill="orange")+ coord_flip() + scale_y_continuous(labels=comma) +
  labs(title = "Amount of Spending per Transaction", x="", y="") +theme_minimal()