BKM-Assignment

Emre Usta 11/10/2019

1.Introduction and Preparation

The Interbank Card Center (BKM) was established in 1990 with the partnership of 13 public and private Turkish banks for the purpose of providing solutions to the common problems and developing the rules and standards of credit and debit cards in Turkey, within the card payment system.

The dataset we used in this analysis from BKM includes number and volume of transactions made with credit card and debit card with respect to merchandise category group in Turkey.

# Creating a function to filter year and month on the URL
filterYearMonth <- 
  function(year,month) {
    url <- paste('https://bkm.com.tr/en/secilen-aya-ait-sektorel-gelisim/?filter_year=',year,'&filter_month=',month,'&List=Lis'
                 ,sep=''
                 ,collapse = NULL)
  return(url)
  }

year <-c('2017','2018','2019')
month <- c('01','02','03','04','05','06','07','08','09','10','11','12')
raw_df_full <- ''

for (i in 1:length(year)) {
  for (j in 1:length(month)) {
    if (year[i]=='2019' & month[j]=='07') {
      break
    }
    
    url<-filterYearMonth(year[i],month[j])
    
    page <- read_html(url)
    
    raw_df <- 
      page %>%
      html_nodes("table") %>%.[(4)] %>% 
      html_table(page, fill = TRUE,header = FALSE) %>% 
      as.data.frame() %>%
      slice(3:max(nrow(.)))  
    
    # Replacing N/A values with 0 and adding year and month columns
    raw_df <- 
      raw_df %>% 
      mutate_if(is.numeric,funs(ifelse(is.na(.),0,.))) %>%  
      mutate(year = year[i],month = month[j])
    
    # Appending raw_df into raw_df_full in each iteration
    raw_df_full<-rbind(raw_df_full,raw_df)
  }
}


# Adding column names
colnames(raw_df_full) <- c('merchant_category','cc_transaction_count','dc_transaction_count','cc_transaction_amount','dc_transaction_amount','year','month')

raw_df_full <- raw_df_full %>% slice(-c(1))

2.Analysis

df_cc_dc <-
  raw_df_full %>%
  filter(merchant_category == 'TOTAL') %>%
  group_by(year) %>%
  summarise(credit_card_cnt = sum(as.numeric((gsub("\\.", "",cc_transaction_count))),na.rm = T),
            debit_card_cnt = sum(as.numeric((gsub("\\.", "",dc_transaction_count))),na.rm = T))

df_card_type <- data.frame(year,card_type = c("Credit","Debit")
                           ,cnt=c(df_cc_dc$credit_card_cnt,df_cc_dc$debit_card_cnt))

df_cc_dc
## # A tibble: 3 x 3
##   year  credit_card_cnt debit_card_cnt
##   <chr>           <dbl>          <dbl>
## 1 2017       3367634485     1421081163
## 2 2018       3787647630     1834645929
## 3 2019       2044419739     1055668267
ggplot(df_card_type, aes(x=card_type, y=cnt, fill=card_type))+
  geom_bar(stat="identity")+
  #geom_text(aes(label=cnt), vjust=1.6, color="black", size=3.5)+
  geom_text(aes(label=format(cnt,big.mark = ",")), vjust=1.6, color="black", size=2.8)+
  theme_minimal()+
  labs(x="Card Type",y="# of Transactions",title="Number of Transactions with Respect to Card Type Through Years")+
  theme(axis.text.x = element_text()) +
  facet_wrap(~year)

df_category <-
  raw_df_full %>%
  filter(merchant_category != 'TOTAL') %>% #Filtering out sub-total
  group_by(year,merchant_category) %>%
  summarise(credit_card_cnt = sum(as.numeric((gsub("\\.", "",cc_transaction_count))),na.rm = T),
            debit_card_cnt = sum(as.numeric((gsub("\\.", "",dc_transaction_count))),na.rm = T)) %>%
  arrange(year,desc(credit_card_cnt)) %>%
  mutate(rwn =row_number()) %>%
  filter(rwn<=3)

df_category
## # A tibble: 9 x 5
## # Groups:   year [3]
##   year  merchant_category            credit_card_cnt debit_card_cnt   rwn
##   <chr> <chr>                                  <dbl>          <dbl> <int>
## 1 2017  MARKETS AND SHOPPING CENTERS      1008859555      470178217     1
## 2 2017  FOOD                               377919390      301403819     2
## 3 2017  CLOTHING AND ACCESSORY             331246861      107065272     3
## 4 2018  MARKETS AND SHOPPING CENTERS      1138691242      580057869     1
## 5 2018  FOOD                               454714664      368313677     2
## 6 2018  PETROL STATIONS                    336479726      118657680     3
## 7 2019  MARKETS AND SHOPPING CENTERS       615280253      342461968     1
## 8 2019  FOOD                               254907222      231259184     2
## 9 2019  VARIOUS FOOD                       179587854      106714408     3
ggplot(df_category, aes(x=year, y=credit_card_cnt, fill=merchant_category))+
  geom_bar(stat="identity")+
  theme_minimal()+
  labs(y="# of Transactions",title="Top 3 Merchant Category Group with Most Credit Card Transactions",fill="Category")+
  theme(axis.text.x = element_text())