From Raw to Civilized Data

First I find the data from BKM websiteI get the data of the last 6 months via rvest package. I will make a reproducible example of data analysis from the raw data located somewhere to the final analysis.

Download Row Data

I create two variables (url, page), then scraped the data from BKM website. To get the last 6 months, I use a for loop and change the filter in a specific range. I added a month column to dataframe and changed the column names lastly.

library(rvest)
library(dplyr)
library(ggplot2)
library(scales)
library(plyr)

url <- "https://bkm.com.tr/secilen-aya-ait-sektorel-gelisim/?filter_year=2019&filter_month=1"
page <- read_html(url)
tablo <- html_table(page, fill = TRUE)[[4]][-c(1:2),]


for(i in 2:6) {
  url <- paste("https://bkm.com.tr/secilen-aya-ait-sektorel-gelisim/?filter_year=2019&filter_month=", i, sep = "")
  page <- read_html(url)
  tablo <- bind_rows(tablo, html_table(page, fill = TRUE)[[4]][-c(1:2),-1])
}

is_yeri <- c(tablo%>% select(X1) %>%  filter(X1 != "NA"))
is_yeri_1 <- c(rep(is_yeri[["X1"]], times=6))


tablo_1 <- tablo %>% mutate(X1 = is_yeri_1) %>% filter(X1 != "TOPLAM")


month_1 <- c(rep(1:6, times=1, each=26))
tablo_son <- tablo_1 %>% mutate(month = month_1)


tablo2  <- as.data.frame(lapply(tablo_son, function(x) as.numeric(gsub(",", ".", gsub("\\.", "", x)))))
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
tablo2[,1] <- tablo_son[,1]


colnames(tablo2) <- c("sector", "transaction_num_cc", "transaction_num_debit","transaction_amount_cc_mio", "transaction_amount_debit_mio", "month")

tablo_son <- tablo2

You can view head and tail of the dataset in below tables, respectively.

head(tablo_son)
##                                   sector transaction_num_cc
## 1                         ARABA KIRALAMA             256372
## 2 ARAÇ KIRALAMA-SATIS/SERVIS/YEDEK PARÇA            2967019
## 3           BENZIN VE YAKIT ISTASYONLARI           25277186
## 4                     BIREYSEL EMEKLILIK            2271587
## 5                           ÇESITLI GIDA           28362091
## 6                     DOGRUDAN PAZARLAMA             757602
##   transaction_num_debit transaction_amount_cc_mio
## 1                 49296                    195.13
## 2                642136                   2185.84
## 3               8684036                   5066.04
## 4                   697                    716.42
## 5              15221891                   4473.98
## 6                 40038                    678.99
##   transaction_amount_debit_mio month
## 1                        14.77     1
## 2                       127.16     1
## 3                       680.01     1
## 4                         0.30     1
## 5                       673.70     1
## 6                         7.81     1
tail(tablo_son)
##                                    sector transaction_num_cc
## 151        SEYAHAT ACENTELERI/TASIMACILIK            7224598
## 152                               SIGORTA            4513691
## 153                      TELEKOMÜNIKASYON           17915719
## 154 YAPI MALZEMELERI, HIRDAVAT, NALBURIYE            3786201
## 155                                 YEMEK           45687937
## 156                                 DIGER            4984660
##     transaction_num_debit transaction_amount_cc_mio
## 151               3152401                   2311.90
## 152                 42440                   2919.50
## 153               3236866                   1720.25
## 154               1347371                   2784.08
## 155              43035091                   2918.06
## 156               1072573                   1414.83
##     transaction_amount_debit_mio month
## 151                       620.16     6
## 152                        11.44     6
## 153                       247.08     6
## 154                       199.37     6
## 155                      1497.76     6
## 156                       170.06     6