I examined the data of BKM, “Sectoral Distribution of Expenditures in 6 Months in 2019”.
The web site is here: BKM-Secilen aya ait sektorel gelisim
I have compiled the data in excel and upload the raw data in repository. Used Tidyverse, GGplot2 and Scales from Library of R.
library(tidyverse)
library(ggplot2)
library(scales)
Automatically download the raw data from the repository and put it in a temporary file. Then can read that excel document into R and remove the temp file.
tmp<-tempfile(fileext=".xlsx")
download.file("https://github.com/pjournal/mef03-Nilgun/blob/master/2019-BKM%20Rapor-Konsolide.xlsx?raw=true",destfile=tmp, mode="wb")
raw_data<-readxl::read_excel(tmp,col_names=FALSE,skip=1)
file.remove(tmp)
## [1] TRUE
To organize the data, rename The Columns:
colnames(raw_data)<-c("Months","Sector","Numb_CC","Numb_DC","Amount_CC","Amount_DC")
Let’s see the Data:
head(raw_data)
## # A tibble: 6 x 6
## Months Sector Numb_CC Numb_DC Amount_CC Amount_DC
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 6-2019 ARABA KIRALAMA 306426 7.71e4 257. 43.8
## 2 6-2019 ARAÇ KIRALAMA-SATIS/SERVIS/Y~ 3335321 9.02e5 2589. 191.
## 3 6-2019 BENZIN VE YAKIT ISTASYONLARI 32572382 1.38e7 6516. 1195.
## 4 6-2019 BIREYSEL EMEKLILIK 2060390 1.26e3 687. 0.42
## 5 6-2019 ÇESITLI GIDA 31076151 2.04e7 5148. 995.
## 6 6-2019 DOGRUDAN PAZARLAMA 738068 4.53e4 724. 21.3
Let’s calculate the sectoral distribution of expenditures proportionally:
Sectoral_Sum<-raw_data%>%select(Months,Sector,Amount_CC,Amount_DC)%>%mutate(Amount_Sum = Amount_CC+Amount_DC)%>%filter(Sector!="TOPLAM")
Sectoral_Sum<-Sectoral_Sum%>%group_by(Sector)%>%summarise(Amount_Sum=sum(Amount_Sum))%>%mutate(sec_prp=percent(Amount_Sum/sum(Amount_Sum)))%>%arrange(desc(Amount_Sum))
The sectors with the highest total expenditure in 6 months are as follows:
Top_Ten_Sector<-head(Sectoral_Sum,10)
print(Top_Ten_Sector)
## # A tibble: 10 x 3
## Sector Amount_Sum sec_prp
## <chr> <dbl> <chr>
## 1 MARKET VE ALISVERIS MERKEZLERI 83789. 18.4%
## 2 GIYIM VE AKSESUAR 39850. 8.8%
## 3 BENZIN VE YAKIT ISTASYONLARI 39811. 8.7%
## 4 ÇESITLI GIDA 33589. 7.4%
## 5 HIZMET SEKTÖRLERI 28406. 6.2%
## 6 YEMEK 22506. 4.9%
## 7 ELEKTRIK-ELEKTRONIK ESYA, BILGISAYAR 21039. 4.6%
## 8 SAGLIK/SAGLIK ÜRÜNLERI/KOZMETIK 19206. 4.2%
## 9 YAPI MALZEMELERI, HIRDAVAT, NALBURIYE 17692. 3.9%
## 10 SIGORTA 16356. 3.6%
Top_Ten_Sector%>%ggplot(
aes(x=reorder(Sector,Amount_Sum),y=Amount_Sum,fill= Sector)) + geom_bar(stat="identity") +
theme(legend.position = "none", axis.text.x = element_text(size=9,angle = 90)) + coord_flip() + labs(x="",y="Total Expenditures",title="Top Spending Sectors",fill="")
Let’s see the total distribution of expenditures made as credit card or debit card, by sectors.
Card_Dist<-raw_data%>%filter(Sector!="TOPLAM")%>%group_by(Sector)%>%summarise(
CC=sum(Numb_CC),
DC=sum(Numb_DC))%>%mutate(Sum_Card=CC+DC)%>%arrange(desc(Sum_Card))%>%pivot_longer(cols=CC:DC,names_to="Card_Type",values_to="Count_Of_Cards")%>%print(n=6)
## # A tibble: 52 x 4
## Sector Sum_Card Card_Type Count_Of_Cards
## <chr> <dbl> <chr> <dbl>
## 1 MARKET VE ALISVERIS MERKEZLERI 957742221 CC 615280253
## 2 MARKET VE ALISVERIS MERKEZLERI 957742221 DC 342461968
## 3 YEMEK 486166406 CC 254907222
## 4 YEMEK 486166406 DC 231259184
## 5 ÇESITLI GIDA 286302262 CC 179587854
## 6 ÇESITLI GIDA 286302262 DC 106714408
## # ... with 46 more rows
Sectors with the highest spending by total number of cards are as follows:
Top_Ten_Card<-head(Card_Dist,20)
ggplot(Top_Ten_Card,aes(x=reorder(Sector,Sum_Card), y=Count_Of_Cards/1000000,fill=Card_Type))+geom_bar(stat="identity",position="dodge")+theme(axis.text.x = element_text(size=9,angle = 90),legend.position = "top") + coord_flip()+labs(x="Sector",y="Count of Cards(Million)",title="Distribution by Type of Cards-First 10 Sector",fill="Type of Card")
Let’s determine in which sectors credit card usage is not very different from debit card usage.
Card_Comp<-raw_data%>%group_by(Sector)%>%summarise(sum_CC_amount=sum(Amount_CC),sum_DC_amount=sum(Amount_DC))%>%filter(Sector!="TOPLAM")
Card_Comp<-Card_Comp%>%mutate(Diff_Card_Amount=sum_CC_amount-sum_DC_amount)%>%arrange(Diff_Card_Amount)%>%print(10)
## # A tibble: 26 x 4
## Sector sum_CC_amount sum_DC_amount Diff_Card_Amount
## <chr> <dbl> <dbl> <dbl>
## 1 KUMARHANE/IÇKILI YERLER 533. 205. 328.
## 2 ARABA KIRALAMA 1259. 136. 1124.
## 3 KULÜP / DERNEK /SOSYAL HIZ~ 1548. 195. 1353.
## 4 KUYUMCULAR 5938. 2546. 3393.
## 5 BIREYSEL EMEKLILIK 4250. 2.91 4248.
## 6 MÜTEAHHIT ISLERI 4713. 244. 4470.
## 7 DOGRUDAN PAZARLAMA 4590. 82.8 4507.
## 8 SEYAHAT ACENTELERI/TASIMAC~ 7801. 2621. 5180.
## 9 KONAKLAMA 7440. 2083. 5357.
## 10 EGITIM / KIRTASIYE / OFIS ~ 6901. 1427. 5473.
## # ... with 16 more rows
Let’s see the first ten Sectors where credit or debit cards do not matter.
Top_Card_Comp<-head(Card_Comp,10)
Top_Card_Comp%>%ggplot(aes(x=reorder(Sector,-Diff_Card_Amount),y=Diff_Card_Amount,color=Sector))+geom_point(size=5)+coord_flip()+theme(axis.text.x = element_text(size=9,angle=90),legend.position ="None") + labs(x="Sector",y="Difference Amount of CC-DC",title="No Matter Credit Card or Debit Card Top Ten",color="sector")