First Step : Call Necessary Libraries
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble 2.1.3 ✔ purrr 0.3.3
## ✔ tidyr 1.0.0 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ─────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(readxl)
Download Data From Source
tmp<-tempfile(fileext="BKM_data.xlsx")
download.file("https://github.com/pjournal/mef03-arslnbatu/blob/master/BKM_data.xlsx?raw=true",destfile=tmp,mode="wb")
BKM<-readxl::read_excel(tmp,skip=7,col_names=FALSE)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * … and 1 more problem
file.remove(tmp)
## [1] TRUE
Check What is
head(BKM)
## # A tibble: 6 x 6
## ...1 ...2 ...3 ...4 ...5 ...6
## <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 EĞİTİM / KIRTASİYE / OFİS MALZEMELERİ 4653286 2585481 1427. 251. 6-20…
## 2 ELEKTRİK-ELEKTRONİK EŞYA, BİLGİSAYAR 7230732 2269938 3894. 416. 6-20…
## 3 GİYİM VE AKSESUAR 32253174 15240380 6117. 2129. 6-20…
## 4 HAVAYOLLARI 1792404 489887 1828. 738. 6-20…
## 5 HİZMET SEKTÖRLERİ 22032853 6229279 427. 713. 6-20…
## 6 KAMU/VERGI ODEMELERI 2740192 1300247 1240. 220. 6-20…
tail(BKM)
## # A tibble: 6 x 6
## ...1 ...2 ...3 ...4 ...5 ...6
## <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 SİGORTA 4654473 24643 3016. 9.03e0 1-20…
## 2 TELEKOMÜNİKASYON 18478780 2647222 1806. 2.02e2 1-20…
## 3 YAPI MALZEMELERİ, HIRDAVAT, NALB… 2996625 957684 2243. 1.25e2 1-20…
## 4 YEMEK 40160065 32694355 2301. 1.02e3 1-20…
## 5 DİĞER 5747341 15276740 1803. 5.97e2 1-20…
## 6 TOPLAM 329481363 158527514 62661. 9.84e3 1-20…
Organize the Data
colnames(BKM) <- c("work_type","transection_item_credit","transection_item_debit","transection_cost_credit","transection_cost_debit","Dates")
Analyze data
new2 <- BKM %>% select(work_type,transection_item_credit,transection_item_debit,transection_cost_credit,transection_cost_debit)%>%group_by(work_type)%>%summarise(sum_credit=sum(transection_item_credit), sum_cost = sum(transection_cost_credit))%>%mutate(item_cost=(sum_credit/sum_cost))%>% arrange(desc(item_cost))
new3 <- BKM %>% select(work_type,transection_item_credit,transection_item_debit,transection_cost_credit,transection_cost_debit)%>%group_by(work_type)%>%summarise(sum_debit=sum(transection_item_debit), sum_cost = sum(transection_cost_debit))%>%mutate(item_cost = round(sum_debit/sum_cost)) %>% arrange(desc(sum_debit))
new4<- BKM %>% group_by(work_type, Dates) %>% filter(work_type != "TOPLAM")%>%
summarise(sum_debit=sum(transection_cost_debit)) %>%
arrange(desc(sum_debit))
Show the analysis with data plotted
ggplot(data = new4, aes(x = work_type, y = sum_debit, fill = work_type)) +
geom_bar(stat = "identity") + theme(axis.text = element_text(angle = 0)) + theme_bw() + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 0.5, hjust = 0.5, size = 12))
Compare transection_cost_debit and transection_cost_credit
theme_set(theme_bw()) # pre-set the bw theme.
g <- ggplot(BKM, aes(transection_cost_debit, transection_cost_credit))
g + geom_jitter(width = .5, size=1) +
labs(subtitle="compare",
y="cost debit",
x="cost credit",
title="analyze")
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
theme_set(theme_classic())
# Plot
ggplot(new4, aes(x=work_type, y=sum_debit)) +
geom_point(col="tomato2", size=3) + # Draw points
geom_segment(aes(x=,
xend=work_type,
y=min(sum_debit),
yend=max(sum_debit)),
linetype="dashed",
size=0.1) + # Draw dashed lines
labs(title="Dot Plot",
subtitle="Work type vs sum_debit",
caption="source: new4") +
coord_flip()