library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.3
## -- Attaching packages -------------------------------------------------------------------------------- tidyverse 1.3.0 --
## <U+221A> ggplot2 3.3.2 <U+221A> purrr 0.3.4
## <U+221A> tibble 3.0.4 <U+221A> dplyr 1.0.2
## <U+221A> tidyr 1.1.2 <U+221A> stringr 1.4.0
## <U+221A> readr 1.3.1 <U+221A> forcats 0.4.0
## Warning: package 'ggplot2' was built under R version 3.6.3
## Warning: package 'tibble' was built under R version 3.6.3
## Warning: package 'tidyr' was built under R version 3.6.3
## Warning: package 'purrr' was built under R version 3.6.3
## Warning: package 'dplyr' was built under R version 3.6.3
## -- Conflicts ----------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(readxl)
## Warning: package 'readxl' was built under R version 3.6.3
library(ggplot2)
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.6.3
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
setwd("C:/Users/taylan.polat/Desktop/MEF")
df <- read_excel("EVDS.xlsx")
head(df)
## # A tibble: 6 x 9
## Tarih `TP AKONUTSAT1 ~ `TP AKONUTSAT2 ~ `TP AKONUTSAT3 ~ `TP AKONUTSAT4 ~
## <chr> <chr> <chr> <dbl> <dbl>
## 1 2010~ <NA> <NA> NA NA
## 2 2010~ <NA> <NA> NA NA
## 3 2010~ <NA> <NA> NA NA
## 4 2010~ <NA> <NA> NA NA
## 5 2010~ <NA> <NA> NA NA
## 6 2010~ <NA> <NA> NA NA
## # ... with 4 more variables: `TP DISKONSAT ISTANBUL` <dbl>, `TP
## # HEDONIKYKFE IST` <dbl>, `TP HKFE02` <dbl>, `TP TCBF02 ISTANBUL` <dbl>
We can see many NA values
str(df)
## tibble [187 x 9] (S3: tbl_df/tbl/data.frame)
## $ Tarih : chr [1:187] "2010-01" "2010-02" "2010-03" "2010-04" ...
## $ TP AKONUTSAT1 T40 : chr [1:187] NA NA NA NA ...
## $ TP AKONUTSAT2 T40 : chr [1:187] NA NA NA NA ...
## $ TP AKONUTSAT3 T40 : num [1:187] NA NA NA NA NA NA NA NA NA NA ...
## $ TP AKONUTSAT4 T40 : num [1:187] NA NA NA NA NA NA NA NA NA NA ...
## $ TP DISKONSAT ISTANBUL: num [1:187] NA NA NA NA NA NA NA NA NA NA ...
## $ TP HEDONIKYKFE IST : num [1:187] 35.9 36.6 37.4 38 38 37.6 37.3 38.1 38.8 39.1 ...
## $ TP HKFE02 : num [1:187] 36 36.2 36.5 36.9 37.1 37 37.2 37.3 37.7 38.1 ...
## $ TP TCBF02 ISTANBUL : num [1:187] 1415 1420 1428 1443 1449 ...
Since the column names are too long, we update the names so that we can understand.
colnames(df) <- c("Tarih","TP1","TP2","TP3","TP4","TP5","TP6","TP7","TP8")
head(df)
## # A tibble: 6 x 9
## Tarih TP1 TP2 TP3 TP4 TP5 TP6 TP7 TP8
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2010-01 <NA> <NA> NA NA NA 35.9 36 1415.
## 2 2010-02 <NA> <NA> NA NA NA 36.6 36.2 1420.
## 3 2010-03 <NA> <NA> NA NA NA 37.4 36.5 1428.
## 4 2010-04 <NA> <NA> NA NA NA 38 36.9 1443.
## 5 2010-05 <NA> <NA> NA NA NA 38 37.1 1449
## 6 2010-06 <NA> <NA> NA NA NA 37.6 37 1445.
df <- df[38:130,1:9]
df$TP2 <- as.double(df$TP2)
df$Tarih <- as.Date(parse_date_time(df$Tarih,"ym"))
ggplot(df, aes(Tarih,TP7,color = "blue"))+
geom_point()
## Warning: Removed 2 rows containing missing values (geom_point).
We are starting to examine how the TP_AV variable we created for post 2016 has changed according to the date.
df_new <- df %>%
filter(TP4 > 9000)%>%
arrange(desc(TP8))%>%
select(Tarih,TP2,TP3,TP4)%>%
mutate(TP_AV = (TP2 * TP3)/TP4)%>%
filter(Tarih > "2016-08-01")
head(df_new)
## # A tibble: 6 x 5
## Tarih TP2 TP3 TP4 TP_AV
## <date> <dbl> <dbl> <dbl> <dbl>
## 1 2020-08-01 15367 8103 22189 5612.
## 2 2020-07-01 24000 10429 29003 8630.
## 3 2020-06-01 14767 8253 20546 5932.
## 4 2020-03-01 7843 6439 13407 3767.
## 5 2020-02-01 8281 7326 15336 3956.
## 6 2018-10-01 2164 13978 13178 2295.
ggplot(df_new,aes(Tarih,TP_AV))+
geom_line(linetype = "dashed",color = "red")+
geom_point()
We can observe the changes in the last 3 years more clearly from the table below.
ggplot()+
geom_line(data=df_new,aes(y=TP_AV,x= Tarih,colour="darkblue"),size=1 )+
geom_line(data=df_new,aes(y=TP2,x= Tarih,colour="red"),size=1) +
geom_line(data=df_new,aes(y=TP3,x= Tarih,colour="green"), size=1)+
scale_color_discrete(name = "DatewithTP", labels = c("TP_AV", "TP2","TP3"))