R Markdown

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.3
## -- Attaching packages -------------------------------------------------------------------------------- tidyverse 1.3.0 --
## <U+221A> ggplot2 3.3.2     <U+221A> purrr   0.3.4
## <U+221A> tibble  3.0.4     <U+221A> dplyr   1.0.2
## <U+221A> tidyr   1.1.2     <U+221A> stringr 1.4.0
## <U+221A> readr   1.3.1     <U+221A> forcats 0.4.0
## Warning: package 'ggplot2' was built under R version 3.6.3
## Warning: package 'tibble' was built under R version 3.6.3
## Warning: package 'tidyr' was built under R version 3.6.3
## Warning: package 'purrr' was built under R version 3.6.3
## Warning: package 'dplyr' was built under R version 3.6.3
## -- Conflicts ----------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)
library(readxl)
## Warning: package 'readxl' was built under R version 3.6.3
library(ggplot2)
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.6.3
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
setwd("C:/Users/taylan.polat/Desktop/MEF")

df <- read_excel("EVDS.xlsx")
head(df)
## # A tibble: 6 x 9
##   Tarih `TP AKONUTSAT1 ~ `TP AKONUTSAT2 ~ `TP AKONUTSAT3 ~ `TP AKONUTSAT4 ~
##   <chr> <chr>            <chr>                       <dbl>            <dbl>
## 1 2010~ <NA>             <NA>                           NA               NA
## 2 2010~ <NA>             <NA>                           NA               NA
## 3 2010~ <NA>             <NA>                           NA               NA
## 4 2010~ <NA>             <NA>                           NA               NA
## 5 2010~ <NA>             <NA>                           NA               NA
## 6 2010~ <NA>             <NA>                           NA               NA
## # ... with 4 more variables: `TP DISKONSAT ISTANBUL` <dbl>, `TP
## #   HEDONIKYKFE IST` <dbl>, `TP HKFE02` <dbl>, `TP TCBF02 ISTANBUL` <dbl>

Structure of Data

We can see many NA values

str(df)
## tibble [187 x 9] (S3: tbl_df/tbl/data.frame)
##  $ Tarih                : chr [1:187] "2010-01" "2010-02" "2010-03" "2010-04" ...
##  $ TP AKONUTSAT1 T40    : chr [1:187] NA NA NA NA ...
##  $ TP AKONUTSAT2 T40    : chr [1:187] NA NA NA NA ...
##  $ TP AKONUTSAT3 T40    : num [1:187] NA NA NA NA NA NA NA NA NA NA ...
##  $ TP AKONUTSAT4 T40    : num [1:187] NA NA NA NA NA NA NA NA NA NA ...
##  $ TP DISKONSAT ISTANBUL: num [1:187] NA NA NA NA NA NA NA NA NA NA ...
##  $ TP HEDONIKYKFE IST   : num [1:187] 35.9 36.6 37.4 38 38 37.6 37.3 38.1 38.8 39.1 ...
##  $ TP HKFE02            : num [1:187] 36 36.2 36.5 36.9 37.1 37 37.2 37.3 37.7 38.1 ...
##  $ TP TCBF02 ISTANBUL   : num [1:187] 1415 1420 1428 1443 1449 ...

Changing Colnames

Since the column names are too long, we update the names so that we can understand.

colnames(df) <- c("Tarih","TP1","TP2","TP3","TP4","TP5","TP6","TP7","TP8")
head(df)
## # A tibble: 6 x 9
##   Tarih   TP1   TP2     TP3   TP4   TP5   TP6   TP7   TP8
##   <chr>   <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2010-01 <NA>  <NA>     NA    NA    NA  35.9  36   1415.
## 2 2010-02 <NA>  <NA>     NA    NA    NA  36.6  36.2 1420.
## 3 2010-03 <NA>  <NA>     NA    NA    NA  37.4  36.5 1428.
## 4 2010-04 <NA>  <NA>     NA    NA    NA  38    36.9 1443.
## 5 2010-05 <NA>  <NA>     NA    NA    NA  38    37.1 1449 
## 6 2010-06 <NA>  <NA>     NA    NA    NA  37.6  37   1445.

Removing Rows

df <- df[38:130,1:9]
df$TP2 <- as.double(df$TP2)
df$Tarih <- as.Date(parse_date_time(df$Tarih,"ym"))
ggplot(df, aes(Tarih,TP7,color = "blue"))+
  geom_point()
## Warning: Removed 2 rows containing missing values (geom_point).

We are starting to examine how the TP_AV variable we created for post 2016 has changed according to the date.

df_new <- df %>%
  filter(TP4 > 9000)%>%
  arrange(desc(TP8))%>%
  select(Tarih,TP2,TP3,TP4)%>%
  mutate(TP_AV = (TP2 * TP3)/TP4)%>%
  filter(Tarih > "2016-08-01")
head(df_new)
## # A tibble: 6 x 5
##   Tarih        TP2   TP3   TP4 TP_AV
##   <date>     <dbl> <dbl> <dbl> <dbl>
## 1 2020-08-01 15367  8103 22189 5612.
## 2 2020-07-01 24000 10429 29003 8630.
## 3 2020-06-01 14767  8253 20546 5932.
## 4 2020-03-01  7843  6439 13407 3767.
## 5 2020-02-01  8281  7326 15336 3956.
## 6 2018-10-01  2164 13978 13178 2295.
ggplot(df_new,aes(Tarih,TP_AV))+
  geom_line(linetype = "dashed",color = "red")+
  geom_point()

FinalGraph

We can observe the changes in the last 3 years more clearly from the table below.

ggplot()+
    geom_line(data=df_new,aes(y=TP_AV,x= Tarih,colour="darkblue"),size=1 )+
    geom_line(data=df_new,aes(y=TP2,x= Tarih,colour="red"),size=1) +
    geom_line(data=df_new,aes(y=TP3,x= Tarih,colour="green"), size=1)+   
    scale_color_discrete(name = "DatewithTP", labels = c("TP_AV", "TP2","TP3"))