raw_df <- read_xlsx("C:/Users/ETR04585/Desktop/BDA/Courses/1st Semester/Data Analytics Essentials with R/Assingment 2_dplr_ggplot/EVDS_istanbul_property_data.xlsx", n_max = 130)
Let’s preview the data
glimpse(raw_df)
## Rows: 129
## Columns: 9
## $ Tarih <chr> "2010-01", "2010-02", "2010-03", "2010-04",...
## $ `TP AKONUTSAT1 T40` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ `TP AKONUTSAT2 T40` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ `TP AKONUTSAT3 T40` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ `TP AKONUTSAT4 T40` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ `TP DISKONSAT ISTANBUL` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ `TP HEDONIKYKFE IST` <dbl> 35.9, 36.6, 37.4, 38.0, 38.0, 37.6, 37.3, 3...
## $ `TP HKFE02` <dbl> 36.0, 36.2, 36.5, 36.9, 37.1, 37.0, 37.2, 3...
## $ `TP TCBF02 ISTANBUL` <dbl> 1414.9, 1420.1, 1427.9, 1442.7, 1449.0, 144...
raw_df$Tarih<-paste0(raw_df$Tarih,as.character("-01"))
raw_df$Tarih<-as.Date(raw_df$Tarih, format="%Y-%m-%d")
I changed the names of columns with more understandable names.
adj_df <- rename(raw_df, "totalsales" = "TP AKONUTSAT1 T40",
"mortgaged" = "TP AKONUTSAT2 T40",
"first" = "TP AKONUTSAT3 T40",
"second" = "TP AKONUTSAT4 T40",
"foreigner" = "TP DISKONSAT ISTANBUL",
"pi_new" = "TP HEDONIKYKFE IST",
"pi_old" = "TP HKFE02",
"tl_m2" = "TP TCBF02 ISTANBUL",
"years" = "Tarih")
glimpse(adj_df)
## Rows: 129
## Columns: 9
## $ years <date> 2010-01-01, 2010-02-01, 2010-03-01, 2010-04-01, 2010-05...
## $ totalsales <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ mortgaged <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ first <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ second <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ foreigner <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ pi_new <dbl> 35.9, 36.6, 37.4, 38.0, 38.0, 37.6, 37.3, 38.1, 38.8, 39...
## $ pi_old <dbl> 36.0, 36.2, 36.5, 36.9, 37.1, 37.0, 37.2, 37.3, 37.7, 38...
## $ tl_m2 <dbl> 1414.9, 1420.1, 1427.9, 1442.7, 1449.0, 1445.4, 1452.6, ...
adj_df %>%
filter(year(years) > 2012)
## # A tibble: 93 x 9
## years totalsales mortgaged first second foreigner pi_new pi_old tl_m2
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2013-01-01 18235 8423 8298 9937 138 49.5 47.8 2064.
## 2 2013-02-01 18971 8836 8277 10694 120 50.4 48.8 2114.
## 3 2013-03-01 21570 10164 9542 12028 198 51.1 49.9 2158.
## 4 2013-04-01 20791 9726 8751 12040 209 51.6 50.9 2186.
## 5 2013-05-01 22030 10805 9371 12659 188 52.2 51.6 2218.
## 6 2013-06-01 19357 9762 8160 11197 155 53 52.4 2263.
## 7 2013-07-01 20668 10071 9034 11634 192 53.8 52.9 2300.
## 8 2013-08-01 14930 6834 6960 7970 170 54.8 53.5 2320.
## 9 2013-09-01 18514 8153 8128 10386 156 55.6 54.3 2346
## 10 2013-10-01 14866 6268 6737 8129 181 56.3 55 2394.
## # ... with 83 more rows
adj_df %>%
filter(year(years) > 2013) %>%
ggplot(aes(years, mortgaged)) + geom_line() + labs(x="Years", y="Mortgaged Sale", title = "Mortgaged Sale Respected by Years")
adj_df %>%
filter(year(years) > 2013) %>%
mutate(difference = totalsales - lag(totalsales)) %>%
ggplot(aes(years, difference)) + geom_line() + labs(x="Years", y="Total Sales Change", title = "Total Sales Change Compared to The Previous Year")
## Warning: Removed 1 row(s) containing missing values (geom_path).