Preparing data for analysis

raw_df = read_xlsx("C:\\data\\EVDS_istanbul_property_data\\EVDS_istanbul_property_data.xlsx")
print(raw_df,n=5)
## # A tibble: 187 x 9
##   Tarih `TP AKONUTSAT1 ~ `TP AKONUTSAT2 ~ `TP AKONUTSAT3 ~ `TP AKONUTSAT4 ~
##   <chr> <chr>            <chr>                       <dbl>            <dbl>
## 1 2010~ <NA>             <NA>                           NA               NA
## 2 2010~ <NA>             <NA>                           NA               NA
## 3 2010~ <NA>             <NA>                           NA               NA
## 4 2010~ <NA>             <NA>                           NA               NA
## 5 2010~ <NA>             <NA>                           NA               NA
## # ... with 182 more rows, and 4 more variables: `TP DISKONSAT ISTANBUL` <dbl>,
## #   `TP HEDONIKYKFE IST` <dbl>, `TP HKFE02` <dbl>, `TP TCBF02 ISTANBUL` <dbl>

Pulling data from our file system with read_xlsx function.

row_id_df = raw_df %>% transmute(id = row_number(), raw_df)
description_row_id = row_id_df %>% filter(is.na(Tarih)) %>% summarise(description_row_id = min(id))
minimum_description_row_id = description_row_id$description_row_id[1]
clean_df = row_id_df %>%
  filter(id < minimum_description_row_id) %>%
  drop_na() %>%
  filter(id < minimum_description_row_id) %>% 
  dplyr::rename_all(funs(make.names(.))) %>% 
  transmute(TARIH = as.yearmon(Tarih), TOPLAM_SATIS = as.numeric(TP.AKONUTSAT1.T40), IPOTEK_SATIS = as.numeric(TP.AKONUTSAT1.T40), ILK_EL_SATIS = TP.AKONUTSAT3.T40, IKINCI_EL_SATIS = TP.AKONUTSAT4.T40, YABANCI_SATIS = TP.DISKONSAT.ISTANBUL, YENI_KONUT_FIYAT_ENDEKS = TP.HEDONIKYKFE.IST, TR10 = TP.HKFE02, KONUT_BIRIM_FIYAT = TP.TCBF02.ISTANBUL)
print(clean_df,n=5)
## # A tibble: 92 x 9
##   TARIH TOPLAM_SATIS IPOTEK_SATIS ILK_EL_SATIS IKINCI_EL_SATIS YABANCI_SATIS
##   <yea>        <dbl>        <dbl>        <dbl>           <dbl>         <dbl>
## 1 Oca ~        18235        18235         8298            9937           138
## 2 Şub ~        18971        18971         8277           10694           120
## 3 Mar ~        21570        21570         9542           12028           198
## 4 Nis ~        20791        20791         8751           12040           209
## 5 May ~        22030        22030         9371           12659           188
## # ... with 87 more rows, and 3 more variables: YENI_KONUT_FIYAT_ENDEKS <dbl>,
## #   TR10 <dbl>, KONUT_BIRIM_FIYAT <dbl>
analytical_df = clean_df %>% 
  transmute(TARIH, ILK_EL_SATIS, ILK_EL_ORAN = ((ILK_EL_SATIS / TOPLAM_SATIS)*100), IKINCI_EL_SATIS, IKINCI_EL_ORAN = ((IKINCI_EL_SATIS / TOPLAM_SATIS)*100), YABANCI_SATIS, YABANCI_SATIS_ORAN = ((YABANCI_SATIS / TOPLAM_SATIS)*100), TC_SATIS = TOPLAM_SATIS - YABANCI_SATIS, TC_SATIS_ORAN = ((TOPLAM_SATIS - YABANCI_SATIS)/TOPLAM_SATIS)*100)
print(analytical_df,n=5)
## # A tibble: 92 x 9
##   TARIH ILK_EL_SATIS ILK_EL_ORAN IKINCI_EL_SATIS IKINCI_EL_ORAN YABANCI_SATIS
##   <yea>        <dbl>       <dbl>           <dbl>          <dbl>         <dbl>
## 1 Oca ~         8298        45.5            9937           54.5           138
## 2 Şub ~         8277        43.6           10694           56.4           120
## 3 Mar ~         9542        44.2           12028           55.8           198
## 4 Nis ~         8751        42.1           12040           57.9           209
## 5 May ~         9371        42.5           12659           57.5           188
## # ... with 87 more rows, and 3 more variables: YABANCI_SATIS_ORAN <dbl>,
## #   TC_SATIS <dbl>, TC_SATIS_ORAN <dbl>

Calculating percentage and adding new columns for analysis.

Visualizing the data

ggplot(analytical_df, aes(x=as.yearmon(TARIH), group=1)) +
  geom_line(aes(y = ILK_EL_ORAN, color = "First Hand") , size = 1) +
  geom_line(aes(y = IKINCI_EL_ORAN, color = "Second Hand"), size = 1) +
  theme_minimal() + 
  labs(x = 'Housing Sales Period', y = 'Number of houses sold', color = "Acquired By", title = "First Hand vs Second Hand", subtitle = "Sales numbers between January 2013 and August 2020")

Here we can see that around late 2018 and early 2019 second hand market increased significantly, this might be due to decreased purchasing power compared to pre-2019. Since second hand houses are cheaper than first hand houses.

ggplot(analytical_df, aes(x=as.yearmon(TARIH), group=1)) +
  geom_line(aes(y = YABANCI_SATIS_ORAN, color = "Non-TUR Sales") , size = 1) +
  geom_line(aes(y = TC_SATIS_ORAN, color = "TUR Sales"), size = 1) +
  theme_minimal() + 
  labs(x = 'Housing Sales Period', y = 'Number of houses sold', color = "Acquired By", title = "Turkish vs Non-Turkish Buyers", subtitle = "Sales numbers between January 2013 and August 2020")

While this graph doesn’t show huge analysis opportunity it supports our first estimate, since purchasing power is going down in the same dates we can see that Non-Turkish buyers are significantly increased.

Conclusion

We can see that since late-2018’s there is an economic crisis going on in Turkey. This pushes people to buy second-hand properties. This also encourages foreigners to spend money here because it’s cheaper.

Sources

Analytical Sources: