raw_df = read_xlsx("C:\\data\\EVDS_istanbul_property_data\\EVDS_istanbul_property_data.xlsx")
print(raw_df,n=5)
## # A tibble: 187 x 9
## Tarih `TP AKONUTSAT1 ~ `TP AKONUTSAT2 ~ `TP AKONUTSAT3 ~ `TP AKONUTSAT4 ~
## <chr> <chr> <chr> <dbl> <dbl>
## 1 2010~ <NA> <NA> NA NA
## 2 2010~ <NA> <NA> NA NA
## 3 2010~ <NA> <NA> NA NA
## 4 2010~ <NA> <NA> NA NA
## 5 2010~ <NA> <NA> NA NA
## # ... with 182 more rows, and 4 more variables: `TP DISKONSAT ISTANBUL` <dbl>,
## # `TP HEDONIKYKFE IST` <dbl>, `TP HKFE02` <dbl>, `TP TCBF02 ISTANBUL` <dbl>
Pulling data from our file system with read_xlsx function.
row_id_df = raw_df %>% transmute(id = row_number(), raw_df)
description_row_id = row_id_df %>% filter(is.na(Tarih)) %>% summarise(description_row_id = min(id))
minimum_description_row_id = description_row_id$description_row_id[1]
clean_df = row_id_df %>%
filter(id < minimum_description_row_id) %>%
drop_na() %>%
filter(id < minimum_description_row_id) %>%
dplyr::rename_all(funs(make.names(.))) %>%
transmute(TARIH = as.yearmon(Tarih), TOPLAM_SATIS = as.numeric(TP.AKONUTSAT1.T40), IPOTEK_SATIS = as.numeric(TP.AKONUTSAT1.T40), ILK_EL_SATIS = TP.AKONUTSAT3.T40, IKINCI_EL_SATIS = TP.AKONUTSAT4.T40, YABANCI_SATIS = TP.DISKONSAT.ISTANBUL, YENI_KONUT_FIYAT_ENDEKS = TP.HEDONIKYKFE.IST, TR10 = TP.HKFE02, KONUT_BIRIM_FIYAT = TP.TCBF02.ISTANBUL)
print(clean_df,n=5)
## # A tibble: 92 x 9
## TARIH TOPLAM_SATIS IPOTEK_SATIS ILK_EL_SATIS IKINCI_EL_SATIS YABANCI_SATIS
## <yea> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Oca ~ 18235 18235 8298 9937 138
## 2 Şub ~ 18971 18971 8277 10694 120
## 3 Mar ~ 21570 21570 9542 12028 198
## 4 Nis ~ 20791 20791 8751 12040 209
## 5 May ~ 22030 22030 9371 12659 188
## # ... with 87 more rows, and 3 more variables: YENI_KONUT_FIYAT_ENDEKS <dbl>,
## # TR10 <dbl>, KONUT_BIRIM_FIYAT <dbl>
analytical_df = clean_df %>%
transmute(TARIH, ILK_EL_SATIS, ILK_EL_ORAN = ((ILK_EL_SATIS / TOPLAM_SATIS)*100), IKINCI_EL_SATIS, IKINCI_EL_ORAN = ((IKINCI_EL_SATIS / TOPLAM_SATIS)*100), YABANCI_SATIS, YABANCI_SATIS_ORAN = ((YABANCI_SATIS / TOPLAM_SATIS)*100), TC_SATIS = TOPLAM_SATIS - YABANCI_SATIS, TC_SATIS_ORAN = ((TOPLAM_SATIS - YABANCI_SATIS)/TOPLAM_SATIS)*100)
print(analytical_df,n=5)
## # A tibble: 92 x 9
## TARIH ILK_EL_SATIS ILK_EL_ORAN IKINCI_EL_SATIS IKINCI_EL_ORAN YABANCI_SATIS
## <yea> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Oca ~ 8298 45.5 9937 54.5 138
## 2 Şub ~ 8277 43.6 10694 56.4 120
## 3 Mar ~ 9542 44.2 12028 55.8 198
## 4 Nis ~ 8751 42.1 12040 57.9 209
## 5 May ~ 9371 42.5 12659 57.5 188
## # ... with 87 more rows, and 3 more variables: YABANCI_SATIS_ORAN <dbl>,
## # TC_SATIS <dbl>, TC_SATIS_ORAN <dbl>
Calculating percentage and adding new columns for analysis.
ggplot(analytical_df, aes(x=as.yearmon(TARIH), group=1)) +
geom_line(aes(y = ILK_EL_ORAN, color = "First Hand") , size = 1) +
geom_line(aes(y = IKINCI_EL_ORAN, color = "Second Hand"), size = 1) +
theme_minimal() +
labs(x = 'Housing Sales Period', y = 'Number of houses sold', color = "Acquired By", title = "First Hand vs Second Hand", subtitle = "Sales numbers between January 2013 and August 2020")
Here we can see that around late 2018 and early 2019 second hand market increased significantly, this might be due to decreased purchasing power compared to pre-2019. Since second hand houses are cheaper than first hand houses.
ggplot(analytical_df, aes(x=as.yearmon(TARIH), group=1)) +
geom_line(aes(y = YABANCI_SATIS_ORAN, color = "Non-TUR Sales") , size = 1) +
geom_line(aes(y = TC_SATIS_ORAN, color = "TUR Sales"), size = 1) +
theme_minimal() +
labs(x = 'Housing Sales Period', y = 'Number of houses sold', color = "Acquired By", title = "Turkish vs Non-Turkish Buyers", subtitle = "Sales numbers between January 2013 and August 2020")
While this graph doesn’t show huge analysis opportunity it supports our first estimate, since purchasing power is going down in the same dates we can see that Non-Turkish buyers are significantly increased.
We can see that since late-2018’s there is an economic crisis going on in Turkey. This pushes people to buy second-hand properties. This also encourages foreigners to spend money here because it’s cheaper.
Analytical Sources: