# Required libraries
library(tidyverse)
library(dplyr)
library(readxl)
library(lubridate)
library(tidyr)
library(zoo)
library(janitor)
library(reactable)
library(data.table)
2 Preprocessing
2.0.1 Construction Cost Index (2015=100) and Rate of Change
# First part of cleaning data done in excel then I use Pivot longer Function
<- read_excel("data/Constructioncost.xls",
ConsIndex sheet = "Index")
<- pivot_longer(ConsIndex,2:13,names_to = "Month", values_to = "index")
ConsIndex $Month <- match(ConsIndex$Month, month.name)
ConsIndexhead(ConsIndex)
# A tibble: 6 × 3
Year Month index
<dbl> <int> <dbl>
1 2015 1 97.1
2 2015 2 97.6
3 2015 3 98.3
4 2015 4 99.1
5 2015 5 100.
6 2015 6 100.
<- read_excel("data/Constructioncost.xls",
ConsAnnualChange sheet = "Rate")
<- pivot_longer(ConsAnnualChange,2:13,names_to = "Month", values_to = "index")
ConsAnnualChange $Month <- match(ConsAnnualChange$Month, month.name)
ConsAnnualChange$index <- as.numeric(as.character(ConsAnnualChange$index)) ConsAnnualChange
Warning: NAs introduced by coercion
head(ConsAnnualChange)
# A tibble: 6 × 3
Year Month index
<dbl> <int> <dbl>
1 2015 1 NA
2 2015 2 NA
3 2015 3 NA
4 2015 4 NA
5 2015 5 NA
6 2015 6 NA
2.0.2 House Sales by Type and State
# House Sales data was too dirty so I clean it up in excel
<- read_excel("data/Housesales.xls",
HousesalesYear sheet = "Year")
head(HousesalesYear)
# A tibble: 6 × 10
Year Total MSUnit MSShare (%…¹ OSUnit OSSha…² FHUnit FHSha…³ SHUnit SHSha…⁴
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2013 1157190 460112 39.8 6.97e5 60.2 529129 45.7 628061 54.3
2 2014 1165381 389689 33.4 7.76e5 66.6 541554 46.5 623827 53.5
3 2015 1289320 434388 33.7 8.55e5 66.3 598667 46.4 690653 53.6
4 2016 1341453 449508 33.5 8.92e5 66.5 631686 47.1 709767 52.9
5 2017 1409314 473099 33.6 9.36e5 66.4 659698 46.8 749616 53.2
6 2018 1375398 276820 20.1 1.10e6 79.9 651572 47.4 723826 52.6
# … with abbreviated variable names ¹`MSShare (%)`, ²`OSShare (%)`,
# ³`FHShare (%)`, ⁴`SHShare (%)`
<- read_excel("data/Housesales.xls",
HousesalesMonth sheet = "Month")
head(HousesalesMonth)
# A tibble: 6 × 11
Year Month Total MSUnit MSSha…¹ OSUnit OSSha…² FHUnit FHSha…³ SHUnit SHSha…⁴
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2013 1 87444 36581 41.8 50863 58.2 39163 44.8 48281 55.2
2 2013 2 88519 37150 42.0 51369 58.0 39521 44.6 48998 55.4
3 2013 3 97956 41809 42.7 56147 57.3 43052 44.0 54904 56.0
4 2013 4 95381 40812 42.8 54569 57.2 41233 43.2 54148 56.8
5 2013 5 103261 45270 43.8 57991 56.2 46049 44.6 57212 55.4
6 2013 6 96424 43763 45.4 52661 54.6 43345 45.0 53079 55.0
# … with abbreviated variable names ¹`MSShare (%)`, ²`OSShare (%)`,
# ³`FHShare (%)`, ⁴`SHShare (%)`
2.0.3 House Sales by Provinces
<- "data/House Sales by Provinces.xls"
PATH
<- read_excel(PATH, range = cell_rows(14:129), col_names = names(read_excel(PATH, skip = 2))) data_xl
# Fill NA Values
%>%
data_xl fill(colnames(data_xl[1]), .direction = "downup")
# A tibble: 116 × 84
Yıl …¹ Ay\nM…² Topla…³ Adana Adıya…⁴ Afyon…⁵ Ağrı Aksaray Amasya Ankara
<dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2013 Ocak -… 87444 1548 261 475 105 329 245 11215
2 2013 Şubat … 88519 1667 421 506 198 269 290 11281
3 2013 Mart -… 97956 1826 310 563 94 301 316 12291
4 2013 Nisan … 95381 1875 281 458 164 315 302 11889
5 2013 Mayıs … 103261 1941 326 549 141 400 364 12638
6 2013 Hazira… 96424 1987 346 454 112 356 356 11692
7 2013 Temmuz… 106636 1986 404 563 130 401 463 12428
8 2013 Ağusto… 84480 1426 352 472 118 447 330 9636
9 2013 Eylül … 102280 1624 422 536 126 519 343 12206
10 2013 Ekim -… 76344 1232 285 490 103 299 242 8231
# … with 106 more rows, 74 more variables: Antalya <dbl>, Ardahan <dbl>,
# Artvin <dbl>, Aydın <dbl>, Balıkesir <dbl>, Bartın <dbl>, Batman <dbl>,
# Bayburt <dbl>, Bilecik <dbl>, Bingöl <dbl>, Bitlis <dbl>, Bolu <dbl>,
# Burdur <dbl>, Bursa <dbl>, Çanakkale <dbl>, Çankırı <dbl>, Çorum <dbl>,
# Denizli <dbl>, Diyarbakır <dbl>, Düzce <dbl>, Edirne <dbl>, Elazığ <dbl>,
# Erzincan <dbl>, Erzurum <dbl>, Eskişehir <dbl>, Gaziantep <dbl>,
# Giresun <dbl>, Gümüşhane <dbl>, Hakkari <chr>, Hatay <dbl>, Iğdır <dbl>, …
# Save data_xl as House Sales by Provinces.rds
saveRDS(data_xl, file = "House Sales by Provinces.rds")
# Restore the House Sales by Provinces.rds
head(readRDS(file = "House Sales by Provinces.rds"))
# A tibble: 6 × 84
Yıl …¹ Ay\nM…² Topla…³ Adana Adıya…⁴ Afyon…⁵ Ağrı Aksaray Amasya Ankara
<dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2013 Ocak -… 87444 1548 261 475 105 329 245 11215
2 NA Şubat … 88519 1667 421 506 198 269 290 11281
3 NA Mart -… 97956 1826 310 563 94 301 316 12291
4 NA Nisan … 95381 1875 281 458 164 315 302 11889
5 NA Mayıs … 103261 1941 326 549 141 400 364 12638
6 NA Hazira… 96424 1987 346 454 112 356 356 11692
# … with 74 more variables: Antalya <dbl>, Ardahan <dbl>, Artvin <dbl>,
# Aydın <dbl>, Balıkesir <dbl>, Bartın <dbl>, Batman <dbl>, Bayburt <dbl>,
# Bilecik <dbl>, Bingöl <dbl>, Bitlis <dbl>, Bolu <dbl>, Burdur <dbl>,
# Bursa <dbl>, Çanakkale <dbl>, Çankırı <dbl>, Çorum <dbl>, Denizli <dbl>,
# Diyarbakır <dbl>, Düzce <dbl>, Edirne <dbl>, Elazığ <dbl>, Erzincan <dbl>,
# Erzurum <dbl>, Eskişehir <dbl>, Gaziantep <dbl>, Giresun <dbl>,
# Gümüşhane <dbl>, Hakkari <chr>, Hatay <dbl>, Iğdır <dbl>, Isparta <dbl>, …
2.0.4 Building Permits and Rates of Change
<- "data/Building Permits and Rates of Change.xls"
PATH
<- read_excel(PATH, range = cell_rows(24:105), col_names = names(read_excel(PATH, skip = 2))) building_permits
New names:
• `Bina sayısı Number of building` -> `Bina sayısı Number of building...3`
• `Yıllık değişim Annual change (%)` -> `Yıllık değişim Annual change (%)...4`
• `` -> `...5`
• `Daire sayısı Number of dwelling unit` -> `Daire sayısı Number of dwelling
unit...6`
• `Yıllık değişim Annual change (%)` -> `Yıllık değişim Annual change (%)...7`
• `` -> `...8`
• `Yüzölçüm Floor area (m²)` -> `Yüzölçüm Floor area (m²)...9`
• `Yıllık değişim Annual change (%)` -> `Yıllık değişim Annual change (%)...10`
• `` -> `...11`
• `Bina sayısı Number of building` -> `Bina sayısı Number of building...12`
• `Yıllık değişim Annual change (%)` -> `Yıllık değişim Annual change (%)...13`
• `` -> `...14`
• `Daire sayısı Number of dwelling unit` -> `Daire sayısı Number of dwelling
unit...15`
• `Yıllık değişim Annual change (%)` -> `Yıllık değişim Annual change (%)...16`
• `` -> `...17`
• `Yüzölçüm Floor area (m²)` -> `Yüzölçüm Floor area (m²)...18`
• `Yıllık değişim Annual change (%)` -> `Yıllık değişim Annual change (%)...19`
# Fill NA Values
<- building_permits %>%
building_permits fill(colnames(building_permits)[1], .direction = "downup") %>%
select_if(~ !all(is.na(.)))
# CP: Construction Permits
# OP: Occupancy Permits
colnames(building_permits) <- c('Year',
'Quarter',
'CP - Number of Building',
'CP - NoB - Annual Change (%)',
'CP - Number of Dwelling Unit',
'CP - NoDU - Annual Change (%)',
'CP - Floor Area (m2)',
'CP - Floor Area - Annual Change (%)',
'OP - Number of Building',
'OP - NoB - Annual Change (%)',
'OP - Number of Dwelling Unit',
'OP - NoDU - Annual Change (%)',
'OP - Floor Area (m2)',
'OP - Floor Area - Annual Change (%)')
# Save building_permits as Building Permits and Rates of Change.rds
saveRDS(building_permits, file = "Building Permits and Rates of Change.rds")
# Restore the Building Permits and Rates of Change.RData
tail(readRDS(file = "Building Permits and Rates of Change.rds"))
# A tibble: 6 × 14
Year Quarter CP - N…¹ CP - …² CP - …³ CP - …⁴ CP - …⁵ CP - …⁶ OP - …⁷ OP - …⁸
<dbl> <chr> <dbl> <chr> <dbl> <chr> <dbl> <chr> <dbl> <chr>
1 2021 I 33622 137.30… 174741 116.3 3.44e7 95.099… 20979 12.800…
2 2021 II 30537 55.5 157526 14.300… 3.18e7 15.699… 18511 27.699…
3 2021 III 30451 20.199… 149415 5.9000… 3.33e7 19.600… 21954 4
4 2021 IV 43699 17.699… 239577 22.399… 5.13e7 29.100… 30738 29.899…
5 2022 I 26168 -22.19… 128157 -26.69… 2.71e7 -21.19… 22481 7.2000…
6 2022 II 28249 -7.5 133424 -15.30… 2.93e7 -7.799… 22691 22.600…
# … with 4 more variables: `OP - Number of Dwelling Unit` <dbl>,
# `OP - NoDU - Annual Change (%)` <chr>, `OP - Floor Area (m2)` <dbl>,
# `OP - Floor Area - Annual Change (%)` <chr>, and abbreviated variable names
# ¹`CP - Number of Building`, ²`CP - NoB - Annual Change (%)`,
# ³`CP - Number of Dwelling Unit`, ⁴`CP - NoDU - Annual Change (%)`,
# ⁵`CP - Floor Area (m2)`, ⁶`CP - Floor Area - Annual Change (%)`,
# ⁷`OP - Number of Building`, ⁸`OP - NoB - Annual Change (%)`
<- read_excel("data/House sales to foreigners by province.xls",
House_sales_to_foreigners_by_province sheet = "HouseSalesByProvince")
<- pivot_longer(House_sales_to_foreigners_by_province,3:14,names_to = "Month", values_to = "sales")
House_sales_to_foreigners_by_province head(House_sales_to_foreigners_by_province)
# A tibble: 6 × 4
Year Province Month sales
<dbl> <chr> <chr> <dbl>
1 2013 Antalya January 448
2 2013 Antalya February 357
3 2013 Antalya March 495
4 2013 Antalya April 445
5 2013 Antalya May 476
6 2013 Antalya June 358
2.0.5 House sales to foreigners by year
<- read_excel("data/House sales to foreigners.xls",
House_sales_to_foreigners_year sheet = "Year")
head(House_sales_to_foreigners_year)
# A tibble: 6 × 4
Year Total `Sales to foreigners` `Share (%)`
<dbl> <dbl> <dbl> <dbl>
1 2013 1157190 12181 1.05
2 2014 1165381 18959 1.63
3 2015 1289320 22830 1.77
4 2016 1341453 18189 1.36
5 2017 1409314 22234 1.58
6 2018 1375398 39663 2.88
2.0.6 House sales to foreigners by month
<- read_excel("data/House sales to foreigners.xls",
House_sales_to_foreigners_month sheet = "Month")
head(House_sales_to_foreigners_month)
# A tibble: 6 × 5
Year Month Total `Sales to foreigners` `Share (%)`
<dbl> <chr> <dbl> <dbl> <dbl>
1 2013 January 87444 888 1.02
2 2013 February 88519 716 0.809
3 2013 March 97956 962 0.982
4 2013 April 95381 947 0.993
5 2013 May 103261 938 0.908
6 2013 June 96424 783 0.812
2.0.7 House sales by gender
<- read_excel("data/HSBGAP.xls",
HSBGAP_province sheet = "GENDERS & PROVINCES")
head(HSBGAP_province)
# A tibble: 6 × 7
Year City Total Male Female JO Other
<dbl> <chr> <dbl> <dbl> <dbl> <chr> <dbl>
1 2014 Adana 20897 11809 6630 283 2175
2 2014 Adiyaman 4480 3262 970 23 225
3 2014 Afyonkarahisar 6260 3516 1790 590 364
4 2014 Ağri 1347 1098 220 5 24
5 2014 Aksaray 4397 2877 1280 93 147
6 2014 Amasya 3994 2436 1044 62 452
<- read_excel("data/HSBGAP.xls",
HSBGAP_year sheet = "YEARS (TURKEY TOTAL)")
head(HSBGAP_year)
# A tibble: 6 × 6
Year Total Male Female JO Other
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2014 1165381 695727 343209 27919 98526
2 2015 1289320 774874 382237 29708 102501
3 2016 1341453 801048 391334 29088 119983
4 2017 1409314 837928 409453 29178 132755
5 2018 1375398 790006 421286 26877 137229
6 2019 1348729 775653 420276 24141 128659
<- read_excel("data/HSTFBN.xls",
HSTFBN_nation sheet = "FOREIGNERS BY NATIONALITIES")
<- pivot_longer(HSTFBN_nation,4:15,names_to = "Month", values_to = "sales")
HSTFBN_nation head(HSTFBN_nation)
# A tibble: 6 × 5
year country total Month sales
<dbl> <chr> <dbl> <chr> <dbl>
1 2015 Iraq 4228 january 240
2 2015 Iraq 4228 february 243
3 2015 Iraq 4228 march 297
4 2015 Iraq 4228 april 298
5 2015 Iraq 4228 may 276
6 2015 Iraq 4228 june 315
<- read_excel("data/HSTFBN.xls", sheet = "TOTAL SALES BY YEARS")
HSTFBN
head(HSTFBN)
# A tibble: 6 × 2
`\nYear` `\nTotal`
<dbl> <dbl>
1 2015 22991
2 2016 18391
3 2017 22428
4 2018 40044
5 2019 45967
6 2020 41298