2  Preprocessing

Published

November 22, 2022

# Required libraries
library(tidyverse)
library(dplyr)
library(readxl)
library(lubridate)
library(tidyr)
library(zoo)
library(janitor)
library(reactable)
library(data.table)

2.0.1 Construction Cost Index (2015=100) and Rate of Change

# First part of cleaning data done in excel then I use Pivot longer Function

ConsIndex <- read_excel("data/Constructioncost.xls", 
                             sheet = "Index")
ConsIndex <- pivot_longer(ConsIndex,2:13,names_to = "Month", values_to = "index")
ConsIndex$Month <- match(ConsIndex$Month, month.name)
head(ConsIndex) 
# A tibble: 6 × 3
   Year Month index
  <dbl> <int> <dbl>
1  2015     1  97.1
2  2015     2  97.6
3  2015     3  98.3
4  2015     4  99.1
5  2015     5 100. 
6  2015     6 100. 
ConsAnnualChange <- read_excel("data/Constructioncost.xls", 
                                    sheet = "Rate")
ConsAnnualChange <- pivot_longer(ConsAnnualChange,2:13,names_to = "Month", values_to = "index")
ConsAnnualChange$Month <- match(ConsAnnualChange$Month, month.name)
ConsAnnualChange$index <- as.numeric(as.character(ConsAnnualChange$index))
Warning: NAs introduced by coercion
head(ConsAnnualChange)
# A tibble: 6 × 3
   Year Month index
  <dbl> <int> <dbl>
1  2015     1    NA
2  2015     2    NA
3  2015     3    NA
4  2015     4    NA
5  2015     5    NA
6  2015     6    NA

2.0.2 House Sales by Type and State

# House Sales data was too dirty so I clean it up in excel
HousesalesYear <- read_excel("data/Housesales.xls", 
                                    sheet = "Year")

head(HousesalesYear) 
# A tibble: 6 × 10
   Year   Total MSUnit MSShare (%…¹ OSUnit OSSha…² FHUnit FHSha…³ SHUnit SHSha…⁴
  <dbl>   <dbl>  <dbl>        <dbl>  <dbl>   <dbl>  <dbl>   <dbl>  <dbl>   <dbl>
1  2013 1157190 460112         39.8 6.97e5    60.2 529129    45.7 628061    54.3
2  2014 1165381 389689         33.4 7.76e5    66.6 541554    46.5 623827    53.5
3  2015 1289320 434388         33.7 8.55e5    66.3 598667    46.4 690653    53.6
4  2016 1341453 449508         33.5 8.92e5    66.5 631686    47.1 709767    52.9
5  2017 1409314 473099         33.6 9.36e5    66.4 659698    46.8 749616    53.2
6  2018 1375398 276820         20.1 1.10e6    79.9 651572    47.4 723826    52.6
# … with abbreviated variable names ¹​`MSShare (%)`, ²​`OSShare (%)`,
#   ³​`FHShare (%)`, ⁴​`SHShare (%)`
HousesalesMonth <- read_excel("data/Housesales.xls", 
                                     sheet = "Month")
head(HousesalesMonth) 
# A tibble: 6 × 11
   Year Month  Total MSUnit MSSha…¹ OSUnit OSSha…² FHUnit FHSha…³ SHUnit SHSha…⁴
  <dbl> <dbl>  <dbl>  <dbl>   <dbl>  <dbl>   <dbl>  <dbl>   <dbl>  <dbl>   <dbl>
1  2013     1  87444  36581    41.8  50863    58.2  39163    44.8  48281    55.2
2  2013     2  88519  37150    42.0  51369    58.0  39521    44.6  48998    55.4
3  2013     3  97956  41809    42.7  56147    57.3  43052    44.0  54904    56.0
4  2013     4  95381  40812    42.8  54569    57.2  41233    43.2  54148    56.8
5  2013     5 103261  45270    43.8  57991    56.2  46049    44.6  57212    55.4
6  2013     6  96424  43763    45.4  52661    54.6  43345    45.0  53079    55.0
# … with abbreviated variable names ¹​`MSShare (%)`, ²​`OSShare (%)`,
#   ³​`FHShare (%)`, ⁴​`SHShare (%)`

2.0.3 House Sales by Provinces

PATH <- "data/House Sales by Provinces.xls"

data_xl <- read_excel(PATH, range = cell_rows(14:129), col_names = names(read_excel(PATH, skip = 2))) 
# Fill NA Values
data_xl %>%
    fill(colnames(data_xl[1]), .direction = "downup")
# A tibble: 116 × 84
   Yıl      …¹ Ay\nM…² Topla…³ Adana Adıya…⁴ Afyon…⁵  Ağrı Aksaray Amasya Ankara
         <dbl> <chr>     <dbl> <dbl>   <dbl>   <dbl> <dbl>   <dbl>  <dbl>  <dbl>
 1        2013 Ocak -…   87444  1548     261     475   105     329    245  11215
 2        2013 Şubat …   88519  1667     421     506   198     269    290  11281
 3        2013 Mart -…   97956  1826     310     563    94     301    316  12291
 4        2013 Nisan …   95381  1875     281     458   164     315    302  11889
 5        2013 Mayıs …  103261  1941     326     549   141     400    364  12638
 6        2013 Hazira…   96424  1987     346     454   112     356    356  11692
 7        2013 Temmuz…  106636  1986     404     563   130     401    463  12428
 8        2013 Ağusto…   84480  1426     352     472   118     447    330   9636
 9        2013 Eylül …  102280  1624     422     536   126     519    343  12206
10        2013 Ekim -…   76344  1232     285     490   103     299    242   8231
# … with 106 more rows, 74 more variables: Antalya <dbl>, Ardahan <dbl>,
#   Artvin <dbl>, Aydın <dbl>, Balıkesir <dbl>, Bartın <dbl>, Batman <dbl>,
#   Bayburt <dbl>, Bilecik <dbl>, Bingöl <dbl>, Bitlis <dbl>, Bolu <dbl>,
#   Burdur <dbl>, Bursa <dbl>, Çanakkale <dbl>, Çankırı <dbl>, Çorum <dbl>,
#   Denizli <dbl>, Diyarbakır <dbl>, Düzce <dbl>, Edirne <dbl>, Elazığ <dbl>,
#   Erzincan <dbl>, Erzurum <dbl>, Eskişehir <dbl>, Gaziantep <dbl>,
#   Giresun <dbl>, Gümüşhane <dbl>, Hakkari <chr>, Hatay <dbl>, Iğdır <dbl>, …
# Save data_xl as House Sales by Provinces.rds
saveRDS(data_xl, file = "House Sales by Provinces.rds")
# Restore the House Sales by Provinces.rds
head(readRDS(file = "House Sales by Provinces.rds"))
# A tibble: 6 × 84
  Yıl       …¹ Ay\nM…² Topla…³ Adana Adıya…⁴ Afyon…⁵  Ağrı Aksaray Amasya Ankara
         <dbl> <chr>     <dbl> <dbl>   <dbl>   <dbl> <dbl>   <dbl>  <dbl>  <dbl>
1         2013 Ocak -…   87444  1548     261     475   105     329    245  11215
2           NA Şubat …   88519  1667     421     506   198     269    290  11281
3           NA Mart -…   97956  1826     310     563    94     301    316  12291
4           NA Nisan …   95381  1875     281     458   164     315    302  11889
5           NA Mayıs …  103261  1941     326     549   141     400    364  12638
6           NA Hazira…   96424  1987     346     454   112     356    356  11692
# … with 74 more variables: Antalya <dbl>, Ardahan <dbl>, Artvin <dbl>,
#   Aydın <dbl>, Balıkesir <dbl>, Bartın <dbl>, Batman <dbl>, Bayburt <dbl>,
#   Bilecik <dbl>, Bingöl <dbl>, Bitlis <dbl>, Bolu <dbl>, Burdur <dbl>,
#   Bursa <dbl>, Çanakkale <dbl>, Çankırı <dbl>, Çorum <dbl>, Denizli <dbl>,
#   Diyarbakır <dbl>, Düzce <dbl>, Edirne <dbl>, Elazığ <dbl>, Erzincan <dbl>,
#   Erzurum <dbl>, Eskişehir <dbl>, Gaziantep <dbl>, Giresun <dbl>,
#   Gümüşhane <dbl>, Hakkari <chr>, Hatay <dbl>, Iğdır <dbl>, Isparta <dbl>, …

2.0.4 Building Permits and Rates of Change

PATH <- "data/Building Permits and Rates of Change.xls"

building_permits <- read_excel(PATH, range = cell_rows(24:105), col_names = names(read_excel(PATH, skip = 2))) 
New names:
• `Bina sayısı Number of building` -> `Bina sayısı Number of building...3`
• `Yıllık değişim Annual change (%)` -> `Yıllık değişim Annual change (%)...4`
• `` -> `...5`
• `Daire sayısı Number of dwelling unit` -> `Daire sayısı Number of dwelling
  unit...6`
• `Yıllık değişim Annual change (%)` -> `Yıllık değişim Annual change (%)...7`
• `` -> `...8`
• `Yüzölçüm Floor area (m²)` -> `Yüzölçüm Floor area (m²)...9`
• `Yıllık değişim Annual change (%)` -> `Yıllık değişim Annual change (%)...10`
• `` -> `...11`
• `Bina sayısı Number of building` -> `Bina sayısı Number of building...12`
• `Yıllık değişim Annual change (%)` -> `Yıllık değişim Annual change (%)...13`
• `` -> `...14`
• `Daire sayısı Number of dwelling unit` -> `Daire sayısı Number of dwelling
  unit...15`
• `Yıllık değişim Annual change (%)` -> `Yıllık değişim Annual change (%)...16`
• `` -> `...17`
• `Yüzölçüm Floor area (m²)` -> `Yüzölçüm Floor area (m²)...18`
• `Yıllık değişim Annual change (%)` -> `Yıllık değişim Annual change (%)...19`
# Fill NA Values
building_permits <- building_permits %>% 
    fill(colnames(building_permits)[1], .direction = "downup") %>%
    select_if(~ !all(is.na(.)))
# CP: Construction Permits
# OP: Occupancy Permits

colnames(building_permits) <- c('Year',
                                'Quarter',
                                'CP - Number of Building',
                                'CP - NoB - Annual Change (%)',
                                'CP - Number of Dwelling Unit',
                                'CP - NoDU - Annual Change (%)',
                                'CP - Floor Area (m2)',
                                'CP - Floor Area - Annual Change (%)',
                                'OP - Number of Building',
                                'OP - NoB - Annual Change (%)',
                                'OP - Number of Dwelling Unit',
                                'OP - NoDU - Annual Change (%)',
                                'OP - Floor Area (m2)',
                                'OP - Floor Area - Annual Change (%)')
# Save building_permits as Building Permits and Rates of Change.rds
saveRDS(building_permits, file = "Building Permits and Rates of Change.rds")
# Restore the Building Permits and Rates of Change.RData
tail(readRDS(file = "Building Permits and Rates of Change.rds"))
# A tibble: 6 × 14
   Year Quarter CP - N…¹ CP - …² CP - …³ CP - …⁴ CP - …⁵ CP - …⁶ OP - …⁷ OP - …⁸
  <dbl> <chr>      <dbl> <chr>     <dbl> <chr>     <dbl> <chr>     <dbl> <chr>  
1  2021 I          33622 137.30…  174741 116.3    3.44e7 95.099…   20979 12.800…
2  2021 II         30537 55.5     157526 14.300…  3.18e7 15.699…   18511 27.699…
3  2021 III        30451 20.199…  149415 5.9000…  3.33e7 19.600…   21954 4      
4  2021 IV         43699 17.699…  239577 22.399…  5.13e7 29.100…   30738 29.899…
5  2022 I          26168 -22.19…  128157 -26.69…  2.71e7 -21.19…   22481 7.2000…
6  2022 II         28249 -7.5     133424 -15.30…  2.93e7 -7.799…   22691 22.600…
# … with 4 more variables: `OP - Number of Dwelling Unit` <dbl>,
#   `OP - NoDU - Annual Change (%)` <chr>, `OP - Floor Area (m2)` <dbl>,
#   `OP - Floor Area - Annual Change (%)` <chr>, and abbreviated variable names
#   ¹​`CP - Number of Building`, ²​`CP - NoB - Annual Change (%)`,
#   ³​`CP - Number of Dwelling Unit`, ⁴​`CP - NoDU - Annual Change (%)`,
#   ⁵​`CP - Floor Area (m2)`, ⁶​`CP - Floor Area - Annual Change (%)`,
#   ⁷​`OP - Number of Building`, ⁸​`OP - NoB - Annual Change (%)`
House_sales_to_foreigners_by_province <- read_excel("data/House sales to foreigners by province.xls", 
                                                    sheet = "HouseSalesByProvince")
House_sales_to_foreigners_by_province <- pivot_longer(House_sales_to_foreigners_by_province,3:14,names_to = "Month", values_to = "sales")
head(House_sales_to_foreigners_by_province)
# A tibble: 6 × 4
   Year Province Month    sales
  <dbl> <chr>    <chr>    <dbl>
1  2013 Antalya  January    448
2  2013 Antalya  February   357
3  2013 Antalya  March      495
4  2013 Antalya  April      445
5  2013 Antalya  May        476
6  2013 Antalya  June       358

2.0.5 House sales to foreigners by year

 House_sales_to_foreigners_year <- read_excel("data/House sales to foreigners.xls", 
     sheet = "Year")
head(House_sales_to_foreigners_year)
# A tibble: 6 × 4
   Year   Total `Sales to foreigners` `Share (%)`
  <dbl>   <dbl>                 <dbl>       <dbl>
1  2013 1157190                 12181        1.05
2  2014 1165381                 18959        1.63
3  2015 1289320                 22830        1.77
4  2016 1341453                 18189        1.36
5  2017 1409314                 22234        1.58
6  2018 1375398                 39663        2.88

2.0.6 House sales to foreigners by month

House_sales_to_foreigners_month <- read_excel("data/House sales to foreigners.xls", 
     sheet = "Month")
head(House_sales_to_foreigners_month)
# A tibble: 6 × 5
   Year Month     Total `Sales to foreigners` `Share (%)`
  <dbl> <chr>     <dbl>                 <dbl>       <dbl>
1  2013 January   87444                   888       1.02 
2  2013 February  88519                   716       0.809
3  2013 March     97956                   962       0.982
4  2013 April     95381                   947       0.993
5  2013 May      103261                   938       0.908
6  2013 June      96424                   783       0.812

2.0.7 House sales by gender

HSBGAP_province <- read_excel("data/HSBGAP.xls", 
     sheet = "GENDERS  & PROVINCES")
head(HSBGAP_province)
# A tibble: 6 × 7
   Year City           Total  Male Female JO    Other
  <dbl> <chr>          <dbl> <dbl>  <dbl> <chr> <dbl>
1  2014 Adana          20897 11809   6630 283    2175
2  2014 Adiyaman        4480  3262    970 23      225
3  2014 Afyonkarahisar  6260  3516   1790 590     364
4  2014 Ağri            1347  1098    220 5        24
5  2014 Aksaray         4397  2877   1280 93      147
6  2014 Amasya          3994  2436   1044 62      452
HSBGAP_year <- read_excel("data/HSBGAP.xls", 
     sheet = "YEARS (TURKEY TOTAL)")
head(HSBGAP_year)
# A tibble: 6 × 6
   Year   Total   Male Female    JO  Other
  <dbl>   <dbl>  <dbl>  <dbl> <dbl>  <dbl>
1  2014 1165381 695727 343209 27919  98526
2  2015 1289320 774874 382237 29708 102501
3  2016 1341453 801048 391334 29088 119983
4  2017 1409314 837928 409453 29178 132755
5  2018 1375398 790006 421286 26877 137229
6  2019 1348729 775653 420276 24141 128659
HSTFBN_nation <- read_excel("data/HSTFBN.xls", 
     sheet = "FOREIGNERS BY NATIONALITIES")

HSTFBN_nation <- pivot_longer(HSTFBN_nation,4:15,names_to = "Month", values_to = "sales")
head(HSTFBN_nation)
# A tibble: 6 × 5
   year country total Month    sales
  <dbl> <chr>   <dbl> <chr>    <dbl>
1  2015 Iraq     4228 january    240
2  2015 Iraq     4228 february   243
3  2015 Iraq     4228 march      297
4  2015 Iraq     4228 april      298
5  2015 Iraq     4228 may        276
6  2015 Iraq     4228 june       315
HSTFBN <- read_excel("data/HSTFBN.xls", sheet = "TOTAL SALES BY YEARS")

head(HSTFBN)
# A tibble: 6 × 2
  `\nYear` `\nTotal`
     <dbl>     <dbl>
1     2015     22991
2     2016     18391
3     2017     22428
4     2018     40044
5     2019     45967
6     2020     41298