library(dplyr)
library(tidyverse)
library(ggplot2)
library(readxl)
library(rio)
library(stringr)
library(lubridate)
library(plotly)
library(kableExtra)
library(scales)
library(cowplot)
library(plotly)

Data Cleaning and Overview

Data set contains City Based Monthly Export/Import data of Turkey from 2013 to September 2020. Below code chunk imports data and makes necessary cleaning steps like creating Month and Date variables, translating the content, checking whether if data set contains any missing points.

#Import Data
data <- rio::import('https://github.com/pjournal/mef04g-rsizlar/blob/gh-pages/Data/Trading_Data.xlsx?raw=true')
head(data)
##   Year    Month          City TradingType  AmountUSD  AmountEUR    AmountTL
## 1 2014  03-Mart 34 - ISTANBUL     Ihracat 8407027918 6083477740 18670401956
## 2 2017  03-Mart 34 - ISTANBUL     Ihracat 7594486727 7112557872 27893924701
## 3 2015 04-Nisan 34 - ISTANBUL     Ihracat 7589250602 7041682782 20145325846
## 4 2013 11-Kasim 34 - ISTANBUL     Ihracat 7569704135 5602338497 15323250876
## 5 2017 11-Kasim 34 - ISTANBUL     Ihracat 7439085044 6344723625 28869310858
## 6 2017 05-Mayis 34 - ISTANBUL     Ihracat 7389840049 6692326366 26394860561
#Clean Month and City variables
data$Month<-str_sub(data$Month,start= 4)

data<-data %>% 
  separate(City, c("CityCode", "City"),sep = "( - )")

data<-data%>%
  mutate(Month=case_when(data$Month=="Ocak"~"01",
                        data$Month=="Subat"~"02",
                        data$Month=="Mart"~"03",
                        data$Month=="Nisan"~"04",
                        data$Month=="Mayis"~"05",
                        data$Month=="Haziran"~"06",
                        data$Month=="Temmuz"~"07",
                        data$Month=="Agustos"~"08",
                        data$Month=="Eylul"~"09",
                        data$Month=="Ekim"~"10",
                        data$Month=="Kasim"~"11",
                        data$Month=="Aralik"~"12"))

data<-data%>%
  mutate(TradingType=case_when(data$TradingType=="Ithalat"~"Import",
                         data$TradingType=="Ihracat"~"Export"))

#Create Date Variable from Year and Month
data$Date<-paste0(data$Year,as.character("-"),data$Month,as.character("-01"))
data$Date<-as.Date(data$Date, format="%Y-%m-%d")

sapply(data, function(x) sum(is.na(x)))
##        Year       Month    CityCode        City TradingType   AmountUSD 
##           0           0           0           0           0           0 
##   AmountEUR    AmountTL        Date 
##           0           0           0

Annual Export/Import Figures by Cities

Below code chunk creates 4 summary tables that shows the cities of Turkey with the maximum and minimum import/export figures on a yearly basis. As we are still in 2020, its figures will have data until September.

min_yearly_export<-data%>%
  select(Year,City,TradingType,AmountUSD)%>%
  filter(TradingType=="Export")%>%
  group_by(Year,City)%>%
  summarise("Yearly Amount"=sum(AmountUSD))%>%
  arrange(Year,`Yearly Amount`)%>%
  mutate(rn=row_number(Year))%>%
  filter(rn==1)%>%
  transmute(Year=as.character(Year),City,`Yearly Amount`)

max_yearly_export<-data%>%
  select(Year,City,TradingType,AmountUSD)%>%
  filter(TradingType=="Export")%>%
  group_by(Year,City)%>%
  summarise("Yearly Amount"=sum(AmountUSD))%>%
  arrange(Year,desc(`Yearly Amount`))%>%
  mutate(rn=row_number(Year))%>%
  filter(rn==1)%>%
  transmute(Year=as.character(Year),City,`Yearly Amount`)

min_yearly_import<-data%>%
  select(Year,City,TradingType,AmountUSD)%>%
  filter(TradingType=="Import")%>%
  group_by(Year,City)%>%
  summarise("Yearly Amount"=sum(AmountUSD))%>%
  arrange(Year,`Yearly Amount`)%>%
  mutate(rn=row_number(Year))%>%
  filter(rn==1)%>%
  transmute(Year=as.character(Year),City,`Yearly Amount`)

max_yearly_import<-data%>%
  select(Year,City,TradingType,AmountUSD)%>%
  filter(TradingType=="Import")%>%
  group_by(Year,City)%>%
  summarise("Yearly Amount"=sum(AmountUSD))%>%
  arrange(Year,desc(`Yearly Amount`))%>%
  mutate(rn=row_number(Year))%>%
  filter(rn==1)%>%
  transmute(Year=as.character(Year),City,`Yearly Amount`)

Below table shows the cities with the minimum import figures on a yearly basis. Ardahan occurs 3 times in the table and its figures are decreasing year by year. In 4 years, from 2013 to 2017, Ardahan’s import figures seem to be halved. Also, we can observe an interesting point: 6 out of 8 observations are from Black Sea region. High agricultural operations, small population and rural life may have led to lower import figures.

min_yearly_import %>%
  kable(caption = "Min Amount of Import by Cities",format.args = list(big.mark = ",")) %>%
  kable_classic(full_width = F, html_font = "Cambria")
Min Amount of Import by Cities
Year City Yearly Amount
2013 ARDAHAN 189,015
2014 TUNCELI 61,752
2015 ARDAHAN 155,837
2016 GUMUSHANE 158,883
2017 ARDAHAN 96,347
2018 TUNCELI 93,318
2019 BAYBURT 4,387
2020 BAYBURT 20,172

Below table shows the cities with the maximum import figures on a yearly basis. As expected, Istanbul has not left the first place for years. Thanks to being a big metropolis, business opportunities in the city, its consumption culture and population higher than most countries, Istanbul has had the best import figures for years.

max_yearly_import %>%
  kable(caption = "Max Amount of Import by Cities",format.args = list(big.mark = ",")) %>%
  kable_classic(full_width = F, html_font = "Cambria")
Max Amount of Import by Cities
Year City Yearly Amount
2013 ISTANBUL 146,056,466,645
2014 ISTANBUL 139,891,414,317
2015 ISTANBUL 121,178,388,613
2016 ISTANBUL 117,092,958,849
2017 ISTANBUL 135,791,868,999
2018 ISTANBUL 125,175,604,235
2019 ISTANBUL 109,280,925,933
2020 ISTANBUL 89,186,574,873

Below table shows the cities with the minimum export figures on a yearly basis. Gümüşhane occurs 4 times in the table. Even the enormous increase in 2014 was not enough get rid of from the last place of the list. An interesting point like the “Minimum Yearly Import Figures”: 5 out of 8 observations are from Black Sea region. Similar reasons like small population and rural life may be the reason for that.

min_yearly_export %>%
  kable(caption = "Min Amount of Export by Cities",format.args = list(big.mark = ",")) %>%
  kable_classic(full_width = F, html_font = "Cambria")
Min Amount of Export by Cities
Year City Yearly Amount
2013 GUMUSHANE 6,203
2014 GUMUSHANE 207,500
2015 GUMUSHANE 26,862
2016 GUMUSHANE 36,604
2017 BAYBURT 117,349
2018 KARS 208,046
2019 Belirsiz (Nonspecified) 21,163
2020 TUNCELI 130,312

Below table shows the cities with the maximum export figures on a yearly basis. As expected, again, Istanbul occurs in all observations when it comes to the maximum export figures. Due to huge population, high workforce, metropolis environment, Istanbul has had the best export figures for years.

max_yearly_export %>%
  kable(caption = "Max Amount of Export by Cities",format.args = list(big.mark = ",")) %>%
  kable_classic(full_width = F, html_font = "Cambria")
Max Amount of Export by Cities
Year City Yearly Amount
2013 ISTANBUL 81,576,600,102
2014 ISTANBUL 84,831,849,720
2015 ISTANBUL 79,274,722,710
2016 ISTANBUL 77,766,580,616
2017 ISTANBUL 83,448,092,342
2018 ISTANBUL 88,203,470,805
2019 ISTANBUL 88,827,639,699
2020 ISTANBUL 57,734,497,765

Boxplot of City Based Yearly Export/Import Figures

Below code chunk creates a boxplot to compare the distribution of Export/Import figures. Logarithmic normalization was used on AmountUSD variable to see distribution clearly. As seen from the below boxplot, export figures have very similar distribution year by year. There is not any drastic change. Import figures have wider distribution than export. This shows us that all cities import products/services and we are dependent on import. Minimum points of export figures seem to higher than import. There may be a domination on export. There is a limited potential for smaller cities and it’s not easy to exceed this potential.

data%>%
  select(Year,City,Month,TradingType,AmountUSD)%>%
ggplot(aes(x=as.factor(Year), y=log(AmountUSD),color=TradingType)) +
  geom_boxplot()+
  theme_test()+
  theme(legend.position="bottom")+
  labs(x="Year", y="log of Amount", title = "Yearly Export/Import Distribution Comparison")+ 
  scale_colour_discrete("Trading Type")

Yearly Overview of Export/Import Figures

Below code chunk helps us to visualize Export/Import figures monthly for last 2 years (from 2018 Sep to 2020 Sep). Bars show Balance which is calculated by the following notation \(Export-Import\). Similar trend in Export and Import can be observed easily by the lines. If we look at the last few months, we can see that both import and export figures have a rising trend but balance seems to be affected negatively. If we look at 2020 March, balance is reaching to a peak point (in negative way). This situation shows that Turkey’s export operations are adversely affected by Covid-19. Also, there is an interesting point: after a huge decrease in balance; it starts to decrease slowly month by month.

#p<-data%>%
#  filter(Date>"2018-08-01")%>%
#  select(Date,TradingType,AmountUSD)%>%
#  group_by(Date,TradingType)%>%
#  summarise(Total=sum(AmountUSD)/1000000000)%>%
#  pivot_wider(names_from = TradingType, values_from = Total)%>%
#  mutate(Balance = Export-Import)%>%
#  ggplot() +
#  geom_col(aes(x=Date, y=Balance),size = 0.5, color = "#C4961A", fill = "#FFDB6D")+
#  geom_line(aes(x=Date, y=Export),color="#F8766D",size=1) +
#  geom_line(aes(x=Date, y=Import),color="#00BFC4",size=1) +
#  theme_test() + 
#  labs(x="Date", y="Total Amount (billion dollars)", title = "Yearly Total Export/Import Comparison") + 
#  theme(legend.position="bottom")

cols<- c("Export"="#F8766D","Import"="#00BFC4","Balance"="#FFDB6D")

p<-data%>%
  filter(Date>"2018-08-01")%>%
  select(Date,TradingType,AmountUSD)%>%
  group_by(Date,TradingType)%>%
  summarise(Total=round(sum(AmountUSD)/1000000000,2))%>%
  pivot_wider(names_from = TradingType, values_from = Total)%>%
  mutate(Balance = Export-Import)%>%
  ggplot() +
  geom_col(aes(x=Date, y=Balance, fill = "Balance"),size = 0.5, color = "#C4961A")+
  geom_line(aes(x=Date, y=Export,color="Export"),size=1) +
  geom_line(aes(x=Date, y=Import,color="Import"),size=1) +
  scale_colour_manual(name="Lines",values=cols) + scale_fill_manual(name="Bar",values=cols)+
  theme_test() + 
  labs(x="Date", y="Total Amount (billion dollars)", title = "Yearly Total Export/Import Comparison")

ggplotly(p,tooltip = c("Balance","Export","Import","Date"))%>%
  layout(legend = list(orientation = "h", x = 0.25, y = -0.2))

Yearly Pie Charts of Export/Import Share

When we look at the total USD rates in the first 9 months for 2018, 2019 and 2020, it is seen that imports were 58% in 2018, 54% in 2019 and 57% in 2020. As a result, we notice that the export share in 2019 increased compared to 2018, but decreased again in 2020. The difference between import and export share in 2020 is seen as 14%. It has been observed that imports are more than exports in the last 3 years.

pie19 <- data%>%
  filter(Month < 10 & Year==2019)%>%
  select(TradingType,AmountUSD,Year,Month)%>%
  group_by(TradingType)%>%
  summarise(Amountusd = sum(AmountUSD))%>%
  mutate(share=Amountusd/sum(Amountusd)*100.0) %>%
  arrange(desc(Amountusd))

bpie19 <- ggplot(pie19, aes(x="", y= share, fill=TradingType)) +
  geom_bar(width = 1, size = 1, color = "white", stat = "identity") +
    coord_polar("y") +
    geom_text(aes(label = paste0(round(share), "%")), 
              position = position_stack(vjust = 0.5)) +
    labs(x = NULL, y = NULL, fill = NULL, 
         title = "Import vs Export for 1:9-2019") +
    guides(fill = guide_legend(reverse = TRUE)) +
    scale_fill_manual(values = c("#F8766D", "#00BFC4")) +
    theme_classic() +
    theme(axis.line = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(hjust = 0.5, color = "#666666"))

pie20 <- data%>%
  filter(Month < 10 & Year==2020)%>%
  select(TradingType,AmountUSD,Year,Month)%>%
  group_by(TradingType)%>%
  summarise(Amountusd = sum(AmountUSD))%>%
  mutate(share=Amountusd/sum(Amountusd)*100.0) %>%
  arrange(desc(Amountusd))

bpie20 <- ggplot(pie20, aes(x="", y= share, fill=TradingType)) +
  geom_bar(width = 1, size = 1, color = "white", stat = "identity") +
    coord_polar("y") +
    geom_text(aes(label = paste0(round(share), "%")), 
              position = position_stack(vjust = 0.5)) +
    labs(x = NULL, y = NULL, fill = NULL, 
         title = "Import vs Export for 1:9-2020") +
    guides(fill = guide_legend(reverse = TRUE)) +
    scale_fill_manual(values = c("#F8766D", "#00BFC4")) +
    theme_classic() +
    theme(axis.line = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(hjust = 0.5, color = "#666666"))

pie18 <- data%>%
  filter(Month < 10 & Year==2018)%>%
  select(TradingType,AmountUSD,Year,Month)%>%
  group_by(TradingType)%>%
  summarise(Amountusd = sum(AmountUSD))%>%
  mutate(share=Amountusd/sum(Amountusd)*100.0) %>%
  arrange(desc(Amountusd))

bpie18 <- ggplot(pie18, aes(x="", y= share, fill=TradingType)) +
  geom_bar(width = 1, size = 1, color = "white", stat = "identity") +
    coord_polar("y") +
    geom_text(aes(label = paste0(round(share), "%")), 
              position = position_stack(vjust = 0.5)) +
    labs(x = NULL, y = NULL, fill = NULL, 
         title = "Import vs Export for 1:9-2018") +
    guides(fill = guide_legend(reverse = TRUE)) +
    scale_fill_manual(values = c("#F8766D", "#00BFC4")) +
    theme_classic() +
    theme(axis.line = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(hjust = 0.5, color = "#666666"))

pie17 <- data%>%
  filter(Month < 10 & Year==2017)%>%
  select(TradingType,AmountUSD,Year,Month)%>%
  group_by(TradingType)%>%
  summarise(Amountusd = sum(AmountUSD))%>%
  mutate(share=Amountusd/sum(Amountusd)*100.0) %>%
  arrange(desc(Amountusd))

bpie17 <- ggplot(pie17, aes(x="", y= share, fill=TradingType)) +
  geom_bar(width = 1, size = 1, color = "white", stat = "identity") +
    coord_polar("y") +
    geom_text(aes(label = paste0(round(share), "%")), 
              position = position_stack(vjust = 0.5)) +
    labs(x = NULL, y = NULL, fill = NULL, 
         title = "Import vs Export for 1:9-2017") +
    guides(fill = guide_legend(reverse = TRUE)) +
    scale_fill_manual(values = c("#F8766D", "#00BFC4")) +
    theme_classic() +
    theme(axis.line = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(hjust = 0.5, color = "#666666"))



plot_grid(bpie17,bpie18,bpie19,bpie20)