road-runner_ODD-Assignment

road-runner 11/1/2019

1.Introduction and Preparation

ODD(Automotive Distributers’ Association) is an organization that has 29 members that represent 47 international brands worlwide. ODD has the vision of providing the continuity and improvement of automotive sector as having the international competitive capacity within the concept of modernity, environmental awareness and social responsibility

The dataset we used in this analysis from ODD includes retail sales of automobiles and commercial vehicles which are both domestic and imported in Turkey.

Column names and explanations are below

  1. brand_name: Brand of the car

  2. auto_dom: Total number of sales for domestic automobiles

  3. auto_imp: Total number of sales for imported automobiles

  4. auto_total: Total number of sales for both domestic and imported automobiles

  5. comm_dom: Total number of sales for domestic commercial vehicles

  6. comm_imp: Total number of sales for imported commercial vehicles

  7. comm_total: Total number of sales for both domestic and imported commercial vehicles

  8. total_dom: Total number of sales for both domestic automobiles and commercial vehicles

  9. total_imp: Total number of sales for both imported automobiles and commercial vehicles

  10. total_total: Total Total number of sales

  11. year

  12. month

raw_data_full <- ''

# Creating a list including all csv names to use in for loop
list_all <- c('201601','201602','201603','201604','201605','201606','201607','201608','201609','201610','201611','201612'
              ,'201701','201702','201703','201704','201705','201706','201707','201708','201709','201710','201711','201712'
              ,'201801','201802','201803','201804','201805','201806','201807','201808','201809','201810','201811','201812')

for (i in 1:length(list_all)) {
    
  file_url<- paste('https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/', list_all[i],'.csv?raw=true', sep='')
  
  print(file_url)
  
  # Reading csv from the url
  raw_data <- read.csv(file_url,sep=',',header=F)
  
  # Replacing NA values with 0 and label the time period with year and month, so when data is merged the data there won't be any confusion.
  raw_data <- 
    raw_data %>%
    mutate_if(is.numeric,funs(ifelse(is.na(.),0,.))) %>% 
    mutate(yearmonth=list_all[i])
  
  # Appending raw_data_full into raw_data_full for each iteration
  raw_data_full<-rbind(raw_data_full,raw_data)
  
}
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201601.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201602.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201603.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201604.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201605.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201606.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201607.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201608.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201609.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201610.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201611.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201612.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201701.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201702.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201703.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201704.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201705.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201706.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201707.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201708.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201709.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201710.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201711.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201712.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201801.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201802.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201803.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201804.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201805.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201806.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201807.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201808.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201809.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201810.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201811.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201812.csv?raw=true"
# Adding column names to raw_data_full
colnames(raw_data_full) <- c('brand_name','auto_dom','auto_imp','auto_total','comm_dom','comm_imp','comm_total','total_dom','total_imp','total_total','yearmonth')

raw_data_full <- raw_data_full %>% slice(-c(1))

2.Analysis

2.1.Category

df_category <-
raw_data_full %>%
  select(yearmonth,auto_total,comm_total,total_total) %>%
  filter(substr(yearmonth,1,4) == 2018) %>%
  group_by(substr(yearmonth,1,4)) %>%
  summarise(auto = sum(as.numeric((gsub(",","",auto_total))),na.rm = T),
            comm = sum(as.numeric((gsub(",","",comm_total))),na.rm = T),
            total = sum(as.numeric((gsub(",","",total_total))),na.rm = T))

df_category
## # A tibble: 1 x 4
##   `substr(yearmonth, 1, 4)`   auto   comm  total
##   <chr>                      <dbl>  <dbl>  <dbl>
## 1 2018                      486321 134616 620937
#Creating a column Category to transpose the values
pie_category <- data.frame(Category = c("auto","commercial"), freq=c(df_category$auto,df_category$comm))

ggplot(pie_category, aes (x="", y = freq, fill = factor(Category))) + 
  geom_col(position = 'stack', width = 1) +
  geom_text(aes(label = paste(round(freq / sum(freq) * 100, 1), "%"), x = 1.3),
            position = position_stack(vjust = 0.5)) +
  theme_classic() +
  theme(plot.title = element_text(hjust=0.5),
        axis.line = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank()) +
  labs(fill = "Category",
       x = NULL,
       y = NULL,
       title = "Distribution of Retail Sales Auto/ Light Commercial Vehicles for 2018") + 
  coord_polar("y")

2.2.Locality

df_locality <-
  raw_data_full %>% 
  select(yearmonth,total_dom,total_imp,total_total) %>%
  filter(substr(yearmonth,1,4) == 2018) %>%
  group_by(substr(yearmonth,1,4)) %>%
  summarise(domestic = sum(as.numeric((gsub(",","",total_dom))),na.rm = T),
          import = sum(as.numeric((gsub(",","",total_imp))),na.rm = T),
          total = sum(as.numeric((gsub(",","",total_total))),na.rm = T))

df_locality
## # A tibble: 1 x 4
##   `substr(yearmonth, 1, 4)` domestic import  total
##   <chr>                        <dbl>  <dbl>  <dbl>
## 1 2018                        230522 390415 620937
#Creating a column Locality to transpose the values
pie_locality <- data.frame(Locality = c("domestic","import"), freq=c(df_locality$domestic,df_locality$import))

ggplot(pie_locality, aes (x="", y = freq, fill = factor(Locality))) + 
  geom_col(position = 'stack', width = 1) +
  geom_text(aes(label = paste(round(freq / sum(freq) * 100, 1), "%"), x = 1.3),
            position = position_stack(vjust = 0.5)) +
  theme_classic() +
  theme(plot.title = element_text(hjust=0.5),
        axis.line = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank()) +
  labs(fill = "Locality",
       x = NULL,
       y = NULL,
       title = "Distribution of Retail Sales Domestic/Imported Vehicles for 2018") + 
  coord_polar("y")

2.3.Top5

df_top5 <-
  raw_data_full %>%
  group_by(substr(yearmonth,1,4),brand_name) %>%
  filter(substr(yearmonth,1,4) == 2017) %>%
  summarise(total = sum(as.numeric((gsub(",","",total_total))),na.rm = T)) %>%
  arrange(desc(total)) %>%
  mutate(rwn = row_number()) %>%
  filter(rwn < 6)

df_top5
## # A tibble: 5 x 4
## # Groups:   substr(yearmonth, 1, 4) [1]
##   `substr(yearmonth, 1, 4)` brand_name  total   rwn
##   <chr>                     <fct>       <dbl> <int>
## 1 2017                      RENAULT    130276     1
## 2 2017                      FIAT       120049     2
## 3 2017                      VOLKSWAGEN 117481     3
## 4 2017                      FORD       111064     4
## 5 2017                      HYUNDAI     52265     5
ggplot(df_top5, aes(x=brand_name, y=total, fill=brand_name))+
  geom_bar(stat="identity")+
  geom_text(aes(label=total), vjust=1.6, color="black", size=3.5)+
  theme_minimal()+
  labs(x="Brand Name",y="# of Sales",title="5 Most Selling Car Brands for 2017",fill="Brand")+
  theme(axis.text.x = element_text(angle=30))