road-runner 11/1/2019
ODD(Automotive Distributers’ Association) is an organization that has 29 members that represent 47 international brands worlwide. ODD has the vision of providing the continuity and improvement of automotive sector as having the international competitive capacity within the concept of modernity, environmental awareness and social responsibility
The dataset we used in this analysis from ODD includes retail sales of automobiles and commercial vehicles which are both domestic and imported in Turkey.
Column names and explanations are below
brand_name: Brand of the car
auto_dom: Total number of sales for domestic automobiles
auto_imp: Total number of sales for imported automobiles
auto_total: Total number of sales for both domestic and imported automobiles
comm_dom: Total number of sales for domestic commercial vehicles
comm_imp: Total number of sales for imported commercial vehicles
comm_total: Total number of sales for both domestic and imported commercial vehicles
total_dom: Total number of sales for both domestic automobiles and commercial vehicles
total_imp: Total number of sales for both imported automobiles and commercial vehicles
total_total: Total Total number of sales
year
month
raw_data_full <- ''
# Creating a list including all csv names to use in for loop
list_all <- c('201601','201602','201603','201604','201605','201606','201607','201608','201609','201610','201611','201612'
,'201701','201702','201703','201704','201705','201706','201707','201708','201709','201710','201711','201712'
,'201801','201802','201803','201804','201805','201806','201807','201808','201809','201810','201811','201812')
for (i in 1:length(list_all)) {
file_url<- paste('https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/', list_all[i],'.csv?raw=true', sep='')
print(file_url)
# Reading csv from the url
raw_data <- read.csv(file_url,sep=',',header=F)
# Replacing NA values with 0 and label the time period with year and month, so when data is merged the data there won't be any confusion.
raw_data <-
raw_data %>%
mutate_if(is.numeric,funs(ifelse(is.na(.),0,.))) %>%
mutate(yearmonth=list_all[i])
# Appending raw_data_full into raw_data_full for each iteration
raw_data_full<-rbind(raw_data_full,raw_data)
}
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201601.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201602.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201603.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201604.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201605.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201606.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201607.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201608.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201609.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201610.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201611.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201612.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201701.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201702.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201703.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201704.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201705.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201706.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201707.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201708.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201709.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201710.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201711.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201712.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201801.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201802.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201803.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201804.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201805.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201806.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201807.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201808.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201809.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201810.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201811.csv?raw=true"
## [1] "https://github.com/pjournal/mef03g-road-runner/blob/master/ODD-Data/201812.csv?raw=true"
# Adding column names to raw_data_full
colnames(raw_data_full) <- c('brand_name','auto_dom','auto_imp','auto_total','comm_dom','comm_imp','comm_total','total_dom','total_imp','total_total','yearmonth')
raw_data_full <- raw_data_full %>% slice(-c(1))
df_category <-
raw_data_full %>%
select(yearmonth,auto_total,comm_total,total_total) %>%
filter(substr(yearmonth,1,4) == 2018) %>%
group_by(substr(yearmonth,1,4)) %>%
summarise(auto = sum(as.numeric((gsub(",","",auto_total))),na.rm = T),
comm = sum(as.numeric((gsub(",","",comm_total))),na.rm = T),
total = sum(as.numeric((gsub(",","",total_total))),na.rm = T))
df_category
## # A tibble: 1 x 4
## `substr(yearmonth, 1, 4)` auto comm total
## <chr> <dbl> <dbl> <dbl>
## 1 2018 486321 134616 620937
#Creating a column Category to transpose the values
pie_category <- data.frame(Category = c("auto","commercial"), freq=c(df_category$auto,df_category$comm))
ggplot(pie_category, aes (x="", y = freq, fill = factor(Category))) +
geom_col(position = 'stack', width = 1) +
geom_text(aes(label = paste(round(freq / sum(freq) * 100, 1), "%"), x = 1.3),
position = position_stack(vjust = 0.5)) +
theme_classic() +
theme(plot.title = element_text(hjust=0.5),
axis.line = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank()) +
labs(fill = "Category",
x = NULL,
y = NULL,
title = "Distribution of Retail Sales Auto/ Light Commercial Vehicles for 2018") +
coord_polar("y")
df_locality <-
raw_data_full %>%
select(yearmonth,total_dom,total_imp,total_total) %>%
filter(substr(yearmonth,1,4) == 2018) %>%
group_by(substr(yearmonth,1,4)) %>%
summarise(domestic = sum(as.numeric((gsub(",","",total_dom))),na.rm = T),
import = sum(as.numeric((gsub(",","",total_imp))),na.rm = T),
total = sum(as.numeric((gsub(",","",total_total))),na.rm = T))
df_locality
## # A tibble: 1 x 4
## `substr(yearmonth, 1, 4)` domestic import total
## <chr> <dbl> <dbl> <dbl>
## 1 2018 230522 390415 620937
#Creating a column Locality to transpose the values
pie_locality <- data.frame(Locality = c("domestic","import"), freq=c(df_locality$domestic,df_locality$import))
ggplot(pie_locality, aes (x="", y = freq, fill = factor(Locality))) +
geom_col(position = 'stack', width = 1) +
geom_text(aes(label = paste(round(freq / sum(freq) * 100, 1), "%"), x = 1.3),
position = position_stack(vjust = 0.5)) +
theme_classic() +
theme(plot.title = element_text(hjust=0.5),
axis.line = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank()) +
labs(fill = "Locality",
x = NULL,
y = NULL,
title = "Distribution of Retail Sales Domestic/Imported Vehicles for 2018") +
coord_polar("y")
df_top5 <-
raw_data_full %>%
group_by(substr(yearmonth,1,4),brand_name) %>%
filter(substr(yearmonth,1,4) == 2017) %>%
summarise(total = sum(as.numeric((gsub(",","",total_total))),na.rm = T)) %>%
arrange(desc(total)) %>%
mutate(rwn = row_number()) %>%
filter(rwn < 6)
df_top5
## # A tibble: 5 x 4
## # Groups: substr(yearmonth, 1, 4) [1]
## `substr(yearmonth, 1, 4)` brand_name total rwn
## <chr> <fct> <dbl> <int>
## 1 2017 RENAULT 130276 1
## 2 2017 FIAT 120049 2
## 3 2017 VOLKSWAGEN 117481 3
## 4 2017 FORD 111064 4
## 5 2017 HYUNDAI 52265 5
ggplot(df_top5, aes(x=brand_name, y=total, fill=brand_name))+
geom_bar(stat="identity")+
geom_text(aes(label=total), vjust=1.6, color="black", size=3.5)+
theme_minimal()+
labs(x="Brand Name",y="# of Sales",title="5 Most Selling Car Brands for 2017",fill="Brand")+
theme(axis.text.x = element_text(angle=30))