####ODD Homework Update* |
---|
title: “EGM_Group_Assignment” |
author: “Mehtap Colak, Tugce Uncu, Bulent Buyuk, Oguz Colak” |
date: “07 11 2019” |
output: html_document |
# Create a temporary file
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1 ✔ purrr 0.3.2
## ✔ tibble 2.1.3 ✔ dplyr 0.8.3
## ✔ tidyr 1.0.0 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggplot2)
library(ggrepel)
#Create a temporary file
tmp=tempfile(fileext=".xlsx")
#Download file from repository to the temp file
download.file("https://github.com/pjournal/mef03-tugceu/blob/master/CarSalesAll.xlsx?raw=true", destfile=tmp,mode='wb')
#Read that excel file using readxl package's read_excel function. You might need to adjust the parameters (skip, col_names) according to your raw file's format.
raw_data<-readxl::read_excel(tmp,skip=2,col_names=FALSE)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * … and 7 more problems
# Remove the temp file
file.remove(tmp)
## [1] TRUE
# Let's see our raw data
head(raw_data)
## # A tibble: 6 x 12
## ...1 ...2 ...3 ...4 ...5 ...6 ...7 ...8 ...9 ...10 ...11 ...12
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA R… 0 12 12 0 0 0 0 12 12 1 2016
## 2 ASTON … 0 2 2 0 0 0 0 2 2 1 2016
## 3 AUDI 0 911 911 0 0 0 0 911 911 1 2016
## 4 BENTLEY 0 0 0 0 0 0 0 0 0 1 2016
## 5 BMW 0 496 496 0 0 0 0 496 496 1 2016
## 6 CHERY 0 30 30 0 0 0 0 30 30 1 2016
# Use the same column names in your data.
colnames(raw_data) <- c("brand_name","auto_dom","auto_imp","auto_total","comm_dom","comm_imp","comm_total","total_dom","total_imp","total_total","month","year")
# Now we replace NA values with 0 and label the time period with year and month, so when we merge the data we won't be confused.
car_data <- raw_data %>% mutate_if(is.numeric,funs(ifelse(is.na(.),0,.)))
## Warning: funs() is soft deprecated as of dplyr 0.8.0
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once per session.
print(car_data,width=Inf)
## # A tibble: 2,001 x 12
## brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO 0 12 12 0 0 0
## 2 ASTON MARTİN 0 2 2 0 0 0
## 3 AUDI 0 911 911 0 0 0
## 4 BENTLEY 0 0 0 0 0 0
## 5 BMW 0 496 496 0 0 0
## 6 CHERY 0 30 30 0 0 0
## 7 CITROEN 0 394 394 41 207 248
## 8 DACIA 0 1235 1235 0 221 221
## 9 DS 0 8 8 0 0 0
## 10 FERRARI 0 3 3 0 0 0
## total_dom total_imp total_total month year
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 12 12 1 2016
## 2 0 2 2 1 2016
## 3 0 911 911 1 2016
## 4 0 0 0 1 2016
## 5 0 496 496 1 2016
## 6 0 30 30 1 2016
## 7 41 601 642 1 2016
## 8 0 1456 1456 1 2016
## 9 0 8 8 1 2016
## 10 0 3 3 1 2016
## # … with 1,991 more rows
####Analysis
###TOP 5 Best and Worst Performing Brands
best_sellers <- car_data%>%
select(brand_name,total_total)%>%
group_by(brand_name)%>%
summarise(total_total=sum(total_total))%>%
arrange(desc(total_total))%>%
filter(row_number()<6)
worst_sellers <- car_data%>%
select(brand_name,total_total)%>%
group_by(brand_name)%>%
summarise(total_total=sum(total_total))%>%
arrange(total_total)%>%
filter(row_number()<6)
print(best_sellers, width = Inf)
## # A tibble: 5 x 2
## brand_name total_total
## <chr> <dbl>
## 1 RENAULT 380614
## 2 VOLKSWAGEN 347657
## 3 FIAT 338887
## 4 FORD 312159
## 5 HYUNDAI 151963
print(worst_sellers, width=Inf)
## # A tibble: 5 x 2
## brand_name total_total
## <chr> <dbl>
## 1 GEELY 0
## 2 LANCIA 0
## 3 OTOKAR 0
## 4 ASTON MARTİN 2
## 5 TATA 9
###Top 5 Best Performing Brands Total Sales Change Over The Years
yearly_comparison <- car_data%>%
select(brand_name,total_total,year)%>%
group_by(brand_name,year)%>%
summarise(total_total=sum(total_total))%>%
filter(brand_name %in% c("RENAULT","VOLKSWAGEN","FIAT","FORD","HYUNDAI"))%>%
filter(year %in% c(2016,2017,2018))
ggplot(yearly_comparison, aes(x=year, y=total_total, color=brand_name)) +
geom_line() + labs(title="Top 5 Best Performing Brands Total Sales Change Over The Years")
###Market Leaders’ Domestic Market Share
domestic_market <- car_data%>%
select(brand_name, total_dom)%>%
group_by(brand_name)%>%
summarise(total_dom=sum(total_dom))%>%
mutate(domestic_market_share=total_dom/sum(total_dom)*100)%>%
arrange(desc(domestic_market_share))%>%
filter(row_number()<6)
ggplot(domestic_market, aes(x="",y=domestic_market_share, fill=brand_name))+
geom_bar(width = 1, stat = "identity")+
coord_polar("y", start=0)+
theme_void()+
theme(legend.position="none") +
geom_text(aes(y = domestic_market_share, label = brand_name),position = position_stack(vjust = 0.5), color = "white", size=6) +
scale_fill_brewer(palette="Set1")
###Top Ten Brands’ auto vs commercial sales
auto_vs_commercial <- car_data%>%
select(brand_name,auto_total,comm_total,total_total)%>%
group_by(brand_name)%>%
summarise(auto_total=sum(auto_total),comm_total=sum(comm_total),total_total=sum(total_total))%>%
arrange(desc(total_total))%>%
filter(row_number()<11)
auto_vs_commercial
## # A tibble: 10 x 4
## brand_name auto_total comm_total total_total
## <chr> <dbl> <dbl> <dbl>
## 1 RENAULT 336976 43638 380614
## 2 VOLKSWAGEN 264524 83133 347657
## 3 FIAT 188504 150383 338887
## 4 FORD 112680 199479 312159
## 5 HYUNDAI 144507 7456 151963
## 6 TOYOTA 134536 13979 148515
## 7 DACIA 118889 17114 136003
## 8 OPEL 130202 504 130706
## 9 PEUGEOT 92366 29545 121911
## 10 MERCEDES-BENZ 79591 28127 107718
auto_vs_commercial_plot <- ggplot(auto_vs_commercial)+
geom_point(aes(auto_total, comm_total), color='#00AFBB',size=1)+
geom_label_repel(aes(auto_total, comm_total,label=auto_vs_commercial$brand_name),size=2)+
theme_classic(base_size = 10)+
coord_fixed(ratio = 1.2)
auto_vs_commercial_plot