####ODD Homework Update*
title: “EGM_Group_Assignment”
author: “Mehtap Colak, Tugce Uncu, Bulent Buyuk, Oguz Colak”
date: “07 11 2019”
output: html_document
# Create a temporary file
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1     ✔ purrr   0.3.2
## ✔ tibble  2.1.3     ✔ dplyr   0.8.3
## ✔ tidyr   1.0.0     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.4.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(ggplot2)

library(ggrepel)
#Create a temporary file
tmp=tempfile(fileext=".xlsx")
#Download file from repository to the temp file
download.file("https://github.com/pjournal/mef03-tugceu/blob/master/CarSalesAll.xlsx?raw=true", destfile=tmp,mode='wb')
#Read that excel file using readxl package's read_excel function. You might need to adjust the parameters (skip, col_names) according to your raw file's format.
raw_data<-readxl::read_excel(tmp,skip=2,col_names=FALSE)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * … and 7 more problems
# Remove the temp file
file.remove(tmp)
## [1] TRUE
# Let's see our raw data
head(raw_data)
## # A tibble: 6 x 12
##   ...1     ...2  ...3  ...4  ...5  ...6  ...7  ...8  ...9 ...10 ...11 ...12
##   <chr>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA R…     0    12    12     0     0     0     0    12    12     1  2016
## 2 ASTON …     0     2     2     0     0     0     0     2     2     1  2016
## 3 AUDI        0   911   911     0     0     0     0   911   911     1  2016
## 4 BENTLEY     0     0     0     0     0     0     0     0     0     1  2016
## 5 BMW         0   496   496     0     0     0     0   496   496     1  2016
## 6 CHERY       0    30    30     0     0     0     0    30    30     1  2016
# Use the same column names in your data.
colnames(raw_data) <- c("brand_name","auto_dom","auto_imp","auto_total","comm_dom","comm_imp","comm_total","total_dom","total_imp","total_total","month","year")
# Now we replace NA values with 0 and label the time period with year and month, so when we merge the data we won't be confused.
car_data <- raw_data %>% mutate_if(is.numeric,funs(ifelse(is.na(.),0,.))) 
## Warning: funs() is soft deprecated as of dplyr 0.8.0
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once per session.
print(car_data,width=Inf)
## # A tibble: 2,001 x 12
##    brand_name   auto_dom auto_imp auto_total comm_dom comm_imp comm_total
##    <chr>           <dbl>    <dbl>      <dbl>    <dbl>    <dbl>      <dbl>
##  1 ALFA ROMEO          0       12         12        0        0          0
##  2 ASTON MARTİN        0        2          2        0        0          0
##  3 AUDI                0      911        911        0        0          0
##  4 BENTLEY             0        0          0        0        0          0
##  5 BMW                 0      496        496        0        0          0
##  6 CHERY               0       30         30        0        0          0
##  7 CITROEN             0      394        394       41      207        248
##  8 DACIA               0     1235       1235        0      221        221
##  9 DS                  0        8          8        0        0          0
## 10 FERRARI             0        3          3        0        0          0
##    total_dom total_imp total_total month  year
##        <dbl>     <dbl>       <dbl> <dbl> <dbl>
##  1         0        12          12     1  2016
##  2         0         2           2     1  2016
##  3         0       911         911     1  2016
##  4         0         0           0     1  2016
##  5         0       496         496     1  2016
##  6         0        30          30     1  2016
##  7        41       601         642     1  2016
##  8         0      1456        1456     1  2016
##  9         0         8           8     1  2016
## 10         0         3           3     1  2016
## # … with 1,991 more rows

####Analysis

###TOP 5 Best and Worst Performing Brands

best_sellers <- car_data%>%
  select(brand_name,total_total)%>%
  group_by(brand_name)%>%
  summarise(total_total=sum(total_total))%>%
  arrange(desc(total_total))%>%
  filter(row_number()<6)
worst_sellers <- car_data%>%
  select(brand_name,total_total)%>%
  group_by(brand_name)%>%
  summarise(total_total=sum(total_total))%>%
  arrange(total_total)%>%
  filter(row_number()<6)
print(best_sellers, width = Inf)
## # A tibble: 5 x 2
##   brand_name total_total
##   <chr>            <dbl>
## 1 RENAULT         380614
## 2 VOLKSWAGEN      347657
## 3 FIAT            338887
## 4 FORD            312159
## 5 HYUNDAI         151963
print(worst_sellers, width=Inf)
## # A tibble: 5 x 2
##   brand_name   total_total
##   <chr>              <dbl>
## 1 GEELY                  0
## 2 LANCIA                 0
## 3 OTOKAR                 0
## 4 ASTON MARTİN           2
## 5 TATA                   9

###Top 5 Best Performing Brands Total Sales Change Over The Years

yearly_comparison <- car_data%>%
  select(brand_name,total_total,year)%>%
  group_by(brand_name,year)%>%
  summarise(total_total=sum(total_total))%>%
  filter(brand_name %in% c("RENAULT","VOLKSWAGEN","FIAT","FORD","HYUNDAI"))%>%
  filter(year  %in% c(2016,2017,2018))

ggplot(yearly_comparison, aes(x=year, y=total_total, color=brand_name))  +
  geom_line() + labs(title="Top 5 Best Performing Brands Total Sales Change Over The Years")

###Market Leaders’ Domestic Market Share

domestic_market <- car_data%>%
  select(brand_name, total_dom)%>%
  group_by(brand_name)%>%
  summarise(total_dom=sum(total_dom))%>%
  mutate(domestic_market_share=total_dom/sum(total_dom)*100)%>%
  arrange(desc(domestic_market_share))%>%
  filter(row_number()<6)
ggplot(domestic_market, aes(x="",y=domestic_market_share, fill=brand_name))+
  geom_bar(width = 1, stat = "identity")+
  coord_polar("y", start=0)+
  theme_void()+
  theme(legend.position="none") +
  
  geom_text(aes(y = domestic_market_share, label = brand_name),position = position_stack(vjust = 0.5), color = "white", size=6) +
  scale_fill_brewer(palette="Set1")

###Top Ten Brands’ auto vs commercial sales

auto_vs_commercial <- car_data%>%
  select(brand_name,auto_total,comm_total,total_total)%>%
  group_by(brand_name)%>%
  summarise(auto_total=sum(auto_total),comm_total=sum(comm_total),total_total=sum(total_total))%>%
  arrange(desc(total_total))%>%
  filter(row_number()<11)
auto_vs_commercial
## # A tibble: 10 x 4
##    brand_name    auto_total comm_total total_total
##    <chr>              <dbl>      <dbl>       <dbl>
##  1 RENAULT           336976      43638      380614
##  2 VOLKSWAGEN        264524      83133      347657
##  3 FIAT              188504     150383      338887
##  4 FORD              112680     199479      312159
##  5 HYUNDAI           144507       7456      151963
##  6 TOYOTA            134536      13979      148515
##  7 DACIA             118889      17114      136003
##  8 OPEL              130202        504      130706
##  9 PEUGEOT            92366      29545      121911
## 10 MERCEDES-BENZ      79591      28127      107718
auto_vs_commercial_plot <- ggplot(auto_vs_commercial)+
  geom_point(aes(auto_total, comm_total), color='#00AFBB',size=1)+
  geom_label_repel(aes(auto_total, comm_total,label=auto_vs_commercial$brand_name),size=2)+
  theme_classic(base_size = 10)+
  coord_fixed(ratio = 1.2)
auto_vs_commercial_plot