ODD Assignment

Group Mujde ‘R’

1.Introduction

First we find the data on Otomotiv Distibütörleri Derneği website. Then we will find yearly sales data from 2016 to 2019. We combined this data in Konsolide_ODD_datasi.xlsx. We will try to analyse this data and make some conclusions.

Raw Data Column Descriptions

1. Year (year)
2. Brand (brand)
3. Domestic Car Sales (car_dom)
4. Imported Car Sales (car_imp)
5. Total Car Sales (car_total)
6. Domestic Light Commercial Sales (lcv_dom)
7. Imported Light Commercial Sales (lcv_imp)
8. Total Light Commercial Sales (lcv_total)
9. Domestic Total Sales (all_dom)
10. Imported Total Sales (all_imp)
11. All Sales (all_total)

2.Download Raw Data

Our raw excel file is in our repository. We can automatically download that file and put it in a temporary file. Then we can read that excel document into R and remove the temp file.

## Our temp file
tmp<-tempfile(fileext=".xlsx")
## Download file from repository
download.file("https://github.com/pjournal/mef03g-mujde-r/blob/master/Konsolide_ODD_datasi.xlsx?raw=true",destfile=tmp, mode='wb')
## Read that excel file using readxl library. col_names not included and first two rows skipped
raw_data<-readxl::read_excel(tmp,col_names = FALSE, skip=2)

## Remove the temp file
file.remove(tmp)

## [1] TRUE

# Let's see our data
head(raw_data)

## # A tibble: 6 x 11
##    ...1 ...2          ...3  ...4  ...5  ...6  ...7  ...8  ...9 ...10 ...11
##   <dbl> <chr>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1  2016 ALFA ROMEO       0   634   634     0     0     0     0   634   634
## 2  2016 ASTON MARTIN     0    29    29     0     0     0     0    29    29
## 3  2016 AUDI             0 22005 22005     0     0     0     0 22005 22005
## 4  2016 BENTLEY          0     9     9     0     0     0     0     9     9
## 5  2016 BMW              0 27166 27166     0     0     0     0 27166 27166
## 6  2016 CHERY            0   144   144     0     0     0     0   144   144

3.Organize Data

## rename colnames
colnames(raw_data) <- c("year","brand","car_dom","car_imp","car_total","lcv_dom","lcv_imp","lcv_total","all_dom","all_imp","all_total")
## replace NA values with 0
car_data <- raw_data %>% mutate_if(is.numeric,funs(ifelse(is.na(.),0,.)))
## let's see our data again
print(head(car_data),width=Inf)

## # A tibble: 6 x 11
##    year brand        car_dom car_imp car_total lcv_dom lcv_imp lcv_total
##   <dbl> <chr>          <dbl>   <dbl>     <dbl>   <dbl>   <dbl>     <dbl>
## 1  2016 ALFA ROMEO         0     634       634       0       0         0
## 2  2016 ASTON MARTIN       0      29        29       0       0         0
## 3  2016 AUDI               0   22005     22005       0       0         0
## 4  2016 BENTLEY            0       9         9       0       0         0
## 5  2016 BMW                0   27166     27166       0       0         0
## 6  2016 CHERY              0     144       144       0       0         0
##   all_dom all_imp all_total
##     <dbl>   <dbl>     <dbl>
## 1       0     634       634
## 2       0      29        29
## 3       0   22005     22005
## 4       0       9         9
## 5       0   27166     27166
## 6       0     144       144

4.Analysis

4.1 Best Seller Brands

## remove data which total_sales are 0
best_seller <- car_data %>% filter(car_total > 0 & lcv_total > 0) 
## group by brand and sum total_sales
best_seller <- best_seller %>% select(brand,all_total) %>% group_by(brand) %>% summarise(all_total = sum(all_total))
## arrange by all_total desc
best_seller <- best_seller %>% arrange(desc(all_total))
## best_seller 6 brands
best_seller <- best_seller %>% filter(row_number() < 7)
print(best_seller, width=Inf)

## # A tibble: 6 x 2
##   brand      all_total
##   <chr>          <dbl>
## 1 RENAULT       383624
## 2 VOLKSWAGEN    354062
## 3 FIAT          344592
## 4 FORD          317304
## 5 HYUNDAI       154569
## 6 TOYOTA        151026

fancy_scientific <- function(l) {
     # turn in to character string in scientific notation
     l <- format(l, scientific = TRUE)
     # quote the part before the exponent to keep all the digits
     l <- gsub("^(.*)e", "'\\1'e", l)
     # turn the 'e+' into plotmath format
     l <- gsub("e", "%*%10^", l)
     # return this as an expression
     parse(text=l)
}
ggplot(best_seller, aes(x=brand, y=all_total, fill=brand)) + geom_bar(alpha=0.6,stat="identity") + geom_point() + scale_y_continuous(labels=fancy_scientific) + labs(x="Brand Name",y="Sales Count",title="6 Best Seller Brands",fill="Brand")

4.2 Total Sales Over Years

## remove data which total_sales are 0
total_sales_over_years <- car_data %>% filter(car_total > 0 & lcv_total > 0) 
## group by year and sum total_sales
total_sales_over_years <- total_sales_over_years %>% select(year,all_total) %>% group_by(year) %>% summarise(all_total = sum(all_total))
## arrange by year desc
total_sales_over_years <- total_sales_over_years %>% arrange(desc(year))
## remove last year because the year is not finished
total_sales_over_years <- total_sales_over_years %>% slice(-1)
print(total_sales_over_years, width=Inf)

## # A tibble: 3 x 2
##    year all_total
##   <dbl>     <dbl>
## 1  2018    490991
## 2  2017    776801
## 3  2016    781695

## let's convert sales by year data to pie category
percentage_data <- mutate(total_sales_over_years, sales_percentage = all_total / sum(all_total) * 100)
## we can see that there is a big decline in sales of 2018
percentage_data

## # A tibble: 3 x 3
##    year all_total sales_percentage
##   <dbl>     <dbl>            <dbl>
## 1  2018    490991             24.0
## 2  2017    776801             37.9
## 3  2016    781695             38.1

ggplot(percentage_data, aes(x="", y=sales_percentage, fill=factor(year))) + geom_bar(stat="identity", width=1) + coord_polar("y", start=0) +geom_text(aes(label = paste0(round(sales_percentage), "%")), position = position_stack(vjust = 0.5)) + scale_fill_manual(values=c("#55DDE0", "#33658A", "#2F4858")) + labs(x = NULL, y = NULL, fill = NULL, title = "Sales Percentage Over Year") + theme_classic() + theme(axis.line = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.title = element_text(hjust = 0.5, color = "#666666"))

4.3 Light Commercial Vs Automobile Sales By Brands

## remove data which total_sales are 0
total_sales_brands_by_type <- car_data %>% filter(car_total > 0 & lcv_total > 0)
## group by year and sum total_sales
total_sales_brands_by_type <- total_sales_brands_by_type %>% select(brand, year,lcv_total, car_total) %>% group_by(brand) %>% summarise(lcv_total = sum(lcv_total), car_total = sum(car_total)) %>% select (brand, lcv_total, car_total)
## reshape our dataframe which lcv_total and car_total values are different rows by brand
total_sales_brands_by_type <- reshape(total_sales_brands_by_type, varying = c("lcv_total", "car_total"), timevar="sales_type", v.names="sales_count", times = c("lcv_total", "car_total"),direction="long")

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## create flipped stack bar chart with lcv_total - car_total data by brands
ggplot(total_sales_brands_by_type, aes(x=reorder(brand,sales_count), y=sales_count/1000)) + geom_bar(aes(fill = sales_type), stat = "identity",position ="stack") + coord_flip() + geom_point() + labs(x="Brand Name",y="Sales Count K (divided by 1000",title="Car / Commercial Sales By Brands",fill="Brand")