library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1     ✔ readr   1.3.1
## ✔ tibble  2.1.3     ✔ purrr   0.3.3
## ✔ tidyr   1.0.0     ✔ stringr 1.4.0
## ✔ ggplot2 3.2.1     ✔ forcats 0.4.0
## ── Conflicts ─────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(readxl)

##Step I: Download Raw Data from Repository

#Create a temporary file
tmp<-tempfile(fileext="odd_car_sales_2019_09.xlsx")
#Download
download.file("https://github.com/pjournal/mef03-arslnbatu/raw/master/odd_car_sales_2019_09.xlsx?raw=true",destfile=tmp,mode="wb")
#Read excel
raw_data<-readxl::read_excel(tmp,skip=7,col_names=FALSE)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * … and 5 more problems
#Remove
file.remove(tmp)
## [1] TRUE

##Step II: Remove the last two rows:

# Remove the last two rows
raw_data <- raw_data %>% slice(-c(43,44))
#First 5 raw data
head(raw_data)
## # A tibble: 6 x 10
##   ...1          ...2  ...3  ...4  ...5  ...6  ...7  ...8  ...9 ...10
##   <chr>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO      NA     9     9    NA    NA     0     0     9     9
## 2 ASTON MARTIN    NA     3     3    NA    NA     0     0     3     3
## 3 AUDI            NA  1191  1191    NA    NA     0     0  1191  1191
## 4 BENTLEY         NA     0     0    NA    NA     0     0     0     0
## 5 BMW             NA   737   737    NA    NA     0     0   737   737
## 6 CITROEN         NA   694   694    NA   363   363     0  1057  1057

##Step III: Civilized the Data

# Replace NA values with 0 and label the time period with year and month.
car_data_sep_19 <- raw_data %>% mutate_if(is.numeric,funs(ifelse(is.na(.),0,.))) %>% mutate(year=2019,month=9)
## Warning: funs() is soft deprecated as of dplyr 0.8.0
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once per session.
# Change the column names
colnames(car_data_sep_19) <- c("brand","local_auto","imp_auto","total_auto","local_comm","imp_comm","total_comm","local_total","imp_total","all_total","year","months")
print(car_data_sep_19,width=Inf)
## # A tibble: 42 x 12
##    brand        local_auto imp_auto total_auto local_comm imp_comm
##    <chr>             <dbl>    <dbl>      <dbl>      <dbl>    <dbl>
##  1 ALFA ROMEO            0        9          9          0        0
##  2 ASTON MARTIN          0        3          3          0        0
##  3 AUDI                  0     1191       1191          0        0
##  4 BENTLEY               0        0          0          0        0
##  5 BMW                   0      737        737          0        0
##  6 CITROEN               0      694        694          0      363
##  7 DACIA                 0      998        998          0      201
##  8 DS                    0       15         15          0        0
##  9 FERRARI               0        2          2          0        0
## 10 FIAT               4904       77       4981        956      186
##    total_comm local_total imp_total all_total  year months
##         <dbl>       <dbl>     <dbl>     <dbl> <dbl>  <dbl>
##  1          0           0         9         9  2019      9
##  2          0           0         3         3  2019      9
##  3          0           0      1191      1191  2019      9
##  4          0           0         0         0  2019      9
##  5          0           0       737       737  2019      9
##  6        363           0      1057      1057  2019      9
##  7        201           0      1199      1199  2019      9
##  8          0           0        15        15  2019      9
##  9          0           0         2         2  2019      9
## 10       1142        5860       263      6123  2019      9
## # … with 32 more rows

##Step IV: Save the Data

saveRDS(car_data_sep_19,file="C:\\Users\\HP\\Downloads\\odd_car_sales_2019_09.rds")

##Step V: Make Some Analysis

#calculate local and import point
data <- car_data_sep_19 %>% 
  filter(local_total > 0 & imp_total > 0 & brand != "TOPLAM:") %>%
  select(brand,local_total,all_total,imp_total) %>%
  mutate(local_point = round(local_total/all_total,2), imp_point = round(imp_total/all_total,2)) %>%
  arrange(desc(local_point))
data
## # A tibble: 7 x 6
##   brand   local_total all_total imp_total local_point imp_point
##   <chr>         <dbl>     <dbl>     <dbl>       <dbl>     <dbl>
## 1 ISUZU            62        63         1        0.98      0.02
## 2 FIAT           5860      6123       263        0.96      0.04
## 3 TOYOTA         2387      2573       186        0.93      0.07
## 4 HONDA          1500      1720       220        0.87      0.13
## 5 RENAULT        6247      7296      1049        0.86      0.14
## 6 FORD           2447      3652      1205        0.67      0.33
## 7 HYUNDAI         564      1604      1040        0.35      0.65
# List of all total sales for all vehicles
car_data_sep_19 %>% 
  filter(total_auto > 0 & total_comm > 0 & brand != "TOPLAM:") %>%
  select(brand,all_total) %>%
  arrange(desc(all_total))
## # A tibble: 15 x 2
##    brand         all_total
##    <chr>             <dbl>
##  1 RENAULT            7296
##  2 FIAT               6123
##  3 VOLKSWAGEN         4428
##  4 FORD               3652
##  5 TOYOTA             2573
##  6 PEUGEOT            1938
##  7 HYUNDAI            1604
##  8 OPEL               1548
##  9 MERCEDES-BENZ      1401
## 10 DACIA              1199
## 11 CITROEN            1057
## 12 NISSAN             1019
## 13 KIA                 647
## 14 MITSUBISHI          141
## 15 SSANGYONG            20
#Plot between local point and all total sales for brands
ggplot(data, aes(local_point, all_total, colour = brand)) + 
  geom_point(size=6)