Imported dataset by using read.csv()
function.
Looked at first 5 rows by using head()
library(tidyverse)
library(rmarkdown)
balik_hal_fiyatlari <- read.csv("C:/Users/tceme/Desktop/Data/R projects/balik_hal_fiyatlari.csv", stringsAsFactors = FALSE, header = TRUE,sep = ";", encoding="UTF-8")
head(balik_hal_fiyatlari)
## TARIH MAL_TURU MAL_ADI BIRIM ASGARI_UCRET AZAMI_UCRET
## 1 2021-01-02 00:00:00 BALIK TIRSI (DENIZ) KG 5.83 12.5
## 2 2021-01-02 00:00:00 BALIK KIRLANGIÇ (DENIZ) KG 3.00 80.0
## 3 2021-01-02 00:00:00 BALIK ÇIMÇIM (DENIZ) KG 3.50 8.0
## 4 2021-01-02 00:00:00 BALIK HANOS ( DENIZ ) KG 2.50 5.0
## 5 2021-01-02 00:00:00 BALIK KILIÇ (DENIZ) KG 45.00 45.0
## 6 2021-01-02 00:00:00 BALIK LÜFER ( DENIZ) KG 130.00 130.0
Structure of data checked by using str()
function.
str(balik_hal_fiyatlari)
## 'data.frame': 18230 obs. of 6 variables:
## $ TARIH : chr "2021-01-02 00:00:00" "2021-01-02 00:00:00" "2021-01-02 00:00:00" "2021-01-02 00:00:00" ...
## $ MAL_TURU : chr "BALIK" "BALIK" "BALIK" "BALIK" ...
## $ MAL_ADI : chr "TIRSI (DENIZ)" "KIRLANGIÇ (DENIZ)" "ÇIMÇIM (DENIZ)" "HANOS ( DENIZ )" ...
## $ BIRIM : chr "KG" "KG" "KG" "KG" ...
## $ ASGARI_UCRET: num 5.83 3 3.5 2.5 45 130 38 25 10 2.5 ...
## $ AZAMI_UCRET : num 12.5 80 8 5 45 130 38 55 10 6.67 ...
‘TARIH’ column converted to date type.
balik_hal_fiyatlari$TARIH <- as.Date(balik_hal_fiyatlari$TARIH)
str(balik_hal_fiyatlari)
## 'data.frame': 18230 obs. of 6 variables:
## $ TARIH : Date, format: "2021-01-02" "2021-01-02" ...
## $ MAL_TURU : chr "BALIK" "BALIK" "BALIK" "BALIK" ...
## $ MAL_ADI : chr "TIRSI (DENIZ)" "KIRLANGIÇ (DENIZ)" "ÇIMÇIM (DENIZ)" "HANOS ( DENIZ )" ...
## $ BIRIM : chr "KG" "KG" "KG" "KG" ...
## $ ASGARI_UCRET: num 5.83 3 3.5 2.5 45 130 38 25 10 2.5 ...
## $ AZAMI_UCRET : num 12.5 80 8 5 45 130 38 55 10 6.67 ...
Extracted how many different types of seafood exist in the data by using count()
and unique()
function.
There are 126 unique seafood and 4 types of unit.
count(unique(balik_hal_fiyatlari['MAL_ADI']))
## n
## 1 126
count(unique(balik_hal_fiyatlari['MAL_TURU']))
## n
## 1 4
Calculated the percentage of product’s type with the code below.
YUZDE <- select(balik_hal_fiyatlari, TARIH, MAL_TURU, MAL_ADI, BIRIM, ASGARI_UCRET, AZAMI_UCRET) %>%
group_by(MAL_TURU) %>%
summarise(n = n()) %>%
mutate(PERCENTAGE_OF_UNITS = (n / 18230) * 100)
YUZDE
## # A tibble: 4 x 3
## MAL_TURU n PERCENTAGE_OF_UNITS
## <chr> <int> <dbl>
## 1 BALIK 16212 88.9
## 2 ITHAL (DONUK) 386 2.12
## 3 KÜLTÜR 558 3.06
## 4 TATLI SU 1074 5.89
p <- YUZDE %>% ggplot( aes(x = "", y = PERCENTAGE_OF_UNITS, fill = MAL_TURU)) +
geom_bar(stat="identity", width=1, color="white") +
coord_polar("y", start=0) +
xlab("")+
ylab("Percantage of Units")+
theme_void()+
scale_fill_brewer(palette="Set1")
p + theme(legend.title = element_blank())
Grouped the data up to 1st quarter of the year by using group_by()
funciton and calculated the price difference as percentage by using mutate()
function.
I used lubridate()
function to divide ‘TARIH’ column to DAY - MONTH - YEAR features.
ILK_CEYREK_FIYAT <- select(balik_hal_fiyatlari, TARIH, MAL_ADI, ASGARI_UCRET, AZAMI_UCRET) %>%
group_by(YEAR = lubridate::year(TARIH), MONTH = lubridate::month(TARIH), DAY = lubridate::day(TARIH)) %>%
group_by(MAL_ADI) %>%
filter(MONTH < 4) %>%
summarise(MIN_ASGARI_UCRET = min(ASGARI_UCRET), MAX_AZAMI_UCRET = max(AZAMI_UCRET),
ORTALAMA_ASGARI_UCRET = mean(ASGARI_UCRET), ORTALAMA_AZAMI_UCRET = mean(AZAMI_UCRET)) %>%
mutate(DIFF_PERCENTAGE = (ORTALAMA_AZAMI_UCRET - ORTALAMA_ASGARI_UCRET) / ORTALAMA_AZAMI_UCRET * 100) %>%
arrange(desc(ORTALAMA_AZAMI_UCRET))
x <- head(ILK_CEYREK_FIYAT, n=10)
x
## # A tibble: 10 x 6
## MAL_ADI MIN_ASGARI_UCRET MAX_AZAMI_UCRET ORTALAMA_ASGARI~ ORTALAMA_AZAMI_~
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 ISTAKO~ 3 500 163. 294.
## 2 KARIDE~ 10 300 70.0 194.
## 3 KALKAN~ 12 230 124. 171.
## 4 SINARI~ 12 225 101. 165
## 5 FANGRI~ 50 200 127. 162.
## 6 MERCAN~ 2 230 8.16 160.
## 7 DIL 3 200 23.2 152.
## 8 BARBUN~ 8 210 67.7 149.
## 9 TRANÇA~ 60 200 127. 149.
## 10 BARBUN~ 2 250 9.85 146.
## # ... with 1 more variable: DIFF_PERCENTAGE <dbl>
Visualized the average most valuable seafoods in 1st quarter with geom_bar()
Used coord_flip()
to swap the axises.
q <- x %>%
mutate(MAL_ADI = fct_reorder(MAL_ADI, ORTALAMA_AZAMI_UCRET)) %>%
ggplot( aes(x=MAL_ADI, y=ORTALAMA_AZAMI_UCRET)) +
geom_bar(stat="identity", fill="#f68060", alpha=.6, width=.4) +
ggtitle("10 Most Expensive Seafood in the 1st quarter")+
coord_flip() +
xlab("Seafood") +
ylab("Average Max Price") +
theme_classic()
q
Calculated monthly average price of Dil Baligi and Barbun(Kaya) by using group_by
function.
DIL_BALIGI <- select(balik_hal_fiyatlari, TARIH, MAL_TURU, MAL_ADI, BIRIM, ASGARI_UCRET, AZAMI_UCRET) %>%
group_by(YEAR = lubridate::year(TARIH), MONTH = lubridate::month(TARIH), DAY = lubridate::day(TARIH)) %>%
group_by(MAL_ADI, MONTH) %>%
filter(MAL_ADI == "DIL") %>%
summarise(MIN_ASGARI_UCRET = min(ASGARI_UCRET), MAX_AZAMI_UCRET = max(AZAMI_UCRET),
ORTALAMA_ASGARI_UCRET = mean(ASGARI_UCRET), ORTALAMA_AZAMI_UCRET = mean(AZAMI_UCRET))
DIL_BALIGI
## # A tibble: 10 x 6
## # Groups: MAL_ADI [1]
## MAL_ADI MONTH MIN_ASGARI_UCRET MAX_AZAMI_UCRET ORTALAMA_ASGARI~
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 DIL 1 10 180 22.0
## 2 DIL 2 8 180 23.7
## 3 DIL 3 3 200 24.1
## 4 DIL 4 5 220 22.6
## 5 DIL 5 5 180 32.0
## 6 DIL 6 10 200 32.4
## 7 DIL 7 4 300 32.4
## 8 DIL 8 10 300 38.2
## 9 DIL 9 5 300 31.6
## 10 DIL 10 10 350 28.9
## # ... with 1 more variable: ORTALAMA_AZAMI_UCRET <dbl>
BARBUN_KAYA <- select(balik_hal_fiyatlari, TARIH, MAL_TURU, MAL_ADI, BIRIM, ASGARI_UCRET, AZAMI_UCRET) %>%
group_by(YEAR = lubridate::year(TARIH), MONTH = lubridate::month(TARIH), DAY = lubridate::day(TARIH)) %>%
group_by(MAL_ADI, MONTH) %>%
filter(MAL_ADI == "BARBUN(KAYA)") %>%
summarise(MIN_ASGARI_UCRET = min(ASGARI_UCRET), MAX_AZAMI_UCRET = max(AZAMI_UCRET),
ORTALAMA_ASGARI_UCRET = mean(ASGARI_UCRET), ORTALAMA_AZAMI_UCRET = mean(AZAMI_UCRET))
BARBUN_KAYA
## # A tibble: 10 x 6
## # Groups: MAL_ADI [1]
## MAL_ADI MONTH MIN_ASGARI_UCRET MAX_AZAMI_UCRET ORTALAMA_ASGARI~
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 BARBUN~ 1 8 170 64.1
## 2 BARBUN~ 2 50 170 69.7
## 3 BARBUN~ 3 8 210 68.5
## 4 BARBUN~ 4 8 200 76.7
## 5 BARBUN~ 5 70 200 108.
## 6 BARBUN~ 6 30 350 151.
## 7 BARBUN~ 7 70 400 134.
## 8 BARBUN~ 8 12 350 104.
## 9 BARBUN~ 9 60 350 98.6
## 10 BARBUN~ 10 70 350 99.3
## # ... with 1 more variable: ORTALAMA_AZAMI_UCRET <dbl>
As clearly seen from line plots of both fishes below, when hunting restriction begins, prices are rising.
library(hrbrthemes)
library(ggthemes)
COMPARE <- DIL_BALIGI %>%
ggplot( aes(x=MONTH, y=ORTALAMA_AZAMI_UCRET)) +
geom_line(data = DIL_BALIGI, aes(x = MONTH, y = ORTALAMA_AZAMI_UCRET, color = "Dil Baligi"), size = 1) +
geom_point(data = DIL_BALIGI, shape=21, color="black", fill="#69b3a2", size=3) +
geom_line(data = BARBUN_KAYA, aes(x = MONTH, y = ORTALAMA_AZAMI_UCRET, color = "Barbun(Kaya)"), size = 1) +
geom_point(data = BARBUN_KAYA, shape=8, color="black", fill="#EE0000", size=3) +
scale_x_discrete(label = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October"), "Months", limits=c(1:10))+
scale_y_continuous(name = "Average Max Price")+
labs(color = '')+
ggtitle("Monthly Average Maximum Price Comparison of Dil Baligi and Barbun(Kaya)")+
theme_solarized(base_size = 10)
COMPARE + theme(
plot.title = element_text(color = "Purple", size = 10, face = "italic"))