Imported dataset by using read.csv() function.

Looked at first 5 rows by using head()

library(tidyverse)
library(rmarkdown)

balik_hal_fiyatlari <- read.csv("C:/Users/tceme/Desktop/Data/R projects/balik_hal_fiyatlari.csv", stringsAsFactors = FALSE, header = TRUE,sep = ";", encoding="UTF-8")

head(balik_hal_fiyatlari)
##                 TARIH MAL_TURU           MAL_ADI BIRIM ASGARI_UCRET AZAMI_UCRET
## 1 2021-01-02 00:00:00    BALIK     TIRSI (DENIZ)    KG         5.83        12.5
## 2 2021-01-02 00:00:00    BALIK KIRLANGIÇ (DENIZ)    KG         3.00        80.0
## 3 2021-01-02 00:00:00    BALIK    ÇIMÇIM (DENIZ)    KG         3.50         8.0
## 4 2021-01-02 00:00:00    BALIK   HANOS ( DENIZ )    KG         2.50         5.0
## 5 2021-01-02 00:00:00    BALIK     KILIÇ (DENIZ)    KG        45.00        45.0
## 6 2021-01-02 00:00:00    BALIK    LÜFER ( DENIZ)    KG       130.00       130.0

Structure of data checked by using str() function.

str(balik_hal_fiyatlari)
## 'data.frame':    18230 obs. of  6 variables:
##  $ TARIH       : chr  "2021-01-02 00:00:00" "2021-01-02 00:00:00" "2021-01-02 00:00:00" "2021-01-02 00:00:00" ...
##  $ MAL_TURU    : chr  "BALIK" "BALIK" "BALIK" "BALIK" ...
##  $ MAL_ADI     : chr  "TIRSI (DENIZ)" "KIRLANGIÇ (DENIZ)" "ÇIMÇIM (DENIZ)" "HANOS ( DENIZ )" ...
##  $ BIRIM       : chr  "KG" "KG" "KG" "KG" ...
##  $ ASGARI_UCRET: num  5.83 3 3.5 2.5 45 130 38 25 10 2.5 ...
##  $ AZAMI_UCRET : num  12.5 80 8 5 45 130 38 55 10 6.67 ...

‘TARIH’ column converted to date type.

balik_hal_fiyatlari$TARIH <- as.Date(balik_hal_fiyatlari$TARIH)

str(balik_hal_fiyatlari)
## 'data.frame':    18230 obs. of  6 variables:
##  $ TARIH       : Date, format: "2021-01-02" "2021-01-02" ...
##  $ MAL_TURU    : chr  "BALIK" "BALIK" "BALIK" "BALIK" ...
##  $ MAL_ADI     : chr  "TIRSI (DENIZ)" "KIRLANGIÇ (DENIZ)" "ÇIMÇIM (DENIZ)" "HANOS ( DENIZ )" ...
##  $ BIRIM       : chr  "KG" "KG" "KG" "KG" ...
##  $ ASGARI_UCRET: num  5.83 3 3.5 2.5 45 130 38 25 10 2.5 ...
##  $ AZAMI_UCRET : num  12.5 80 8 5 45 130 38 55 10 6.67 ...

Extracted how many different types of seafood exist in the data by using count() and unique() function.

There are 126 unique seafood and 4 types of unit.

count(unique(balik_hal_fiyatlari['MAL_ADI']))
##     n
## 1 126
count(unique(balik_hal_fiyatlari['MAL_TURU']))
##   n
## 1 4

Calculated the percentage of product’s type with the code below.

YUZDE <- select(balik_hal_fiyatlari, TARIH, MAL_TURU, MAL_ADI, BIRIM, ASGARI_UCRET, AZAMI_UCRET) %>%
  group_by(MAL_TURU) %>%
  summarise(n = n()) %>%  
  mutate(PERCENTAGE_OF_UNITS = (n / 18230) * 100)
  
YUZDE
## # A tibble: 4 x 3
##   MAL_TURU          n PERCENTAGE_OF_UNITS
##   <chr>         <int>               <dbl>
## 1 BALIK         16212               88.9 
## 2 ITHAL (DONUK)   386                2.12
## 3 KÜLTÜR          558                3.06
## 4 TATLI SU       1074                5.89
p <- YUZDE %>% ggplot( aes(x = "", y = PERCENTAGE_OF_UNITS, fill = MAL_TURU)) + 
  geom_bar(stat="identity", width=1, color="white") + 
  coord_polar("y", start=0) +
  xlab("")+
  ylab("Percantage of Units")+
  theme_void()+
  scale_fill_brewer(palette="Set1")

p + theme(legend.title = element_blank())

Grouped the data up to 1st quarter of the year by using group_by() funciton and calculated the price difference as percentage by using mutate() function.

I used lubridate() function to divide ‘TARIH’ column to DAY - MONTH - YEAR features.

ILK_CEYREK_FIYAT <- select(balik_hal_fiyatlari, TARIH, MAL_ADI, ASGARI_UCRET, AZAMI_UCRET) %>%
  group_by(YEAR = lubridate::year(TARIH), MONTH = lubridate::month(TARIH), DAY = lubridate::day(TARIH)) %>%
  group_by(MAL_ADI) %>%
  filter(MONTH < 4) %>%
  summarise(MIN_ASGARI_UCRET = min(ASGARI_UCRET), MAX_AZAMI_UCRET = max(AZAMI_UCRET), 
            ORTALAMA_ASGARI_UCRET = mean(ASGARI_UCRET), ORTALAMA_AZAMI_UCRET = mean(AZAMI_UCRET)) %>%
  mutate(DIFF_PERCENTAGE = (ORTALAMA_AZAMI_UCRET - ORTALAMA_ASGARI_UCRET) / ORTALAMA_AZAMI_UCRET * 100) %>%
  arrange(desc(ORTALAMA_AZAMI_UCRET)) 

x <- head(ILK_CEYREK_FIYAT, n=10)

x
## # A tibble: 10 x 6
##    MAL_ADI MIN_ASGARI_UCRET MAX_AZAMI_UCRET ORTALAMA_ASGARI~ ORTALAMA_AZAMI_~
##    <chr>              <dbl>           <dbl>            <dbl>            <dbl>
##  1 ISTAKO~                3             500           163.               294.
##  2 KARIDE~               10             300            70.0              194.
##  3 KALKAN~               12             230           124.               171.
##  4 SINARI~               12             225           101.               165 
##  5 FANGRI~               50             200           127.               162.
##  6 MERCAN~                2             230             8.16             160.
##  7 DIL                    3             200            23.2              152.
##  8 BARBUN~                8             210            67.7              149.
##  9 TRANÇA~               60             200           127.               149.
## 10 BARBUN~                2             250             9.85             146.
## # ... with 1 more variable: DIFF_PERCENTAGE <dbl>

Visualized the average most valuable seafoods in 1st quarter with geom_bar()

Used coord_flip() to swap the axises.

q <- x %>%
  mutate(MAL_ADI = fct_reorder(MAL_ADI, ORTALAMA_AZAMI_UCRET)) %>%
  ggplot( aes(x=MAL_ADI, y=ORTALAMA_AZAMI_UCRET)) +
  geom_bar(stat="identity", fill="#f68060", alpha=.6, width=.4) +
  ggtitle("10 Most Expensive Seafood in the 1st quarter")+
  coord_flip() +
  xlab("Seafood") +
  ylab("Average Max Price") +
  theme_classic()

q

Calculated monthly average price of Dil Baligi and Barbun(Kaya) by using group_by function.

DIL_BALIGI <- select(balik_hal_fiyatlari, TARIH, MAL_TURU, MAL_ADI, BIRIM, ASGARI_UCRET, AZAMI_UCRET) %>%
  group_by(YEAR = lubridate::year(TARIH), MONTH = lubridate::month(TARIH), DAY = lubridate::day(TARIH)) %>%
  group_by(MAL_ADI, MONTH) %>%
  filter(MAL_ADI == "DIL") %>%
  summarise(MIN_ASGARI_UCRET = min(ASGARI_UCRET), MAX_AZAMI_UCRET = max(AZAMI_UCRET), 
            ORTALAMA_ASGARI_UCRET = mean(ASGARI_UCRET), ORTALAMA_AZAMI_UCRET = mean(AZAMI_UCRET))

DIL_BALIGI
## # A tibble: 10 x 6
## # Groups:   MAL_ADI [1]
##    MAL_ADI MONTH MIN_ASGARI_UCRET MAX_AZAMI_UCRET ORTALAMA_ASGARI~
##    <chr>   <dbl>            <dbl>           <dbl>            <dbl>
##  1 DIL         1               10             180             22.0
##  2 DIL         2                8             180             23.7
##  3 DIL         3                3             200             24.1
##  4 DIL         4                5             220             22.6
##  5 DIL         5                5             180             32.0
##  6 DIL         6               10             200             32.4
##  7 DIL         7                4             300             32.4
##  8 DIL         8               10             300             38.2
##  9 DIL         9                5             300             31.6
## 10 DIL        10               10             350             28.9
## # ... with 1 more variable: ORTALAMA_AZAMI_UCRET <dbl>
BARBUN_KAYA <- select(balik_hal_fiyatlari, TARIH, MAL_TURU, MAL_ADI, BIRIM, ASGARI_UCRET, AZAMI_UCRET) %>%
  group_by(YEAR = lubridate::year(TARIH), MONTH = lubridate::month(TARIH), DAY = lubridate::day(TARIH)) %>%
  group_by(MAL_ADI, MONTH) %>%
  filter(MAL_ADI == "BARBUN(KAYA)") %>%
  summarise(MIN_ASGARI_UCRET = min(ASGARI_UCRET), MAX_AZAMI_UCRET = max(AZAMI_UCRET), 
            ORTALAMA_ASGARI_UCRET = mean(ASGARI_UCRET), ORTALAMA_AZAMI_UCRET = mean(AZAMI_UCRET))

BARBUN_KAYA
## # A tibble: 10 x 6
## # Groups:   MAL_ADI [1]
##    MAL_ADI MONTH MIN_ASGARI_UCRET MAX_AZAMI_UCRET ORTALAMA_ASGARI~
##    <chr>   <dbl>            <dbl>           <dbl>            <dbl>
##  1 BARBUN~     1                8             170             64.1
##  2 BARBUN~     2               50             170             69.7
##  3 BARBUN~     3                8             210             68.5
##  4 BARBUN~     4                8             200             76.7
##  5 BARBUN~     5               70             200            108. 
##  6 BARBUN~     6               30             350            151. 
##  7 BARBUN~     7               70             400            134. 
##  8 BARBUN~     8               12             350            104. 
##  9 BARBUN~     9               60             350             98.6
## 10 BARBUN~    10               70             350             99.3
## # ... with 1 more variable: ORTALAMA_AZAMI_UCRET <dbl>

As clearly seen from line plots of both fishes below, when hunting restriction begins, prices are rising.

library(hrbrthemes)
library(ggthemes)

COMPARE <- DIL_BALIGI %>%
  ggplot( aes(x=MONTH, y=ORTALAMA_AZAMI_UCRET)) +
  geom_line(data = DIL_BALIGI, aes(x = MONTH, y = ORTALAMA_AZAMI_UCRET, color = "Dil Baligi"), size = 1) +
  geom_point(data = DIL_BALIGI, shape=21, color="black", fill="#69b3a2", size=3) +
  geom_line(data = BARBUN_KAYA, aes(x = MONTH, y = ORTALAMA_AZAMI_UCRET, color = "Barbun(Kaya)"), size = 1) +
  geom_point(data = BARBUN_KAYA, shape=8, color="black", fill="#EE0000", size=3) +
  scale_x_discrete(label = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October"), "Months", limits=c(1:10))+
  scale_y_continuous(name = "Average Max Price")+
  labs(color = '')+
  ggtitle("Monthly Average Maximum Price Comparison of Dil Baligi and Barbun(Kaya)")+
  theme_solarized(base_size = 10)

COMPARE + theme(
  plot.title = element_text(color = "Purple", size = 10, face = "italic"))