About Data

This dataset consists of six different values in September 2020:

You can find the data here

Data Analysis

library(dplyr)
library(ggplot2)
library(readxl)
library(lubridate)
energy_data<-read_excel("C:/Users/unalt/Desktop/MEF/Data Analytics Essentials/week3/ptf-smf.xlsx")
summary(energy_data)
##       Date                          MCP             SMP        
##  Min.   :2020-09-01 00:00:00   Min.   :198.4   Min.   : 129.0  
##  1st Qu.:2020-09-08 11:45:00   1st Qu.:292.7   1st Qu.: 275.0  
##  Median :2020-09-15 23:30:00   Median :305.1   Median : 320.0  
##  Mean   :2020-09-15 23:30:00   Mean   :308.2   Mean   : 323.4  
##  3rd Qu.:2020-09-23 11:15:00   3rd Qu.:314.9   3rd Qu.: 351.3  
##  Max.   :2020-09-30 23:00:00   Max.   :982.0   Max.   :2000.0  
##  PositiveImbalancePrice(TL/MWh) NegativeImbalancePrice(TL/MWh)
##  Min.   :125.1                  Min.   : 204.7                
##  1st Qu.:258.0                  1st Qu.: 305.4                
##  Median :293.8                  Median : 330.9                
##  Mean   :286.8                  Mean   : 346.1                
##  3rd Qu.:304.8                  3rd Qu.: 365.0                
##  Max.   :952.5                  Max.   :2060.0                
##  SMPDirection      
##  Length:720        
##  Class :character  
##  Mode  :character  
##                    
##                    
## 

Data Manipulations

new_energy_data<- energy_data %>%
  transform(Date = as.POSIXct(Date, format='%d.%m.%y %H:%M')) %>%
  mutate(New_Date= as.Date(Date)) %>% 
  mutate(Day = weekdays(Date)) %>% mutate(SMP_MCP_Diff=SMP-MCP) %>%
  mutate(Time= format(energy_data$Date,"%H:%M:%S")) %>%
  mutate(Hour=lubridate::hour(Date))

summary(new_energy_data)
##       Date                          MCP             SMP        
##  Min.   :2020-09-01 00:00:00   Min.   :198.4   Min.   : 129.0  
##  1st Qu.:2020-09-08 11:45:00   1st Qu.:292.7   1st Qu.: 275.0  
##  Median :2020-09-15 23:30:00   Median :305.1   Median : 320.0  
##  Mean   :2020-09-15 23:30:00   Mean   :308.2   Mean   : 323.4  
##  3rd Qu.:2020-09-23 11:15:00   3rd Qu.:314.9   3rd Qu.: 351.3  
##  Max.   :2020-09-30 23:00:00   Max.   :982.0   Max.   :2000.0  
##  PositiveImbalancePrice.TL.MWh. NegativeImbalancePrice.TL.MWh.
##  Min.   :125.1                  Min.   : 204.7                
##  1st Qu.:258.0                  1st Qu.: 305.4                
##  Median :293.8                  Median : 330.9                
##  Mean   :286.8                  Mean   : 346.1                
##  3rd Qu.:304.8                  3rd Qu.: 365.0                
##  Max.   :952.5                  Max.   :2060.0                
##  SMPDirection          New_Date              Day             SMP_MCP_Diff    
##  Length:720         Min.   :2020-09-01   Length:720         Min.   :-180.99  
##  Class :character   1st Qu.:2020-09-08   Class :character   1st Qu.:   0.00  
##  Mode  :character   Median :2020-09-15   Mode  :character   Median :  18.25  
##                     Mean   :2020-09-15                      Mean   :  15.21  
##                     3rd Qu.:2020-09-23                      3rd Qu.:  40.00  
##                     Max.   :2020-09-30                      Max.   :1570.00  
##      Time                Hour      
##  Length:720         Min.   : 0.00  
##  Class :character   1st Qu.: 5.75  
##  Mode  :character   Median :11.50  
##                     Mean   :11.50  
##                     3rd Qu.:17.25  
##                     Max.   :23.00

Data Visualization

geom_graph <- new_energy_data %>% group_by(Hour) %>% 
  summarise(Avg_MCP=mean(MCP), Min_MCP=min(MCP), Max_MCP=max(MCP), Avg_SMP=mean(SMP),  
            Min_SMP=min(SMP), Max_SMP=max(SMP))
## `summarise()` ungrouping output (override with `.groups` argument)
hourly_mcp<-ggplot(geom_graph, aes(x=Hour)) + geom_line(aes(y=Avg_MCP, color="Average Hourly MCP"), size=0.8) +
              geom_line(aes(y=Min_MCP, color="Minimum Hourly MCP"), size=0.8) + 
              geom_line(aes(y=Max_MCP, color="Maximum Hourly MCP"), size=0.8) +
              labs(x="Hour", y="MCP (TL)") + theme_minimal() +
              ggtitle("Hourly MCP Trend")

hourly_mcp + theme(
  plot.title = element_text(color="darkred", size=14, face="bold"))

Although there are deviations in the MCP value during the day, considering the average MCP value, it is seen that the most energy need occurs between 1 pm and 4 pm.

hourly_smp<-ggplot(geom_graph, aes(x=Hour)) + geom_line(aes(y=Avg_SMP, color="Average Hourly SMP"), size=0.8) +
              geom_line(aes(y=Min_SMP, color="Minimum Hourly SMP"), size=0.8) + 
              geom_line(aes(y=Max_SMP, color="Maximum Hourly SMP"), size=0.8) +
              labs(x="Hour", y="SMP (TL)") + theme_minimal() +
              ggtitle("Hourly SMP Trend")

hourly_smp + theme(
  plot.title = element_text(color="darkred", size=14, face="bold"))

It is seen that the SMP values and the MCP values given in the previous graph move in the same direction. Especially between 4 pm and 5 pm, it is seen that the energy requirement is approximately 4 times the normal energy requirement.

cor<- ggplot(new_energy_data, aes(MCP, SMP, color=Day)) + geom_point() + geom_smooth(method= 'lm') + 
        theme_classic() + ggtitle("Day-Based Correlation Between SMP and MCP")

cor + theme(
  plot.title = element_text(color="darkred", size=14, face="bold"))
## `geom_smooth()` using formula 'y ~ x'

Looking at the day-based of SMP and MCP data, as expected, it was observed that the energy need at the weekend is lower than weekday. The highest energy requirement occurred on Thursday.

col_graphs<- new_energy_data %>% 
  group_by(Date) %>% select(Date, SMP_MCP_Diff, SMPDirection)


diff_smp_mcp<-ggplot(col_graphs, aes(x=Date, y=SMP_MCP_Diff, fill=SMPDirection)) + geom_col() + 
                scale_fill_manual("SMPDirection", values=c("#FF9900","#CC0000","#009E73")) +
                labs(x="Date", y="SMP - MCP") + theme_light() +
                ggtitle("Energy Deficit or Surplus Trend", "September 2020 Electricity Market Prices")
  
diff_smp_mcp + theme(
  plot.title = element_text(color="darkred", size=14, face="bold"),
  plot.subtitle = element_text(color="darkred", size=11))

Looking at the general data for September, it is seen that the balance of production and consumption has never been achieved and the days of energy deficit are the majority.