This dataset consists of six different values in September 2020:
You can find the data here
library(dplyr)
library(ggplot2)
library(readxl)
library(lubridate)
energy_data<-read_excel("C:/Users/unalt/Desktop/MEF/Data Analytics Essentials/week3/ptf-smf.xlsx")
summary(energy_data)
## Date MCP SMP
## Min. :2020-09-01 00:00:00 Min. :198.4 Min. : 129.0
## 1st Qu.:2020-09-08 11:45:00 1st Qu.:292.7 1st Qu.: 275.0
## Median :2020-09-15 23:30:00 Median :305.1 Median : 320.0
## Mean :2020-09-15 23:30:00 Mean :308.2 Mean : 323.4
## 3rd Qu.:2020-09-23 11:15:00 3rd Qu.:314.9 3rd Qu.: 351.3
## Max. :2020-09-30 23:00:00 Max. :982.0 Max. :2000.0
## PositiveImbalancePrice(TL/MWh) NegativeImbalancePrice(TL/MWh)
## Min. :125.1 Min. : 204.7
## 1st Qu.:258.0 1st Qu.: 305.4
## Median :293.8 Median : 330.9
## Mean :286.8 Mean : 346.1
## 3rd Qu.:304.8 3rd Qu.: 365.0
## Max. :952.5 Max. :2060.0
## SMPDirection
## Length:720
## Class :character
## Mode :character
##
##
##
new_energy_data<- energy_data %>%
transform(Date = as.POSIXct(Date, format='%d.%m.%y %H:%M')) %>%
mutate(New_Date= as.Date(Date)) %>%
mutate(Day = weekdays(Date)) %>% mutate(SMP_MCP_Diff=SMP-MCP) %>%
mutate(Time= format(energy_data$Date,"%H:%M:%S")) %>%
mutate(Hour=lubridate::hour(Date))
summary(new_energy_data)
## Date MCP SMP
## Min. :2020-09-01 00:00:00 Min. :198.4 Min. : 129.0
## 1st Qu.:2020-09-08 11:45:00 1st Qu.:292.7 1st Qu.: 275.0
## Median :2020-09-15 23:30:00 Median :305.1 Median : 320.0
## Mean :2020-09-15 23:30:00 Mean :308.2 Mean : 323.4
## 3rd Qu.:2020-09-23 11:15:00 3rd Qu.:314.9 3rd Qu.: 351.3
## Max. :2020-09-30 23:00:00 Max. :982.0 Max. :2000.0
## PositiveImbalancePrice.TL.MWh. NegativeImbalancePrice.TL.MWh.
## Min. :125.1 Min. : 204.7
## 1st Qu.:258.0 1st Qu.: 305.4
## Median :293.8 Median : 330.9
## Mean :286.8 Mean : 346.1
## 3rd Qu.:304.8 3rd Qu.: 365.0
## Max. :952.5 Max. :2060.0
## SMPDirection New_Date Day SMP_MCP_Diff
## Length:720 Min. :2020-09-01 Length:720 Min. :-180.99
## Class :character 1st Qu.:2020-09-08 Class :character 1st Qu.: 0.00
## Mode :character Median :2020-09-15 Mode :character Median : 18.25
## Mean :2020-09-15 Mean : 15.21
## 3rd Qu.:2020-09-23 3rd Qu.: 40.00
## Max. :2020-09-30 Max. :1570.00
## Time Hour
## Length:720 Min. : 0.00
## Class :character 1st Qu.: 5.75
## Mode :character Median :11.50
## Mean :11.50
## 3rd Qu.:17.25
## Max. :23.00
geom_graph <- new_energy_data %>% group_by(Hour) %>%
summarise(Avg_MCP=mean(MCP), Min_MCP=min(MCP), Max_MCP=max(MCP), Avg_SMP=mean(SMP),
Min_SMP=min(SMP), Max_SMP=max(SMP))
## `summarise()` ungrouping output (override with `.groups` argument)
hourly_mcp<-ggplot(geom_graph, aes(x=Hour)) + geom_line(aes(y=Avg_MCP, color="Average Hourly MCP"), size=0.8) +
geom_line(aes(y=Min_MCP, color="Minimum Hourly MCP"), size=0.8) +
geom_line(aes(y=Max_MCP, color="Maximum Hourly MCP"), size=0.8) +
labs(x="Hour", y="MCP (TL)") + theme_minimal() +
ggtitle("Hourly MCP Trend")
hourly_mcp + theme(
plot.title = element_text(color="darkred", size=14, face="bold"))
Although there are deviations in the MCP value during the day, considering the average MCP value, it is seen that the most energy need occurs between 1 pm and 4 pm.
hourly_smp<-ggplot(geom_graph, aes(x=Hour)) + geom_line(aes(y=Avg_SMP, color="Average Hourly SMP"), size=0.8) +
geom_line(aes(y=Min_SMP, color="Minimum Hourly SMP"), size=0.8) +
geom_line(aes(y=Max_SMP, color="Maximum Hourly SMP"), size=0.8) +
labs(x="Hour", y="SMP (TL)") + theme_minimal() +
ggtitle("Hourly SMP Trend")
hourly_smp + theme(
plot.title = element_text(color="darkred", size=14, face="bold"))
It is seen that the SMP values and the MCP values given in the previous graph move in the same direction. Especially between 4 pm and 5 pm, it is seen that the energy requirement is approximately 4 times the normal energy requirement.
cor<- ggplot(new_energy_data, aes(MCP, SMP, color=Day)) + geom_point() + geom_smooth(method= 'lm') +
theme_classic() + ggtitle("Day-Based Correlation Between SMP and MCP")
cor + theme(
plot.title = element_text(color="darkred", size=14, face="bold"))
## `geom_smooth()` using formula 'y ~ x'
Looking at the day-based of SMP and MCP data, as expected, it was observed that the energy need at the weekend is lower than weekday. The highest energy requirement occurred on Thursday.
col_graphs<- new_energy_data %>%
group_by(Date) %>% select(Date, SMP_MCP_Diff, SMPDirection)
diff_smp_mcp<-ggplot(col_graphs, aes(x=Date, y=SMP_MCP_Diff, fill=SMPDirection)) + geom_col() +
scale_fill_manual("SMPDirection", values=c("#FF9900","#CC0000","#009E73")) +
labs(x="Date", y="SMP - MCP") + theme_light() +
ggtitle("Energy Deficit or Surplus Trend", "September 2020 Electricity Market Prices")
diff_smp_mcp + theme(
plot.title = element_text(color="darkred", size=14, face="bold"),
plot.subtitle = element_text(color="darkred", size=11))
Looking at the general data for September, it is seen that the balance of production and consumption has never been achieved and the days of energy deficit are the majority.