raw_df = read.csv("C:\\data\\EPIAS_EXIST_MCP_SMP\\ptf-smf.csv",header=TRUE,dec=",")
knitr::kable(raw_df[1:5,], format="markdown", align='cc')
Tarih | PTF | SMF | Pozitif.Dengesizlik.Fiyatı..TL.MWh. | Negatif.Dengesizlik.Fiyatı..TL.MWh. | SMF.Yön |
---|---|---|---|---|---|
01.09.20 00:00 | 302.39 | 332,39 | 293.32 | 342,36 | ? Enerji Açığı |
01.09.20 01:00 | 300.25 | 325,25 | 291.24 | 335,01 | ? Enerji Açığı |
01.09.20 02:00 | 292.64 | 317,64 | 283.86 | 327,17 | ? Enerji Açığı |
01.09.20 03:00 | 290.00 | 320,00 | 281.30 | 329,60 | ? Enerji Açığı |
01.09.20 04:00 | 290.00 | 330,00 | 281.30 | 339,90 | ? Enerji Açığı |
Pulling data from our file system with read_csv function.
char_to_numeric = function(char_string){
numeric_value = as.numeric(sub(',', '.',sub('.', '', char_string, fixed = TRUE)))
return(numeric_value)
}
clean_names = c("DATE_TIME","MARKET_CLEARING_PRICE","SYSTEM_MARGINAL_PRICE","POSITIVE_IMBALANCE_PRICE","NEGATIVE_IMBALANCE_PRICE","SYSTEM_MARGINAL_PRICE_COURSE")
clean_df = setNames(raw_df, clean_names) %>% transform(DATE_TIME = as.POSIXct(DATE_TIME,format='%d.%m.%y %H:%M'), SYSTEM_MARGINAL_PRICE = char_to_numeric(SYSTEM_MARGINAL_PRICE), NEGATIVE_IMBALANCE_PRICE = char_to_numeric(NEGATIVE_IMBALANCE_PRICE))
knitr::kable(clean_df[1:5,], format="markdown", align='cc')
DATE_TIME | MARKET_CLEARING_PRICE | SYSTEM_MARGINAL_PRICE | POSITIVE_IMBALANCE_PRICE | NEGATIVE_IMBALANCE_PRICE | SYSTEM_MARGINAL_PRICE_COURSE |
---|---|---|---|---|---|
2020-09-01 00:00:00 | 302.39 | 332.39 | 293.32 | 342.36 | ? Enerji Açığı |
2020-09-01 01:00:00 | 300.25 | 325.25 | 291.24 | 335.01 | ? Enerji Açığı |
2020-09-01 02:00:00 | 292.64 | 317.64 | 283.86 | 327.17 | ? Enerji Açığı |
2020-09-01 03:00:00 | 290.00 | 320.00 | 281.30 | 329.60 | ? Enerji Açığı |
2020-09-01 04:00:00 | 290.00 | 330.00 | 281.30 | 339.90 | ? Enerji Açığı |
Cleaned our data here also defined a function to convert our data types to numeric(instead of char) so we can analyze it further.
working_time = function(week_day, hour){
isItWorkingTime = ifelse(between(week_day,2,6) & between(hour,8,18), "Working Time", "Free Time")
return(isItWorkingTime)
}
analytical_df = clean_df[1:3] %>% transmute(DATE_TIME, HOUR = hour(DATE_TIME), WEEK_DAY = wday(DATE_TIME, label = TRUE, abbr=FALSE, locale='UK', week_start = 1), WORKING_TIME = working_time(wday(DATE_TIME),hour(DATE_TIME)), MARKET_CLEARING_PRICE, SYSTEM_MARGINAL_PRICE)
knitr::kable(analytical_df[1:5,], format="markdown", align='cc')
DATE_TIME | HOUR | WEEK_DAY | WORKING_TIME | MARKET_CLEARING_PRICE | SYSTEM_MARGINAL_PRICE |
---|---|---|---|---|---|
2020-09-01 00:00:00 | 0 | Tuesday | Free Time | 302.39 | 332.39 |
2020-09-01 01:00:00 | 1 | Tuesday | Free Time | 300.25 | 325.25 |
2020-09-01 02:00:00 | 2 | Tuesday | Free Time | 292.64 | 317.64 |
2020-09-01 03:00:00 | 3 | Tuesday | Free Time | 290.00 | 320.00 |
2020-09-01 04:00:00 | 4 | Tuesday | Free Time | 290.00 | 330.00 |
ggplot(analytical_df, aes(x=DATE_TIME, group=1)) +
geom_line(aes(y = MARKET_CLEARING_PRICE, color = "Market Clearing Price") , size = 1) +
geom_line(aes(y = SYSTEM_MARGINAL_PRICE, color = "System Marginal Price"), size = 1) +
theme_minimal() +
labs(x = '', y = 'Price in TRY/MWh', color = "Price Category", title = "MCP/SMP", subtitle = "Electricity market prices in September 2020")
Here we can see that there are spiking in System Marginal Prices in 03/09/2020, 07/09/2020 and 17/09/2020 but no national anomaly detected in news search. There must be a hidden variable we can’t see.
analytical_df %>%
select(WORKING_TIME, MARKET_CLEARING_PRICE, SYSTEM_MARGINAL_PRICE) %>%
group_by(WORKING_TIME) %>%
summarize(MARKET_CLEARING_PRICE = mean(MARKET_CLEARING_PRICE), SYSTEM_MARGINAL_PRICE = mean(SYSTEM_MARGINAL_PRICE), .groups = 'drop') %>% ggplot(., aes(x="", y=SYSTEM_MARGINAL_PRICE, fill=WORKING_TIME)) +
geom_bar(stat="identity") +
coord_polar("y") +
theme_void() +
labs(x = '', y = '', color = "Work Hours/Free Time", title = "SMP in Work Hours", subtitle = "System marginal price comparison between work hours and free time") +
scale_fill_discrete(name = "Work Hours/Free Time")
EPIAS dataset indicates there is more electric consumption in work hours.
analytical_df %>%
select(WEEK_DAY, MARKET_CLEARING_PRICE, SYSTEM_MARGINAL_PRICE) %>%
group_by(WEEK_DAY) %>%
summarize('Market Clearing Price' = mean(MARKET_CLEARING_PRICE), 'System Marginal Price' = mean(SYSTEM_MARGINAL_PRICE), .groups = 'drop') %>%
pivot_longer(.,cols=c('Market Clearing Price','System Marginal Price'),names_to='PRICE_CATEGORY',values_to='VALUE') %>%
ggplot(., aes(x=WEEK_DAY, y=VALUE, fill=PRICE_CATEGORY)) +
geom_bar(stat="identity", position = 'dodge') +
theme_minimal() +
labs(x = 'Day of the Week', y = 'Price in TRY/MWh', title = "MCP/SMP in Days of the Week", subtitle = "Electricity market prices comparison between days of the week") +
scale_fill_discrete(name = "Price Category")
Here we can see that electrical consumption peaks at thursday and we get lowest consumption in weekends.
analytical_df %>%
select(HOUR, MARKET_CLEARING_PRICE, SYSTEM_MARGINAL_PRICE) %>%
group_by(HOUR) %>%
summarize('Market Clearing Price' = mean(MARKET_CLEARING_PRICE), 'System Marginal Price' = mean(SYSTEM_MARGINAL_PRICE), .groups = 'drop') %>%
pivot_longer(.,cols=c('Market Clearing Price','System Marginal Price'),names_to='PRICE_CATEGORY',values_to='VALUE') %>%
ggplot(., aes(x=factor(HOUR), y=VALUE, fill=PRICE_CATEGORY)) +
geom_bar(stat="identity", position = 'dodge') +
theme_minimal() +
labs(x = 'Hours', y = 'Price in TRY/MWh', title = "MCP/SMP in Hours", subtitle = "Electricity market prices comparison between hours") +
scale_fill_discrete(name = "Price Category")
Here we can see that energy consumption peaks between 16:00 and 17:00 and we see it is lowest at 06:00.
I think it might be because 16:00 and 17:00 is at work hours and evening so population can use it way much, and at 06:00 it is already day and most people are not awake.