Preparing data for analysis

raw_df = read.csv("C:\\data\\EPIAS_EXIST_MCP_SMP\\ptf-smf.csv",header=TRUE,dec=",")
knitr::kable(raw_df[1:5,], format="markdown", align='cc')
Tarih PTF SMF Pozitif.Dengesizlik.Fiyatı..TL.MWh. Negatif.Dengesizlik.Fiyatı..TL.MWh. SMF.Yön
01.09.20 00:00 302.39 332,39 293.32 342,36 ? Enerji Açığı
01.09.20 01:00 300.25 325,25 291.24 335,01 ? Enerji Açığı
01.09.20 02:00 292.64 317,64 283.86 327,17 ? Enerji Açığı
01.09.20 03:00 290.00 320,00 281.30 329,60 ? Enerji Açığı
01.09.20 04:00 290.00 330,00 281.30 339,90 ? Enerji Açığı

Pulling data from our file system with read_csv function.

char_to_numeric = function(char_string){
  numeric_value = as.numeric(sub(',', '.',sub('.', '', char_string, fixed = TRUE)))
  return(numeric_value)
}

clean_names = c("DATE_TIME","MARKET_CLEARING_PRICE","SYSTEM_MARGINAL_PRICE","POSITIVE_IMBALANCE_PRICE","NEGATIVE_IMBALANCE_PRICE","SYSTEM_MARGINAL_PRICE_COURSE")
clean_df = setNames(raw_df, clean_names) %>% transform(DATE_TIME = as.POSIXct(DATE_TIME,format='%d.%m.%y %H:%M'), SYSTEM_MARGINAL_PRICE = char_to_numeric(SYSTEM_MARGINAL_PRICE), NEGATIVE_IMBALANCE_PRICE = char_to_numeric(NEGATIVE_IMBALANCE_PRICE))
knitr::kable(clean_df[1:5,], format="markdown", align='cc')
DATE_TIME MARKET_CLEARING_PRICE SYSTEM_MARGINAL_PRICE POSITIVE_IMBALANCE_PRICE NEGATIVE_IMBALANCE_PRICE SYSTEM_MARGINAL_PRICE_COURSE
2020-09-01 00:00:00 302.39 332.39 293.32 342.36 ? Enerji Açığı
2020-09-01 01:00:00 300.25 325.25 291.24 335.01 ? Enerji Açığı
2020-09-01 02:00:00 292.64 317.64 283.86 327.17 ? Enerji Açığı
2020-09-01 03:00:00 290.00 320.00 281.30 329.60 ? Enerji Açığı
2020-09-01 04:00:00 290.00 330.00 281.30 339.90 ? Enerji Açığı

Cleaned our data here also defined a function to convert our data types to numeric(instead of char) so we can analyze it further.

working_time = function(week_day, hour){
    isItWorkingTime = ifelse(between(week_day,2,6) & between(hour,8,18), "Working Time", "Free Time")
    return(isItWorkingTime)
  }


analytical_df = clean_df[1:3] %>% transmute(DATE_TIME, HOUR = hour(DATE_TIME), WEEK_DAY = wday(DATE_TIME, label = TRUE, abbr=FALSE, locale='UK', week_start = 1), WORKING_TIME = working_time(wday(DATE_TIME),hour(DATE_TIME)), MARKET_CLEARING_PRICE, SYSTEM_MARGINAL_PRICE)
knitr::kable(analytical_df[1:5,], format="markdown", align='cc')
DATE_TIME HOUR WEEK_DAY WORKING_TIME MARKET_CLEARING_PRICE SYSTEM_MARGINAL_PRICE
2020-09-01 00:00:00 0 Tuesday Free Time 302.39 332.39
2020-09-01 01:00:00 1 Tuesday Free Time 300.25 325.25
2020-09-01 02:00:00 2 Tuesday Free Time 292.64 317.64
2020-09-01 03:00:00 3 Tuesday Free Time 290.00 320.00
2020-09-01 04:00:00 4 Tuesday Free Time 290.00 330.00

Visualizing the data

ggplot(analytical_df, aes(x=DATE_TIME, group=1)) +
  geom_line(aes(y = MARKET_CLEARING_PRICE, color = "Market Clearing Price") , size = 1) +
  geom_line(aes(y = SYSTEM_MARGINAL_PRICE, color = "System Marginal Price"), size = 1) +
  theme_minimal() + 
  labs(x = '', y = 'Price in TRY/MWh', color = "Price Category", title = "MCP/SMP", subtitle = "Electricity market prices in September 2020")

Here we can see that there are spiking in System Marginal Prices in 03/09/2020, 07/09/2020 and 17/09/2020 but no national anomaly detected in news search. There must be a hidden variable we can’t see.

analytical_df %>%
  select(WORKING_TIME, MARKET_CLEARING_PRICE, SYSTEM_MARGINAL_PRICE) %>%
  group_by(WORKING_TIME) %>%
  summarize(MARKET_CLEARING_PRICE = mean(MARKET_CLEARING_PRICE), SYSTEM_MARGINAL_PRICE = mean(SYSTEM_MARGINAL_PRICE), .groups = 'drop') %>% ggplot(., aes(x="", y=SYSTEM_MARGINAL_PRICE, fill=WORKING_TIME)) +
  geom_bar(stat="identity") +
  coord_polar("y") + 
  theme_void() + 
    labs(x = '', y = '', color = "Work Hours/Free Time", title = "SMP in Work Hours", subtitle = "System marginal price comparison between work hours and free time") +
  scale_fill_discrete(name = "Work Hours/Free Time")

EPIAS dataset indicates there is more electric consumption in work hours.

analytical_df %>%
  select(WEEK_DAY, MARKET_CLEARING_PRICE, SYSTEM_MARGINAL_PRICE) %>%
  group_by(WEEK_DAY) %>%
  summarize('Market Clearing Price' = mean(MARKET_CLEARING_PRICE), 'System Marginal Price' = mean(SYSTEM_MARGINAL_PRICE), .groups = 'drop') %>% 
  pivot_longer(.,cols=c('Market Clearing Price','System Marginal Price'),names_to='PRICE_CATEGORY',values_to='VALUE') %>%
  ggplot(., aes(x=WEEK_DAY, y=VALUE, fill=PRICE_CATEGORY)) +
  geom_bar(stat="identity", position = 'dodge') +
  theme_minimal() + 
    labs(x = 'Day of the Week', y = 'Price in TRY/MWh', title = "MCP/SMP in Days of the Week", subtitle = "Electricity market prices comparison between days of the week") +
  scale_fill_discrete(name = "Price Category")

Here we can see that electrical consumption peaks at thursday and we get lowest consumption in weekends.

analytical_df %>%
  select(HOUR, MARKET_CLEARING_PRICE, SYSTEM_MARGINAL_PRICE) %>%
  group_by(HOUR) %>%
  summarize('Market Clearing Price' = mean(MARKET_CLEARING_PRICE), 'System Marginal Price' = mean(SYSTEM_MARGINAL_PRICE), .groups = 'drop') %>%
  pivot_longer(.,cols=c('Market Clearing Price','System Marginal Price'),names_to='PRICE_CATEGORY',values_to='VALUE') %>%
  ggplot(., aes(x=factor(HOUR), y=VALUE, fill=PRICE_CATEGORY)) +
  geom_bar(stat="identity", position = 'dodge') +
  theme_minimal() + 
    labs(x = 'Hours', y = 'Price in TRY/MWh', title = "MCP/SMP in Hours", subtitle = "Electricity market prices comparison between hours") +
  scale_fill_discrete(name = "Price Category")

Here we can see that energy consumption peaks between 16:00 and 17:00 and we see it is lowest at 06:00.

I think it might be because 16:00 and 17:00 is at work hours and evening so population can use it way much, and at 06:00 it is already day and most people are not awake.