0

I have the follwing Excel inputs from an electricity provider...

  1. column: Excel's time differences between values in 2. column
  2. column: Datetimes given by the Swiss electricity provider obviously in CET/CEST format
  3. column: datetimes in Excel format
  4. column: the resulting datetimes from column 3
  5. column and following: values

0.010416667; 25.10.2015 00:00; 42302.00; 25.10.2015 00:00; 1353500.886 0.010416667; 25.10.2015 00:15; 42302.01; 25.10.2015 00:15; 1358036.992 0.010416667; 25.10.2015 00:30; 42302.02; 25.10.2015 00:30; 1336141.202 0.010416667; 25.10.2015 00:45; 42302.03; 25.10.2015 00:45; 1339588.722 0.010416667; 25.10.2015 01:00; 42302.04; 25.10.2015 01:00; 1309298.517 0.010416667; 25.10.2015 01:15; 42302.05; 25.10.2015 01:15; 1312979.406 0.010416667; 25.10.2015 01:30; 42302.06; 25.10.2015 01:30; 1263912.381 0.010416667; 25.10.2015 01:45; 42302.07; 25.10.2015 01:45; 1263811.518 0.010416667; 25.10.2015 02:00; 42302.08; 25.10.2015 02:00; 1232290.791 0.010416667; 25.10.2015 02:15; 42302.09; 25.10.2015 02:15; 1258868.764 0.010416667; 25.10.2015 02:30; 42302.10; 25.10.2015 02:30; 1237087.155 0.010416667; 25.10.2015 02:45; 42302.11; 25.10.2015 02:45; 1231009.94 0.010416667; 25.10.2015 03:00; 42302.13; 25.10.2015 03:00; 1234285.809 -0.03125; 25.10.2015 02:15; 42302.09; 25.10.2015 02:15; 1220952.562 0.010416667; 25.10.2015 02:30; 42302.10; 25.10.2015 02:30; 1184250.718 0.010416667; 25.10.2015 02:45; 42302.11; 25.10.2015 02:45; 1169521.089 0.010416667; 25.10.2015 03:00; 42302.13; 25.10.2015 03:00; 1149841.273 0.010416667; 25.10.2015 03:15; 42302.14; 25.10.2015 03:15; 1157376.444 0.010416667; 25.10.2015 03:30; 42302.15; 25.10.2015 03:30; 1159568.082 0.010416667; 25.10.2015 03:45; 42302.16; 25.10.2015 03:45; 1176631.296 0.010416667; 25.10.2015 04:00; 42302.17; 25.10.2015 04:00; 1170186.402 0.010416667; 25.10.2015 04:15; 42302.18; 25.10.2015 04:15; 1174296.3 0.010416667; 25.10.2015 04:30; 42302.19; 25.10.2015 04:30; 1170491.442 0.010416667; 25.10.2015 04:45; 42302.20; 25.10.2015 04:45; 1168962.826 0.010416667; 25.10.2015 05:00; 42302.21; 25.10.2015 05:00; 1155760.463 0.010416667; 25.10.2015 05:15; 42302.22; 25.10.2015 05:15; 1184186.085 0.010416667; 25.10.2015 05:30; 42302.23; 25.10.2015 05:30; 1188887.774

Full Excel data can be downloaded from Swissgrid homepage: Swissgrid Data

The data is recorded during time shift of daylight saving. I read out the data with following code:

read_SwissgridExcel_v2015 <- function(path,sheet=3){
wb1 <- read_excel(path, sheet=sheet, col_names=TRUE, skip=1)
names(wb1)[1] <-"Col_1"
wb1$Col_1 <- as.POSIXct(round.POSIXt(wb1$Col_1),tz="Europe/Berlin",usetz=TRUE)
SwissgridDat <- wb1
attr(SwissgridDat,"description")<-names(SwissgridDat)
names(SwissgridDat) <- c("Col_1",
                       "Cons_Total_Enduse_Swiss_ControlBlock",
                       "Prod_Total_Swiss_ControlBlock",
                       "Cons_Total_Swiss_ControlBlock",
                       "Net_Outflow_Swiss_Transm_Grid",
                       "Grid_FeedIn_Swiss_Transm_Grid",
                       "Control_Energy_Pos_Sec",
                       "Control_Energy_Neg_Sec",
                       "Control_Energy_Pos_Ter",
                       "Control_Energy_Neg_Ter",
                       "Cross_Border_Exchange_CH_AT",
                       "Cross_Border_Exchange_AT_CH",
                       "Cross_Border_Exchange_CH_DE",
                       "Cross_Border_Exchange_DE_CH",
                       "Cross_Border_Exchange_CH_FR",
                       "Cross_Border_Exchange_FR_CH",
                       "Cross_Border_Exchange_CH_IT",
                       "Cross_Border_Exchange_IT_CH",
                       "Transit",
                       "Import",
                       "Export",
                       "Prices_Avg_Pos_Sec_Control_Energy",
                       "Prices_Avg_Neg_Sec_Control_Energy",
                       "Prices_Avg_Pos_Ter_Control_Energy",
                       "Prices_Avg_Neg_Ter_Control_Energy",
                       "Prod_Canton_AG",
                       "Cons_Canton_AG",
                       "Prod_Canton_FR",
                       "Cons_Canton_FR",
                       "Prod_Canton_GL",
                       "Cons_Canton_GL",
                       "Prod_Canton_GR",
                       "Cons_Canton_GR",
                       "Prod_Canton_LU",
                       "Cons_Canton_LU",
                       "Prod_Canton_NE",
                       "Cons_Canton_NE",
                       "Prod_Canton_SO",
                       "Cons_Canton_SO",
                       "Prod_Canton_SG",
                       "Cons_Canton_SG",
                       "Prod_Canton_TI",
                       "Cons_Canton_TI",
                       "Prod_Canton_TG",
                       "Cons_Canton_TG",
                       "Prod_Canton_VS",
                       "Cons_Canton_VS",
                       "Prod_Canton_AI_AR",
                       "Cons_Canton_AI_AR",
                       "Prod_Canton_BL_BS",
                       "Cons_Canton_BL_BS",
                       "Prod_Canton_BE_JU",
                       "Cons_Canton_BE_JU",
                       "Prod_Canton_SZ_ZG",
                       "Cons_Canton_SZ_ZG",
                       "Prod_Canton_OW_NW_UR",
                       "Cons_Canton_OW_NW_UR",
                       "Prod_Canton_GE_VD",
                       "Cons_Canton_GE_VD",
                       "Prod_Canton_SH_ZH",
                       "Cons_Canton_SH_ZH",
                       "Prod_Cantons",
                       "Cons_Cantons",
                       "Prod_Foreign_Territories_SwissControlZone",
                       "Cons_Foreign_Territories_SwissControlZone"
)
SwissgridDat <- SwissgridDat %>% transform(SelfProdEndUse=pmin(Prod_Total_Swiss_ControlBlock,Cons_Total_Enduse_Swiss_ControlBlock))
SwissgridDat <- SwissgridDat %>% transform(ImportEndUse= ifelse(Cons_Total_Enduse_Swiss_ControlBlock>Prod_Total_Swiss_ControlBlock,Cons_Total_Enduse_Swiss_ControlBlock-Prod_Total_Swiss_ControlBlock,0))
SwissgridDat <- SwissgridDat %>% transform(ImportNonEndUsed= ifelse(Cons_Total_Enduse_Swiss_ControlBlock>Prod_Total_Swiss_ControlBlock & 
                                                                    Prod_Total_Swiss_ControlBlock<Cons_Total_Swiss_ControlBlock,
                                                                  Cons_Total_Swiss_ControlBlock-Cons_Total_Enduse_Swiss_ControlBlock,0) + 
                                           ifelse(Cons_Total_Enduse_Swiss_ControlBlock<Prod_Total_Swiss_ControlBlock &
                                                    Prod_Total_Swiss_ControlBlock<Cons_Total_Swiss_ControlBlock,
                                                  Cons_Total_Swiss_ControlBlock-Prod_Total_Swiss_ControlBlock,0)
)
SwissgridDat <- SwissgridDat %>% transform(Export2= ifelse(Prod_Total_Swiss_ControlBlock >= Cons_Total_Swiss_ControlBlock,
                                                         Prod_Total_Swiss_ControlBlock-Cons_Total_Swiss_ControlBlock,0)
)

SwissgridDat <- SwissgridDat %>% transform(SelfProdOwnUse=
                                           ifelse(Cons_Total_Enduse_Swiss_ControlBlock<Prod_Total_Swiss_ControlBlock &
                                                    Prod_Total_Swiss_ControlBlock<=Cons_Total_Swiss_ControlBlock,
                                                  Prod_Total_Swiss_ControlBlock-Cons_Total_Enduse_Swiss_ControlBlock,0)+
                                           ifelse(Cons_Total_Enduse_Swiss_ControlBlock<Prod_Total_Swiss_ControlBlock &
                                                    Prod_Total_Swiss_ControlBlock>Cons_Total_Swiss_ControlBlock,
                                                  Cons_Total_Swiss_ControlBlock-Cons_Total_Enduse_Swiss_ControlBlock,0)
)
return(SwissgridDat)
}
DataPath <- getwd()
SwissgridDat <- read_SwissgridExcel_v2015(paste(DataPath, "/EnergieUebersichtCH_2015.xlsx", sep = ""))

Printing out the timestamps with following

SwissgridDat$Col_1[28510:28525]

results in

[1] "2015-10-25 00:30:00 CEST" "2015-10-25 00:45:00 CEST" "2015-10-25 01:00:00 CEST" "2015-10-25 01:15:00 CEST" "2015-10-25 01:30:00 CEST" "2015-10-25 01:45:00 CEST"
[7] "2015-10-25 02:00:00 CET"  "2015-10-25 02:15:00 CET"  "2015-10-25 02:30:00 CET"  "2015-10-25 02:45:00 CET"  "2015-10-25 03:00:00 CET"  "2015-10-25 02:15:00 CET" 
[13] "2015-10-25 02:30:00 CET"  "2015-10-25 02:45:00 CET"  "2015-10-25 03:00:00 CET"  "2015-10-25 03:15:00 CET"

Problem: as you can see the shift hour is both printed in CET time, although the logic says that the first hour 02:00 to 03:00 has to be CEST and then the second hour from 02:15 to 03:00 in CET format.

Did I do something wrong or is this a bug? How would you do the readout?

My goal is to transform everything in UTC anyway, but data is unfortunately not in UTC format.

Thx a lot for your help on this.

NanisTe
  • 13
  • 4
  • Nope, you did not do something wrong. Posixct cannot know the DST hour without further info. Here is the strategy is usually employ in this case: 1) find duplicate timestamps 2) average time stamp between the previous fitting entries and later one. Here s snippet for hourly timestamps: `target <- which(duplicated(df.scenarios$time)) df.scenarios$time[target] <- df.scenarios$time[target] + (df.scenarios$time[target + 1] - df.scenarios$time[target - 1])/2` – Buggy Oct 11 '16 at 15:45
  • Could use use the approach I've outlined in this answer? http://stackoverflow.com/a/39799050/1412059 – Roland Oct 11 '16 at 15:45
  • Also I might add: maybe you can reduced the amount of code in your question to the bare minimum and make a small test out of your data. – Buggy Oct 11 '16 at 15:50
  • @user3293236 as you can see POSIXct has tried to find the hour but did put it wrong. So why do you say it can not find the hour? It definitely did something wrong because I gave the info of the time zone. – NanisTe Oct 11 '16 at 16:20
  • @user3293236 I do not really understand, what you are suggesting with your code above. I will try to reduce code sections next time. Thx – NanisTe Oct 11 '16 at 16:26
  • It is not a bug because the conversion to POSIXct only recognises the timestamp and not the difference to the previous time stamp entry. Why should it interpret the first 02:00 differently than the second 02:00? It is the same string. BTW try the answer below. That gives you a nice time regular timestamp – Buggy Oct 11 '16 at 16:31

1 Answers1

0

I thought of a simpler approach since you want UTC only:

start_posix <- as.POSIXct("2014-01-01 00:00:00", tz="Europe/Zurich") #this is your first time stamp change as appropriate (e.g SwissgridDat$Col_1[1])
attr(start_posix, "tzone")<-"UTC"
len_out<-nrow(SwissgridDat$Col_1) 
len_out<-1000 # remove this line in your case
seq.POSIXt(from=start_posix,by=15*60,length.out = len_out)

That will give you a nice regular 15min UTC timestamp as long as no data is missing

EDIT:

Here's just the correction for the DLS hours. Feel free to generalize to several years of data, if needed...

Test data:

25.10.2015 00:00;42302
25.10.2015 00:15;42302.01
25.10.2015 00:30;42302.02
25.10.2015 00:45;42302.03
25.10.2015 01:00;42302.04
25.10.2015 01:15;42302.05
25.10.2015 01:30;42302.06
25.10.2015 01:45;42302.07
25.10.2015 02:00;42302.08
25.10.2015 02:15;42302.09
25.10.2015 02:30;42302.1
25.10.2015 02:45;42302.11
25.10.2015 03:00;42302.13
25.10.2015 02:15;42302.09
25.10.2015 02:30;42302.1
25.10.2015 02:45;42302.11
25.10.2015 03:00;42302.13
25.10.2015 03:15;42302.14
25.10.2015 03:30;42302.15
25.10.2015 03:45;42302.16
25.10.2015 04:00;42302.17
25.10.2015 04:15;42302.18
25.10.2015 04:30;42302.19
25.10.2015 04:45;42302.2
25.10.2015 05:00;42302.21
25.10.2015 05:15;42302.22
25.10.2015 05:30;42302.23

Code:

data <- read.delim("test.csv", sep=";", dec=".", header =F, col.names = c("time", "somedata"))
data$time <- as.POSIXct(data$time, "%d.%m.%Y %H:%M", tz="Europe/Zurich")
target <- which(duplicated(data$time))
len_out <- length(target)*2-1 #need to run over duplicated and originals
last_target <- max(target)

for (i in seq(0,len_out)){

  diff_increment <- as.numeric(difftime(data$time[last_target-i+2],data$time[last_target-i+1], units="secs")) #get correct increment from a little further out
  data$time[last_target-i]<-data$time[last_target-i+1]-diff_increment
}

This snippet will correct the day light saving hours in your data after the read-in.

Buggy
  • 2,050
  • 21
  • 27
  • Thx for this suggestion. But this procedure should read out the datetime values from the Excel file. The idea was to only convert those time stamps to UTC values accurately by knowing the original time zone. I did not expect that the time stamps belonging to CEST are sampled as CET values. I do not want to assume time stamps which could be different or have missing values. – NanisTe Oct 11 '16 at 17:30
  • @Nixdae The new snippet should do the trick. It is basically just a generalization of the stuff I posted in the comment section. It looks for duplicated values as caused by the DLS conversion to Posixct and corrects it by running only over those values. This should be what you are looking for. – Buggy Oct 12 '16 at 06:00
  • Thx a lot this seems be what I was looking for. I will implement it and give a comment on that. Thumbs up. – NanisTe Oct 12 '16 at 08:50