0

I would like to convert a list object to zoo and then apply rollapply on the zoo object. Short example reproduced below (I have 90,000 such files to process, using UNIX:)). Assume my list has two dataframes.

1) I would like to convert the date in each of the dataframes to this format:

dates <- as.Date(paste0(mylist$year, "-", mylist$month, "-", mylist$day), format="%Y-%m-%d")

z <- zoo(mylist, order.by=mylist[,1])

I understand lapply can do this but I tried without success.

Once I get my zoo object, I would like to use rollapply:

library(hydroTSM)#for daily2annual function but aggregate can do 

    x.3max <- rollapply(data=zooobject, width=3, FUN=sum, fill=NA, partial= TRUE,
                         align="center")
    # Maximum value per year of 3-day total rainfall for each one of the simulations
    z.3max.annual <- daily2annual(z.3max,  FUN=max,na.rm=TRUE)#dates=1

What the code above does is it centers a 3-day window on each column of the dataframes in zooobject and sums the values. The, the max per year of the 3-day total is extracted.

      mylist<- list(a,a)
mylist<-lapply(mylist, function(x) x[x[["Month"]] %in% c(12,1,2),])# extract data for DJF for individual sites


    library(zoo)
       a= structure(list(Year = c(1975L, 1975L, 1975L, 1975L, 1975L, 1975L
), Month = c(1L, 1L, 1L, 1L, 1L, 1L), Site = structure(c(1L, 
1L, 1L, 1L, 1L, 1L), .Label = "G100", class = "factor"), Day = 1:6, 
    sim01 = c(28.49, 29.04, 27.62, 28.43, 28.69, 29.16), sim02 = c(29.49, 
    30.04, 28.62, 29.43, 29.69, 30.16), sim03 = c(30.49, 31.04, 
    29.62, 30.43, 30.69, 31.16), sim04 = c(31.49, 32.04, 30.62, 
    31.43, 31.69, 32.16), sim05 = c(32.49, 33.04, 31.62, 32.43, 
    32.69, 33.16), sim06 = c(33.49, 34.04, 32.62, 33.43, 33.69, 
    34.16), sim07 = c(34.49, 35.04, 33.62, 34.43, 34.69, 35.16
    ), sim08 = c(35.49, 36.04, 34.62, 35.43, 35.69, 36.16), sim09 = c(36.49, 
    37.04, 35.62, 36.43, 36.69, 37.16), sim10 = c(37.49, 38.04, 
    36.62, 37.43, 37.69, 38.16), sim11 = c(38.49, 39.04, 37.62, 
    38.43, 38.69, 39.16), sim12 = c(39.49, 40.04, 38.62, 39.43, 
    39.69, 40.16), sim13 = c(40.49, 41.04, 39.62, 40.43, 40.69, 
    41.16), sim14 = c(41.49, 42.04, 40.62, 41.43, 41.69, 42.16
    ), sim15 = c(42.49, 43.04, 41.62, 42.43, 42.69, 43.16), sim16 = c(43.49, 
    44.04, 42.62, 43.43, 43.69, 44.16), sim17 = c(44.49, 45.04, 
    43.62, 44.43, 44.69, 45.16), sim18 = c(45.49, 46.04, 44.62, 
    45.43, 45.69, 46.16), sim19 = c(46.49, 47.04, 45.62, 46.43, 
    46.69, 47.16), sim20 = c(47.49, 48.04, 46.62, 47.43, 47.69, 
    48.16)), .Names = c("Year", "Month", "Site", "Day", "sim01", 
"sim02", "sim03", "sim04", "sim05", "sim06", "sim07", "sim08", 
"sim09", "sim10", "sim11", "sim12", "sim13", "sim14", "sim15", 
"sim16", "sim17", "sim18", "sim19", "sim20"), row.names = c(NA, 
6L), class = "data.frame")

Output should be similar to:

Year Site Sim01... 
1975 G100 ...
1976 G100 ...
1977 G100 ...

Only the values in the months c(12,1,2) are needed.

code123
  • 2,082
  • 4
  • 30
  • 53

2 Answers2

1

This produces a list of zoo objects, Lz, and then performs rollapply on each component of the list giving L2. Finally L3 aggregates over year taking the max of each column.

library(zoo)

mylist <- list(a, a) # a is given at bottom of question

Lz <- lapply(mylist, read.zoo, index = 1:3, format = "%Y %m %d")
L2 <- lapply(Lz, rollapply, 3, sum, partial = TRUE)
L3 <- lapply(L2, function(z) aggregate(z, as.numeric(format(time(z), "%Y")), max))

giving:

> L3

[[1]]
     sim01 sim02 sim03 sim04 sim05  sim06  sim07  sim08  sim09  sim10  sim11
1975 86.28 89.28 92.28 95.28 98.28 101.28 104.28 107.28 110.28 113.28 116.28
      sim12  sim13  sim14  sim15  sim16  sim17  sim18  sim19  sim20
1975 119.28 122.28 125.28 128.28 131.28 134.28 137.28 140.28 143.28

[[2]]
     sim01 sim02 sim03 sim04 sim05  sim06  sim07  sim08  sim09  sim10  sim11
1975 86.28 89.28 92.28 95.28 98.28 101.28 104.28 107.28 110.28 113.28 116.28
      sim12  sim13  sim14  sim15  sim16  sim17  sim18  sim19  sim20
1975 119.28 122.28 125.28 128.28 131.28 134.28 137.28 140.28 143.28
G. Grothendieck
  • 254,981
  • 17
  • 203
  • 341
  • thanks for helping. It works well. I just edited the data colnames to represent my real data. How can I do this: Lz <- lapply(a, read.zoo, index = 1:3, format = "%Y %m %d") lst3<-lapply(Lz, rollapply, 3, sum, partial = TRUE,align="center") lst4 <-lapply(lst3, function(x) aggregate(x[,5:ncol(x)], x[c(Year, Site)], FUN = max)) # I would like to get the max for each Year by Site. I get an error when using aggregate bcause of "Site" column. – code123 Mar 04 '15 at 23:30
  • I have added max per year using the data in the question. – G. Grothendieck Mar 04 '15 at 23:37
  • we are almost there. I will like L3 to have as colnames<-c("Year","Site",...). You omitted the "Site" (my fault I did not specify) in L3. I have edited the question again. I need the dataframe colnames to be Year, Site, Sim01... so that I can write hem conveniently to a folder with lapply(L3,function(x) write.table(x,file=paste(getwd(),"mydir",paste0(unique(x$Site),".csv"), sep="/"),row.names=FALSE,quote=FALSE)) – code123 Mar 05 '15 at 00:58
  • You can't have a mixture of character columns and numeric columns in a zoo object because zoo object are vectors or matrices with a time index. – G. Grothendieck Mar 05 '15 at 01:50
  • I can remove the "Site" column from mylist using lapply(mylist, function(x) x[!(names(x) %in% c("Site"))]). How can I rename the dataframes in L3 using the Site code? e.g. intead of [[1]] and [[2]], would like to have [[G100]], [[G100]] as defined by Site in each dataframe (I have 90000 such dataframes in a list and there 90000 different Site codes). I have to keep the List names and will write the output to folder using lapply(L3,function(x) write.table(x,file=paste(getwd(),"mydir",paste0(unique(x$Site),".csv"), sep="/"),row.names=FALSE,quote=FALSE)) – code123 Mar 05 '15 at 02:15
  • thanks for the solution. It works great now. Finally, i am trying to write L3 to a folder using the code below. I would like to name the files using L3 names. How can I do this? lapply(L3,function(x) + write.table(x,file=paste(getwd(),"WINTER-HISTO",paste0(unique(names(L3)),".csv"), + sep="/"),row.names=FALSE,quote=FALSE)) ### create a folder – code123 Mar 05 '15 at 03:24
0

Solved

lst1 <- lapply(list.files(pattern=".csv"),function(x) read.table(x,header=TRUE,sep="")) # read all files and data and replace -999.9 with NA

lst2<-lapply(lst1, function(x) x[x[["Month"]] %in% c(6,7,8),])#c(6,7,8) extract data for DJF for individual sites
names(lst2)<-list.files(pattern=".csv")
lapply(lst2,tail,4)
lst3<-lapply(lst2, function(x) x[!(names(x) %in% c("Site"))])
Lz <- lapply(lst3, read.zoo, index = 1:3, format = "%Y %m %d")

L2 <- lapply(Lz, rollapply, 3, sum, partial = TRUE)
L3 <- lapply(L2, function(z) aggregate(z, as.numeric(format(time(z), "%Y")), max))

mapply(
  write.table,
  x=L3, file=paste(names(L3), "csv", sep="."),
  MoreArgs=list(row.names=FALSE, sep=",")
) # write files to folder keeping the list names as file names
code123
  • 2,082
  • 4
  • 30
  • 53