0

I have a dataset where I need to calculate bouts lengths of a very big dataset (65400 rows!). when we were collecting the data we recorded what our focal animal was doing in each minute and second during a 5 minute time session. My sample data is as

 structure(list(date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L), .Label = "02/04/2015", class = "factor"), minute = c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), sec = 1:11, activity = structure(c(2L, 
2L, 2L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 2L), .Label = c("N", "S", 
"U"), class = "factor"), day_time = structure(c(1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "14:45", class = "factor")), .Names = c("date", 
"minute", "sec", "activity", "day_time"), class = "data.frame", row.names = c(NA, 
-11L)) 

df
         date minute sec activity day_time
1  02/04/2015      1   1        S    14:45
2  02/04/2015      1   2        S    14:45
3  02/04/2015      1   3        S    14:45
4  02/04/2015      1   4        N    14:45
5  02/04/2015      1   5        U    14:45
6  02/04/2015      1   6        U    14:45
7  02/04/2015      1   7        U    14:45
8  02/04/2015      1   8        S    14:45
9  02/04/2015      1   9        S    14:45
10 02/04/2015      1  10        S    14:45
11 02/04/2015      1  11        S    14:45

what we need to calculate is the length of each bout (in seconds) keeping the 5 minute sessions separate. in this case our desired output will be

structure(list(date = structure(c(1L, 1L, 1L, 1L), .Label = "02/04/2015", class = "factor"), 
        minute = c(1L, 1L, 1L, 1L), activity = structure(c(2L, 1L, 
        3L, 2L), .Label = c("N", "S", "U"), class = "factor"), day_time = structure(c(1L, 
        1L, 1L, 1L), .Label = "14:45", class = "factor"), bout_length = c(3L, 
        1L, 3L, 4L)), .Names = c("date", "minute", "activity", "day_time", 
    "bout_length"), class = "data.frame", row.names = c(NA, -4L))

desired output

            date minute activity day_time bout_length
    1 02/04/2015      1        S    14:45           3
    2 02/04/2015      1        N    14:45           1
    3 02/04/2015      1        U    14:45           3
    4 02/04/2015      1        S    14:45           4

I have tried (rle) without success since I have to specify different minute and sessions. thank you for helping out

Taw
  • 53
  • 6

1 Answers1

0

I finally got my way round it. It would be nice if there is a short way to it

bouts1 <- as.data.frame(lapply(df, as.character), stringsAsFactors = FALSE)
bouts1<-head(do.call(rbind, by(bouts1, df$session, rbind, "empty")), -1)
rownames(bouts1) <- seq(length=nrow(bouts1))# this renames row names in my dataframe
diffs <- bouts1$activity[-1L] != bouts1$activity[-length(bouts1$activity)]#tells us where the activity is diff
idx <- c(which(diffs), length(bouts1$activity))
bout.len<-diff(c(0, idx))
trial <- c(which(diffs=="TRUE"))#these are the row IDs that contain activities that change
new.bouts<-bouts1[row.names(bouts1) %in% trial,]# extracting the rows in the data where activity changes
temp<-bouts[65400,]#took the last observation from the main data set. 
new.bouts <- rbind(new.bouts,temp)#these 2 commands I had to create another line so that we get where to put e value
new.bouts$bout.len<-bout.len
new.bouts<-new.bouts[!new.bouts$Date=="empty",]
Taw
  • 53
  • 6