0

I have a list of times:

> sapply(copy, class)
$timefact
[1] "POSIXct" "POSIXt" 

$timefact_hour
[1] "integer"

> head(copy)

             timefact timefact_hour
1 2016-04-07 23:42:00            23
2 2016-04-07 23:37:00            23
3 2016-04-07 23:31:00            23
4 2016-04-07 23:27:00            23
5 2016-04-07 23:19:00            23
6 2016-04-07 23:17:00            23

My objective is to create a third column which will be an integer value that represents the number of minutes difference between the given row and the row above it.

The output therefore should look something like this:

> output
             timefact timefact_hour timediff
1 2016-04-07 23:42:00            23       NA
2 2016-04-07 23:37:00            23        5
3 2016-04-07 23:31:00            23        6
4 2016-04-07 23:27:00            23        4
5 2016-04-07 23:19:00            23        8
6 2016-04-07 23:17:00            23        2

However, I want to exclude all cases where the hours are different and only find the differences of rows that share the same hour as the row above. For example:

> output
              timefact timefact_hour timediff
90 2016-04-07 12:14:00            12        6
91 2016-04-07 12:04:00            12       10
92 2016-04-07 11:56:00            11       NA
93 2016-04-07 11:49:00            11        7
94 2016-04-07 11:42:00            11        7
95 2016-04-07 11:36:00            11        6

So far, I have come up with the following code,

for(i in 2:nrow(copy)) {
   print(difftime(copy[i,"timefact"], copy[i-1,"timefact"], tz = "EST", units = "mins"))
  }

Which seems to work but I am not sure how to a) add it as a third column to the dataframe and b) skip over rows where the row above is not the same hour. Any advice would be appreciated! I have included a sample of the data below:

> dput(copy)
structure(list(timefact = structure(list(sec = c(0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), min = c(42L, 37L, 31L, 27L, 
19L, 17L, 10L, 6L, 1L, 56L, 50L, 45L, 34L, 27L, 18L, 4L, 58L, 
53L, 50L, 44L, 44L, 37L, 34L, 28L, 23L, 16L, 12L, 4L, 59L, 50L, 
19L, 13L, 46L, 26L, 26L, 19L, 11L, 8L, 2L, 55L, 51L, 44L, 37L, 
31L, 9L, 0L, 48L, 43L, 34L, 30L, 10L, 6L, 57L, 52L, 44L, 39L, 
30L, 23L, 23L, 1L, 1L, 54L, 48L, 32L, 23L, 16L, 12L, 5L, 1L, 
48L, 44L, 37L, 27L, 18L, 13L, 6L, 0L, 39L, 31L, 23L, 17L, 4L, 
54L, 49L, 44L, 38L, 33L, 24L, 20L, 14L, 4L, 56L, 49L, 42L, 36L, 
48L, 36L, 31L, 22L, 14L), hour = c(23L, 23L, 23L, 23L, 23L, 23L, 
23L, 23L, 23L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 21L, 21L, 21L, 
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 20L, 20L, 20L, 20L, 
19L, 19L, 19L, 19L, 19L, 19L, 19L, 18L, 18L, 18L, 18L, 18L, 18L, 
18L, 17L, 17L, 17L, 17L, 17L, 17L, 16L, 16L, 16L, 16L, 16L, 16L, 
16L, 16L, 16L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 14L, 14L, 
14L, 14L, 14L, 14L, 14L, 14L, 13L, 13L, 13L, 13L, 13L, 12L, 12L, 
12L, 12L, 12L, 12L, 12L, 12L, 12L, 11L, 11L, 11L, 11L, 10L, 10L, 
10L, 10L, 10L), mday = c(7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L), mon = c(3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L), year = c(116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 
116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 
116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 
116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 
116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 
116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 
116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 
116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 
116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 
116L, 116L, 116L, 116L), wday = c(4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), yday = c(97L, 
97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 
97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 
97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 
97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 
97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 
97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 
97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 
97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L), isdst = c(0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L), zone = c("EST", "EST", "EST", "EST", "EST", "EST", "EST", 
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", 
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", 
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", 
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", 
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", 
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", 
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", 
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", 
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", 
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", 
"EST", "EST", "EST"), gmtoff = c(NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_)), .Names = c("sec", "min", "hour", 
"mday", "mon", "year", "wday", "yday", "isdst", "zone", "gmtoff"
), class = c("POSIXlt", "POSIXt"), tzone = c("EST", "EST", "   "
)), timefact_hour = c(23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 
23L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 21L, 21L, 21L, 21L, 21L, 
21L, 21L, 21L, 21L, 21L, 21L, 21L, 20L, 20L, 20L, 20L, 19L, 19L, 
19L, 19L, 19L, 19L, 19L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 17L, 
17L, 17L, 17L, 17L, 17L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 
16L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 14L, 14L, 14L, 14L, 
14L, 14L, 14L, 14L, 13L, 13L, 13L, 13L, 13L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L, 12L, 11L, 11L, 11L, 11L, 10L, 10L, 10L, 10L, 
10L)), .Names = c("timefact", "timefact_hour"), row.names = c(NA, 
100L), class = "data.frame")
iskandarblue
  • 7,208
  • 15
  • 60
  • 130
  • Can you please share expected output either based on your `dput` or the example in the post? – mtoto May 17 '16 at 08:18
  • I did, both output examples above are taken directly from the data in `dput`. `copy` contains 100 rows but I do not have that many rows of example output. I selected only two small samples to show my objective. – iskandarblue May 17 '16 at 08:19
  • except you shared different rows, which is confusing. – mtoto May 17 '16 at 08:21
  • Yes, but that is only because I wanted to explain the condition of excluding rows where the row above contains a different hour. Sorry for the confusion – iskandarblue May 17 '16 at 08:23
  • why is row 90 `NA` ? – mtoto May 17 '16 at 08:27
  • that's my mistake, it shoudl not be. I fixed it above – iskandarblue May 17 '16 at 08:30
  • This is exactly why you should be sharing expected output based on your example data, see answer below. – mtoto May 17 '16 at 08:31
  • Thanks. I guess using this code, the output is a negative value for all of the rows where the one above belongs to a different hour, but that works. – iskandarblue May 17 '16 at 08:36
  • Let us [continue this discussion in chat](http://chat.stackoverflow.com/rooms/112127/discussion-between-mtoto-and-the-darkside). – mtoto May 17 '16 at 08:37

1 Answers1

1

First convert timefact column to class POSIXct

copy$timefact <- as.POSIXct(copy$timefact)

Then, using lubridate and dplyr:

library(lubridate)
library(lubridate)

copy %>%
  group_by(timefact_hour) %>%
  mutate(timediff = lag(minute(timefact) - lead(minute(timefact))))

Or data.table:

library(data.table)

setDT(copy)[, timediff := shift(minute(timefact) - 
                                  shift(minute(timefact), type = "lead")), 
            by = timefact_hour]
mtoto
  • 23,919
  • 4
  • 58
  • 71