0

I have a column of times in timefact

> head(foo)
  cnrd_marsh            timefact timefact_hour
1         БЧ 2016-04-07 14:34:00            14
2         БЧ 2016-04-07 14:15:00            14
3         БЧ 2016-04-07 14:10:00            14
4         БЧ 2016-04-07 13:58:00            13
5         БЧ 2016-04-07 13:57:00            13
6         БЧ 2016-04-07 13:39:00            13

My objective is to create a fourth column containing integer values that represent the number of minutes difference (difftime) between the given row and the row above it. In addition, I need to exclude all cases where the timefact_houris different from the row above OR cnrd_marsh is different from the row above. In other words, I only need to find the differences of rows that share the same hour and same cnrd_marsh as the row above.

The following code appears to work at first аs NA values are in the right places, however, further down, what should be NA values become seemingly random negative numbers.

library(data.table)
setDT(foo)[, timefact_diff := shift(minute(timefact) - 
                                  shift(minute(timefact), type = "lead")), 
            by = timefact_hour]

   > head(foo)
  cnrd_marsh            timefact timefact_hour timefact_diff
1         БЧ 2016-04-07 14:34:00            14            NA
2         БЧ 2016-04-07 14:15:00            14            19
3         БЧ 2016-04-07 14:10:00            14             5
4         БЧ 2016-04-07 13:58:00            13            NA
5         БЧ 2016-04-07 13:57:00            13             1
6         БЧ 2016-04-07 13:39:00            13            18
> tail(foo)
    cnrd_marsh            timefact timefact_hour timefact_diff
95          БЧ 2016-04-07 15:23:00            15             3
96          БЧ 2016-04-07 14:58:00            14           -24
97          БЧ 2016-04-07 14:53:00            14             5
98          БЧ 2016-04-07 14:44:00            14             9
99          БЧ 2016-04-07 14:43:00            14             1
100         БЧ 2016-04-07 14:27:00            14            16

Why does this happen and how can I apply the rule that the given row's values should match to the one above in both timefact_hour and cnrd_marsh?

Here is the sample data:

> dput(foo)
structure(list(cnrd_marsh = structure(c(91L, 91L, 91L, 91L, 91L, 
91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 
91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 
91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 
91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 
91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 
91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 
91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 
91L, 91L, 91L, 91L), .Label = c("", "1", "10", "11", "11К", 
"12", "13", "14", "15", "16", "17", "18", "19", "1К", "2", "20", 
"21", "22", "23", "24", "26", "27", "28", "29", "3", "30", "31", 
"32", "33", "33К", "34", "34К", "35", "36", "37", "38", "39", 
"4", "40", "41", "42", "43", "43К", "44", "45", "47", "48", 
"49", "50", "51", "52", "53", "54", "55", "56", "57", "58", "59", 
"6", "60", "61", "62", "63", "63К", "64", "65", "66", "67", 
"7", "70", "70К", "71", "72", "73", "74", "76", "77", "78", 
"79", "8", "80", "81", "82", "83", "84", "85", "86", "88", "9", 
"БК", "БЧ"), class = "factor"), timefact = structure(c(1460057640, 
1460056500, 1460056200, 1460055480, 1460055420, 1460054340, 1460052480, 
1460052360, 1460051340, 1460051040, 1460050440, 1460050380, 1460049360, 
1460048160, 1460048040, 1460046960, 1460046720, 1460046300, 1460045340, 
1460045160, 1460043540, 1460042880, 1460042520, 1460041920, 1460041140, 
1460040120, 1460039760, 1460038860, 1460038620, 1460038080, 1460038020, 
1460037240, 1460036280, 1460035800, 1460034960, 1460034780, 1460034120, 
1460034060, 1460033340, 1460032500, 1460032140, 1460031300, 1460031000, 
1460030340, 1460030340, 1460029800, 1460029200, 1460028300, 1460025720, 
1460090700, 1460088660, 1460088600, 1460087760, 1460087580, 1460086980, 
1460086980, 1460086320, 1460085300, 1460085180, 1460084340, 1460084160, 
1460083620, 1460083620, 1460082780, 1460081820, 1460081760, 1460080740, 
1460080500, 1460080200, 1460079360, 1460079180, 1460077260, 1460076840, 
1460076600, 1460076360, 1460075820, 1460074740, 1460070660, 1460070480, 
1460069100, 1460068800, 1460068140, 1460068140, 1460067060, 1460063880, 
1460063460, 1460062380, 1460062080, 1460061360, 1460061300, 1460059920, 
1460057880, 1460057640, 1460060760, 1460060580, 1460059080, 1460058780, 
1460058240, 1460058180, 1460057220), class = c("POSIXct", "POSIXt"
), tzone = "EST"), timefact_hour = c(14L, 14L, 14L, 13L, 13L, 
13L, 13L, 13L, 12L, 12L, 12L, 12L, 12L, 11L, 11L, 11L, 11L, 11L, 
11L, 11L, 10L, 10L, 10L, 10L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 8L, 
8L, 8L, 8L, 8L, 8L, 8L, 7L, 7L, 7L, 7L, 7L, 6L, 6L, 6L, 6L, 6L, 
5L, 23L, 23L, 23L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 21L, 21L, 
21L, 21L, 21L, 21L, 21L, 20L, 20L, 20L, 20L, 20L, 20L, 19L, 19L, 
19L, 19L, 19L, 18L, 18L, 17L, 17L, 17L, 17L, 17L, 16L, 16L, 15L, 
15L, 15L, 15L, 15L, 14L, 14L, 15L, 15L, 14L, 14L, 14L, 14L, 14L
)), .Names = c("cnrd_marsh", "timefact", "timefact_hour"), row.names = c(NA, 
-100L), class = "data.frame")
zx8754
  • 52,746
  • 12
  • 114
  • 209
iskandarblue
  • 7,208
  • 15
  • 60
  • 130

1 Answers1

0

You could lag the timefact_hour and cnrd_marsh, and then test your condition :

foo[, previous_hour := shift(timefact_hour, type = "lag")]
foo[, previous_cnrd := shift(cnrd_marsh, type = "lag")]
foo = foo[!(previous_hour != timefact_hour | previous_cnrd != cnrd_marsh)]
FlxPo
  • 36
  • 3