3

(I initially posted a question here, but it didn't fully cover my issue)

I have a data frame with a 'date' column and a measure of precipitation (rainfall):

  date precip
1    1    0.0
2    2    0.0
3    3   12.4
4    4   10.2
5    5    0.0
6    6   13.6

I want to create a column "event" with a counter (ID) for each consecutive period of rainfall. A rainfall event can be defined as consecutive runs with precipitation larger than e.g. 0.

If we don't allow any short gaps of zero rain, the 'event' would look like this, with a counter for non-0 periods, and NA for periods with no rain.

  date precip event
1    1    0.0    NA
2    2    0.0    NA
3    3   12.4     1
4    4   10.2     1
5    5    0.0    NA
6    6   13.6     2

In addition, I want to able to allow for shorter periods with no rain, e.g. of size n = 1 day, within each run of non-0.

For example, in the data frame above, if we allow for 1 day with 0 rain within a contiguous period of rain, e.g. day 5, then day 3 to 6 can be defined as one rainfall event:

  date precip event
1    1    0.0    NA
2    2    0.0    NA
3    3   12.4     1
4    4   10.2     1
5    5    0.0     1 # <- gap of 1 day with no rain: OK
6    6   13.6     1

A slightly larger toy data set:

structure(list(date = 1:31, precip = c(0, 0, 12.3999996185303, 
10.1999998092651, 0, 13.6000003814697, 16.6000003814697, 21.5, 
7.59999990463257, 0, 0, 0, 0.699999988079071, 0, 0, 0, 5.40000009536743, 
0, 1, 35.4000015258789, 11.5, 16.7000007629395, 13.5, 13.1000003814697, 
11.8000001907349, 1.70000004768372, 0, 15.1000003814697, 12.8999996185303, 
3.70000004768372, 24.2999992370605)), row.names = c(NA, -31L), class = "data.frame")

Now I'm really stuck. I tried some strange things like the one below (just a start), but I think I will not figure it out by myself and would be super grateful for any help

# this is far from being any helpful, but just to show the direction I was heading...
# the threshold could be 0 to mirror the example above...

rainfall_event = function(df,
                          daily_thresh = .2,
                          n = 1) {
  for (i in 1:nrow(df)) {
    zero_index = 1
    
    if (df[i,]$precip < daily_thresh) {
      # every time you encounter a value below the threshold count the 0s
      zero_counter = 0
      
      while (df[i,]$precip < daily_thresh) {

        zero_counter = zero_counter + 1
        
        if (i != nrow(df)) {
          i = i + 1
          zero_index = zero_index + 1
        } else{
          break
        }
      }
      
      if (zero_counter > n) {
        df[zero_index:zero_index + zero_counter,][["event"]] = NA
      }
      
    } else{
      event_counter = 1
      
      while (df[i, ]$precip > daily_thresh) {

        df[["event"]] = event_counter
        if (i != nrow(rainfall_one_slide)) {
          i = i + 1
        } else{
          break
        }
      }
      
    }
  }
  
}
Henrik
  • 65,555
  • 14
  • 143
  • 159
Robin Kohrs
  • 655
  • 7
  • 17

3 Answers3

4

An rle alternative:

# limit of n days with precip = 0 to be allowed in runs of non-zero
n = 1

# rle of precip == 0
r = rle(d$precip == 0)

# replace the values of precip = 0 & length > limit with NA
r$values[r$values & r$lengths > n] = NA

# reconstruct the vector from the updated runs
ir = inverse.rle(r)

# rle of "is NA"
r2 = rle(is.na(ir))

# replace length of NA runs with 0
r2$lengths[r2$values] = 0

# replace values of non-NA runs with a sequence
r2$values[!r2$values] = seq_along(r2$values[!r2$values])

# create event column
d[!is.na(ir), "event"] = inverse.rle(r2)

   date precip event
1     1    0.0    NA
2     2    0.0    NA
3     3   12.4     1
4     4   10.2     1
5     5    0.0     1
6     6   13.6     1
7     7   16.6     1
8     8   21.5     1
9     9    7.6     1
10   10    0.0    NA
11   11    0.0    NA
12   12    0.0    NA
13   13    0.7     2
14   14    0.0    NA
15   15    0.0    NA
16   16    0.0    NA
17   17    5.4     3
18   18    0.0     3
19   19    1.0     3
20   20   35.4     3
21   21   11.5     3
22   22   16.7     3
23   23   13.5     3
24   24   13.1     3
25   25   11.8     3
26   26    1.7     3
27   27    0.0     3
28   28   15.1     3
29   29   12.9     3
30   30    3.7     3
31   31   24.3     3
Henrik
  • 65,555
  • 14
  • 143
  • 159
2

Using data.table with rleid

library(data.table)

f1 <- function(dat, n) {

 tmp <- as.data.table(dat)[,
      grp := rleid(precip != 0)][precip != 0, 
       event := .GRP,
        grp][, event_fill := nafill(nafill(event, 'locf'),
      'nocb')]
 tmp[, event := fifelse(.N <= n & precip == 0,
     fcoalesce(event, event_fill), event), grp][, 
         c("grp", "event_fill") := NULL][]

 }

-testing

f1(df1, 0)
     date precip event
 1:    1    0.0    NA
 2:    2    0.0    NA
 3:    3   12.4     1
 4:    4   10.2     1
 5:    5    0.0    NA
 6:    6   13.6     2
 7:    7   16.6     2
 8:    8   21.5     2
 9:    9    7.6     2
10:   10    0.0    NA
11:   11    0.0    NA
12:   12    0.0    NA
13:   13    0.7     3
14:   14    0.0    NA
15:   15    0.0    NA
16:   16    0.0    NA
17:   17    5.4     4
18:   18    0.0    NA
19:   19    1.0     5
20:   20   35.4     5
21:   21   11.5     5
22:   22   16.7     5
23:   23   13.5     5
24:   24   13.1     5
25:   25   11.8     5
26:   26    1.7     5
27:   27    0.0    NA
28:   28   15.1     6
29:   29   12.9     6
30:   30    3.7     6
31:   31   24.3     6

with n = 1

f1(df1, 1)
    date precip event
 1:    1    0.0    NA
 2:    2    0.0    NA
 3:    3   12.4     1
 4:    4   10.2     1
 5:    5    0.0     1
 6:    6   13.6     2
 7:    7   16.6     2
 8:    8   21.5     2
 9:    9    7.6     2
10:   10    0.0    NA
11:   11    0.0    NA
12:   12    0.0    NA
13:   13    0.7     3
14:   14    0.0    NA
15:   15    0.0    NA
16:   16    0.0    NA
17:   17    5.4     4
18:   18    0.0     4
19:   19    1.0     5
20:   20   35.4     5
21:   21   11.5     5
22:   22   16.7     5
23:   23   13.5     5
24:   24   13.1     5
25:   25   11.8     5
26:   26    1.7     5
27:   27    0.0     5
28:   28   15.1     6
29:   29   12.9     6
30:   30    3.7     6
31:   31   24.3     6
akrun
  • 874,273
  • 37
  • 540
  • 662
  • This is pretty amazing!! I have no idea what is going on in this function... I think though, that in your example with `n`=1, from observation 3 to 9 should be the first group because we allow one rainfall event to have gaps of one day – Robin Kohrs Mar 05 '21 at 07:37
1

So, it will probably never be of interested to someone, but I think I kind of have a solution as well:)

f2 = function(d,
              n = 1,
              daily_thresh = .2) {

  # start int the first row
  i = 1

  # start with rainfall event 1
  event_counter = 0
  
  # set the value initially to 0
  d[["event"]] = 0

  # while still in the dataframe
  while (i <= nrow(d)) {

    # get the current precip value
    precip = d[i,]$precip

    # if its below the threshold --> DRY period starts
    if (precip < daily_thresh) {

      # count unknown number of following dry days of this dry episode
      dry_days = 0

      ### DRY LOOP
      # start from the day with rainfall under the threshold
      for (j in i:nrow(d)) {

        # count the consecutive dry days
        if (d[j,]$precip < daily_thresh) {
          dry_days = dry_days + 1


        } else{

          # hit a rainy day --> Get out the dry loop, just decide to which event it belongs
          # if the preceeding dry days are smaller than n --> same as last event

          if (dry_days <= n) {

            # set all the days without rainfall but within n to rainfall
            # if its the first event put it to 1
            if(event_counter == 0) event_counter = 1
            d[(j-1):(j-dry_days),][["event"]] = event_counter
            # set the rainy day to the same event
            d[j,][["event"]] = event_counter
            break # get back to wet peiod

          } else{

            # if the gap was too big --> its a new event
            # set all the days without rainfall and within n to no rainfall
            d[(j-1):(j-dry_days),][["event"]] = NA
            # set the rainy day to a new rainfall event
            event_counter = event_counter + 1
            d[j,][["event"]] = event_counter
            break # get back to wet period
          }
        }
      }

      # set i to where we stopped in the dry loop
      i = j + 1

    } else{

      # if we initially hit a rainy day, just count on
      d[i,][["event"]] = event_counter
      i = i + 1

    }
  }
  return(d)
}

Robin Kohrs
  • 655
  • 7
  • 17