0

I have tried using rollapply but I can't get the desired result.

These are the columns(sample) of the dataset on which I want to do the calculations.

structure(list(LeagueROUND = structure(c(1L, 1L, 1L, 1L, 2L, 
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 4L), .Label = c("1", "2", 
"3", "4"), class = "factor"), League = structure(c(1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Portugal2", class = "factor"), 
    Season = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L), .Label = "2021/2022", class = "factor"), 
    DRAWmarginODDS = structure(c(2L, 1L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L), .Label = c("No", "Yes"), class = "factor"), 
    DRAWnumODDS = c(0L, NA, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 
    0L, 0L, NA, 0L)), .Names = c("LeagueROUND", "League", "Season", 
"DRAWmarginODDS", "DRAWnumODDS"), class = "data.frame", row.names = c(NA, 
-15L))

enter image description here

Desired result

enter image description here

Group by( LeagueROUND,League,Season,DRAWmarginODDS)

Average(mean) of (DRAWnumODDS) of 3 previous LeagueROUNDs

That is:

League Round 1 (Yes) adds 1(DRAWnumODDS) in 3(grouped) rows.

League Round 2 (Yes) adds 2(DRAWnumODDS) in 4(grouped) rows

League Round 3 (Yes) adds 0(DRAWnumODDS) in 3(grouped) rows

Desired:

In League Round 4(Yes) (average of 3 previous League Round) = 3(DRAWnumODDS) in 10(grouped) rows = mean 0,3

League Round (No) = NA

3 first LeagueROUND -> NA

William88
  • 49
  • 6

1 Answers1

1
library(tidyverse)

data <- structure(list(
  LeagueROUND = structure(c(
    1L, 1L, 1L, 1L, 2L,
    2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 4L
  ), .Label = c(
    "1", "2",
    "3", "4"
  ), class = "factor"), League = structure(c(
    1L, 1L, 1L,
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
  ), .Label = "Portugal2", class = "factor"),
  Season = structure(c(
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
    1L, 1L, 1L, 1L, 1L, 1L
  ), .Label = "2021/2022", class = "factor"),
  DRAWmarginODDS = structure(c(
    2L, 1L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L
  ), .Label = c("No", "Yes"), class = "factor"),
  DRAWnumODDS = c(
    0L, NA, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L,
    0L, 0L, NA, 0L
  )
), .Names = c(
  "LeagueROUND", "League", "Season",
  "DRAWmarginODDS", "DRAWnumODDS"
), class = "data.frame", row.names = c(
  NA,
  -15L
))


data %>%
  mutate(LeagueROUND = as.integer(LeagueROUND)) %>%
  group_by(DRAWmarginODDS, LeagueROUND) %>%
  summarise(DRAWnumODDS = sum(DRAWnumODDS, na.rm = TRUE)) %>%
  ungroup() %>%
  filter(DRAWmarginODDS == "Yes") %>%
  arrange(LeagueROUND) %>%
  mutate(
    n_observations = LeagueROUND %>% map_int(~ {
      data %>%
        mutate(LeagueROUND = as.integer(LeagueROUND)) %>%
        filter(LeagueROUND < .x & DRAWmarginODDS == "Yes") %>%
        nrow()
    }),
    mean_last_3_DRAWnumODDS = (lag(DRAWnumODDS, 1) + lag(DRAWnumODDS, 2) + lag(DRAWnumODDS, 3)) / n_observations
  ) %>%
  mutate(across(everything(), as.character)) %>%
  right_join(data %>% mutate(across(everything(), as.character))) %>%
  type_convert()
#> `summarise()` has grouped output by 'DRAWmarginODDS'. You can override using the `.groups` argument.Joining, by = c("DRAWmarginODDS", "LeagueROUND", "DRAWnumODDS")
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#>   DRAWmarginODDS = col_character(),
#>   LeagueROUND = col_double(),
#>   DRAWnumODDS = col_double(),
#>   n_observations = col_double(),
#>   mean_last_3_DRAWnumODDS = col_double(),
#>   League = col_character(),
#>   Season = col_character()
#> )
#> # A tibble: 15 × 7
#>    DRAWmarginODDS LeagueROUND DRAWnumODDS n_observations mean_last_3_DRA… League
#>    <chr>                <dbl>       <dbl>          <dbl>            <dbl> <chr> 
#>  1 Yes                      1           1              0             NA   Portu…
#>  2 Yes                      3           0              7             NA   Portu…
#>  3 Yes                      3           0              7             NA   Portu…
#>  4 Yes                      3           0              7             NA   Portu…
#>  5 Yes                      4           0             10              0.3 Portu…
#>  6 Yes                      4           0             10              0.3 Portu…
#>  7 Yes                      4           0             10              0.3 Portu…
#>  8 Yes                      1           0             NA             NA   Portu…
#>  9 No                       1          NA             NA             NA   Portu…
#> 10 Yes                      1           0             NA             NA   Portu…
#> 11 Yes                      2           0             NA             NA   Portu…
#> 12 Yes                      2           0             NA             NA   Portu…
#> 13 Yes                      2           1             NA             NA   Portu…
#> 14 Yes                      2           1             NA             NA   Portu…
#> 15 No                       4          NA             NA             NA   Portu…
#> # … with 1 more variable: Season <chr>

Created on 2022-03-15 by the reprex package (v2.0.0)

danlooo
  • 10,067
  • 2
  • 8
  • 22
  • Thanks. But the desired result should be 0.3. 3/ number of rows(10 in the example) where Yes in the previous three LeagueROUND. In the same way. I need to create a new column with the same number of rows as the original data (as in the image above). – William88 Mar 15 '22 at 12:48
  • Why 0.3 and why 10 rows? `data` has 15 rows, 13 of them contain DRAWmarginODDS=Yes. This results can be added to the original df witha final command `%>% right_join(data)`. – danlooo Mar 15 '22 at 12:59
  • 13 rows but 3 are from LeagueROUND4. So that's 10 rows for the first 3 LeagueROUND. I guess it will be done with sum/.N. But I tried many times and I couldn't get the result. – William88 Mar 15 '22 at 13:33
  • @William88 I revised my asnwer – danlooo Mar 15 '22 at 13:52