2

I have a dataset like below and want to extract non-empty cells from each column while keeping the Date information.

df <- structure(list(Date = as.Date(c("6/25/2020", "6/26/2020", "6/27/2020"),
      format = "%m/%d/%y"), 
      A = c("",2L,1L),B = c(3L,"",""),C = c(3L,2L,"")),
      class = "data.frame", row.names = c("1", "2", "3"))

Here is the result I'm looking for:

Date       Company Number
2020-06-26    A      2
2020-06-27    A      1
2020-06-25    B      3
2020-06-25    C      3
2020-06-26    C      2
T-T
  • 693
  • 1
  • 10
  • 24

3 Answers3

1

You can use pivot_longer with values_drop_na = T:

library(tidyverse)
df %>% 
  na_if("") %>% 
  pivot_longer(-Date, values_drop_na = T, names_to = "Company", values_to  = "Number")

  Date       Company Number
  <date>     <chr>   <chr> 
1 2020-06-25 B       3     
2 2020-06-25 C       3     
3 2020-06-26 A       2     
4 2020-06-26 C       2     
5 2020-06-27 A       1   

You can also use pivot_longer and handle empty cells with filter:

df %>% 
  pivot_longer(-Date, names_to = "Company", values_to  = "Number") %>% 
  filter(Number != "")
Maël
  • 45,206
  • 3
  • 29
  • 67
1

Another possible solution:

library(tidyverse)

df %>% 
  pivot_longer(A:C, names_to = "Company", values_to = "Number",
    values_transform = list(Number = \(x) ifelse(x == "", NA, as.numeric(x))),
    values_drop_na = T)

#> # A tibble: 5 × 3
#>   Date       Company Number
#>   <date>     <chr>    <dbl>
#> 1 2020-06-25 B            3
#> 2 2020-06-25 C            3
#> 3 2020-06-26 A            2
#> 4 2020-06-26 C            2
#> 5 2020-06-27 A            1
PaulS
  • 21,159
  • 2
  • 9
  • 26
1

Using base R with reshape

out <- transform(na.omit(reshape(type.convert(df, as.is = TRUE),
   idvar = 'Date', varying = list(2:4), v.names = 'Number', 
  direction = "long", timevar = "Company")), Company = names(df)[-1][Company])
row.names(out) <- NULL
akrun
  • 874,273
  • 37
  • 540
  • 662