3

Say we have this vector:

products <- c(a, b, d, f, g, h, i, j, m, o, t, z)

And a dataframe like this one below:

seller_a seller_b seller_c
a        b        d
d        d        e
g        g        g
h        l        h
t        n        t
z        y        w

I would like to include an additional row into the dataframe which would signal to what extend each seller column matches the products vector.

In other words, my goal is to get the original dataframe to look like this:

seller_a seller_b seller_c
6        3        4
a        b        d
d        d        e
g        g        g
h        l        h
t        n        t
z        y        w
Sotos
  • 51,121
  • 6
  • 32
  • 66
teogj
  • 289
  • 1
  • 11

8 Answers8

3

You can also use add_row function from tibble to add additional row to your data set:

library(dplyr)

df %>%
  add_row(seller_a = as.character(sum(df$seller_a %in% products)),
          seller_b = as.character(sum(df$seller_b %in% products)),
          seller_c = as.character(sum(df$seller_c %in% products)),
          .before = 1)

# A tibble: 7 x 3
  seller_a seller_b seller_c
  <chr>    <chr>    <chr>   
1 6        3        4       
2 a        b        d       
3 d        d        e       
4 g        g        g       
5 h        l        h       
6 t        n        t       
7 z        y        w 
Anoushiravan R
  • 21,622
  • 3
  • 18
  • 41
3

Using the inputs shown reproducibly in the Note at the end

as.data.frame(lapply(DF, function(x) c(sum(x %in% products), x)))
##   seller_a seller_b seller_c
## 1        6        3        4
## 2        a        b        d
## 3        d        d        e
## ...snip...

Numeric vector

however, all elements of a column must be of the same type so the numbers will be coerced to character. You might prefer to just create a separate numeric vector.

sapply(DF, function(x) sum(x %in% products))
## seller_a seller_b seller_c 
##        6        3        4 

S3

This is probably overdoing it but it would be possible to create a new S3 class that has the product numbers stored as a numeric attribute rather than a row but show it as a row when printed.

as.data.frame1 <- function(x, ...) UseMethod("as.data.frame1")

as.data.frame1.data.frame <- function(x, product, ...) {
    out <- structure(x, class = c("data.frame1", class(x)))
    attr(out, "product") <-  sapply(DF, function(x) sum(x %in% products))
    out
}

format.data.frame1 <- function(x, ...) {
  format(as.data.frame(rbind(attr(x, "product"), x)))
}

print.data.frame1 <- function(x, ...) {
  print(format(x), ...)
}

DF1 <- as.data.frame1(DF, products)

DF1
##   seller_a seller_b seller_c
## 1        6        3        4
## 2        a        b        d
## 3        d        d        e
## ...snip...

attr(DF1, "product")  # numeric vector
## seller_a seller_b seller_c 
##        6        3        4 

as.data.frame(DF1)
##   seller_a seller_b seller_c
## 1        a        b        d
## 2        d        d        e
## 3        g        g        g
## ...snip...

Note

products <- scan(text = "a, b, d, f, g, h, i, j, m, o, t, z", 
  what = "", sep = ",", strip.white = TRUE)
Lines <- "seller_a seller_b seller_c
a        b        d
d        d        e
g        g        g
h        l        h
t        n        t
z        y        w"
DF <- read.table(text = Lines, header = TRUE)
G. Grothendieck
  • 254,981
  • 17
  • 203
  • 341
3

using summarise with across in dplyr

library(dplyr)

DF %>% summarise(across(everything(), ~as.character(sum(. %in% products)))) %>%
  bind_rows(., DF)
#>   seller_a seller_b seller_c
#> 1        6        3        4
#> 2        a        b        d
#> 3        d        d        e
#> 4        g        g        g
#> 5        h        l        h
#> 6        t        n        t
#> 7        z        y        w

Created on 2021-06-07 by the reprex package (v2.0.0)

AnilGoyal
  • 25,297
  • 4
  • 27
  • 45
2

data:

df <- tibble(
  a = c("a", "d", "g", "h", "t", "z"),
  b = c("b", "d", "g", "l", "n", "y"),
  c = c("d", "e", "g", "h", "t", "w")
)

products <- c("a", "b", "d", "f", "g", "h", "i", "j", "m", "o", "t", "z")

code:

library(tidyverse)

df %>% rbind(map_int(., ~sum(products %in% .x)), .)

  a     b     c    
  <chr> <chr> <chr>
1 6     3     4    
2 a     b     d    
3 d     d     e    
4 g     g     g    
5 h     l     h    
6 t     n     t    
7 z     y     w 

Just note that those numbers in first row will be characters. Also if columns are factors code won't work (that's why I use tibble)

det
  • 5,013
  • 1
  • 8
  • 16
2

Solution with data.table:

library(data.table)

products <- c("a", "b", "d", "f", "g", "h", "i", "j", "m", "o", "t", "z")

DT <- data.table(
  seller_a = c("a", "d", "g", "h", "t", "z"),
  seller_b = c("b", "d", "g", "l", "n", "y"),
  seller_c = c("d", "e", "g", "h", "t", "w")
)

DT1 <- DT[,.(seller_a = length(which(products%in%seller_a==TRUE)),
             seller_b = length(which(products%in%seller_b==TRUE)),
             seller_c = length(which(products%in%seller_c==TRUE)))]

# -------------------

> DT1
   seller_a seller_b seller_c
1:        6        3        4
> rbind(DT1, DT)
   seller_a seller_b seller_c
1:        6        3        4
2:        a        b        d
3:        d        d        e
4:        g        g        g
5:        h        l        h
6:        t        n        t
7:        z        y        w

Sara
  • 465
  • 5
  • 15
2

Base R option -

rbind(sapply(df, function(x) sum(x %in% products)), df)

#  a b c
#1 6 3 4
#2 a b d
#3 d d e
#4 g g g
#5 h l h
#6 t n t
#7 z y w
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
2

We can use

library(dplyr)
df %>% 
    summarise(across(everything(), ~ c(sum(products %in% .), .)))

-output

# A tibble: 7 x 3
  a     b     c    
  <chr> <chr> <chr>
1 6     3     4    
2 a     b     d    
3 d     d     e    
4 g     g     g    
5 h     l     h    
6 t     n     t    
7 z     y     w
akrun
  • 874,273
  • 37
  • 540
  • 662
1

using base R, you could do the following

#///////////////////
#your data
products <- c("a", "b", "d", "f", "g", "h", "i", "j", "m", "o", "t", "z")
seller_a <- c("a", "d", "g", "h", "t", "z")
seller_b <- c("b", "d", "g", "l", "n", "y")
seller_c <- c("d", "e", "g", "h", "t", "w")
d <- as.data.frame(cbind(seller_a,seller_b,seller_c))
#///////////////////

a <- c(sum(d$seller_a %in% products), sum(d$seller_b %in% products), sum(d$seller_c %in% products))

d <- rbind(a,d)
wiebke
  • 111
  • 2