We could create a sequence column in 'df1'
df1 <- diamonds %>%
select(1:2) %>%
slice(1:5) %>%
mutate(rn = row_number())
df2 <- df1 %>%
slice(-3)
and then just pull
the rn
anti_join(df1, df2) %>%
pull('rn')
#[1] 3
rownames_to_column
doesn't give the correct row number because tibble
doesn't allow for row names and it resets the row number on each subset, thus we get only the changed row number instead of the original one. In addition to the tibble
case, here the slice
does reset i.e. if we check the functions involved in it, there is a call to dplyr_new_data_frame
, which may be resetting the row number
> methods("slice")
#[1] slice.data.frame* slice.index
> getAnywhere("slice.data.frame")
function (.data, ..., .preserve = FALSE)
{
loc <- slice_rows(.data, ...)
dplyr_row_slice(.data, loc, preserve = .preserve)
}
> dplyr:: dplyr_row_slice
function (data, i, ...)
{
if (!is.numeric(i) && !is.logical(i)) {
abort("`i` must be an numeric or logical vector.")
}
UseMethod("dplyr_row_slice")
}
> methods("dplyr_row_slice")
[1] dplyr_row_slice.data.frame* dplyr_row_slice.grouped_df* dplyr_row_slice.rowwise_df*
> getAnywhere("dplyr_row_slice.data.frame")
function (data, i, ...)
{
dplyr_reconstruct(vec_slice(data, i), data)
}
> dplyr_reconstruct
function (data, template)
{
data <- dplyr_new_data_frame(data)
return(dplyr_reconstruct_dispatch(data, template))
UseMethod("dplyr_reconstruct", template)
}
> dplyr:::dplyr_new_data_frame
function (x = data.frame(), n = NULL, ..., row.names = NULL,
class = NULL)
{
row.names <- row.names %||% .row_names_info(x, type = 0L)
new_data_frame(x, n = n, ..., row.names = row.names, class = class)
}