I have this data.frame
and I want to identify which cells from sample1$domain
have "www", replace that with ""
and strsplit
the corresponding sample1$suffix
. The data looks like this:
domain suffix
1 wbx2 com
2 redhat com
3 something com
4 gstatic com
5 www googleapis.com
6 smartfilter com
I have managed to tackle this as shown below but it changes the position of the row(s) (I would like it to stay at position 5) and given that it will run for million of cases, I don't think this is the most efficient way to do it.:
library("stringr")
sample1$domain <- ifelse(sample1$domain == "www", "", sample1$domain)
sample1[sample1$domain == "", c("domain", "suffix")] <- sample1[sample1$domain == "", c("suffix", "domain")]
y <- sample1$domain[sample1$suffix == ""]
z <- as.data.frame(unlist(str_split_fixed(y, "[.]", 2)))
colnames(z) <- c("domain", "suffix")
sample1 <- rbind(sample1, z)
sample1 <- subset(sample1, sample1$suffix != "")
rownames(sample1) <- NULL
sample1
# domain suffix
#1 wbx2 com
#2 redhat com
#3 something com
#4 gstatic com
#5 smartfilter com
#6 googleapis com
DATA
sample1 <- structure(list(domain = c("wbx2", "redhat", "something",
"gstatic", "www", "smartfilter"), suffix = c("com", "com", "com",
"com", "googleapis.com", "com")), .Names = c("domain", "suffix"
), row.names = c(NA, 6L), class = "data.frame")