1) One option is extract
from tidyr
where we extract one or more digits ((\\d+)
) at the start (^
) of the string as a capture group, followed by a space, then one ore more characters that are letters with space, followed by a space, then one or more digits in a capture group, followed by space and rest of the characters as the 4th column
library(stringr)
library(dplyr)
library(tidyr)
df %>%
extract(col1, into = str_c('col', 1:4),
'^(\\d+) ([A-Za-z ]+) (\\d+) (.*)', convert = TRUE)
# col1 col2 col3 col4
#1 25 Edgemont 52 Sioux County
#2 57 Burke 88 Papillion-LaVista South
2) Or with separate
from tidyr
where we specify a regex lookaround to split at the space
df %>%
separate(col1, into = str_c('col', 1:4), sep = '(?<=\\d) | (?=\\d)')
# col1 col2 col3 col4
#1 25 Edgemont 52 Sioux County
#2 57 Burke 88 Papillion-LaVista South
3) Or using tstrsplit
from data.table
library(data.table)
setDT(df)[, tstrsplit(col1, "(?<=\\d) | (?=\\d)", perl = TRUE)]
# V1 V2 V3 V4
#1: 25 Edgemont 52 Sioux County
#2: 57 Burke 88 Papillion-LaVista South
4) Or using read.csv
from base R
(No packages are used ...)
read.csv(text = gsub("(?<=\\d) | (?=\\d)", ",", df$col1,
perl = TRUE), header = FALSE)
# V1 V2 V3 V4
#1 25 Edgemont 52 Sioux County
#2 57 Burke 88 Papillion-LaVista South
5) Or with strsplit
from base R
(No packages are used ...)
type.convert(as.data.frame(do.call(rbind,
strsplit(as.character(df$col1), "(?<=\\d) | (?=\\d)",
perl = TRUE))), as.is = TRUE)
# V1 V2 V3 V4
#1 25 Edgemont 52 Sioux County
#2 57 Burke 88 Papillion-LaVista South
data
df <- data.frame(col1 = c("25 Edgemont 52 Sioux County",
"57 Burke 88 Papillion-LaVista South"))