Is this what you are looking for?
library(tidyverse)
data.frame(txt) %>%
# tidy up `txt`:
mutate(txt = gsub("(?![, ])\\W", "", txt, perl = TRUE)) %>%
# split into rows
separate_rows(txt, sep = ",") %>%
# extract keywords matched:
mutate(keywords = str_extract(txt, "(?i)Parking|Garage|Garden|Freehold|Fireplace|Balcony"))
# A tibble: 19 × 2
txt keywords
<chr> <chr>
1 "Stunning seaside location" NA
2 " 24hour emergency call system and secure video entry" NA
3 " Mature landscaped gardens with large terraces and seating areas" garden
4 " Walk out balconies to selected apartments" NA
5 " Beautifully decorated homeowners8099 lounge" NA
6 " Parking spaces and car ports are available via an annual permit" Parking
7 " Wheelchair access" NA
8 " Lifts to all floors" NA
9 " Fire detection" NA
10 " Intruder alarm" NA
11 " Village Location" NA
12 " 4 Bedrooms" NA
13 " Gardens" Garden
14 " Balcony" Balcony
15 " On streetresidents parking" parking
16 " Central heating" NA
17 " Double glazing" NA
18 " Fireplace" Fireplace
19 " Ruralsecluded" NA
Data:
txt <- '"[\"Stunning seaside location\", \"24-hour emergency call system and secure video entry\", \"Mature landscaped gardens with large terraces and seating areas\", \"Walk out balconies to selected apartments\", \"Beautifully decorated homeownersâ\200\231 lounge\", \"Parking spaces and car ports are available via an annual permit\", \"Wheelchair access\", \"Lifts to all floors\", \"Fire detection\", \"Intruder alarm\"]", "[\"Village Location, 4 Bedrooms, Garden(s)\"]", "[\"Balcony\", \"On street/residents parking\", \"Central heating\", \"Double glazing\", \"Fireplace\", \"Rural/secluded\"]"'
If there may be more than 1 keyword per substring, then use str_extract_all
in this way:
data.frame(txt) %>%
mutate(txt = gsub("(?![, ])\\W", "", txt, perl = TRUE)) %>%
separate_rows(txt, sep = ",") %>%
mutate(keywords = str_extract_all(txt, "(?i)Parking|Garage|Garden|Freehold|Fireplace|Balcony")) %>%
unnest(where(is.list), keep_empty = TRUE)
EDIT:
If the OP is looking to obtain a variable for each keyword, then this works:
data.frame(txt) %>%
mutate(txt = gsub("(?![, /])\\W", "", txt, perl = TRUE)) %>%
separate_rows(txt, sep = ", ") %>%
mutate(keywords = str_extract_all(txt, "(?i)Parking|Garage|Garden|Freehold|Fireplace|Balcony")) %>%
# unnest listed items:
unnest(where(is.list), keep_empty = TRUE) %>%
# capitalize initial letter:
mutate(keywords = sub("^(.)", "\\U\\1", keywords, perl = TRUE)) %>%
# cast each keaword into its own column:
pivot_wider(names_from = keywords, values_from = keywords,
values_fn = function(x) 1, values_fill = 0)
# A tibble: 19 × 6
txt `NA` Garden Parking Balcony Fireplace
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Stunning seaside location 1 0 0 0 0
2 24hour emergency call system and secure video entry 1 0 0 0 0
3 Mature landscaped gardens with large terraces and seating areas 0 1 0 0 0
4 Walk out balconies to selected apartments 1 0 0 0 0
5 Beautifully decorated homeowners8099 lounge 1 0 0 0 0
6 Parking spaces and car ports are available via an annual permit 0 0 1 0 0
7 Wheelchair access 1 0 0 0 0
8 Lifts to all floors 1 0 0 0 0
9 Fire detection 1 0 0 0 0
10 Intruder alarm 1 0 0 0 0
11 Village Location 1 0 0 0 0
12 4 Bedrooms 1 0 0 0 0
13 Gardens 0 1 0 0 0
14 Balcony 0 0 0 1 0
15 On street/residents parking 0 0 1 0 0
16 Central heating 1 0 0 0 0
17 Double glazing 1 0 0 0 0
18 Fireplace 0 0 0 0 1
19 Rural/secluded 1 0 0 0 0