1

The data I am using are based on self completed, mannually entered text responses to a questionnaire.

The problem is, especially with regards to fish species, people abbreviate, call them by different names, spell things wrong, etc.

How do I take all of the related names in a series of three columns and make them into one unified name so that I can perform analysis on them?

structure(list(species_1_target = c("Blacktail", "Craker", "Black tail", 
"Musselcracker", "Musselcracker", "Garick", "White musselcracker", 
"Blacktails", "Kob", "Any", "White cracker", "Musslecrack", "Galjoen", 
"Cracker", "Blacktail", "White Musselcracker", "Blacktail", "Anything", 
"poensie", "cracker", "cracker", "cracker", "glajoen", "blacktail", 
"steenie", "Musselcracker white", "Steenbras"), species_2_target = c("Steenbras", 
"Steambrass", NA, "Elf", "Galjoen", "Elf", "Black musselcracker", 
"Stumpnose", "Bluefish", NA, "Blacktail", "Steenie", "Kob", "Poensie", 
NA, NA, "Steenies", NA, "cracker", "galjoen", NA, NA, NA, "cracker", 
"blacktail", "Black musselcracker", "Galjoen"), species_3_target = c("Octopus", 
"Black tail", NA, "Steenbrass", NA, "Kob", "Kob", "Sandshark", 
"Steen ras", NA, NA, "Kob", NA, "Kob", NA, NA, "Sandsharks", 
NA, "gully shark", "gully shark", NA, NA, NA, NA, NA, "Kob", 
NA)), class = "data.frame", row.names = 3:29)

For example: "black tail", "blacktail", "Black Tail", "Blacktail" etc. could all be renamed "blacktail" "Steenies", "steenbras", "steenie", "steambras" -> "Steenbras"

Is there a way to do this using partial string matching to save time/effort?

I know how to manually rename each one, however, there are over 150 rows in the full data and having to find and rename each unique variation can be tedious to say the least. Additionally, data entries are ongoing so the list of inaccurately entered species names will likely continue to grow.

Update:

full df minus some rows

    structure(list(location = c("Our kai huis seaview", "Maitlands", 
"Maitlands", "Beachview", "Maitlands", "Seaview", "Seavi", "Maitlands", 
"Seaview", "Maitlands", "Maitlands", "Maitlands", "..maitlands", 
"Maitlands", "Maitlands", "Maitlands", "Maitlands", "Maitlands", 
"Maitlands", "Maitlands", "Maitlands", "maitlands", "Maitlands", 
"Maitlands", "Seaview", "Seaview", "Port Alfred", "Port Alfred", 
"Port Alfred", "Port Alfred", "Port Alfred", "Bluewater Bay", 
"Bluewater Bay", "Bluewater Bay", "Bluewater Bay", "Cape Recife", 
"Cape Recife", "Cape recife", "Cape recife", "Cape Recife", "Cape recife"
), days = c("209", "132", "45", "189", "192", "27", "98", "97", 
"51", "99", "18", "106", "12", "60", "30", "100", "52", "10", 
"260", "175", "110", "175", "10", "24", "50", "100", "15", "60", 
"20", "100", "60", "30", "101", "12", "24", "69", "50", "200", 
"50", "150", "60"), satis_catch = structure(c(3L, 5L, 5L, NA, 
4L, 4L, 5L, 4L, 2L, 5L, 5L, 4L, 4L, 5L, 3L, 4L, 4L, 3L, 5L, 4L, 
3L, 3L, 4L, 3L, 5L, 5L, 5L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
4L, 5L, 4L, 4L, 4L), levels = c("Very Unimportant", "Slightly Unimportant", 
"Neither unimportant nor important", "Slightly Important", "Very Important"
), class = "factor"), satis_harvest = structure(c(3L, 5L, 4L, 
4L, 2L, 5L, 5L, 4L, 1L, 5L, 4L, 2L, 2L, 5L, 3L, 4L, 3L, 2L, 2L, 
1L, 2L, 5L, 3L, 5L, 5L, 5L, 5L, 5L, 2L, 5L, 4L, 1L, 4L, 5L, 3L, 
4L, 2L, 4L, 3L, 5L, 4L), levels = c("Very Unimportant", "Slightly Unimportant", 
"Neither unimportant nor important", "Slightly Important", "Very Important"
), class = "factor"), satis_size = structure(c(3L, 5L, 4L, 5L, 
4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 4L, 2L, 3L, 4L, 4L, 
5L, 5L, 1L, 4L, 4L, 5L, 4L, 3L, 4L, 5L, 5L, 5L, 5L, 3L, 4L, 4L, 
4L, 4L, 4L, 4L, 5L), levels = c("Very Unimportant", "Slightly Unimportant", 
"Neither unimportant nor important", "Slightly Important", "Very Important"
), class = "factor"), satis_scenery = structure(c(4L, 4L, 3L, 
5L, 5L, 5L, 5L, 5L, 5L, 4L, 3L, 5L, 5L, 4L, 5L, 5L, 5L, 4L, 5L, 
5L, 5L, 5L, 1L, 4L, 4L, 5L, 3L, 4L, 4L, 2L, 4L, 4L, 2L, 3L, 1L, 
3L, 3L, 3L, 3L, 3L, 4L), levels = c("Very Unimportant", "Slightly Unimportant", 
"Neither unimportant nor important", "Slightly Important", "Very Important"
), class = "factor"), satis_facility = structure(c(4L, 4L, 2L, 
4L, 5L, 5L, 1L, 1L, 4L, 3L, 2L, 2L, 1L, 3L, 3L, 5L, 4L, 4L, 5L, 
4L, 1L, 2L, 2L, 2L, 1L, 1L, 4L, 2L, 2L, 3L, 2L, 4L, 2L, 4L, 3L, 
2L, 3L, 1L, 3L, 3L, 3L), levels = c("Very Unimportant", "Slightly Unimportant", 
"Neither unimportant nor important", "Slightly Important", "Very Important"
), class = "factor"), satis_crowding = structure(c(1L, 4L, 4L, 
3L, 2L, 3L, 2L, 3L, 4L, 4L, 4L, 4L, 4L, 3L, 5L, 4L, 3L, 3L, 3L, 
2L, 5L, 4L, 2L, 4L, 4L, 2L, 3L, 5L, 5L, 4L, 4L, 5L, 3L, 2L, 4L, 
4L, 4L, 4L, 4L, 2L, 5L), levels = c("Very Unimportant", "Slightly Unimportant", 
"Neither unimportant nor important", "Slightly Important", "Very Important"
), class = "factor"), satis_mastery = structure(c(4L, 4L, 4L, 
4L, 5L, 4L, 4L, 4L, 1L, 5L, 2L, 5L, 5L, 4L, 1L, 4L, 5L, 1L, 5L, 
4L, 5L, 5L, 1L, 3L, 3L, 4L, 2L, 2L, 4L, 2L, 3L, 5L, 5L, 2L, 5L, 
2L, 4L, 5L, 4L, 3L, 4L), levels = c("Very Unimportant", "Slightly Unimportant", 
"Neither unimportant nor important", "Slightly Important", "Very Important"
), class = "factor"), satis_relax = structure(c(5L, 5L, 5L, 4L, 
4L, 5L, 5L, 4L, 5L, 4L, 4L, 5L, NA, 4L, 5L, 5L, 5L, 4L, 5L, 5L, 
5L, 5L, 1L, 4L, 3L, 3L, 3L, 3L, 4L, 4L, 2L, 3L, 2L, 4L, 3L, 4L, 
3L, 4L, 4L, 4L, 4L), levels = c("Very Unimportant", "Slightly Unimportant", 
"Neither unimportant nor important", "Slightly Important", "Very Important"
), class = "factor"), satis_social = structure(c(5L, 4L, 3L, 
3L, 5L, 5L, 1L, 3L, 4L, 4L, 2L, 4L, 3L, 2L, 5L, 5L, 5L, 3L, 3L, 
5L, 2L, 5L, 3L, 2L, 2L, 1L, 2L, 2L, 3L, 1L, 2L, 4L, 4L, 4L, 3L, 
3L, 4L, 3L, 3L, 3L, 4L), levels = c("Very Unimportant", "Slightly Unimportant", 
"Neither unimportant nor important", "Slightly Important", "Very Important"
), class = "factor"), satis_access = structure(c(3L, 4L, 4L, 
4L, 4L, 4L, 1L, 4L, 5L, 3L, 2L, 5L, 2L, 3L, 3L, 5L, 5L, 3L, 4L, 
3L, 3L, 5L, 4L, 2L, 2L, 1L, 5L, 2L, 2L, 1L, 4L, 2L, 2L, 4L, 2L, 
3L, 2L, 4L, 3L, 2L, 3L), levels = c("Very Unimportant", "Slightly Unimportant", 
"Neither unimportant nor important", "Slightly Important", "Very Important"
), class = "factor"), satis_waterquality = structure(c(4L, 5L, 
2L, 5L, 4L, 5L, 2L, 5L, 4L, 1L, 1L, 4L, 2L, 2L, 3L, 5L, 5L, 5L, 
4L, 4L, 5L, 5L, 3L, 2L, 3L, 1L, 3L, 1L, 3L, 1L, 2L, 1L, 2L, 3L, 
1L, 3L, 3L, 3L, 2L, 2L, 4L), levels = c("Very Unimportant", "Slightly Unimportant", 
"Neither unimportant nor important", "Slightly Important", "Very Important"
), class = "factor"), target_edibles = c("Edibles", "Edibles", 
"Edibles", "Edibles", "Edibles", "Edibles", "Edibles", "Edibles", 
"Edibles", NA, "Edibles", "Edibles", "Edibles", "Edibles", "Edibles", 
"Edibles", "Edibles", "Edibles", "Edibles", "Edibles", "Edibles", 
"Edibles", "Edibles", "Edibles", "Edibles", "Edibles", "Edibles", 
"Edibles", "Edibles", "Edibles", "Edibles", NA, "Edibles", "Edibles", 
"Edibles", "Edibles", "Edibles", "Edibles", "Edibles", "Edibles", 
"Edibles"), target_inedibles = c(NA, NA, NA, NA, NA, NA, NA, 
"Inedibles", NA, "Inedibles", NA, NA, NA, "Inedibles", NA, NA, 
NA, NA, "Inedibles", "Inedibles", NA, NA, NA, NA, NA, NA, NA, 
"Inedibles", NA, "Inedibles", NA, "Inedibles", "Inedibles", "Inedibles", 
NA, NA, "Inedibles", "Inedibles", NA, NA, NA), species_1_target = c("Blacktail", 
"Craker", "Black tail", "Musselcracker", "Musselcracker", "Garick", 
"White musselcracker", "Blacktails", "Kob", "Any", "White cracker", 
"Musslecrack", "Galjoen", "Cracker", "Blacktail", "White Musselcracker", 
"Blacktail", "Anything", "poensie", "cracker", "cracker", "cracker", 
"glajoen", "blacktail", "steenie", "Musselcracker white", "Steenbras", 
"Poensie", "blacktail", "Steenbras", "Kob", "Bronzies", "Kob", 
"any", "garrick", "White cracker", "poenskop", "Poensie", "Blacktail", 
"shad", "cob"), species_2_target = c("Steenbras", "Steambrass", 
NA, "Elf", "Galjoen", "Elf", "Black musselcracker", "Stumpnose", 
"Bluefish", NA, "Blacktail", "Steenie", "Kob", "Poensie", NA, 
NA, "Steenies", NA, "cracker", "galjoen", NA, NA, NA, "cracker", 
"blacktail", "Black musselcracker", "Galjoen", "galjoen", "steenbras", 
"Garrick", "Garrick", "Raggies", "Raggies", NA, "elf", "kob", 
"dogshark", "Galjoen", "cracker", NA, NA), species_3_target = c("Octopus", 
"Black tail", NA, "Steenbrass", NA, "Kob", "Kob", "Sandshark", 
"Steen ras", NA, NA, "Kob", NA, "Kob", NA, NA, "Sandsharks", 
NA, "gully shark", "gully shark", NA, NA, NA, NA, NA, "Kob", 
NA, "blacktail", "black musselcracker", "kob", NA, "Pajamas", 
"Sandsharks", NA, NA, "shad", "Cracker", "Kob", "kob", NA, NA
), most_satisf_ebible = c("Relax", "Catch", "Catch", "Relax", 
"Scenery", "Relax", "Catch", "Catch", "Catch", NA, "Catch", "Relax", 
"Catch", "Water Quality", "Social", "Catch", "Scenery", "Scenery", 
"Catch", "Catch", "Size", "Relax", "Catch", "Harvest", "Harvest", 
"Catch", "Harvest", "Harvest", "Catch", "Harvest", "Catch", NA, 
"Harvest", "Catch", "Catch", "Harvest", "Catch", "Catch", "Harvest", 
"Harvest", "Catch"), least_satisf_edible = c("Size", NA, "Water Quality", 
"Catch", "Accessibility", NA, NA, "Accessibility", "Social", 
NA, "Water Quality", "Catch", "Harvest", "Harvest", "Mastery", 
"Water Quality", "Size", "Mastery", "Harvest", "Harvest", "Scenery", 
"Facilities", "Crowds", "Water Quality", "Facilities", NA, "Crowds", 
"Crowds", "Harvest", "Facilities", "Crowds", NA, "Facilities", 
"Accessibility", "Facilities", "Facilities", "Accessibility", 
"Water Quality", "Water Quality", "Accessibility", "Facilities"
), most_satisf_inedible = c(NA, NA, NA, NA, NA, NA, NA, "Mastery", 
NA, "Scenery", NA, NA, NA, "Water Quality", NA, NA, NA, NA, "Catch", 
"Catch", NA, NA, NA, NA, NA, NA, NA, "Size", NA, "Size", NA, 
"Size", "Size", "Catch", NA, NA, "Size", "Catch", NA, NA, NA), 
    least_satisf_inedible = c(NA, NA, NA, NA, NA, NA, NA, "Size", 
    NA, "Water Quality", NA, NA, NA, "Harvest", NA, NA, NA, NA, 
    "Harvest", "Harvest", NA, NA, NA, NA, NA, NA, NA, "Water Quality", 
    NA, "Mastery", NA, "Harvest", "Harvest", "Mastery", NA, NA, 
    "Harvest", "Water Quality", NA, NA, NA), release_undersize = structure(c(5L, 
    5L, 4L, 5L, 5L, 5L, 3L, 5L, 5L, 5L, 5L, 4L, 5L, 5L, 5L, 5L, 
    5L, 5L, 5L, 5L, 5L, 5L, 5L, 4L, 5L, 3L, 4L, 4L, 5L, 4L, 5L, 
    4L, 5L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 4L), levels = c("Never", 
    "Rarely", "About half the time", "Frequently", "Always"), class = "factor"), 
    release_edible = structure(c(1L, 2L, 4L, 2L, 3L, 4L, 3L, 
    3L, NA, 3L, 3L, 4L, 4L, 4L, 3L, 4L, 4L, 2L, 5L, 5L, 3L, 4L, 
    2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 3L, 4L, 3L, 4L, 4L, 4L, 
    4L, 4L, 4L, 4L), levels = c("Never", "Rarely", "About half the time", 
    "Frequently", "Always"), class = "factor"), release_legal = structure(c(1L, 
    1L, 4L, 2L, 3L, 4L, 3L, 5L, 3L, 1L, 2L, 4L, 3L, 4L, 5L, 2L, 
    4L, 3L, 5L, 4L, 3L, 4L, 5L, 2L, 2L, 3L, 2L, 2L, 4L, 4L, 3L, 
    4L, 2L, 3L, 4L, 4L, 4L, 3L, 3L, 3L, 3L), levels = c("Never", 
    "Rarely", "About half the time", "Frequently", "Always"), class = "factor"), 
    release_prohib = structure(c(1L, 1L, 5L, 5L, 5L, 5L, 3L, 
    4L, 4L, 5L, 5L, 5L, 5L, 4L, 3L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
    1L, 4L, 5L, 2L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
    4L, 4L, 5L, 3L), levels = c("Never", "Rarely", "About half the time", 
    "Frequently", "Always"), class = "factor"), release_revived = structure(c(3L, 
    5L, 3L, 3L, 5L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 5L, 
    4L, 3L, 3L, 5L, 5L, 5L, 5L, 3L, 4L, 3L, 4L, 4L, 4L, 3L, 4L, 
    5L, 4L, 5L, 5L, 4L, 4L, 5L, 4L, 4L, 3L), levels = c("Never", 
    "Rarely", "About half the time", "Frequently", "Always"), class = "factor"), 
    release_other_undersized = structure(c(4L, 5L, 2L, 2L, 3L, 
    3L, 3L, 3L, 2L, 3L, 2L, 2L, 2L, 3L, 2L, 5L, 4L, 4L, 3L, 3L, 
    4L, 2L, 4L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 4L, 3L, 3L, 4L, 3L, 
    4L, 3L, 4L, 3L, 4L, 3L), levels = c("Never", "Rarely", "About half the time", 
    "Frequently", "Always"), class = "factor"), release_other_edible = structure(c(4L, 
    2L, 2L, 2L, 2L, 3L, 2L, 2L, 3L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 
    3L, 4L, 3L, 2L, 3L, 3L, 2L, 2L, 2L, 3L, 2L, 2L, 3L, 3L, 3L, 
    2L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), levels = c("Never", 
    "Rarely", "About half the time", "Frequently", "Always"), class = "factor"), 
    release_other_legal = structure(c(3L, 3L, 2L, 2L, 3L, 2L, 
    2L, 1L, 3L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 4L, 3L, 2L, 4L, 
    3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 4L, 2L, 3L, 
    3L, 2L, 2L, 2L, 2L), levels = c("Never", "Rarely", "About half the time", 
    "Frequently", "Always"), class = "factor"), release_other_prohib = structure(c(3L, 
    1L, 2L, 4L, 3L, 4L, 3L, 2L, 4L, 3L, 3L, 3L, 3L, 3L, 2L, 4L, 
    3L, 4L, 4L, 2L, 2L, 3L, 1L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 
    4L, 4L, 4L, 3L, 4L, 3L, 3L, 3L, 3L, 2L), levels = c("Never", 
    "Rarely", "About half the time", "Frequently", "Always"), class = "factor"), 
    release_other_revived = structure(c(3L, 5L, 2L, 3L, 3L, 3L, 
    2L, 3L, 2L, 4L, 2L, 1L, 2L, 2L, 2L, 4L, 2L, 2L, 3L, 2L, 5L, 
    3L, 4L, 2L, 2L, 2L, 2L, 2L, 4L, 3L, 4L, 3L, 2L, 4L, 4L, 3L, 
    3L, 4L, 4L, 4L, 2L), levels = c("Never", "Rarely", "About half the time", 
    "Frequently", "Always"), class = "factor"), percent_survive_eds = c("56", 
    "100", "55", "100", "45", "5", "61", "9", "100", "75", "22", 
    "50", "44", "51", "75", "90", "80", "70", "70", "75", "100", 
    "80", "75", "60", "20", NA, "50", "30", "80", "60", "80", 
    "40", "75", "90", "60", "85", "60", "90", "50", "100", "80"
    ), percent_survive_ineds = c("50", "100", "88", "90", "70", 
    "6", "41", "21", "100", "75", "50", "80", "100", "51", "75", 
    "90", "87", "70", "85", "85", "75", "80", "75", "81", "75", 
    NA, "80", "75", "60", "75", "90", "95", "80", "90", "50", 
    "90", "80", "95", "75", "100", "80"), effective_MPA = structure(c(4L, 
    5L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 
    5L, NA, 4L, 5L, 5L, 5L, 4L, 4L, 5L, NA, 5L, 4L, 4L, 4L, 5L, 
    5L, 5L, 4L, 4L, 5L, 4L, 4L, 5L, 5L, 4L), levels = c("Strongly disagree", 
    "Somewhat disagree", "Neither agree nor disagree", "Somewhat agree", 
    "Strongly agree"), class = "factor"), effective_CR = structure(c(3L, 
    4L, 4L, 5L, 5L, 5L, 5L, 4L, 4L, 5L, 4L, 4L, 4L, 5L, 5L, 5L, 
    4L, NA, 5L, 5L, 2L, 5L, 4L, 5L, 2L, NA, 4L, 2L, 5L, 3L, 4L, 
    5L, 5L, 4L, 4L, 5L, 5L, 4L, 2L, 5L, 4L), levels = c("Strongly disagree", 
    "Somewhat disagree", "Neither agree nor disagree", "Somewhat agree", 
    "Strongly agree"), class = "factor"), support_CAREzone = structure(c(1L, 
    5L, 3L, 1L, 4L, 4L, 5L, 3L, 5L, 1L, 2L, 5L, 5L, 1L, 2L, 4L, 
    4L, 3L, 5L, 5L, 1L, 5L, 4L, 1L, 2L, NA, 1L, 2L, 4L, 1L, 2L, 
    5L, 4L, 3L, 2L, 2L, 4L, 1L, 2L, 1L, 2L), levels = c("Strongly oppose", 
    "Oppose", "Neither oppose nor support", "Support", "Strongly Support"
    ), class = "factor"), age = c("32", "41", "63", "56", "63", 
    "33", "31", "32", "47", "58", "29", "47", "64", "41", "23", 
    "71", "23", "33", "37", "37", "35", "75", NA, "40", "61", 
    NA, "62", "46", "38", "65", "31", "42", "46", "29", "25", 
    "20", "31", "67", "18", "66", "40"), income = structure(c(2L, 
    NA, 3L, NA, 3L, 3L, 3L, 3L, NA, 2L, 3L, 3L, 3L, NA, 3L, 3L, 
    2L, NA, NA, 3L, NA, NA, 3L, 3L, 3L, NA, 3L, 3L, 3L, 3L, NA, 
    NA, 3L, NA, 3L, 3L, 3L, 3L, 2L, 3L, NA), levels = c("R0", 
    "R1-R19,200", "R19,201-R307,200", "R301,201 and above"), class = "factor"), 
    employment = structure(c(3L, 3L, 4L, 3L, 4L, 3L, 3L, 3L, 
    3L, 1L, 3L, 3L, 4L, 3L, 3L, 4L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 
    3L, 4L, NA, 4L, 3L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 4L, 
    2L, 4L, 3L), levels = c("Unemployed", "Student", "Employed", 
    "Retired"), class = "factor"), education = structure(c(2L, 
    5L, 2L, 2L, 5L, 2L, 5L, 3L, 2L, 2L, 2L, 3L, 2L, 4L, 3L, 3L, 
    2L, 5L, 2L, 2L, 4L, 3L, NA, 3L, 2L, NA, 3L, 3L, 4L, 2L, 4L, 
    NA, 2L, 4L, 3L, 2L, 3L, 2L, 2L, 2L, 5L), levels = c("Pre-matric", 
    "Matric", "Professional Certificate", "Undergraduate degree", 
    "Postgraduate degree"), class = "factor")), row.names = 3:43, class = "data.frame")

I have tried the method suggested in the comments below regarding the tidyverse solution but it doesn't change the names... at all. Not sure why.

 df %>%  
  mutate(across(.cols= 16:18, ~ str_to_lower(.x)),
         across(.cols =16:18, 
                ~ case_when(str_detect(.x, "tail") ~ "blacktail",
                            str_detect(.x, "teen") ~ "steenbras",
                            str_detect(.x, "bras") ~ "steenbras",
                            str_detect(.x, "lack m") ~ "poenskop",
                            str_detect(.x, "poen") ~ "poenskop",
                            str_detect(.x, "cracker") ~ "white musselcracker",
                            str_detect(.x, "hite") ~ "white musselcracker",
                            str_detect(.x, "crak") ~ "white musselcracker",
                            str_detect(.x, "musslecrack") ~ "white musselcracker",
                            str_detect(.x, "andsh") ~ "sandshark",
                            str_detect(.x, "ully") ~ "spotted gully",
                            str_detect(.x, "rick") ~ "garrick",
                            str_detect(.x, "luef") ~ "bronze bream",
                            str_detect(.x, "onze") ~ "bronze bream",
                            str_detect(.x, "joen") ~ "galjoen",

                            TRUE ~ .x) ),
         across(.cols=16:18, ~ str_to_title(.x)))
oguz ismail
  • 1
  • 16
  • 47
  • 69
Chris Bova
  • 134
  • 1
  • 2
  • 9
  • For clarification, the names should change to a unified name, but remane in their respective columns. – Chris Bova Jun 16 '22 at 09:09
  • Here you have to regex on every column to unify the names , e.g remove spaces between words , i highly recommend [Regular Expressions](https://raw.githubusercontent.com/rstudio/cheatsheets/main/regex.pdf) – Mohamed Desouky Jun 16 '22 at 09:18
  • I will check it out. – Chris Bova Jun 16 '22 at 09:22
  • I can see that regex may be useful in selecting or identifying the variants, but I'm not sure about it's application for renaming? – Chris Bova Jun 16 '22 at 09:28
  • You can for e.g. remove all spaces with 'df$species_1_target <- gsub("\\s" , "" , df$species_1_target) ' and you can do it for each column then `df$species_1_target <- tolower(df$species_1_target)` to lower , and so on which is text mining field – Mohamed Desouky Jun 16 '22 at 09:34

1 Answers1

2

Probably not optimal, but it works.

library(tidyverse)
df <- structure(list(species_1_target = c("Blacktail", "Craker", "Black tail", 
                                          "Musselcracker", "Musselcracker", "Garick", "White musselcracker", 
                                          "Blacktails", "Kob", "Any", "White cracker", "Musslecrack", "Galjoen", 
                                          "Cracker", "Blacktail", "White Musselcracker", "Blacktail", "Anything", 
                                          "poensie", "cracker", "cracker", "cracker", "glajoen", "blacktail", 
                                          "steenie", "Musselcracker white", "Steenbras"), species_2_target = c("Steenbras", 
                                                                                                               "Steambrass", NA, "Elf", "Galjoen", "Elf", "Black musselcracker", 
                                                                                                               "Stumpnose", "Bluefish", NA, "Blacktail", "Steenie", "Kob", "Poensie", 
                                                                                                               NA, NA, "Steenies", NA, "cracker", "galjoen", NA, NA, NA, "cracker", 
                                                                                                               "blacktail", "Black musselcracker", "Galjoen"), species_3_target = c("Octopus", 
                                                                                                                                                                                    "Black tail", NA, "Steenbrass", NA, "Kob", "Kob", "Sandshark", 
                                                                                                                                                                                    "Steen ras", NA, NA, "Kob", NA, "Kob", NA, NA, "Sandsharks", 
                                                                                                                                                                                    NA, "gully shark", "gully shark", NA, NA, NA, NA, NA, "Kob", 
                                                                                                                                                                                    NA)), class = "data.frame", row.names = 3:29) 

df %>%  
  as_tibble() %>%  
  mutate(across(everything(), ~ str_to_lower(.x)),
         across(everything(), 
                ~ case_when(str_detect(.x, "black") ~ "blacktail",
                            str_detect(.x, "steen") ~ "steenbras",
                            TRUE ~ .x) ),
         across(everything(), ~ str_to_title(.x)))
#> # A tibble: 27 x 3
#>    species_1_target    species_2_target species_3_target
#>    <chr>               <chr>            <chr>           
#>  1 Blacktail           Steenbras        Octopus         
#>  2 Craker              Steambrass       Blacktail       
#>  3 Blacktail           <NA>             <NA>            
#>  4 Musselcracker       Elf              Steenbras       
#>  5 Musselcracker       Galjoen          <NA>            
#>  6 Garick              Elf              Kob             
#>  7 White Musselcracker Blacktail        Kob             
#>  8 Blacktail           Stumpnose        Sandshark       
#>  9 Kob                 Bluefish         Steenbras       
#> 10 Any                 <NA>             <NA>            
#> # ... with 17 more rows

Created on 2022-06-16 by the reprex package (v2.0.1)

Chamkrai
  • 5,912
  • 1
  • 4
  • 14
  • This is pretty much what I had in mind. Is there a way that you can use multiple str_detect for a single name, or must I just add another item: ie: df %>% mutate(across(everything(), ~ str_to_lower(.x)), across(everything(), ~ case_when(str_detect(.x, "tail") ~ "blacktail", str_detect(.x, "steen") ~ "steenbras", str_detect(.x, "poensie") ~ "steenbras", TRUE ~ .x) ), across(everything(), ~ str_to_title(.x))) – Chris Bova Jun 16 '22 at 09:33
  • 1
    Within the pattern parameter, you can do `"black|tail"` with `|` in between to signify black or tail will convert to `Blacktail`. An example is `steen|poensie` – Chamkrai Jun 16 '22 at 09:40
  • I did make an attempt at using your suggestion, however, it doesn't appear to work on my data. – Chris Bova Jun 16 '22 at 12:59
  • @ChrisBova I ran your code and it changed the values though? What errors did you get? – Chamkrai Jun 16 '22 at 13:07
  • I don't get any errors, it just doesn't change anything on my data.frame – Chris Bova Jun 16 '22 at 13:12
  • I just resolved the issue I was having by taking the tidyverse code and assigning it to df. ie: df<- df %>% as_tibble() %>% ... – Chris Bova Jun 16 '22 at 13:14