import re
input_text = "((PERS)Yo), ((PERS)Yo) ((PERS)yo) hgasghasghsa ((PERS)Yo) ((PERS)Yo) ((PERS)Yo) ((PERS)yo) jhsjhsdhjsdsdh ((PERS)Yo) jhdjfjhdffdj ((PERS)ella) ((PERS)Ella) ((PERS)ellos) asassaasasasassaassaas ((PERS)yo) ssdsdsd"
pattern = re.compile(r'\(\(PERS\)\s*yo\s*\)(?:\(\(PERS\)\s*yo\s*\))+', flags = re.IGNORECASE)
modified_text = re.sub(pattern, '((PERS)yo)', input_text)
print(modified_text)
Why is this code not used to eliminate the repeated occurrences one after the other of the sequence of characters ((PERS)\s*yo\s*)
?
This should be the correct output:
"((PERS)Yo), ((PERS)yo) hgasghasghsa ((PERS)yo) jhsjhsdhjsdsdh ((PERS)yo) jhdjfjhdffdj ((PERS)ella) ((PERS)Ella) ((PERS)ellos) asassaasasasassaassaas ((PERS)yo) ssdsdsd"