import re
def one_day_or_another_day_relative_to_a_date_func(input_text):
#print(repr(input_text)) #print what you have captured, and you should replace
return "aaaaaaaa"
def identify(input_text):
some_text = r"(?:(?!\.\s*?\n)[^;])*"
date_capture_pattern = r"([12]\d{3}-[01]\d-[0-3]\d)(\D*?)"
previous_days = r"(\d+)\s*(?:dias|dia)\s*(?:antes|previos|previo|antes|atrás|atras)\s*"
after_days = r"(\d+)\s*(?:dias|dia)\s*(?:después|despues|luego)\s*"
n_patterns = [
previous_days + r"(?:del|de\s*el|de|al|a)\s*" + some_text + date_capture_pattern + some_text + r"\s*(?:,\s*o|o)\s*" + previous_days,
after_days + r"(?:del|de\s*el|de|al|a)\s*" + some_text + date_capture_pattern + some_text + r"\s*(?:,\s*o|o)\s*" + previous_days,
previous_days + r"(?:del|de\s*el|de|al|a)\s*" + some_text + date_capture_pattern + some_text + r"\s*(?:,\s*o|o)\s*" + after_days,
after_days + r"(?:del|de\s*el|de|al|a)\s*" + some_text + date_capture_pattern + some_text + r"\s*(?:,\s*o|o)\s*" + after_days]
#Itero la lista de patrones de búsqueda para que el programa intente con uno por uno
for n_pattern in n_patterns:
#Este es mi intento de realizar el reemplazo, aunque tiene problemas con modificadores non-greedy
input_text = re.sub(n_pattern, one_day_or_another_day_relative_to_a_date_func , input_text, re.IGNORECASE)
input_texts = ["8 dias antes o 9 dias antes del 2022-12-22",
"2 dias despues o 1 dia antes del 2022-12-22, dia en donde ocurrio",
"a tan solo 2 dias despues de 2022-12-22 o a caso eran 6 dias despues, mmm no recuerdo bien",
]
#Testing...
for input_text in input_texts:
#print(input_text)
print(one_day_or_another_day_relative_to_a_date_func(input_text))
Incorrect output that I am getting, because if I incorrectly capture the substrings, the replacements will also be incorrect
"aaaaaaaa"
"aaaaaaaa"
"aaaaaaaa"
Having well-defined limits, I don't understand why this capture pattern try to capture beyond them?
And the output that I need is that:
"aaaaaaaa"
"aaaaaaaa, dia en donde ocurrio"
"a tan solo aaaaaaaa, mmm no recuerdo bien"