0
import re, datetime
from calendar import monthrange

def date_standarization_03(match):
    input_text_substring = match.group()
    input_text_substring = input_text_substring.strip()

    #converts if it is indicated that it is "this month" to the number of the current month
    input_text_substring = re.sub(r"(?:de|)[\s|]*este mes", str(datetime.datetime.today().strftime('%m')), input_text_substring)
    input_text_substring = re.sub(r"(?:del|de el|el)[\s|]*(?:mes que viene|siguiente mes|mes siguiente)", str( int(datetime.datetime.today().strftime('%m') ) + 1), input_text_substring)
    input_text_substring = re.sub(r"(?:del|de el|el)[\s|]*(?:mes pasado|pasado mes|anterior año|mes anterior)", str( int(datetime.datetime.today().strftime('%m') ) - 1), input_text_substring)

    es_month_dict = {"enero": "01", "febrero": "02", "marzo": "03", "abril": "04", "mayo": "05", "junio": "06",
                     "julio": "07", "agosto": "08", "septiembre": "09", "octubre": "10", "noviembre": "11", "diciembre": "12"}

    #to transform ordinal numbers into cardinal numbers
    #  example: '1º' --> '1'  or  '10º' --> '10'
    input_text_substring = re.sub("(\d{1,2})[\s|]*º", r"\1", input_text_substring)

    #replace this with the number of days in that month
    print("replace here!")
    #input_text = re.sub(r"(?:en|durante)[\s|]*(?:el|los)[\s|]*(?:transcurso|trancurso|periodo|dias)", , input_text_substring)

    return input_text_substring
#Examples:
#if the year is not indicated, the current year should be assumed, for example in the cases of example 1 and in example 5
input_text = "los juegos se jugaran durante el transcurso del mes 11" #example 1
input_text = "empiezan durante el transcurso del mes 05 del año 2020" #example 2
input_text = "empiezan durante el periodo del mes de septiembre de este año" #example 3
input_text = "empezaran durante el transcurso del mes del 11º mes de 2024" #example 4
input_text = "empezaran durante el transcurso del mes de diciembre seguramente" #example 5


identificate_day_or_month = r"(\d{1,2})" #the months can have a maximum of 2 digits if they are written in numbers
months = r"enero|febrero|marzo|abril|mayo|junio|julio|agosto|septiembre|octubre|noviembre|diciembre|este mes|mes que viene|siguiente mes|mes siguiente|mes pasado|pasado mes|anterior año|mes anterior"

#for months identification
standard_number_of_digits_re_3 = r"(?:del mes|de el mes|del|de el|de )[\s|]*(?:de|)[\s|]*(?:" + identificate_day_or_month + "|" + months + ")"
standard_number_of_digits_re_4 = r"(?:del mes|de el mes|del|de el)[\s|]*" + identificate_day_or_month + r"[\s|]*º[\s|]*(?:mes|)"
standard_number_of_digits_re_5 = r"de[\s|]*este[\s|]*" + identificate_day_or_month + r"[\s|]*º[\s|]*mes"

#for months identification
standard_number_of_digits_re_6 = r"(?:del del año|de el año|del del ano|de el ano|del|de)[\s|]*(?:el|)[\s|]*(?:este año|año que viene|siguiente año|año siguiente|año pasado|pasado año|anterior año|año anterior|este ano|ano que viene|siguiente ano|ano siguiente|ano pasado|pasado ano|anterior ano|ano anterior|año \d*|ano \d*|el \d*|\d*)"

#for days month periods identification
standard_number_of_digits_re_10 = r"(?:en|durante)[\s|]*(?:el|los)[\s|]*(?:transcurso|trancurso|periodo|dias)"

identification_regex = standard_number_of_digits_re_10 + r"[\s|]*(?:" + standard_number_of_digits_re_3 + r"|" + standard_number_of_digits_re_4 + r"|" + standard_number_of_digits_re_5 + r")[\s|]*" + standard_number_of_digits_re_6

#Extracts the substrings that meet the pattern established by the regex and sends it to the function in charge of making the modifications
input_text = re.sub(identification_regex, date_standarization_03, input_text)

print(repr(input_text)) #output, for example: 'empiezaran [01 -- 30] 11 de 2024'

And the correct outputs for each of these input example strings:

"los juegos se jugaran durante el transcurso del mes 11 2022" #for example 1
"empiezan durante [01 -- 31] 05 2020" #for example 2
"empiezan [01 -- 30] 09 2022" #for example 3
"empezaran [01 -- 30] 11 2024"  #for example 4
"empezaran [01 -- 31] 12 2022 seguramente"  #for example 5

date_standarization_03() is the function where the replacements must be made, the rest of the regex that allow entering it are only so that the program can be tested. And in this part of the code print("replace here!") is where the replacement should be done.


simplification of the question

input_string = "durante el transcurso del mes 11"

#transform to "[01 -- 30] 11"
Matt095
  • 857
  • 3
  • 9
  • 1
    What's the problem? Pleased introduce the task before the code, as explained in "[ask]". Read the other [help] articles, such as on providing a [mcve]. – outis Oct 07 '22 at 18:33
  • @outis There I edit it, anyway it is only the indicated function that must be modified, the rest is only to adapt the string – Matt095 Oct 07 '22 at 19:13
  • @outis the problem is that I need obtain the number of days of the month in the input_string and replace that – Matt095 Oct 07 '22 at 19:19
  • 1
    You could use the `itermonthdays(...)` method of `calendar.Calendar`, but honestly given your use case I think you'd be better off just printing a text calendar with `calendar.TextCalendar`. https://docs.python.org/3.8/library/calendar.html – Wildcard Oct 07 '22 at 22:20

0 Answers0