I've been reading the forum, investigating on internet. But can't figure out how to apply a pandas functions to resume this whole code:
def get_time_and_date(schedule, starting_date, position):
# calculate time and date for each start and ending time if the ending time < starting time, add one day to the ending.
my_time = datetime.strptime(schedule.split('-')[position], '%H:%M')
my_date = datetime.strptime(starting_date, '%Y-%m-%d')
# get the starting hour for the range if we are calculating last interval
if position == 1:
starting_hour = datetime.strptime(schedule.split('-')[0], '%H:%M')
starting_hour = datetime(my_date.year, my_date.month, my_date.day, starting_hour.hour, 0)
# convert unify my_time and my_date normalizing the minutes
if hora.minute >= 30:
my_hour_and_date = datetime(my_date.year, my_date.month, my_date.day, my_time.hour, 30)
else:
my_hour_and_date = datetime(my_date.year, my_date.month, my_date.day, hora.hour, 0)
# if final time of the day < than starting time, means there is a day jump, so we add a day
if position == 1 and my_hour_and_date < starting_hour: my_hour_and_date += timedelta(days=1)
return my_hour_and_date
def get_time_interval_ranges(schedule, my_date):
# get all match schedules if there are any
schedules = schedule.split('/')
intervals_list = []
# loop through al the schedules and add the range with the split separator "Separa aquí"
for my_schedule in schedules:
current_range = pd.date_range(start=get_time_and_date(my_schedule, my_date, 0), end=get_time_and_date(my_schedule, my_date, 1), freq="30min").strftime('%Y-%m-%d, %H:%M').to_list()
intervals_list += current_range
intervals_list.append('separate_range_here')
return intervals_list
def generate_time_intervals(df, column_to_process, new_column):
#generate range of times column
df[new_column] = df.apply(lambda row: get_time_interval_ranges(row[column_to_process], row['my_date'], True), axis=1)
return df
I believe there is a better way to do this, but I can't find out how. What I'm giving to the first function(generate_time_intervals
) is a dataFrame with some columns but only Date (yyyy-mm-dd) and schedule are important.
When the schedule is 09:00-15:00 it's easy, just split by the "-" and give it to the builtint function data_range. The problem comes to handle horrendous times like the one on the title or the likes of 09:17-16:24.
Is there any way to handle this without so much looping and the sorts in my code?
Edit:
With this input:
Worker | Date | Schedule |
---|---|---|
Worker1 | 2022-05-01 | 09:00-10:00/11:00-14:00/15:00-18:00 |
Worker2 | 2022-05-01 | 09:37-15:38 |
I would like this output:
Date | Interval | Working Minutes |
---|---|---|
2022-05-01 | 09:00 | 30 |
2022-05-01 | 09:30 | 53 |
2022-05-01 | 10:00 | 30 |
2022-05-01 | 10:30 | 30 |
2022-05-01 | 11:00 | 60 |
2022-05-01 | 11:30 | 60 |
2022-05-01 | 12:00 | 60 |
2022-05-01 | 12:30 | 60 |
2022-05-01 | 13:00 | 60 |
2022-05-01 | 13:30 | 60 |
2022-05-01 | 14:00 | 30 |
2022-05-01 | 14:30 | 30 |
2022-05-01 | 15:00 | 60 |
2022-05-01 | 15:30 | 38 |
2022-05-01 | 16:00 | 30 |
2022-05-01 | 16:30 | 30 |
2022-05-01 | 17:00 | 30 |
2022-05-01 | 17:30 | 30 |
2022-05-01 | 18:00 | 0 |