@Arthur given the fact you have 100 records then it would be best to create a DataFrame for each record. While this requires more code. It is doable.
You can leverage several functions such as df.pipe() to pass functions. Here's how I solved it.
This example has 20 dates but it can scale to any number.
import pandas as pd
import numpy as np
dates_df = pd.DataFrame({
'start': [
'2023-03-15', '2023-04-11', '2023-04-26', '2023-05-05', '2023-06-19',
'2023-07-04', '2023-07-24', '2023-08-01', '2023-08-21', '2023-09-07'
],
'end': [
'2023-09-22', '2023-10-05', '2023-10-12', '2023-10-19', '2023-11-20',
'2023-11-23', '2023-11-30', '2023-12-08', '2023-12-11', '2023-12-13'
]
})
def monthly_pto_days_per_range(start_date: str, end_date: str) -> pd.DataFrame:
dates = pd.Series(
{"start": pd.to_datetime(start_date), "end": pd.to_datetime(end_date)}
)
return (
(
pd.Series(
index=pd.date_range(start=dates["start"], end=dates["end"]),
dtype="float64",
)
.resample("M")
.size()
.to_period("M")
.to_frame()
.T
)
.assign(start_date=start_date, end_date=end_date)
)
def col_to_front(df: pd.DataFrame, column: str) -> pd.DataFrame:
r"""
Bring column to front in a DataFrame.
Parameters
----------
df : pd.DataFrame
col : str
Column to move
front : bool, optional
by default True will bring column to position 0
else to last column
"""
return df.loc[
:, lambda x: sorted(x.columns, key=lambda col: 0 if col == column else 1)
]
def generate_pto_days_from_date_ranges(df: pd.DataFrame) -> pd.DataFrame:
return (
pd.concat(
[
np.vectorize(monthly_pto_days_per_range)(df["start"], df["end"])[i]
for i in df.index
]
)
.fillna(0)
.pipe(col_to_front, "end_date")
.pipe(col_to_front, "start_date")
)
dates_df.pipe(generate_pto_days_from_date_ranges)
