First, let's recreate your data (including the slice) using Polars. We'll take this in steps.
Calculating Dates
First, we'll use the date_range
function in Polars to create our dates.
import polars as pl
from datetime import datetime
df = pl.DataFrame({
'date': pl.date_range(datetime(2018, 11, 28), datetime(2019, 11, 28), '1h', closed='left'),
})
df
shape: (8760, 1)
┌─────────────────────┐
│ date │
│ --- │
│ datetime[ns] │
╞═════════════════════╡
│ 2018-11-28 00:00:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2018-11-28 01:00:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2018-11-28 02:00:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2018-11-28 03:00:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ ... │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2019-11-27 20:00:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2019-11-27 21:00:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2019-11-27 22:00:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2019-11-27 23:00:00 │
└─────────────────────┘
Slicing
Next, we'll use the arange
expression, along with a when/then/otherwise
expression to slice our data and assign the pokemon_type
.
df = df.with_columns([
pl.when(pl.arange(0, pl.count()) < 400)
.then('fire')
.otherwise('water')
.alias('pokemon_type')
])
df
shape: (8760, 2)
┌─────────────────────┬──────────────┐
│ date ┆ pokemon_type │
│ --- ┆ --- │
│ datetime[ns] ┆ str │
╞═════════════════════╪══════════════╡
│ 2018-11-28 00:00:00 ┆ fire │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2018-11-28 01:00:00 ┆ fire │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2018-11-28 02:00:00 ┆ fire │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2018-11-28 03:00:00 ┆ fire │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ ... ┆ ... │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2019-11-27 20:00:00 ┆ water │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2019-11-27 21:00:00 ┆ water │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2019-11-27 22:00:00 ┆ water │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2019-11-27 23:00:00 ┆ water │
└─────────────────────┴──────────────┘
Calculating quarter-end dates
To obtain our end-quarter dates, we can use the when/then/otherwise
and the datetime
expression to create them from the date
field.
Note: I'll set include_bondaries=True
, just to compare our calculated end-quarter date to the _upper_boundary
date.
(
df
.groupby_dynamic("date", every="3mo", by="pokemon_type", include_boundaries=True)
.agg(pl.count())
.with_column(
pl.when(pl.col("date").dt.month() < 4)
.then(pl.datetime(pl.col('date').dt.year(), 4, 1))
.when(pl.col("date").dt.month() < 7)
.then(pl.datetime(pl.col("date").dt.year(), 7, 1))
.when(pl.col("date").dt.month() < 10)
.then(pl.datetime(pl.col("date").dt.year(), 10, 1))
.otherwise(pl.datetime(pl.col("date").dt.year() + 1, 1, 1))
.alias("quarter")
)
)
shape: (6, 6)
┌──────────────┬─────────────────────┬─────────────────────┬─────────────────────┬───────┬─────────────────────┐
│ pokemon_type ┆ _lower_boundary ┆ _upper_boundary ┆ date ┆ count ┆ quarter │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ datetime[ns] ┆ datetime[ns] ┆ datetime[ns] ┆ u32 ┆ datetime[ms] │
╞══════════════╪═════════════════════╪═════════════════════╪═════════════════════╪═══════╪═════════════════════╡
│ fire ┆ 2018-10-01 00:00:00 ┆ 2019-01-01 00:00:00 ┆ 2018-10-01 00:00:00 ┆ 400 ┆ 2019-01-01 00:00:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ water ┆ 2018-10-01 00:00:00 ┆ 2019-01-01 00:00:00 ┆ 2018-10-01 00:00:00 ┆ 417 ┆ 2019-01-01 00:00:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ water ┆ 2019-01-01 00:00:00 ┆ 2019-04-01 00:00:00 ┆ 2019-01-01 00:00:00 ┆ 2160 ┆ 2019-04-01 00:00:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ water ┆ 2019-04-01 00:00:00 ┆ 2019-07-01 00:00:00 ┆ 2019-04-01 00:00:00 ┆ 2184 ┆ 2019-07-01 00:00:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ water ┆ 2019-07-01 00:00:00 ┆ 2019-10-01 00:00:00 ┆ 2019-07-01 00:00:00 ┆ 2208 ┆ 2019-10-01 00:00:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ water ┆ 2019-10-01 00:00:00 ┆ 2020-01-01 00:00:00 ┆ 2019-10-01 00:00:00 ┆ 1391 ┆ 2020-01-01 00:00:00 │
└──────────────┴─────────────────────┴─────────────────────┴─────────────────────┴───────┴─────────────────────┘
Likewise, if you wanted to use closed="left"
, you could also express your quarter-ending dates like this:
(
df
.groupby_dynamic("date", every="3mo", by="pokemon_type", closed='left', include_boundaries=True)
.agg(pl.count())
.with_column(
pl.when(pl.col("date").dt.month() < 4)
.then(pl.datetime(pl.col('date').dt.year(), 3, 31))
.when(pl.col("date").dt.month() < 7)
.then(pl.datetime(pl.col("date").dt.year(), 6, 30))
.when(pl.col("date").dt.month() < 10)
.then(pl.datetime(pl.col("date").dt.year(), 9, 30))
.otherwise(pl.datetime(pl.col("date").dt.year(), 12, 31))
.alias("quarter")
)
)
shape: (6, 6)
┌──────────────┬─────────────────────┬─────────────────────┬─────────────────────┬───────┬─────────────────────┐
│ pokemon_type ┆ _lower_boundary ┆ _upper_boundary ┆ date ┆ count ┆ quarter │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ datetime[ns] ┆ datetime[ns] ┆ datetime[ns] ┆ u32 ┆ datetime[ms] │
╞══════════════╪═════════════════════╪═════════════════════╪═════════════════════╪═══════╪═════════════════════╡
│ fire ┆ 2018-10-01 00:00:00 ┆ 2019-01-01 00:00:00 ┆ 2018-10-01 00:00:00 ┆ 400 ┆ 2018-12-31 00:00:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ water ┆ 2018-10-01 00:00:00 ┆ 2019-01-01 00:00:00 ┆ 2018-10-01 00:00:00 ┆ 416 ┆ 2018-12-31 00:00:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ water ┆ 2019-01-01 00:00:00 ┆ 2019-04-01 00:00:00 ┆ 2019-01-01 00:00:00 ┆ 2160 ┆ 2019-03-31 00:00:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ water ┆ 2019-04-01 00:00:00 ┆ 2019-07-01 00:00:00 ┆ 2019-04-01 00:00:00 ┆ 2184 ┆ 2019-06-30 00:00:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ water ┆ 2019-07-01 00:00:00 ┆ 2019-10-01 00:00:00 ┆ 2019-07-01 00:00:00 ┆ 2208 ┆ 2019-09-30 00:00:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ water ┆ 2019-10-01 00:00:00 ┆ 2020-01-01 00:00:00 ┆ 2019-10-01 00:00:00 ┆ 1392 ┆ 2019-12-31 00:00:00 │
└──────────────┴─────────────────────┴─────────────────────┴─────────────────────┴───────┴─────────────────────┘