You can use getattr
to "convert" the string of the expression you want it to an operable one. In that way you can do this...
df=pl.DataFrame({'a':[1,2,3,4], 'b':[3,4,5,6]})
(
df
.select(getattr(pl.col(col), fun)().suffix(f"_{fun}")
for col in ['a','b']
for fun in ["max", "min", "mean"])
)
shape: (1, 6)
┌───────┬───────┬────────┬───────┬───────┬────────┐
│ a_max ┆ a_min ┆ a_mean ┆ b_max ┆ b_min ┆ b_mean │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ f64 ┆ i64 ┆ i64 ┆ f64 │
╞═══════╪═══════╪════════╪═══════╪═══════╪════════╡
│ 4 ┆ 1 ┆ 2.5 ┆ 6 ┆ 3 ┆ 4.5 │
└───────┴───────┴────────┴───────┴───────┴────────┘
You can take out the for col in ['a','b']
and change the pl.col(col)
to pl.all()
if you just want all columns.
You can even replicate this syntax {'a' : ['sum', 'min'], 'b' : ['min', 'max']}
by using a double iterated generator
(
df
.select(getattr(pl.col(col), fun)().suffix(f"_{fun}")
for col,funL in {'a' : ['sum', 'min'], 'b' : ['min', 'max']}.items()
for fun in funL)
)
Lastly, you can wrap that all up into a function and monkey patch it to pl.DataFrame.agg
so you have the direct functionality that you're looking for.
def agg(df, func: str | list | dict) -> pl.DataFrame:
"""Function to replicate pandas agg function, will take either a single string, a list of strings, or a dict mapping columns to functions"""
if isinstance(func, str):
func=[func]
if isinstance(func, list):
return (
df
.select(getattr(pl.all(), fun)().suffix(f"_{fun}") for fun in func)
)
elif isinstance(func, dict):
return (
df
.select(getattr(pl.col(col), fun)().suffix(f"_{fun}")
for col,funL in func.items()
for fun in funL)
)
pl.DataFrame.agg=agg
Now you can just do
df.agg(['min','max'])
shape: (1, 6)
┌───────┬───────┬───────┬───────┬────────┬────────┐
│ a_min ┆ b_min ┆ a_max ┆ b_max ┆ a_mean ┆ b_mean │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ f64 ┆ f64 │
╞═══════╪═══════╪═══════╪═══════╪════════╪════════╡
│ 1 ┆ 3 ┆ 4 ┆ 6 ┆ 2.5 ┆ 4.5 │
└───────┴───────┴───────┴───────┴────────┴────────┘
or
df.agg({'a' : ['sum', 'min'], 'b' : ['min', 'max']})
shape: (1, 4)
┌───────┬───────┬───────┬───────┐
│ a_sum ┆ a_min ┆ b_min ┆ b_max │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 ┆ i64 │
╞═══════╪═══════╪═══════╪═══════╡
│ 10 ┆ 1 ┆ 3 ┆ 6 │
└───────┴───────┴───────┴───────┘