0

In this handbook is an example how to utilise the function polars::prelude::fold_exprs to accumulate the row values over multiple columns horizontally.

let df = df!(
    "a" => &[1, 2, 3],
    "b" => &[10, 20, 30],
)?;

let out = df
    .lazy()
    .select([fold_exprs(lit(0), |acc, x| Ok(Some(acc + x)), [col("*")]).alias("sum")])
    .collect()?;
println!("{}", out);

The output is:

shape: (3, 1)
┌─────┐
│ sum │
│ --- │
│ i64 │
╞═════╡
│ 11  │
│ 22  │
│ 33  │
└─────┘

Question: How can I accumulate the row values over the single column col(b) vertically using the fold_exprs function to get the following output:

shape: (3, 1)
┌─────┐
│ sum │
│ --- │
│ i64 │
╞═════╡
│ 10  │
│ 30  │
│ 60  │
└─────┘

I'm looking for something like:

let out = df
    .lazy()
    .select([fold_exprs(lit(0), |acc, x| Ok(Some(acc + x)), [col("b")]).alias("sum")])
    .collect()?;

where I replaced col("*") with col("b"): How do I have to modify the closure |acc, x| Ok(Some(acc + x)) to get my desired output?

LenC
  • 69
  • 1
  • 10

1 Answers1

1

For a cumulative sum, you can just use the cumsum(reverse: bool) function (needs feature cum_agg).

let df = df!(
    "a" => &[1, 2, 3],
    "b" => &[10, 20, 30],
)
.unwrap()
.lazy()
.with_column(col("b").cumsum(false).alias("b_summed"));

println!("{:?}", df.collect()?);
shape: (3, 3)
┌─────┬─────┬──────────┐
│ a   ┆ b   ┆ b_cumsum │
│ --- ┆ --- ┆ ---      │
│ i32 ┆ i32 ┆ i32      │
╞═════╪═════╪══════════╡
│ 1   ┆ 10  ┆ 10       │
│ 2   ┆ 20  ┆ 30       │
│ 3   ┆ 30  ┆ 60       │
└─────┴─────┴──────────┘

For arbitrary cumulative functions you can drop down into map.

let df = df!(
    "a" => &[1, 2, 3],
    "b" => &[10, 20, 30],
)
.unwrap()
.lazy()
.with_column(
    col("b")
        .map(
            |b| {
                let b = b.i32()?;
                Ok(Some(
                    b.into_iter()
                        .scan(("".to_owned(), true), |(s, is_first), x| {
                            if !*is_first {
                                s.push(',');
                            }
                            *is_first = false;

                            s.push_str(x.map(|x| x.to_string()).as_deref().unwrap_or(""));
                            Some(s.clone())
                        })
                        .collect::<Utf8Chunked>()
                        .into_series(),
                ))
            },
            GetOutput::from_type(DataType::Utf8),
        )
        .alias("b_cumstr"),
);

println!("{:?}", df.collect()?);
shape: (3, 3)
┌─────┬─────┬──────────┐
│ a   ┆ b   ┆ b_cumstr │
│ --- ┆ --- ┆ ---      │
│ i32 ┆ i32 ┆ str      │
╞═════╪═════╪══════════╡
│ 1   ┆ 10  ┆ 10       │
│ 2   ┆ 20  ┆ 10,20    │
│ 3   ┆ 30  ┆ 10,20,30 │
└─────┴─────┴──────────┘
BallpointBen
  • 9,406
  • 1
  • 32
  • 62