0

Trying to find out if there is a way to use a groupby column in naming an alias after an aggregate:

Could not find anything here on SO / Reddit etc.

Could I use an EXPR in the alias function?

Thanks for any help :)

use polars::prelude::*;

fn main() {
    let df = df! [
        "STOCK"         => ["TSLA", "TSLA", "META", "META", "AA",
                            "TSLA", "TSLA", "META", "META", "AA"],
        "EXP_DATE"      => ["2022-10-07","2022-10-07","2022-10-07","2022-10-07", "2022-10-07",
                            "2022-10-14","2022-10-14","2022-10-14","2022-10-14", "2022-10-14"],
        "PUT_CALL"      => ["P","P","C","C","P",
                            "C","C","P","P","C"],
        "STRIKES"       => [10, 20, 5, 10, 90,
                            10, 20, 5, 10, 80],

    ]
    .unwrap();

    // Could use some help to make this filter "LAZY" too ...
    let call_mask = df.column("PUT_CALL").unwrap().equal("C").unwrap();
    let calls_df = df.filter(&call_mask).unwrap(); //.collect();

    let new_df = calls_df
        .lazy()
        .groupby([col("STOCK"), col("EXP_DATE"), col("PUT_CALL")])

        // Here I want to use the EXP_DATEcolumn as part of the name e.g. "EXP_DATE Calls"
        .agg([col("STRIKES").list().alias("EXP_DATE Calls")])

        .collect()
        .unwrap();

    println!("new_df\t{:?}", new_df);

    /* This is the OUTPUT I like to have

    new_df  shape: (3, 4)
    ┌───────┬────────────┬──────────┬──────────────────┐──────────────────┐
    │ STOCK ┆ EXP_DATE   ┆ PUT_CALL ┆ 2022-10-07 Calls │ 2022-10-14 Calls │
    │ ---   ┆ ---        ┆ ---      ┆ ---              │ ---              │
    │ str   ┆ str        ┆ str      ┆ list[i32]        │ list[i32]        │
    ╞═══════╪════════════╪══════════╪══════════════════╡══════════════════╡
    │ META  ┆ 2022-10-07 ┆ C        ┆ [5, 10]          │                  │
    ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
    │ AA    ┆ 2022-10-14 ┆ C        ┆                  │ [80]             │
    ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
    │ TSLA  ┆ 2022-10-14 ┆ C        ┆                  │ [10, 20]         │
    └───────┴────────────┴──────────┴──────────────────┘──────────────────┘

     */
}
Robert
  • 131
  • 1
  • 7
  • You can obtain those same columns by filtering. Separate columns per filter seems like an anti pattern – BallpointBen Oct 05 '22 at 01:55
  • @BallPointBen thanks for sharing your thoughts ... I started to re-think my algorithmic approach and am able to get rid of those "collection" columns altogether. – Robert Oct 06 '22 at 04:38

0 Answers0