This code was tested on Rust v1.67 for polars in v0.27.2.
Add these features in Cargo.toml:
[dependencies]
polars = { version = "*", features = [ "lazy", "lazy_regex", "list_eval" ] }
color-eyre = "*"
The main function:
use color_eyre::Result;
use polars::prelude::*;
use std::error::Error;
fn main() -> Result<(), Box<dyn Error>> {
let row1 = vec![0.0, -1.0 - 1.0 / 9.0, 1.77];
let row2 = vec![-2.0 - 2.0 / 9.0, 3.0 + 3.0 / 9.0, 2.93];
let row3 = vec![3.0 + 3.0 / 9.0, -4.0 - 1.0 / 9.0, 3.56];
let row4 = vec![8.0 + 8.0 / 9.0, -10.0, 7.26];
let series1: Series = Series::new("a", &row1);
let series2: Series = Series::new("b", &row2);
let series3: Series = Series::new("c", &row3);
let series4: Series = Series::new("d", &row4);
let list = Series::new("vec", &[series1, series2, series3, series4]);
let df: DataFrame = DataFrame::new(vec![list])?;
println!("df:\n{df}\n");
let mut lazyframe = df.lazy();
let mut new_columns: Vec<String> = Vec::new();
for i in 0..row1.len() {
let column_name: String = format!("vec_{i}");
let subtraction: String = format!("sub_{i}");
new_columns.extend([column_name.clone(), subtraction.clone()]);
lazyframe = lazyframe
.with_columns([
// split list into new intermediate columns
col("vec").arr().get(lit(i as i64)).alias(&column_name),
//col("vec").arr().eval(lit(2.0) * col(""), true)
//.alias("test multiplication by 2"),
])
.with_columns([
(col(&column_name).last() - col(&column_name))
.apply(absolute_value, GetOutput::from_type(DataType::Float64))
.alias(&subtraction)
]);
}
lazyframe = lazyframe
.select([
all(),
concat_lst([col("^sub_.*$")]).alias("Concat lists")
]);
lazyframe = lazyframe
.with_columns([
col("Concat lists").arr().sum().alias("Sum")
]);
// uncomment to discard intermediate columns
// lazyframe = lazyframe.drop_columns(new_columns);
println!("dataframe:\n{}\n", lazyframe.collect()?);
Ok(())
}
The absolute_value function is given below:
fn absolute_value(str_val: Series) -> Result<Option<Series>, PolarsError> {
let series: Series = str_val
.f64()
.expect("fn absolute_value: series was not an f64 dtype")
.into_iter()
.map(|opt_value: Option<f64>| opt_value.map(|value: f64| value.abs()))
.collect::<Float64Chunked>()
.into_series();
Ok(Some(series))
}
The initial DataFrame:
df:
shape: (4, 1)
┌─────────────────────────────┐
│ vec │
│ --- │
│ list[f64] │
╞═════════════════════════════╡
│ [0.0, -1.111111, 1.77] │
│ [-2.222222, 3.333333, 2.93] │
│ [3.333333, -4.111111, 3.56] │
│ [8.888889, -10.0, 7.26] │
└─────────────────────────────┘
The final result is:
dataframe:
shape: (4, 9)
┌─────────────────────────────┬───────────┬───────────┬───────────┬─────┬───────┬───────┬──────────────────────────────┬───────────┐
│ vec ┆ vec_0 ┆ sub_0 ┆ vec_1 ┆ ... ┆ vec_2 ┆ sub_2 ┆ Concat lists ┆ Sum │
│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │
│ list[f64] ┆ f64 ┆ f64 ┆ f64 ┆ ┆ f64 ┆ f64 ┆ list[f64] ┆ f64 │
╞═════════════════════════════╪═══════════╪═══════════╪═══════════╪═════╪═══════╪═══════╪══════════════════════════════╪═══════════╡
│ [0.0, -1.111111, 1.77] ┆ 0.0 ┆ 8.888889 ┆ -1.111111 ┆ ... ┆ 1.77 ┆ 5.49 ┆ [8.888889, 8.888889, 5.49] ┆ 23.267778 │
│ [-2.222222, 3.333333, 2.93] ┆ -2.222222 ┆ 11.111111 ┆ 3.333333 ┆ ... ┆ 2.93 ┆ 4.33 ┆ [11.111111, 13.333333, 4.33] ┆ 28.774444 │
│ [3.333333, -4.111111, 3.56] ┆ 3.333333 ┆ 5.555556 ┆ -4.111111 ┆ ... ┆ 3.56 ┆ 3.7 ┆ [5.555556, 5.888889, 3.7] ┆ 15.144444 │
│ [8.888889, -10.0, 7.26] ┆ 8.888889 ┆ 0.0 ┆ -10.0 ┆ ... ┆ 7.26 ┆ 0.0 ┆ [0.0, 0.0, 0.0] ┆ 0.0 │
└─────────────────────────────┴───────────┴───────────┴───────────┴─────┴───────┴───────┴──────────────────────────────┴───────────┘