0

Need help to "translate" a python example to rust. The python example was given here

Here is the code snippet I try to make work:

use polars::prelude::*;

fn main() {
    let s1 = Series::new("Fruit", &["Apple", "Apple", "Pear"]);
    let s2 = Series::new("Color", &["Red", "Yellow", "Green"]);

    let df = DataFrame::new(vec![s1, s2]).unwrap();

    let df_lazy = df.lazy();

    /*

    This is the PYTHON version I like to recreate in RUST:

    df_lazy.with_columns([
                    # string fmt over multiple expressions
                    pl.format("{} has {} color", "Fruit", "Color").alias("fruit_list"),
                    # columnar lambda over multiple expressions
                    pl.map(["Fruit", "Color"], lambda s: s[0] + " has " + s[1] + " color" ).alias("fruit_list2"),
                    ])
     */

}

I can't even get a simple select to work?! Now I am lost.

Robert
  • 131
  • 1
  • 7

4 Answers4

2

The LazyFrame has a slightly different interface for .select than the regular DataFrame. It is expecting an iterable set of column expressions, built using the col() method. You can change your select call to the following:

let selected = df_lazy.select(&[col("Fruit"), col("Color")]);

println!("{:?}", selected.collect());

To get the results:

Ok(shape: (3, 2)
┌───────┬────────┐
│ Fruit ┆ Color  │
│ ---   ┆ ---    │
│ str   ┆ str    │
╞═══════╪════════╡
│ Apple ┆ Red    │
├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ Apple ┆ Yellow │
├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
│ Pear  ┆ Green  │
└───────┴────────┘)

You can see more examples of working with the LazyFrame here: https://docs.rs/polars-lazy/latest/polars_lazy/

emagers
  • 841
  • 7
  • 13
  • Thanks @emagers for the help. That just leaves me with my bigger issue ... I will remove the error: `// This does not work. // let selected = df_lazy.select(["Fruit", "Color"]);` from the original question as this is fixed with your suggestion. – Robert Sep 05 '22 at 19:08
0

I got a little further ... but ran into another snag:

The Black Box Function example should do the trick, but I can't get it to work:

Err(SchemaMisMatch("Series of dtype: List(Float64) != Struct"))

at this line let ca = s.struct_()?;

Here is the sample code:

use polars::prelude::*;

fn my_black_box_function(a: f32, b: f32) -> f32 {
    // do something
    a
}

fn apply_multiples(lf: &LazyFrame) -> Result<DataFrame> {
    df![
        "col_a" => [1.0, 2.0, 3.0],
        "col_b" => [3.0, 5.1, 0.3]
    ]?
    .lazy()
    .select([concat_lst(["col_a", "col_b"]).map(
        |s| {

            let ca = s.struct_()?;

            let b = ca.field_by_name("col_a")?;
            let a = ca.field_by_name("col_b")?;
            let a = a.f32()?;
            let b = b.f32()?;

            let out: Float32Chunked = a
                .into_iter()
                .zip(b.into_iter())
                .map(|(opt_a, opt_b)| match (opt_a, opt_b) {
                    (Some(a), Some(b)) => Some(my_black_box_function(a, b)),
                    _ => None,
                })
                .collect();

            Ok(out.into_series())
        },
        GetOutput::from_type(DataType::Float32),
    )])
    .collect()
}

The two series are concatenated as 's':

shape: (3,)
Series: 'col_a' [list]
[
        [1.0, 3.0]
        [2.0, 5.1]
        [3.0, 0.3]
]

but I can not make a struct_ out of it?!

Robert
  • 131
  • 1
  • 7
0

Thanks to the comments (@ polars issues) from @cannero and @ritchie46, I was able to make it work.

This is a working version (Float64):

use polars::prelude::*;

fn my_black_box_function(a: f64, b: f64) -> f64 {
    // do something
    a
}

fn apply_multiples(lf: LazyFrame) -> Result<DataFrame> {
   
    let ergebnis = lf
        .select([col("struct_col").map(
            |s| {
                let ca = s.struct_()?;

                let b = ca.field_by_name("a")?;
                let a = ca.field_by_name("b")?;
                let a = a.f64()?;
                let b = b.f64()?;

                let out: Float64Chunked = a
                    .into_iter()
                    .zip(b.into_iter())
                    .map(|(opt_a, opt_b)| match (opt_a, opt_b) {
                        (Some(a), Some(b)) => Some(my_black_box_function(a, b)),
                        _ => None,
                    })
                    .collect();

                Ok(out.into_series())
            },
            GetOutput::from_type(DataType::Float64),
        )])
        .collect();

    ergebnis
}

fn main() {
    // We start with a normal DataFrame
    let df = df![
        "a" => [1.0, 2.0, 3.0],
        "b" => [3.0, 5.1, 0.3]
    ]
    .unwrap();

    // We CONVERT the df into a StructChunked and WRAP this into a new LazyFrame
    let lf = df![
        "struct_col" => df.into_struct("StructChunked")
    ]
    .unwrap()
    .lazy();

    let processed = apply_multiples(lf);

    match processed {
        Ok(..) => println!("We did it"),
        Err(e) => println!("{:?}", e),
    }
}

Here is a version for my initial question (String):

use polars::prelude::*;

fn my_fruit_box(fruit: String, color: String) -> String {
    // do something
    format!("{} has {} color", fruit, color)
}

fn apply_multiples(lf: LazyFrame) -> Result<DataFrame> {
    
    let ergebnis = lf
        .select([col("struct_col").map(
            |s| {
                let ca = s.struct_()?;

                let fruit = ca.field_by_name("Fruit")?;
                let color = ca.field_by_name("Color")?;
                let color = color.utf8()?;
                let fruit = fruit.utf8()?;

                let out: Utf8Chunked = fruit
                    .into_iter()
                    .zip(color.into_iter())
                    .map(|(opt_fruit, opt_color)| match (opt_fruit, opt_color) {
                        (Some(fruit), Some(color)) => {
                            Some(my_fruit_box(fruit.to_string(), color.to_string()))
                        }
                        _ => None,
                    })
                    .collect();

                Ok(out.into_series())
            },
            GetOutput::from_type(DataType::Utf8),
        )])
        .collect();

    ergebnis
}

fn main() {
    // We start with a normal DataFrame
    let s1 = Series::new("Fruit", &["Apple", "Apple", "Pear"]);
    let s2 = Series::new("Color", &["Red", "Yellow", "Green"]);

    let df = DataFrame::new(vec![s1, s2]).unwrap();

    // We CONVERT the df into a StructChunked and WRAP this into a new LazyFrame
    let lf = df![
        "struct_col" => df.into_struct("StructChunked")
    ]
    .unwrap()
    .lazy();

    let processed = apply_multiples(lf);

    match processed {
        Ok(..) => println!("We did it"),
        Err(e) => println!("{:?}", e),
    }
}

Robert
  • 131
  • 1
  • 7
0

To use struct_() or as_struct (see https://pola-rs.github.io/polars-book/user-guide/dsl/custom_functions.html?highlight=apply#combining-multiple-column-values):

Add to Cargo.toml the features: "dtype-struct"

polars = { version = "*", features = [
    "lazy",
    "lazy_regex",
    "dtype-datetime",
    "strings",
    "csv-file",
    "parquet",
    "list_eval",
    "list_take",
    "concat_str",
    "dtype-struct", // <--
] }

Other features were added to my specific case.

leun4m
  • 560
  • 5
  • 24
Claudio Fsr
  • 106
  • 6