4

I am reading a csv file with date in month day year format (e.g. "11/15/2022"). But month and day do not have 0 padding. Following is my test code

use polars::prelude::*;
use polars_lazy::prelude::*;

fn main() {
    let df = df![
        "x" => ["1/4/2011", "2/4/2011", "3/4/2011", "4/4/2011"],
        "y" => [1, 2, 3, 4],
    ].unwrap();
    let lf: LazyFrame = df.lazy();

    let options = StrpTimeOptions {
        fmt: Some("%m/%d/%Y".into()),
        date_dtype: DataType::Date,
        ..Default::default()
    };

    let res = lf.clone()
    .with_column(col("x").str().strptime(options).alias("new time"))
    .collect().unwrap();

    println!("{:?}", res);

}

The output is

shape: (4, 3)
┌──────────┬─────┬──────────┐
│ x        ┆ y   ┆ new time │
│ ---      ┆ --- ┆ ---      │
│ str      ┆ i32 ┆ date     │
╞══════════╪═════╪══════════╡
│ 1/4/2011 ┆ 1   ┆ null     │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
│ 2/4/2011 ┆ 2   ┆ null     │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
│ 3/4/2011 ┆ 3   ┆ null     │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
│ 4/4/2011 ┆ 4   ┆ null     │

in the options I tried "%-m/%-d/%Y instead of "%m/%d/%Y as mentioned in documentation. But it panicked at runtime.

thread '<unnamed>' panicked at 'attempt to subtract with overflow', /home/xxx/.cargo/registry/src/github.com-1ecc6299db9ec823/polars-time-0.21.1/src/chunkedarray/utf8/mod.rs:234:33

What is a correct way to read this format. I am using "Ubuntu 20.04.4 LTS"

Kushdesh
  • 1,118
  • 10
  • 16
  • 2
    why are you using python documentation? – Netwave May 22 '22 at 15:25
  • I used python doc only for date format. I couldn't get rust doc for time format. Redirected to it from https://pola-rs.github.io/polars-book/user-guide/howcani/data/timestamps.html – Kushdesh May 22 '22 at 15:39
  • Should I use https://docs.rs/chrono/latest/chrono/format/strftime/index.html. But it also says add '-' to suppress any padding. – Kushdesh May 22 '22 at 15:57

2 Answers2

4

Your Default is making it run with the wrong flags. You need to set exact to true:

...
    let options = StrpTimeOptions {
        fmt: Some("%-m/%-d/%Y".into()),
        date_dtype: DataType::Date,
        exact: true,
        ..Default::default()
    };
...

Full code with padding included tested:

use polars::prelude::*;
use polars_lazy::dsl::StrpTimeOptions;
use polars_lazy::prelude::{col, IntoLazy, LazyFrame};

fn main() {
    let df = df![
        "x" => ["01/04/2011", "2/4/2011", "3/4/2011", "4/4/2011"],
        "y" => [1, 2, 3, 4],
    ]
    .unwrap();
    let lf: LazyFrame = df.lazy();

    let options = StrpTimeOptions {
        fmt: Some("%-m/%-d/%Y".into()),
        date_dtype: DataType::Date,
        exact: true,
        ..Default::default()
    };

    let res = lf
        .clone()
        .with_column(col("x").str().strptime(options).alias("new time"))
        .collect()
        .unwrap();

    println!("{:?}", res);
}

Outputs:

shape: (4, 3)
┌────────────┬─────┬────────────┐
│ x          ┆ y   ┆ new time   │
│ ---        ┆ --- ┆ ---        │
│ str        ┆ i32 ┆ date       │
╞════════════╪═════╪════════════╡
│ 01/04/2011 ┆ 1   ┆ 2011-01-04 │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2/4/2011   ┆ 2   ┆ 2011-02-04 │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 3/4/2011   ┆ 3   ┆ 2011-03-04 │
├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 4/4/2011   ┆ 4   ┆ 2011-04-04 │
└────────────┴─────┴────────────┘
Netwave
  • 40,134
  • 6
  • 50
  • 93
0

For Rust Polars version "0.30".

With Cargo.toml:

[dependencies]
polars = { version = "0.30", features = [
    "lazy", # Lazy API
    "dtype-date",
    # others features
] }

Now use StrptimeOptions:

    let time_options = StrptimeOptions {
        format: Some("%-m/%-d/%Y".into()),
        strict: false, // If set then polars will return an error if any date parsing fails
        exact: true,   // If polars may parse matches that not contain the whole string e.g. “foo-2021-01-01-bar” could match “2021-01-01”
        cache: true,   // use a cache of unique, converted dates to apply the datetime conversion.
    };

And after the replacements:

use polars::prelude::*;
use std::error::Error;

fn main() -> Result<(), Box<dyn Error>> {
    let df: DataFrame = df![
        "x" => ["01/04/2011", "2/4/2011", "3/4/2011", "4/4/2011"],
        "y" => [1, 2, 3, 4],
    ]?;

    let time_options = StrptimeOptions {
        format: Some("%-m/%-d/%Y".into()),
        strict: false, // If set then polars will return an error if any date parsing fails
        exact: true,   // If polars may parse matches that not contain the whole string e.g. “foo-2021-01-01-bar” could match “2021-01-01”
        cache: true,   // use a cache of unique, converted dates to apply the datetime conversion.
    };

    let lz: LazyFrame = df
        .lazy()
        //.with_column(col("x").str().strptime(options).alias("new time"))
        .with_column(
            col("x")
            .str()
            .to_date(time_options)
            .alias("new time")
        );

    println!("result:\n{:?}", lz.collect()?);

    Ok(())
}
Claudio Fsr
  • 106
  • 6