Using the latest polars as of 2022 You have to make sure that "json"
is added as to the feature
in [dependencies]
in Cargo for polars. For example,
[dependencies]
polars = { version="0.24.2", features = ["lazy", "json"] }
tokio = { version = "1.21.1", features = ["full"] }
Now for reading line-separated json (I know there is an official name but it escapes me right now) into a dataframe:
use polars::prelude::*;
fn main() -> PolarsResult<()> {
let schema = Schema::from(vec![
Field::new("reviwerID", DataType::Utf8),
Field::new("asin", DataType::Utf8),
Field::new("reviewerName", DataType::Utf8),
Field::new("helpful", DataType::List(Box::new(DataType::Int32))),
Field::new("reviewText", DataType::Utf8),
Field::new("overall", DataType::Float64),
Field::new("summary", DataType::Utf8),
Field::new("unixReviewTime", DataType::Int64),
Field::new("reviewTime", DataType::Utf8),
Field::new("style", DataType::Utf8),
]);
let df = match LazyJsonLineReader::new("Toys_and_Games_5.ndjson".into())
.with_schema(schema)
.finish() {
Ok(lf) => lf,
Err(e) => panic!("Error: {}", e),
}
.collect();
println!("{:?}", df);
Ok(())
}
The output is:
Ok(shape: (3695, 10)
┌───────────┬────────────┬──────────────────────────┬─────────┬─────┬──────────────────────────────────┬────────────────┬─────────────┬───────┐
│ reviwerID ┆ asin ┆ reviewerName ┆ helpful ┆ ... ┆ summary ┆ unixReviewTime ┆ reviewTime ┆ style │
│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ str ┆ str ┆ i32 ┆ ┆ str ┆ i64 ┆ str ┆ str │
╞═══════════╪════════════╪══════════════════════════╪═════════╪═════╪══════════════════════════════════╪════════════════╪═════════════╪═══════╡
│ null ┆ 0486427706 ┆ Ginger ┆ null ┆ ... ┆ Nice book ┆ 1381017600 ┆ 10 6, 2013 ┆ null │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ null ┆ 0486427706 ┆ Dragonflies & Autumn ┆ null ┆ ... ┆ Great pictures ┆ 1376006400 ┆ 08 9, 2013 ┆ null │
│ ┆ ┆ Leaves ┆ ┆ ┆ ┆ ┆ ┆ │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ null ┆ 0486427706 ┆ barbara ann ┆ null ┆ ... ┆ The pictures are great, I've ┆ 1459814400 ┆ 04 5, 2016 ┆ null │
│ ┆ ┆ ┆ ┆ ┆ don... ┆ ┆ ┆ │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ null ┆ 0486427706 ┆ Samantha ┆ null ┆ ... ┆ So beautiful! ┆ 1455321600 ┆ 02 13, 2016 ┆ null │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ ... ┆ ... ┆ ... ┆ ... ┆ ... ┆ ... ┆ ... ┆ ... ┆ ... │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ null ┆ B00IEOH8KO ┆ A. Red ┆ null ┆ ... ┆ Made party decorating easy and ┆ 1453939200 ┆ 01 28, 2016 ┆ null │
│ ┆ ┆ ┆ ┆ ┆ a... ┆ ┆ ┆ │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ null ┆ B00IEOH8KO ┆ nilda morales ┆ null ┆ ... ┆ Four Stars ┆ 1452988800 ┆ 01 17, 2016 ┆ null │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ null ┆ B00IEOH8KO ┆ W. Ross ┆ null ┆ ... ┆ Five Stars ┆ 1449014400 ┆ 12 2, 2015 ┆ null │
├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ null ┆ B00IEOH8KO ┆ Liz89 ┆ null ┆ ... ┆ Great Deal ┆ 1431129600 ┆ 05 9, 2015 ┆ null │
└───────────┴────────────┴──────────────────────────┴─────────┴─────┴──────────────────────────────────┴────────────────┴─────────────┴───────┘)
#IMPORTANT NOTE
Notice that I had to explicitly provide the schema because the "style"
column in your data-set is completely weird, which means that polars has difficulty inferring it. If you fill out the complete schema it should work, so I didn't bother creating a Struct for it, however you can go ahead and do that if you like :).