-1

I would like to read in a CSV file in polars using rust, and I would like to ignore all rows that cause an error. My understanding was that using with_ignore_errors(true) would skip the rows that cause error. However, I get an error.

use polars::frame::DataFrame;
use polars::prelude::PolarsResult;
use polars_io::prelude::*;

fn open_file(file_path: &str) -> PolarsResult<DataFrame> {
    CsvReader::from_path(file_path)?
        .with_ignore_errors(true)
        .has_header(true)
        .finish()
}


pub fn calculate(file_path: &str, lhs: &Vec<usize>, rhs: usize) -> f64 {
    let df = match open_file(file_path) {
        Ok(df) => df,
        Err(_) => panic!("Panic"),
    };
    return calculate_all(&df, &lhs, rhs);
}

Here is the error:

thread '<unnamed>' panicked at 'called `Result::unwrap()` on an `Err` value: External("", Utf8Error)', /home/user/.cargo/registry/src/index.crates.io
-6f17d22bba15001f/arrow2-0.17.3/src/array/growable/utf8.rs:55:76
stack backtrace:
   0:     0x55de9f9eaf01 - std::backtrace_rs::backtrace::libunwind::trace::h6aeaf83abc038fe6
                               at /rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/../../backtrace/src/backtrace/libunwind.rs:93:5
   1:     0x55de9f9eaf01 - std::backtrace_rs::backtrace::trace_unsynchronized::h4f9875212db0ad97
                               at /rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/../../backtrace/src/backtrace/mod.rs:66:5
   2:     0x55de9f9eaf01 - std::sys_common::backtrace::_print_fmt::h3f820027e9c39d3b
                               at /rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/sys_common/backtrace.rs:65:5
   3:     0x55de9f9eaf01 - <std::sys_common::backtrace::_print::DisplayBacktrace as core::fmt::Display>::fmt::hded4932df41373b3
                               at /rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/sys_common/backtrace.rs:44:22
   4:     0x55de9fa0f3bf - core::fmt::rt::Argument::fmt::hc8ead7746b2406d6
                               at /rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/core/src/fmt/rt.rs:138:9
   5:     0x55de9fa0f3bf - core::fmt::write::hb1cb56105a082ad9
                               at /rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/core/src/fmt/mod.rs:1094:21
   6:     0x55de9f9e8ac1 - std::io::Write::write_fmt::h797fda7085c97e57
                               at /rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/io/mod.rs:1713:15
   7:     0x55de9f9ead15 - std::sys_common::backtrace::_print::h492d3c92d7400346
                               at /rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/sys_common/backtrace.rs:47:5
   8:     0x55de9f9ead15 - std::sys_common::backtrace::print::hf74aa2eef05af215
                               at /rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/sys_common/backtrace.rs:34:9
   9:     0x55de9f9ec367 - std::panicking::default_hook::{{closure}}::h8cad394227ea3de8
  10:     0x55de9f9ec154 - std::panicking::default_hook::h249cc184fec99a8a
                               at /rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/panicking.rs:288:9
  11:     0x55de9f9ec81c - std::panicking::rust_panic_with_hook::h82ebcd5d5ed2fad4
                               at /rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/panicking.rs:705:13
  12:     0x55de9f9ec717 - std::panicking::begin_panic_handler::{{closure}}::h810bed8ecbe66f1a
                               at /rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/panicking.rs:597:13
  13:     0x55de9f9eb336 - std::sys_common::backtrace::__rust_end_short_backtrace::h1410008071796261
                               at /rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/sys_common/backtrace.rs:151:18
  14:     0x55de9f9ec462 - rust_begin_unwind
                               at /rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/panicking.rs:593:5

My understanding is that using with_ignore_errors should skip the rows that have error. I also expected any error from the CsvReader to return to calculate -- I expected "Panic" to print since open_file(...) would return the error from CsvReader::from_path

  • `with_ignore_errors` will ignore rows with CSV errors, but if the file has unicode errors (like it looks like yours has) then those *can't* be ignored because they are issues with the whole file, not the row – BallpointBen Jul 27 '23 at 00:11
  • Oh I see! That makes sense. What is the suggested way to handle unicode errors? – user22284368 Jul 31 '23 at 15:25

0 Answers0