I am trying to use the polars rust library to create dataframes from json fetched from stats.nba.com, (example json). The best example I could find for creating a dataframe from json was from the docs but I'm not sure how to load a serde_json::Value
into a Cursor
and pass it into the JsonReader
. Below is my code to load everything into Vecs and then create the Series and DataFrame, but is there a better way?:
fn load_dataframe(&self) -> Result<()> {
let endpoint_json = self.send_request().unwrap();
let result_sets = endpoint_json["resultSets"].as_array().unwrap();
for data_set in result_sets {
let data_set_values = data_set["rowSet"].as_array().unwrap();
let data_set_headers = data_set["headers"].as_array().unwrap();
let mut headers_to_values: HashMap<&str, Vec<&Value>> = HashMap::new();
for (pos, row) in data_set_values.iter().enumerate() {
if pos == 0 {
init_columns(&mut headers_to_values, row, data_set_headers);
} else {
insert_row_values(&mut headers_to_values, row, data_set_headers);
}
}
let mut df_series: Vec<Series> = Vec::new();
for (col_name, json_values) in headers_to_values {
if json_values.is_empty() { continue; }
let first_val = json_values[0];
if first_val.is_null() { continue; }
if first_val.is_i64() {
let typed_data = json_values.iter().map(|&v| v.as_i64().unwrap_or(0)).collect::<Vec<i64>>();
df_series.push(Series::new(col_name, typed_data));
} else if first_val.is_f64() {
let typed_data = json_values.iter().map(|&v| v.as_f64().unwrap_or(0.0)).collect::<Vec<f64>>();
df_series.push(Series::new(col_name, typed_data));
} else {
let typed_data = json_values.iter().map(|&v| v.as_str().unwrap_or("")).collect::<Vec<&str>>();
df_series.push(Series::new(col_name, typed_data));
}
}
let data_set_name = data_set["name"].as_str().unwrap();
let df = DataFrame::new(df_series)?;
println!("{} \n{:?}", data_set_name, df);
}
Ok(())
}
fn init_columns<'a>(headers_to_values: &mut HashMap<&'a str, Vec<&'a Value>>, first_row: &'a Value, headers: &'a Vec<Value>) -> () {
let first_row_array = first_row.as_array().unwrap();
for (pos, col_val) in first_row_array.iter().enumerate() {
let col_name = headers[pos].as_str().unwrap();
headers_to_values.insert(col_name, vec![col_val]);
}
}
fn insert_row_values<'a>(headers_to_values: &mut HashMap<&'a str, Vec<&'a Value>>, row: &'a Value, headers: &'a Vec<Value>) -> () {
let row_array = row.as_array().unwrap();
for (pos, col_val) in row_array.iter().enumerate() {
let col_name = headers[pos].as_str().unwrap();
let series_values = headers_to_values.get_mut(col_name).unwrap();
series_values.push(col_val);
}
}