0

I have an array of generic serde_json::Value, that may also contain duplicates.

serde_json::Value does not implement std::cmp::Ord and I also cannot implement the trait because only traits defined in the current crate can be implemented for arbitrary types

What is the best/fastest way to eliminate duplicates (and create a custom order) on that array?

Chayim Friedman
  • 47,971
  • 5
  • 48
  • 77
  • 2
    Perhaps you could put the `Value` inside of a tuple struct like `struct Wrapper(Value);` and implement `std::cmp::Ord` on that. – matthew-e-brown Jun 21 '22 at 13:59
  • Thanks. That's a good idea, but what is the correct way to cast the ```serde_json::Value```? I've already tried something similiar with a custom ```HashValue``` type. But when I try to do the cast ```list.into_iter().map(|&e| e as HashValue).unique().collect();``` then the compiler complains ```non-primitive cast: 'serde_json::Value' as 'index::results::HashValue<'_>'``` – Marko Seidenglanz Jun 21 '22 at 14:10
  • 1
    If `HashValue` is a tuple type, then just wrap it: `.map(|e| HashValue(e))` or even just `.map(HashValue)` – Aplet123 Jun 21 '22 at 14:32
  • Does this answer your question? [How do I implement a trait I don't own for a type I don't own?](https://stackoverflow.com/questions/25413201/how-do-i-implement-a-trait-i-dont-own-for-a-type-i-dont-own) – Chayim Friedman Jun 22 '22 at 01:38
  • How do you want to remove the duplicates? – Chayim Friedman Jun 22 '22 at 01:40

1 Answers1

1

You can use the new type idiom to add traits to existing structs.

use serde_json::Value;
use std::collections::HashSet;
use std::hash::{Hash, Hasher};

struct HashableValue<'a>(&'a Value);

fn main() {
    let value: Value =
        serde_json::from_str(r#"{"a":[{"a":1},{"a":1},{"b":1}],"t":[1,2,3,1,"asdf","df","asdf"]}"#)
            .unwrap();
    let value = remove_duplicates(&value);
    eprintln!("{}", value);
}

fn remove_duplicates(value: &Value) -> Value {
    match value {
        Value::Array(arr) => {
            let mut set = HashSet::new();
            let mut array = vec![];
            for a in arr {
                let v = HashableValue(a);
                if set.contains(&v) {
                    continue;
                }
                set.insert(v);
                array.push(remove_duplicates(a));
            }
            return Value::Array(array);
        }
        Value::Object(obj) => {
            let mut map = serde_json::Map::new();
            for x in obj.iter() {
                map.insert(x.0.clone(), remove_duplicates(x.1));
            }
            return Value::Object(map);
        }
        _ => {}
    }
    value.clone()
}

impl<'a> Hash for HashableValue<'a> {
    fn hash<H: Hasher>(&self, state: &mut H) {
        match &self.0 {
            Value::Null => 0.hash(state),
            Value::Bool(b) => b.hash(state),
            Value::Number(n) => n.hash(state),
            Value::String(str) => str.hash(state),
            Value::Array(arr) => arr.iter().for_each(|a| HashableValue(a).hash(state)),
            Value::Object(obj) => obj.iter().for_each(|entry| {
                entry.0.hash(state);
                HashableValue(entry.1).hash(state);
            }),
        }
    }
}

impl<'a> PartialEq<Self> for HashableValue<'a> {
    fn eq(&self, other: &Self) -> bool {
        self.0.eq(other.0)
    }
}

impl<'a> Eq for HashableValue<'a> {}
AlexN
  • 1,613
  • 8
  • 21