-1

I have some Rust code which performs union and intersection operations on multiple sets. (BTreeSet)

This is currently how I have written the code to chain those operations together. btree_map_X is a BTreeMap object.

let set_a = btree_map_1.keys().cloned().collect::<BTreeSet<i32>>();
let set_b = btree_map_2.keys().cloned().collect::<BTreeSet<i32>>();
let set_c = btree_map_3.keys().cloned().collect::<BTreeSet<i32>>();
let set_d = btree_map_4.keys().cloned().collect::<BTreeSet<i32>>();

let union_all =
    set_a 
        .union(&set_b)
        .cloned().collect::<BTreeSet<i32>>()
        .union(&set_c)
        .cloned().collect::<BTreeSet<i32>>()
        .union(&set_d)
        .cloned().collect::<BTreeSet<i32>>();

This doesn't seem particularly efficient, since after each union operation I am creating a whole new BTreeSet object using collect.

Is there a more efficient way to do this, perhaps leveraging the power of lazy evaluation?

FreelanceConsultant
  • 13,167
  • 27
  • 115
  • 225

2 Answers2

1

You can chain all the iterators on those sets, and collect only at the end:

let union_all = set_a
    .iter()
    .copied()
    .chain(set_b.iter().copied())
    .chain(set_c.iter().copied())
    .chain(set_d.iter().copied())
    .collect::<BTreeSet<_>>();
jthulhu
  • 7,223
  • 2
  • 16
  • 33
0

You can do it fully lazily if you can rely on the source iterators being sorted (which .keys() are). This gets some help from the itertools crate:

use itertools::{EitherOrBoth, Itertools};

/// This will return an iterator of elements that are in both iterators if their
/// elements are sorted and unique.
fn iter_intersect<T, I1, I2>(iter1: I1, iter2: I2) -> impl Iterator<Item = T>
where
    T: Ord,
    I1: Iterator<Item = T>,
    I2: Iterator<Item = T>,
{
    Itertools::merge_join_by(iter1, iter2, Ord::cmp).filter_map(|item| match item {
        EitherOrBoth::Left(_l) => None,
        EitherOrBoth::Right(_r) => None,
        EitherOrBoth::Both(l, _r) => Some(l),
    })
}

/// This will return an iterator of elements that are in either iterators if
/// their elements are sorted and unique.
fn iter_union<T, I1, I2>(iter1: I1, iter2: I2) -> impl Iterator<Item = T>
where
    T: Ord,
    I1: Iterator<Item = T>,
    I2: Iterator<Item = T>,
{
    Itertools::merge_join_by(iter1, iter2, Ord::cmp).filter_map(|item| match item {
        EitherOrBoth::Left(l) => Some(l),
        EitherOrBoth::Right(r) => Some(r),
        EitherOrBoth::Both(l, _r) => Some(l),
    })
}

Then you can use them to combine all iterators and finally collect them into a set:

let set_a = btree_map_1.keys().cloned();
let set_b = btree_map_2.keys().cloned();
let set_c = btree_map_3.keys().cloned();
let set_d = btree_map_4.keys().cloned();

let union_all = iter_union(
    iter_union(set_a, set_b),
    iter_union(set_c, set_d),
).collect::<BTreeSet<i32>>();

// or

let intersect_all = iter_intersect(
    iter_intersect(set_a, set_b),
    iter_intersect(set_c, set_d),
).collect::<BTreeSet<i32>>();

Full demonstration on the playground

kmdreko
  • 42,554
  • 6
  • 57
  • 106