I tried to reproduce your claims, and this is what I got:
use std::{collections::HashMap, hint::black_box, time::Instant};
#[derive(Clone)]
pub struct MyStruct {
pub five_i8: [u8; 5],
pub six_f32: [f32; 6],
pub one_bool: bool,
pub vec1: Vec<u8>,
pub vec2: Vec<Vec<Vec<u8>>>,
pub vec3: Vec<Vec<u8>>,
pub hashmap: HashMap<i8, Vec<u8>>,
pub vec4: Vec<(i8, i8)>,
}
fn main() {
let mut s = MyStruct {
five_i8: [42u8; 5],
six_f32: [69.420; 6],
one_bool: true,
vec1: vec![42u8; 4],
vec2: Default::default(),
vec3: Default::default(),
hashmap: Default::default(),
vec4: vec![(1i8, 2i8); 30],
};
for i in 0..4 {
let mut x = vec![];
for _ in 0..20 {
x.push(vec![1u8; 7]);
}
s.vec2.push(x);
s.vec3.push(vec![42u8; 20]);
s.hashmap.insert(i, vec![2u8; 10]);
}
// Blackbox to prevent optimization
let s = black_box(s);
let start = Instant::now();
for _ in 0..10000 {
let s2 = s.clone();
black_box(s2);
}
let elapsed = start.elapsed();
println!("Time: {} us", elapsed.as_micros() / 10000);
}
$ cargo run --release
Time: 17 us
Now why is that so slow? The answer is: Heap allocations. Everything else is very fast.
The only thing in your struct that performs heap allocations are the HashMap
and the Vec
. Each Vec
and HashMap
is one heap allocation.
So let's see:
vec1
: 1
allocation
vec2
: 1 + 4 * (1 + 20) = 85
allocations
vec3
: 1 + 4 = 5
allocations
hashmap
: 1 + 4 = 5
allocations
vec4
: 1
allocation
That's a total of 97
allocations.
The easiest way is to change the Vec<Vec<Vec<>>>
to a single, flattened Vec
. This would reduce vec2
and vec3
down to a single allocation, and the total count down to 9
allocations.
Like this:
use std::{collections::HashMap, hint::black_box, time::Instant};
#[derive(Clone)]
pub struct MyStruct {
pub five_i8: [u8; 5],
pub six_f32: [f32; 6],
pub one_bool: bool,
pub vec1: Vec<u8>,
pub vec2: Vec<u8>,
pub vec3: Vec<u8>,
pub hashmap: HashMap<i8, Vec<u8>>,
pub vec4: Vec<(i8, i8)>,
}
fn main() {
let mut s = MyStruct {
five_i8: [42u8; 5],
six_f32: [69.420; 6],
one_bool: true,
vec1: vec![42u8; 4],
vec2: vec![10u8; 560],
vec3: vec![42u8; 80],
hashmap: Default::default(),
vec4: vec![(1i8, 2i8); 30],
};
for i in 0..4 {
s.hashmap.insert(i, vec![2u8; 10]);
}
// Blackbox to prevent optimization
let s = black_box(s);
let start = Instant::now();
for _ in 0..10000 {
let s2 = s.clone();
black_box(s2);
}
let elapsed = start.elapsed();
println!("Time: {} us", elapsed.as_micros() / 10000);
}
$ cargo run --release
Time: 1 us