I'm trying to learn about Rust's concurrency and parallel computing and threw together a small script that iterates over a vector of vectors like it was an image's pixels. Since at first I was trying to see how much faster it gets iter
vs par_iter
I threw in a basic timer -- which is probably not amazingly accurate. However, I was getting crazy high numbers. So, I thought I would put together a similar piece of code on Go that allows for easy concurrency and the performance is ~585% faster!
Rust was tested with --release
I also tried using native thread pool but the results were the same. Looked at how many threads I was using and for a bit I was messing around with that as well, to no avail.
What am I doing wrong? (don't mind the definitely not performant way of creating a random value filled vector of vectors)
Rust code (~140ms)
use rand::Rng;
use std::time::Instant;
use rayon::prelude::*;
fn normalise(value: u16, min: u16, max: u16) -> f32 {
(value - min) as f32 / (max - min) as f32
}
fn main() {
let pixel_size = 9_000_000;
let fake_image: Vec<Vec<u16>> = (0..pixel_size).map(|_| {
(0..4).map(|_| {
rand::thread_rng().gen_range(0..=u16::MAX)
}).collect()
}).collect();
// Time starts now.
let now = Instant::now();
let chunk_size = 300_000;
let _normalised_image: Vec<Vec<Vec<f32>>> = fake_image.par_chunks(chunk_size).map(|chunk| {
let normalised_chunk: Vec<Vec<f32>> = chunk.iter().map(|i| {
let r = normalise(i[0], 0, u16::MAX);
let g = normalise(i[1], 0, u16::MAX);
let b = normalise(i[2], 0, u16::MAX);
let a = normalise(i[3], 0, u16::MAX);
vec![r, g, b, a]
}).collect();
normalised_chunk
}).collect();
// Timer ends.
let elapsed = now.elapsed();
println!("Time elapsed: {:.2?}", elapsed);
}
Go code (~24ms)
package main
import (
"fmt"
"math/rand"
"sync"
"time"
)
func normalise(value uint16, min uint16, max uint16) float32 {
return float32(value-min) / float32(max-min)
}
func main() {
const pixelSize = 9000000
var fakeImage [][]uint16
// Create a new random number generator
src := rand.NewSource(time.Now().UnixNano())
rng := rand.New(src)
for i := 0; i < pixelSize; i++ {
var pixel []uint16
for j := 0; j < 4; j++ {
pixel = append(pixel, uint16(rng.Intn(1<<16)))
}
fakeImage = append(fakeImage, pixel)
}
normalised_image := make([][4]float32, pixelSize)
var wg sync.WaitGroup
// Time starts now
now := time.Now()
chunkSize := 300_000
numChunks := pixelSize / chunkSize
if pixelSize%chunkSize != 0 {
numChunks++
}
for i := 0; i < numChunks; i++ {
wg.Add(1)
go func(i int) {
// Loop through the pixels in the chunk
for j := i * chunkSize; j < (i+1)*chunkSize && j < pixelSize; j++ {
// Normalise the pixel values
_r := normalise(fakeImage[j][0], 0, ^uint16(0))
_g := normalise(fakeImage[j][1], 0, ^uint16(0))
_b := normalise(fakeImage[j][2], 0, ^uint16(0))
_a := normalise(fakeImage[j][3], 0, ^uint16(0))
// Set the pixel values
normalised_image[j][0] = _r
normalised_image[j][1] = _g
normalised_image[j][2] = _b
normalised_image[j][3] = _a
}
wg.Done()
}(i)
}
wg.Wait()
elapsed := time.Since(now)
fmt.Println("Time taken:", elapsed)
}