I am working on a Voxel raytracer and I want to implement per-voxel lighting.
I tried to do it like this:
- Raytrace the scene into 2 textures, one for the position of the hit rounded to a specific resolution and a normal
- voxelize all the positions into a buffer
- for each pixel in the position texture get the corresponding voxel from the buffer and write it to the output texture
the wgsl code looks like this:
@group(0)
@binding(0)
var position_input: texture_storage_2d<rgba32float, read>;
@group(0)
@binding(1)
var normal_input: texture_storage_2d<rgba8sint, read>;
@group(0)
@binding(2)
var output: texture_storage_2d<rgba8unorm, write>;
@group(1)
@binding(0)
var<storage, read_write> voxels: array<Voxel>;
struct Voxel {
position: vec3<f32>,
color: vec3<f32>,
};
@group(1)
@binding(1)
var<storage, read_write> voxel_count: atomic<u32>;
@compute
@workgroup_size(16, 16)
fn voxelize(
@builtin(global_invocation_id) global_id: vec3<u32>,
) {
let output_size = vec2<u32>(textureDimensions(output));
let position = vec2<u32>(global_id.xy);
if (position.x >= output_size.x || position.y >= output_size.y) {
return;
}
let voxel_position_full = textureLoad(position_input, position);
// do not voxelize background
if voxel_position_full.w == 0.0 {return;}
let voxel_position = voxel_position_full.xyz;
// check if voxel_position is contained in voxels
// if not then insert it, if yes then average
let atomic_voxel_count = atomicLoad(&voxel_count);
for (var i = 0u; i < atomic_voxel_count; i += 1u) {
var voxel = voxels[i];
if all(voxel.position == voxel_position) {
// let color = voxel.color * 32.0 % 32.0 / 32.0;
let color = voxel_position;
voxel.color = (voxel.color + color) * 0.5;
return;
}
}
// no voxel was found insert
atomicAdd(&voxel_count, 1u);
let color = voxel_position;
let c = atomicLoad(&voxel_count);
voxels[c] = Voxel(voxel_position, color);
}
@compute
@workgroup_size(16, 16)
fn summarize(
@builtin(global_invocation_id) global_id: vec3<u32>,
) {
let output_size = vec2<u32>(textureDimensions(output));
let position = vec2<u32>(global_id.xy);
if (position.x >= output_size.x || position.y >= output_size.y) {
return;
}
let color = vec4<f32>(0.0, 0.0, 0.0, 1.0);
textureStore(output, position, color);
let voxel_position_full = textureLoad(position_input, position);
// skip background
if voxel_position_full.w == 0.0 {return;}
let voxel_position = voxel_position_full.xyz;
let voxel_count = atomicLoad(&voxel_count);
for (var i = 0u; i < voxel_count; i += 1u) {
let voxel = voxels[i];
if all(voxel.position == voxel_position) {
let color = vec4<f32>(voxel.color, 1.0);
// let color = vec4<f32>(0.0, 1.0, 0.0, 1.0);
textureStore(output, position, color);
return;
}
}
}
I also turned the voxel_count
storage atomic, this seems to fix some problems.
But I also need to make the voxels
array atomic, but I do not know how this should work, as the atomic
wrapper only accepts scalar types.
the rust render function looks like this:
pub fn render(&self, device: &wgpu::Device, queue: &wgpu::Queue) {
queue.write_buffer(&self.voxel_count_buffer, 0, &[0, 0, 0, 0]); // count is 0_u32
// voxelize stage
let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: Some("Render Encoder"),
});
let (dispatch_width, dispatch_height) =
compute_work_group_count((RESOLUTION[0], RESOLUTION[1]), (16, 16));
let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
label: Some("compute pass descriptor"),
});
compute_pass.set_pipeline(&self.voxelize_pipeline);
compute_pass.set_bind_group(0, &self.texture_bind_group, &[]);
compute_pass.set_bind_group(1, &self.voxel_bind_group, &[]);
compute_pass.dispatch_workgroups(dispatch_width, dispatch_height, 1);
drop(compute_pass);
queue.submit(std::iter::once(encoder.finish()));
// summarize stage
let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: Some("Render Encoder"),
});
let (dispatch_width, dispatch_height) =
compute_work_group_count((RESOLUTION[0], RESOLUTION[1]), (16, 16));
let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
label: Some("compute pass descriptor"),
});
compute_pass.set_pipeline(&self.summarize_pipeline);
compute_pass.set_bind_group(0, &self.texture_bind_group, &[]);
compute_pass.set_bind_group(1, &self.voxel_bind_group, &[]);
compute_pass.dispatch_workgroups(dispatch_width, dispatch_height, 1);
drop(compute_pass);
queue.submit(std::iter::once(encoder.finish()));
}
I reset the voxel_count
to 0 and compute the two functions in seperate compute passes.
Is there a way of properly synchronizing the memory to remove the data races or is this method fundamentally flawed for parallel computing purposes?
There are also the storageBarrier()
and workgroupBarrier()
functions in WGSL but I could not find any documentation and do not know how they work.