I am creating paged aligned memory with memory_align, then I create a MTLBuffer from that with no copy. The GPU then blits data into that MTLBuffer. When that completes, I wrap that same memory in Data with Data.init(bytesNoCopy:count:deallocator:) to pass on in my project. I don't know what to use as the deallocator. I translating this code from an Apple tutorial written in OBJ-C. The Apple code is here. I spent two days trying to understand this researching myself.
The Apple OBJ-C code deallocator looks like this. This is beyond my OBJ-C knowledge.
// Block to dealloc memory created with vm_allocate
void (^deallocProvidedAddress)(void *bytes, NSUInteger length) =
^(void *bytes, NSUInteger length)
{
vm_deallocate((vm_map_t)mach_task_self(),
(vm_address_t)bytes,
length);
};
The code in question is towards the end of my listing.
// Blit all positions and velocities and provide them to the client either to show final results
// or continue the simulation on another device
func provideFullData(
_ dataProvider: AAPLFullDatasetProvider,
forSimulationTime time: CFAbsoluteTime
) {
let positionDataSize = positions[oldBufferIndex]!.length
let velocityDataSize = velocities[oldBufferIndex]!.length
var positionDataAddress: UnsafeMutableRawPointer? = nil
var velocityDataAddress: UnsafeMutableRawPointer? = nil
// Create buffers to transfer data to client
do {
// allocate memory on page aligned addresses use by both GPU and CPU
let alignment = 0x4000
// make length a mulitple of alignment
let positionAllocationSize = (positionDataSize + alignment - 1) & (~(alignment - 1))
posix_memalign(&positionDataAddress, alignment, positionAllocationSize)
let velocityAllocationSize = (velocityDataSize + alignment - 1) & (~(alignment - 1))
posix_memalign(&positionDataAddress, alignment, velocityAllocationSize)
}
// Blit positions and velocities to a buffer for transfer
do {
// create MTL buffers with created mem allighed
let positionBuffer = device.makeBuffer(
bytesNoCopy: &positionDataAddress,
length: positionDataSize,
options: .storageModeShared,
deallocator: nil)
positionBuffer?.label = "Final Positions Buffer"
let velocityBuffer = device.makeBuffer(
bytesNoCopy: &velocityDataAddress,
length: velocityDataSize,
options: .storageModeShared,
deallocator: nil)
velocityBuffer?.label = "Final Velocities Buffer"
let commandBuffer = commandQueue?.makeCommandBuffer()
commandBuffer?.label = "Full Transfer Command Buffer"
let blitEncoder = commandBuffer?.makeBlitCommandEncoder()
blitEncoder?.label = "Full Transfer Blits"
blitEncoder?.pushDebugGroup("Full Position Data Blit")
if let _position = positions[oldBufferIndex], let positionBuffer {
blitEncoder?.copy(
from: _position,
sourceOffset: 0,
to: positionBuffer,
destinationOffset: 0,
size: positionBuffer.length)
}
blitEncoder?.popDebugGroup()
blitEncoder?.pushDebugGroup("Full Velocity Data Blit")
if let _velocity = velocities[oldBufferIndex], let velocityBuffer {
blitEncoder?.copy(
from: _velocity,
sourceOffset: 0,
to: velocityBuffer,
destinationOffset: 0,
size: velocityBuffer.length)
}
blitEncoder?.popDebugGroup()
blitEncoder?.endEncoding()
commandBuffer?.commit()
// Ensure blit of data is complete before providing
// the data to the client
commandBuffer?.waitUntilCompleted()
}
// Wrap the memory allocated with vm_allocate
// with a NSData object which will allow the app to
// rely on ObjC ARC (or even MMR) to manage the
// memory's lifetime. Initialize NSData object
// with a deallocation block to free the
// vm_allocated memory when the object has been
// deallocated
do {
//this code was in obj-c I don'tlnow how to convert this to swift
// Block to dealloc memory created with vm_allocate
// let deallocProvidedAddress: ((_ bytes: UnsafeMutableRawPointer?, _ length: Int) -> Void)? =
// { bytes, length in
// vm_deallocate(
// mach_task_self() as? vm_map_t,
// bytes as? vm_address_t,
// length)
// }
let positionData = Data(
bytesNoCopy: &positionDataAddress,
count: positionDataSize,
deallocator: .none) // this may be a memory leak
let velocityData = Data(
bytesNoCopy: &velocityDataAddress,
count: velocityDataSize,
deallocator: .none) // this may be a memory leak
dataProvider(positionData, velocityData, time)
}
}
Here is the listing for the Apple OBJ-C code
// Set the initial positions and velocities of the simulation based upon the simulation's config
- (void)initializeData
{
const float pscale = _config->clusterScale;
const float vscale = _config->velocityScale * pscale;
const float inner = 2.5f * pscale;
const float outer = 4.0f * pscale;
const float length = outer - inner;
_oldBufferIndex = 0;
_newBufferIndex = 1;
vector_float4 *positions = (vector_float4 *) _positions[_oldBufferIndex].contents;
vector_float4 *velocities = (vector_float4 *) _velocities[_oldBufferIndex].contents;
for(int i = 0; i < _config->numBodies; i++)
{
vector_float3 nrpos = generate_random_normalized_vector(-1.0, 1.0, 1.0);
vector_float3 rpos = generate_random_vector(0.0, 1.0);
vector_float3 position = nrpos * (inner + (length * rpos));
positions[i].xyz = position;
positions[i].w = 1.0;
vector_float3 axis = {0.0, 0.0, 1.0};
float scalar = vector_dot(nrpos, axis);
if((1.0f - scalar) < 1e-6)
{
axis.xy = nrpos.yx;
axis = vector_normalize(axis);
}
vector_float3 velocity = vector_cross(position, axis);
velocities[i].xyz = velocity * vscale;
}
NSRange fullRange;
fullRange = NSMakeRange(0, _positions[_oldBufferIndex].length);
[_positions[_oldBufferIndex] didModifyRange:fullRange];
fullRange = NSMakeRange(0, _velocities[_oldBufferIndex].length);
[_velocities[_oldBufferIndex] didModifyRange:fullRange];
}
/// Set simulation data for a simulation that was begun elsewhere (i.e. on another device)
- (void)setPositionData:(nonnull NSData *)positionData
velocityData:(nonnull NSData *)velocityData
forSimulationTime:(CFAbsoluteTime)simulationTime
{
_oldBufferIndex = 0;
_newBufferIndex = 1;
vector_float4 *positions = (vector_float4 *) _positions[_oldBufferIndex].contents;
vector_float4 *velocities = (vector_float4 *) _velocities[_oldBufferIndex].contents;
assert(_positions[_oldBufferIndex].length == positionData.length);
assert(_velocities[_oldBufferIndex].length == velocityData.length);
memcpy(positions, positionData.bytes, positionData.length);
memcpy(velocities, velocityData.bytes, velocityData.length);
NSRange fullRange;
fullRange = NSMakeRange(0, _positions[_oldBufferIndex].length);
[_positions[_oldBufferIndex] didModifyRange:fullRange];
fullRange = NSMakeRange(0, _velocities[_oldBufferIndex].length);
[_velocities[_oldBufferIndex] didModifyRange:fullRange];
_simulationTime = simulationTime;
}
/// Blit a subset of the positions data for this frame and provide them to the client
/// to show a summary of the simulation's progress
- (void)fillUpdateBufferWithPositionBuffer:(nonnull id<MTLBuffer>)buffer
usingCommandBuffer:(nonnull id<MTLCommandBuffer>)commandBuffer
{
id<MTLBlitCommandEncoder> blitEncoder = [commandBuffer blitCommandEncoder];
blitEncoder.label = @"Position Update Blit Encoder";
[blitEncoder pushDebugGroup:@"Position Update Blit Commands"];
[blitEncoder copyFromBuffer:buffer
sourceOffset:0
toBuffer:_updateBuffer[_currentBufferIndex]
destinationOffset:0
size:_updateBuffer[_currentBufferIndex].length];
[blitEncoder popDebugGroup];
[blitEncoder endEncoding];
}
/// Blit all positions and velocities and provide them to the client either to show final results
/// or continue the simulation on another device
- (void)provideFullData:(nonnull AAPLFullDatasetProvider)dataProvider
forSimulationTime:(CFAbsoluteTime)time
{
NSUInteger positionDataSize = _positions[_oldBufferIndex].length;
NSUInteger velocityDataSize = _velocities[_oldBufferIndex].length;
void *positionDataAddress = NULL;
void *velocityDataAddress = NULL;
// Create buffers to transfer data to client
{
// Use vm allocate to allocate buffer on page aligned address
kern_return_t err;
err = vm_allocate((vm_map_t)mach_task_self(),
(vm_address_t*)&positionDataAddress,
positionDataSize,
VM_FLAGS_ANYWHERE);
assert(err == KERN_SUCCESS);
err = vm_allocate((vm_map_t)mach_task_self(),
(vm_address_t*)&velocityDataAddress,
velocityDataSize,
VM_FLAGS_ANYWHERE);
assert(err == KERN_SUCCESS);
}
// Blit positions and velocities to a buffer for transfer
{
id<MTLBuffer> positionBuffer = [_device newBufferWithBytesNoCopy:positionDataAddress
length:positionDataSize
options:MTLResourceStorageModeShared
deallocator:nil];
positionBuffer.label = @"Final Positions Buffer";
id<MTLBuffer> velocityBuffer = [_device newBufferWithBytesNoCopy:velocityDataAddress
length:velocityDataSize
options:MTLResourceStorageModeShared
deallocator:nil];
velocityBuffer.label = @"Final Velocities Buffer";
id<MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
commandBuffer.label = @"Full Transfer Command Buffer";
id<MTLBlitCommandEncoder> blitEncoder = [commandBuffer blitCommandEncoder];
blitEncoder.label = @"Full Transfer Blits";
[blitEncoder pushDebugGroup:@"Full Position Data Blit"];
[blitEncoder copyFromBuffer:_positions[_oldBufferIndex]
sourceOffset:0
toBuffer:positionBuffer
destinationOffset:0
size:positionBuffer.length];
[blitEncoder popDebugGroup];
[blitEncoder pushDebugGroup:@"Full Velocity Data Blit"];
[blitEncoder copyFromBuffer:_velocities[_oldBufferIndex]
sourceOffset:0
toBuffer:velocityBuffer
destinationOffset:0
size:velocityBuffer.length];
[blitEncoder popDebugGroup];
[blitEncoder endEncoding];
[commandBuffer commit];
// Ensure blit of data is complete before providing the data to the client
[commandBuffer waitUntilCompleted];
}
// Wrap the memory allocated with vm_allocate with a NSData object which will allow the app to
// rely on ObjC ARC (or even MMR) to manage the memory's lifetime. Initialize NSData object
// with a deallocation block to free the vm_allocated memory when the object has been
// deallocated
{
// Block to dealloc memory created with vm_allocate
void (^deallocProvidedAddress)(void *bytes, NSUInteger length) =
^(void *bytes, NSUInteger length)
{
vm_deallocate((vm_map_t)mach_task_self(),
(vm_address_t)bytes,
length);
};
NSData *positionData = [[NSData alloc] initWithBytesNoCopy:positionDataAddress
length:positionDataSize
deallocator:deallocProvidedAddress];
NSData *velocityData = [[NSData alloc] initWithBytesNoCopy:velocityDataAddress
length:velocityDataSize
deallocator:deallocProvidedAddress];
dataProvider(positionData, velocityData, time);
}
}