I can't seem to find any good info anywhere for what I've run into. I've written a bit of code for Kohonen SOM in OpenCL, on a iMac w/ a ATI Radeon HD 6770M. I'm choosing the GPU device for the context. There is a single line in my code that is causing a CL_DEVICE_NOT_AVAILABLE
error. If I comment it out, code compiles fine... but with it, and the variations I've tried, I consistently get the error.
Here's the code, with the offending line commented:
"// THIS line, only, causes CL_DEVICE_NOT_AVAILABLE !!!".
I'm hoping one of you guys has run into this at some point, as I'm a little baffled. convert_float(diff)
did not work for me.
There are bound to be computational errors, as I haven't gotten beyond the essential complete-compile step, so feel free to ignore or point those out. Either way, I'm really just trying to get beyond the compile.
inline float _calc_sample_distance(__global float* weights, ulong startIdx, uint nodeWidth, __constant float* sample) {
float accum = 0.0f;
float diff = 0.0f;
uint i = 0;
for(i = 0; i<nodeWidth; i++) {
diff = weights[startIdx+i] - sample[i];
accum += pow(diff,2);
}
accum = pow(accum, .5f);
return accum;
}
inline void _calc_coords(uint dimCount, __constant uint* dimSizes, size_t offset, uint* thisCoords) {
// reversed so, processed as xy, then y
ulong trim = offset, multi = 0;
int i = 0, j = 0;
for(i = dimCount-1; i>=0; i--) {
multi = 1;
for(j=i-1; j>=0; j--) {
multi *= dimSizes[j];
}
thisCoords[i] = trim / multi;
trim = trim % multi;
}
}
inline float _calc_map_coord_distance(uint dimCount, __constant uint* bmuCoords, uint* thisCoords) {
float accum = 0.0f;
uint i = 0;
int diff = 0;
for(i = 0; i < dimCount; i++) {
diff = bmuCoords[i] - thisCoords[i];
diff *= diff;
accum += (float)diff; // THIS line, only, causes CL_DEVICE_NOT_AVAILABLE !!!
}
accum = pow(accum,.5f);
return accum;
}
__kernel void calc_kohonen_som_distances(
// map data
__global float* weights, // weights
uint nodeWidth, // the number of weights per node
uint nodeCount, // the total number of weights
__constant float* sample, // sample, of nodeWidth wide
__global float* output // the output distance of each node to the sample
) {
size_t nodeIndex = get_global_id(0);
ulong startIdx = nodeIndex * nodeWidth;
output[nodeIndex] = _calc_sample_distance(weights,startIdx,nodeWidth,sample);
}
__kernel void calc_kohonen_som_update_weights(
// map data
__global float* weights, // weights
uint nodeWidth, // the number of weights per node
uint dimCount, // the number of dimensions
__constant uint* dimSizes, // the size of each dimension
__constant float *sampleData, // the sample to use for updating the bmu and surrounding units
__constant uint* bmuCoords, // the coordinates of the best matching unit, from which we derive offset
float learningRate, // calculated on the CPU as per step
float radius // calculated on the CPU as per step
) {
size_t nodeIndex = get_global_id(0);
ulong startIdx = nodeIndex * nodeWidth;
uint* thisCoords = (uint*)malloc(sizeof(uint)*dimCount);
memset(thisCoords,0,sizeof(uint)*dimCount);
// determine the coordinates of the offset provided
if(dimCount!=1) {
_calc_coords(dimCount,dimSizes,nodeIndex,thisCoords);
} else {
thisCoords[0] = nodeIndex;
}
float distance = _calc_map_coord_distance(dimCount, bmuCoords, thisCoords);
if(distance<radius) {
float influence = exp( (-1*distance)/(2*pow(radius,2.0f)) );
for(uint i=0;i<dimCount;i++) {
weights[startIdx+i] = weights[startIdx+i] + ( influence * learningRate * (sampleData[i] - weights[startIdx+i]) );
}
}
}