2

I'm trying to get a better performance on my ant colony optimisation problem. In order to do so, I'm using openCL to run the update pheromones part in parallel. I have just started learning openCL and this is the kernel code I have developed. Although it runs faster than the sequential version, I still think I can achieve more performance with it, but I'm not finding other things I can do. Is there a way to improve this code even more ?

PS: I have tested this code only on the CPU, since the computer I am working on doesn't have a GPU.

#pragma OPENCL EXTENSION cl_khr_fp64 : enable

int calculateLengthOfTrail(__global int*, const int, __global int*, const int );
int edgeInTrail(const int ,const int , __global int* , const int , const int );
int indexOfCity(__global int*, const int, const int, const int);

__kernel void updatePheromones(
    __global double* pheromones, 
    __global int* ants, 
    __global int* distances, 
    __local double* pheromones_old,
    const int numCities, 
    const int numAnts,
    const double pheromoneDecreaseFactor,
    const double pheromoneIncreaseFactor
)
{
    int i = get_global_id(0);
     int k, j;

    if(i<numCities)
    {
        for(j = i +1; j<numCities; j++)
        {
          for (k = 0; k < numAnts; k++)
          {
            double size = calculateLengthOfTrail(ants,k, distances, numCities);
            double decrease = (1.0 - pheromoneDecreaseFactor) *   pheromones_old[i+numCities*j];
            double increase = 0.0;

            int edge = edgeInTrail(i, j, ants, k, numCities);

            if (edge== 1) 
               increase = (pheromoneIncreaseFactor / size);

            pheromones[i+numCities*j] = decrease + increase;

            if (pheromones[i+numCities*j] < 0.0001)
               pheromones[i +numCities*j] = 0.0001;
            else if (pheromones[i + numCities*j] > 100000.0)
              pheromones[i+numCities*j] = 100000.0;

            pheromones[j+numCities*i] = pheromones[i+numCities*j];

          }
        }
     }
}

int edgeInTrail(const int cityX, const int cityY, __global int* ants, const int row, const int numCities)
{

  int lastIndex = numCities - 1;
  int indexCity = indexOfCity(ants, row, cityX, numCities);

  if (indexCity == 0 && ants[1+numCities*row] == cityY) 
    return 1;
  else if (indexCity == 0 && ants[lastIndex+numCities*row] == cityY) 
        return 1;
  else if (indexCity == 0) 
    return 0;
  else if (indexCity == lastIndex && ants[(lastIndex-1)+numCities*row] == cityY)
        return 1;
  else if (indexCity == lastIndex && ants[row*numCities] == cityY) 
    return 1;
  else if (indexCity == lastIndex) 
    return 0;
  else if (ants[(indexCity-1)+numCities*row] == cityY)
        return 1;
  else if (ants[(indexCity+1)+numCities*row] == cityY) 
        return 1;
  else 
    return 0;
}                                             

int calculateLengthOfTrail(__global int* ants, const int row, __global int* distances, const int numCities)
{
    int sumDistance = 0;
    int i;

    for(i =0; i<numCities-1; i++)
         sumDistance += distances[ants[i+numCities*row]+numCities*ants[(i+1)+numCities*row]];

    return sumDistance;

}

int indexOfCity(__global int* ants, int row, int city, int numCities)
{
    int i;

    for(i =0; i<numCities; i++)
    {
        if(ants[i+numCities*row] == city)
            return i;
    }

    return -1;
}
lucasmoura
  • 275
  • 1
  • 10
  • Well, GPU will run it faster for sure. Only by the fact of using OpenCL will not speed it much. I will give it a try later to get a faster kernel (homework :)) – DarkZeros Apr 29 '14 at 08:51
  • Do you still need help with this? I've some suggestions. – Austin Aug 28 '14 at 23:00

0 Answers0