0

First of all, I hate to ask such a vague question like this, but I'm at my whits' end here. I am attempting to make a genetic algorithm for the traveling salesman classic CS problem.

I have tried to comment my code well such as to make each step obvious as to what I'm trying to do.

I have a struct Node object which contains a city index and x,y coordinates. I am reading in a list of cities from a file and passing the array of struct Node's and the number of them to my function.

If you have any questions,please ask. The gui is being updated every ~1000 iterations, and it definitely changes, but NEVER seems to get any better, even when I run the number of generations for a VERY long time!

Also, if you're confused, I'm returning the distance as a uint64_t for better accuracy and portability than double, but am converting them to double type when doing the fitness function. (This all works, I have verified it)

Can you spot any errors as to why it won't get better?

void GeneticAlgorithm(struct Node *cities, uint64_t *count)
{
    //1 - pick initial random permutations for the population size
    int populationSize =(*count)>10? ((*count)/5) : *count;
    fprintf(stderr, "Population Size: %d\n", populationSize);

    //population consists of populationSize elements (an element being a path)
    struct Node population[populationSize][*count];
    srand (time(NULL));
    //Get Initial Paths
    uint64_t i;
    //iterate over pop. size
    for(i=0; i<populationSize; i++){
        //fill out path for each population
        int j;
        for(j=0; j<*count; j++){
            int element = rand() % (int)(*count);     // 0-count
            while(hasBeenVisited(&cities[element]) == 0){
                element = rand() % (int)(*count);
            }
            memcpy ( &(population[i][j]), &(cities[element]), sizeof(struct Node) );
            setVisited(&cities[element]);

        }
        for(j=0;j<*count; j++){
            (&cities[j])->visited = 0;
        }

    }

    int j;



    //Now, we have our population of randomly selected paths

    struct Node children[populationSize][*count];
    int generation, crossover;

    uint64_t Distances[populationSize];
    uint64_t TotalDistance = 0;
    srand(time(NULL));


    //Iterate over each generation. Basic idea is to do fitness function, generate
    //probability for each, do selection and fill up children array until it has
    //the same populationSize elements as original population, perhaps do a mutation,
    //and replace population array.
    for(generation=0; generation < 5; generation++){



        /*  Get Distance of Each Path and Total Sum of Distances   */
        TotalDistance = 0;
        for(j=0; j<populationSize; j++){
            Distances[j] = 0;
            for(i=0; i<*count; i++){
                if(i==((*count)-1)){
                    Distances[j] += distance(&(population[j][0]), &(population[j][i]));
                }
                else{
                    Distances[j] += distance(&(population[j][i]), &(population[j][i+1]));
                }

            }
            TotalDistance += Distances[j];
        }



        /*  FITNESS FUNCTION    */

        //Evaluate each of the population according to the fitness function (distance through the path)
        double probability[*count];
        double sumProbabilities = 0.0;
        char tempDistanceString[32];
        trimUintDigits(&TotalDistance);
        uintcharf(tempDistanceString, TotalDistance);

        double dTotalDistance = strtod(tempDistanceString, NULL);

        for(i=0; i<populationSize; i++){
            char buf[32];

            //Distances are uint64_t, need to ensure 7 decimal place accuracy (trimuint does this)
            //and uintcharf converts to a decimal representation in a string
            trimUintDigits(&Distances[i]);
            uintcharf(buf, Distances[i]);
            double individualDistance = strtod(buf, NULL);

            probability[i] = sumProbabilities + (individualDistance / totalDistance);
            sumProbabilities += probability[i];
        }
        //set children to all 0 (will replace population)
        memset(&children, 0, sizeof(struct Node)*populationSize*(*count));
        double r;
        int numChildren = 0;

        //Iterate until number of children = current population size
        while(numChildren < populationSize){

            //0 <= r <=1
            r = ((double) rand() / (RAND_MAX));

            if(r>0.7){ //Doesn't always crossover!
                memcpy(&children[numChildren], &population[numChildren], sizeof(struct Node) * (*count));
                numChildren++;
                continue;
            }

            r = ((double) rand() / (RAND_MAX));
            r *= sumProbabilities;
            double nextProb = 0;

            //Pick the two parents for procreation, according to the roulette wheel probability

            int par1=-1, par2=-1;
            while(par1==-1){

                for(i=0; i<populationSize; i++){
                    if(i==populationSize-1){
                        nextProb=probability[0];
                    }
                    else{
                        nextProb = probability[i+1];
                    }

                    if((r > probability[i]) && (r < nextProb)){
                        par1 = i;
                        break;
                    }
                }
                r = ((double) rand() / (RAND_MAX));
                r *= sumProbabilities;
            }

            r = ((double) rand() / (RAND_MAX));
            r*= sumProbabilities;

            while(par2==-1){
                for(i=0; i<populationSize; i++){
                    if(i==populationSize-1){
                        nextProb=probability[0];
                    }
                    else{
                        nextProb = probability[i+1];
                    }

                    if((par1 !=i) && (r > probability[i]) && (r < nextProb)){
                        par2 = i;
                        break;
                    }
                }
                r = ((double) rand() / (RAND_MAX));
                r*= sumProbabilities;
            }



            //Pick my two parents
            struct Node *parentOne = &(population[par1]);
            struct Node *parentTwo = &(population[par2]);

            //Picking a random number of genes from parent two, add them to the child.
            //Then, iterate through parent 1 and add missing nodes to the child
            int GenesFromParentTwo = rand() % (*count-1) + 1;

            if(GenesFromParentTwo < 0 || GenesFromParentTwo > *count){
                GenesFromParentTwo = 1;
            }


            //Copy First 'GenesFromParentTwo' Nodes From Parent 2 to Child
            memcpy(&children[numChildren], &parentTwo[0], sizeof(struct Node)*GenesFromParentTwo);
            int indicesContained[GenesFromParentTwo];
            for(i=0; i<GenesFromParentTwo; i++){
                indicesContained[i] = (int)children[numChildren][i].index;
            }


            //Copy Remaining Missing nodes from Parent 1 to Child
            int numInsertions = 0;
            for(i=*count; i>0; i--){
                if(isContained(indicesContained, &parentOne[i], &GenesFromParentTwo)){
                    continue;
                }
                numInsertions++;
                memcpy(&children[numChildren][GenesFromParentTwo-1+numInsertions], &parentOne[i], sizeof(struct Node));
            }

            numChildren++;
        } //End crossover


        //Replace Population with Children
        for(i=0; i<populationSize; i++){
            memcpy(&population[i], &children[i], sizeof(struct Node) * (*count));
        }


        //Mutate?
        for(i=0; i<populationSize; i++){

            for(j=0; j<*count; j++){

                r = ((double) rand() / (RAND_MAX));
                r *= 100;
                //0 <= r <= 100
                if(r <= 0.001 ){
                    //Mutate!
                    if(j==*count-1){
                        struct Node temp;
                        memcpy(&temp, &population[i][j], sizeof(struct Node));
                        memcpy(&population[i][j], &population[i][0], sizeof(struct Node));
                        memcpy(&population[i][0],  &temp, sizeof(struct Node));
                    }
                    else{
                        struct Node temp;
                        memcpy(&temp, &population[i][j], sizeof(struct Node));
                        memcpy(&population[i][j], &population[i][j+1], sizeof(struct Node));
                        memcpy(&population[i][j+1],  &temp, sizeof(struct Node));
                    }
                }

            }

        }


        //After crossing over each generation, pick the best and send to GUI

        if(generation % 10000 == 0)
        {

            uint64_t shortestGenDistance = UINT64_MAX;
            int elShortest = -0;
            for (j = 0; j < populationSize; j++)
            {
                uint64_t tempDistance = 0;
                for (i = 0; i < *count; i++)
                {
                    if (i == *count - 1)
                    {
                        tempDistance += distance(&(population[j][0]), &(population[j][i]));
                    }
                    else
                    {
                        tempDistance += distance(&(population[j][i]), &(population[j][i + 1]));
                    }
                }
                if (tempDistance < shortestGenDistance)
                {
                    shortestGenDistance = tempDistance;
                    elShortest = j;
                }
            }


            char buffer[8192];
            push_node_array_to_json(buffer, &population[elShortest][0], count, generation + 1);
            push(buffer);
        }



    } //End Generations
} //End algorithm
Yu Hao
  • 119,891
  • 44
  • 235
  • 294
Matthew Darnell
  • 1,315
  • 2
  • 12
  • 24
  • 3
    Your function might be a bit too large to effectively reason about. Have you tried breaking it down into more manageable parts? – EOF Nov 08 '15 at 18:18
  • No, but maybe I should. I started off trying to keep everything small and modular, but I quickly moved into frustration mode! – Matthew Darnell Nov 08 '15 at 18:41
  • Can you pseudo-code your algorithm? It's full of memcpy, buffers, data types and implementation details that make it difficult to evaluate on a conceptual basis. – Andreas Dec 04 '15 at 10:11

1 Answers1

2

Genetic algorithms applied to the (symmetric) TSP work usually quite well using EdgeRecombinationCrossover (ERX), tournament-based selection (instead of fitness proportional selection), and 2-opt mutation (you seem to use swap mutation which is quite disruptive on the TSP). For problem instances between 50 and 1000 cities I would go with a population size between 100 and 1000 and tournament group size between 2 and 10 you should get reasonable results within 200-500 generations. I think your mutation probability is too low, I would use 1-5%.

A genetic algorithm that solves the TSP could look like the following (C# pseudo-code):

const uint seed = 0;
const int popSize = 100;
const int generations = 500;
const double mutationRate = 0.05;

var tsp = new TravelingSalesmanProblem(/*...*/);
var random = new Random(seed);
var population = new int[popSize];
var qualities = new double[popSize];
var nextGen = new int[popSize];
var nextQual = new double[popSize];

for (int i = 0; i < popSize; i++) {
  population[i] = Enumerable.Range(0, tsp.Cities).Shuffle().ToArray();
  qualities[i] = tsp.Evaluate(population[i]);
}
var bestQuality = qualities.Min();
var bestQualityGeneration = 0;

for (int g = 0; g < generations; g++) {  
  var parents = population.SampleTournament(random, 2 * popSize, qualities, groupSize: 3).ToArray();
  for (int i = 0; i < popSize; i++) {
    nextGen[i] = EdgeRecombinationCrossover.Apply(random, parents[i * 2], parents[i * 2 + 1]);
    if (random.NextDouble() < mutationRate) TwoOptManipulator.Apply(random, nextGen[i]);
    nextQual[i] = tsp.Evaluate(nextGen[i]);
    if (nextQual[i] < bestQuality) {
      bestQuality = nextQual[i];
      bestQualityGeneration = g;
    }
  }
  Array.Copy(nextGen, population, popSize);
  Array.Copy(nextQual, qualities, popSize);
}

In general though, the GA is not a first choice algorithm for solving TSPs. The TSP can usually be solved very well by local search using 2-opt moves. If you use a variable neighbor descent using k-opt moves in general you can further improve quality. Lin-kerninghan is a pretty advanced software for solving TSPs, they do make use of a genetic algorithm as a meta-level optimizer on crossing and mutating local optima that were optained using k-opt local search. The GA applied on local optima is more efficient than if you apply it on the whole solution space. You would need to make sure however to add diversity preservation into the GA otherwise the population might quickly collapse to the same solution. Genetic local search techniques usually accept new solutions to a population only if they are substantially different.

Andreas
  • 6,447
  • 2
  • 34
  • 46