4

I was looking to learn about AI and found the traveling salesman problem very interesting. I also wanted to learn about genetic algorithms, so it was a fantastic combo. The task is to find the shortest distance traveling from id 1 to each location from the list once and returning to the starting location id 1

Restriction for the problem :

The location id 1 must be the starting and the ending point

The maximum distance allowed is distance <= 9000

Only max of 250000 fitness calculation is allowed


Code :

import numpy as np
import random
import operator
import pandas as pd

val10 = 0
val9 = 0
class Locations:
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def dist(self, location):
        x_dist = abs(float(self.x) - float(location.x))
        y_dist = abs(float(self.y) - float(location.y))
        # √( (x2 − x1)^2 + (2 − 1)^2 )
        dist = np.sqrt((x_dist ** 2) + (y_dist ** 2))
        return dist

    def __repr__(self):
        return "(" + str(self.x) + "," + str(self.y) + ")"


class Fitness:
    def __init__(self, route):
        self.r = route
        self.dist = 0
        self.fit = 0.0

    def route_dist(self):
        if self.dist == 0:
            path_dist = 0
            for i in range(0, len(self.r)):
                from_location = self.r[i]
                to_location = None
                if i + 1 < len(self.r):
                    to_location = self.r[i+1]
                else:
                    to_location = self.r[0]

                path_dist += from_location.dist(to_location)
            self.dist = path_dist
        return self.dist

    def route_fittness(self):
        if self.fit == 0:
            self.fit = 1 / float(self.route_dist())
        global val9
        val9 = val9 + 1    
        return self.fit


def generate_route(location_list):
    route = random.sample(location_list, len(location_list))
    return route


def gen_zero_population(size, location_list):
    population = []

    for i in range(0, size):
        population.append(generate_route(location_list))
    return population


def determine_fit(population):
    result = {}
    for i in range(0, len(population)):
        result[i] = Fitness(population[i]).route_fittness()
    global val10
    val10 = val10 + 1
    return sorted(result.items(), key=operator.itemgetter(1), reverse=True)


def fit_proportionate_selection(top_pop, elite_size):
    result = []
    df = pd.DataFrame(np.array(top_pop), columns=["index", "Fitness"])
    df['cumulative_sum'] = df.Fitness.cumsum()
    df['Sum'] = 100*df.cumulative_sum/df.Fitness.sum()

    for i in range(0, elite_size):
        result.append(top_pop[i][0])
    for i in range(0, len(top_pop) - elite_size):
        select = 100*random.random()
        for i in range(0, len(top_pop)):
            if select <= df.iat[i, 3]:
                result.append(top_pop[i][0])
                break
    return result


def select_mating_pool(populatoin, f_p_s_result):
    mating_pool = []
    for i in range(0, len(f_p_s_result)):
        index = f_p_s_result[i]
        mating_pool.append(populatoin[index])
    return mating_pool


def ordered_crossover(p1, p2):
    child, child_p1, child_p2 = ([] for i in range(3))

    first_gene = int(random.random() * len(p1))
    sec_gene = int(random.random() * len(p2))

    start_gene = min(first_gene, sec_gene)
    end_gene = max(first_gene, sec_gene)

    for i in range(start_gene, end_gene):
        child_p1.append(p1[i])

    child_p2 = [item for item in p2 if item not in child_p1]

    child = child_p1 + child_p2
    return child


def ordered_crossover_pop(mating_pool, elite_size):
    children = []

    leng = (len(mating_pool) - (elite_size))
    pool = random.sample(mating_pool, len(mating_pool))

    for i in range(0, elite_size):
        children.append(mating_pool[i])

    for i in range(0, leng):
        var = len(mating_pool)-i - 1
        child = ordered_crossover(pool[i], pool[var])
        children.append(child)
    return children


def swap_mutation(one_location, mutation_rate):
    for i in range(len(one_location)):
        if (random.random() < mutation_rate):
            swap = int(random.random() * len(one_location))

            location1 = one_location[i]
            location2 = one_location[swap]

            one_location[i] = location2
            one_location[swap] = location1
    return one_location


def pop_mutation(population, mutation_rate):
    result = []

    for i in range(0, len(population)):
        mutaded_res = swap_mutation(population[i], mutation_rate)
        result.append(mutaded_res)
    return result


def next_gen(latest_gen, elite_size, mutation_rate):
    route_rank = determine_fit(latest_gen)
    selection = fit_proportionate_selection(route_rank, elite_size)
    mating_selection = select_mating_pool(latest_gen, selection)
    children = ordered_crossover_pop(mating_selection, elite_size)
    next_generation = pop_mutation(children, mutation_rate)
    return next_generation


def generic_algor(population, pop_size, elite_size, mutation_rate, gen):
    pop = gen_zero_population(pop_size, population)
    print("Initial distance: " + str(1 / determine_fit(pop)[0][1]))

    for i in range(0, gen):
        pop = next_gen(pop, elite_size, mutation_rate)

    print("Final distance: " + str(1 / determine_fit(pop)[0][1]))
    best_route_index = determine_fit(pop)[0][0]
    best_route = pop[best_route_index]
    print(best_route)
    return best_route


def read_file(fn):
    a = []
    with open(fn) as f:
        [next(f) for _ in range(6)]
        for line in f:
            line = line.rstrip()
            if line == 'EOF':
                break

            ID, x, y = line.split()
            a.append(Locations(x=x, y=y))
    return a


location_list = read_file(r'path_of_the_file')

population = location_list
pop_size = 100
elite_size = 40
mutation_rate = 0.001
gen = 500
generic_algor(population, pop_size, elite_size, mutation_rate, gen)

print(val10)
print(val9)

Location file with x and y coordinates :

|Locations
|
|52 Locations
|
|Coordinates
|
1 565.0 575.0
2 25.0 185.0
3 345.0 750.0
4 945.0 685.0
5 845.0 655.0
6 880.0 660.0
7 25.0 230.0
8 525.0 1000.0
9 580.0 1175.0
10 650.0 1130.0
11 1605.0 620.0 
12 1220.0 580.0
13 1465.0 200.0
14 1530.0 5.0
15 845.0 680.0
16 725.0 370.0
17 145.0 665.0
18 415.0 635.0
19 510.0 875.0  
20 560.0 365.0
21 300.0 465.0
22 520.0 585.0
23 480.0 415.0
24 835.0 625.0
25 975.0 580.0
26 1215.0 245.0
27 1320.0 315.0
28 1250.0 400.0
29 660.0 180.0
30 410.0 250.0
31 420.0 555.0
32 575.0 665.0
33 1150.0 1160.0
34 700.0 580.0
35 685.0 595.0
36 685.0 610.0
37 770.0 610.0
38 795.0 645.0
39 720.0 635.0
40 760.0 650.0
41 475.0 960.0
42 95.0 260.0
43 875.0 920.0
44 700.0 500.0
45 555.0 815.0
46 830.0 485.0
47 1170.0 65.0
48 830.0 610.0
49 605.0 625.0
50 595.0 360.0
51 1340.0 725.0
52 1740.0 245.0
EOF

I have tried to tweak the value of the parameters but it has never gone below or be 9000 it is always around the upper 9500 What can I improve to get it to work for my location file?

Graph

zellez
  • 398
  • 2
  • 17
  • 4
    The easy thing to try is to reduce the mutation rate. As the population approaches the optimal solution, the random mutations can degrade the children as fast as the crossover improves the children, causing the population fitness to stagnate. – user3386109 Apr 19 '23 at 20:13
  • Large elites can be problematic because they can converge on a non-optimal solution and thereafter prevent better alternatives from gaining a foothold. This is particularly true if non-elites are thrown away at every iteration or totally excluded from crossover. – ncke Apr 20 '23 at 04:13
  • 1
    What does "Only max of 250000 fitness calculation is allowed" mean? Does it mean you are allowed to run up to gen 1249? If so, that is enough to solve the problem. As a side note, I ran your code 100 times without modification and 89% got below 9000 at gen 500, 99% at gen 1249. – ken Apr 20 '23 at 05:12
  • you can try using `optuna`/`hyperopt` to optimize your meta-parameters. – dankal444 Apr 20 '23 at 09:24

1 Answers1

0

Turns out everything is fine tweaking the distance function and the fit_proportionate_selection ensures a faster run time which makes testing the parameters faster. The other change is to have a fixed seed for the random() this way the results can be compared without much variant.

def fit_proportionate_selection(top_pop, elite_size):
    indices, fitness = zip(*top_pop)
    cumulative_sum = list(it.accumulate(fitness))
    total = cumulative_sum[-1]
    weights = [100*cs/total for cs in cumulative_sum]

    result = list(indices[:elite_size])
    
    for i in range(len(top_pop) - elite_size):
        select = random.randrange(100)
        for i, weight in enumerate(weights):
            if select <= weight:
                result.append(top_pop[i][0])
                break
    return result 

Taken from my review question Code review of the same code

The parameters picked where from the comment of the review question:

pop_size = 150, elite_size=50, mutation_rate=0.0005, gen=400
zellez
  • 398
  • 2
  • 17