I'm new to OR-Tools and CP-Sat I'm looking for guidance on how to most efficiently optimize sums of variables toward a common average value. Here's a very simplified example to help explain:
from ortools.sat.python import cp_model
from random import randint
model = cp_model.CpModel()
# There are three groups of 5 boolean variables: a, b, and c
a_variables = [model.NewBoolVar(f"a{n}") for n in range(5)]
sum_a = model.NewIntVar(0, 5, "sum_a")
model.Add(sum_a == cp_model.LinearExpr.Sum(a_variables))
b_variables = [model.NewBoolVar(f"b{n}") for n in range(5)]
sum_b = model.NewIntVar(0, 5, "sum_b")
model.Add(sum_b == cp_model.LinearExpr.Sum(b_variables))
c_variables = [model.NewBoolVar(f"c{n}") for n in range(5)]
sum_c = model.NewIntVar(0, 5, "sum_c")
model.Add(sum_c == cp_model.LinearExpr.Sum(c_variables))
# Assign a random sum for the boolean variables
model.Add(cp_model.LinearExpr.Sum(a_variables + b_variables + c_variables) == randint(0, 15))
# We want to optimize so that each group of variables a, b, and c has the same sum,
# or as close as possible.
desired_sum = 3
objectives = []
# Add objectives here
model.Minimize(cp_model.LinearExpr.Sum(objectives))
solver = cp_model.CpSolver()
solver.parameters.log_search_progress = True
solver.parameters.num_search_workers = 1
solver.Solve(model)
solution = {}
solution["sum_a"] = solver.Value(sum_a)
solution["sum_b"] = solver.Value(sum_b)
solution["sum_c"] = solver.Value(sum_c)
return solution
It seems to me in order to guarantee a good result, the objectives should penalize differences non-linearly, so large differences get disproportionately heavier penalties compared to small distances. My best effort so far is to get the absolute value of the difference, then create a series of boolean variables that test for increasingly large differences, and penalize each of these variables more than the last.
diff_a = model.NewIntVar(0, 3, "diff_a")
model.AddAbsEquality(diff_a, desired_sum - sum_a)
diff_b = model.NewIntVar(0, 3, "diff_b")
model.AddAbsEquality(diff_b, desired_sum - sum_b)
diff_c = model.NewIntVar(0, 3, "diff_c")
model.AddAbsEquality(diff_c, desired_sum - sum_c)
objectives = []
diff_a_penalties = [model.NewBoolVar(f"diff_a_{n}") for n in range(3)]
[model.Add(diff_a > n).OnlyEnforceIf(penalty) for n, penalty in enumerate(diff_a_penalties)]
[model.Add(diff_a <= n).OnlyEnforceIf(penalty.Not()) for n, penalty in enumerate(diff_a_penalties)]
objectives.append(cp_model.LinearExpr.WeightedSum(diff_a_penalties, range(1, 4)))
diff_b_penalties = [model.NewBoolVar(f"diff_b_{n}") for n in range(3)]
[model.Add(diff_b > n).OnlyEnforceIf(penalty) for n, penalty in enumerate(diff_b_penalties)]
[model.Add(diff_b <= n).OnlyEnforceIf(penalty.Not()) for n, penalty in enumerate(diff_b_penalties)]
objectives.append(cp_model.LinearExpr.WeightedSum(diff_b_penalties, range(1, 4)))
diff_c_penalties = [model.NewBoolVar(f"diff_c_{n}") for n in range(3)]
[model.Add(diff_c > n).OnlyEnforceIf(penalty) for n, penalty in enumerate(diff_c_penalties)]
[model.Add(diff_c <= n).OnlyEnforceIf(penalty.Not()) for n, penalty in enumerate(diff_c_penalties)]
objectives.append(cp_model.LinearExpr.WeightedSum(diff_c_penalties, range(1, 4)))
This works well and is much faster than my first attempt, which was using AddMultiplicationEquality to square the differences. But I'm wondering if anyone has come up with more efficient ways to accomplish this optimization. My method certainly adds a lot of variables to the model.