This is a (more correct) answer based on the same approach as the previous answer, however, it solves the entire problem as a single weighted bipartite matching.
The same considerations apply as for the previous answer; however, this answer will find an answer if one exists. It has to, however, condition on the number of projects used in the final solution, so it can find multiple "good" solutions for different numbers of projects used (a project is considered used if it has 1 or more students.)
#!/usr/bin/python
"""
filename: student_assign.py
purpose: demonstrate that the problem described in
https://stackoverflow.com/questions/62755778/modified-version-of-student-project-allocation-algorithm
can be solved as an instance of MCF.
"""
import networkx as nx
# For this demonstration we take data directly from the problem description
#supervisor | Topics of Interest | No. Of Groups
#L1 | P1, P3, P4 | 2
#L2 | P5, P2, P9 | 1
#L3 | P1, P3, P4 | 1
#L4 | P1, P3, P4 | 4
#L5 | SP1, P3, P8 | 3
#L6 | P32, P3, P40 | 3
supervisors = {
'L1' : { 'topics' : ['P1', 'P3', 'P4'], 'num_groups' : 2},
'L2' : { 'topics' : ['P5', 'P2', 'P9'], 'num_groups' : 1},
'L3' : { 'topics' : ['P1', 'P3', 'P4'], 'num_groups' : 1},
'L4' : { 'topics' : ['P1', 'P3', 'P4'], 'num_groups' : 4},
'L5' : { 'topics' : ['SP1', 'P3', 'P8'], 'num_groups' : 3},
'L6' : { 'topics' : ['P32', 'P3', 'P40'], 'num_groups' : 3},
}
all_topics = sorted(list({ t for s in supervisors for t in supervisors[s]['topics'] }))
# assuming there is a typo in the problem specification and 'supervisor' = 'student' below
#supervisor | Pref1 | Pref 2 | Pref 3 | Pref 4 |
#S1 | P4 | P1 | SP1 | P5 |
#S2 | P1 | P9 | SP1 | P5 |
#S3 | P3 | P1 | P2 | P5 |
#S4 | P4 | P1 | P40 | P5 |
#S5 | P4 | P32 | SP1 | P5 |
#S6 | P9 | P1 | SP1 | P5 |
students = {
'S1' : ['P4', 'P1', 'SP1', 'P5'] ,
'S2' : ['P1', 'P9', 'SP1', 'P5'] ,
'S3' : ['P3', 'P1', 'P2', 'P5'] ,
'S4' : ['P4', 'P1', 'P40', 'P5'] ,
'S5' : ['P4', 'P32', 'SP1', 'P5'] ,
'S6' : ['P9', 'P1', 'SP1', 'P5'] ,
}
MAX_GROUP_SIZE = 2
def get_student_topic_supervisor_assignments(all_topics,students,supervisors,num_topics_used,max_group_size=MAX_GROUP_SIZE,do_supervisor_load_balancing=False):
G = nx.DiGraph()
G.add_node('sink',demand=len(students) - num_topics_used)
for topic in all_topics:
G.add_node(topic)
G.add_edge(topic, 'sink', weight = 0, capacity = max_group_size-1)
for student in students:
prefs = students[student]
G.add_node(student,demand=-1)
# add increasing weight edges from student to preferences (lowest == best)
for i, topic in enumerate(prefs):
G.add_edge(student, topic, weight = i, capacity = 1)
G.add_node('sink_2',demand=num_topics_used)
for topic in all_topics:
G.add_node(topic + "_2")
G.add_edge(topic, topic + "_2", weight = 0, capacity = 1 )
for supervisor in supervisors:
supervisor_properties = supervisors[supervisor]
for topic in supervisor_properties['topics']:
G.add_edge(topic + "_2", supervisor, weight = 0, capacity = 1)
if do_supervisor_load_balancing:
for i in range(supervisor_properties['num_groups']):
G.add_node(supervisor + "_dummy")
G.add_edge(supervisor, supervisor + "_dummy", weight = i, capacity = 1)
G.add_edge(supervisor + "_dummy", 'sink_2', weight = 0, capacity = 1)
else:
G.add_edge(supervisor, 'sink_2', weight = 0, capacity = supervisor_properties['num_groups'])
# solve the weighted matching
flow_dict = nx.min_cost_flow(G)
for topic in all_topics:
edges = flow_dict[topic]
if edges['sink'] and not edges[topic+"_2"]:
raise RuntimeError('Solution with num_topics_used={n} is not valid.'.format(n=num_topics_used))
# decode solution
topic_assignments = {t : [] for t in all_topics}
for student in students:
edges = flow_dict[student]
for target in edges:
if edges[target]:
topic_assignments[target].append(student)
break
supervisor_assignments = {s : [] for s in supervisors}
for topic in all_topics:
edges = flow_dict[topic+"_2"]
for target in edges:
if edges[target]:
supervisor_assignments[target].append(topic)
return topic_assignments, supervisor_assignments
num_students = len(students)
for n in range(1,num_students+1):
try:
topic_assignments, supervisor_assignments =\
get_student_topic_supervisor_assignments(all_topics,students,supervisors,num_topics_used=n)
print ' An optimal solution was found with `num_topics_used`={n}'.format(n=n)
print ' Topic assignments:\n', topic_assignments
print ' Supervisor assignments:\n', supervisor_assignments
except Exception as e:
pass
This outputs:
An optimal solution was found with `num_topics_used`=4
Topic assignments:
{'P2': [], 'P3': ['S3'], 'P1': ['S2', 'S4'], 'P4': ['S1', 'S5'], 'P5': [], 'SP1': [], 'P8': [], 'P9': ['S6'], 'P32': [], 'P40': []}
Supervisor assignments:
{'L6': ['P3'], 'L4': ['P4'], 'L5': [], 'L2': ['P9'], 'L3': ['P1'], 'L1': []}
An optimal solution was found with `num_topics_used`=5
Topic assignments:
{'P2': [], 'P3': ['S3'], 'P1': ['S2'], 'P4': ['S1', 'S4'], 'P5': [], 'SP1': [], 'P8': [], 'P9': ['S6'], 'P32': ['S5'], 'P40': []}
Supervisor assignments:
{'L6': ['P3', 'P32'], 'L4': ['P1'], 'L5': [], 'L2': ['P9'], 'L3': ['P4'], 'L1': []}
An optimal solution was found with `num_topics_used`=6
Topic assignments:
{'P2': [], 'P3': ['S3'], 'P1': ['S2'], 'P4': ['S4'], 'P5': [], 'SP1': ['S1'], 'P8': [], 'P9': ['S6'], 'P32': ['S5'], 'P40': []}
Supervisor assignments:
{'L6': ['P3', 'P32'], 'L4': ['P1'], 'L5': ['SP1'], 'L2': ['P9'], 'L3': ['P4'], 'L1': []}
Supervisor Load Balancing
An update of this solution added an extra parameter to the function do_supervisor_load_balancing
, which (when set to true) will prefer solutions where the number of topics each supervisor is assigned is similar.
Note, that using load balancing can potentially set the two criteria at odds:
- balancing supervisor loads
- giving students preference on which projects they work
Setting the weights of one higher than the other (by an order of magnitude) will ensure that criteria is much more highly weighted. As it stands, the solution presented here gives about equal weight to both criteria.
In the above example, when load balancing is used the following is output:
An optimal solution was found with `num_topics_used`=4
Topic assignments:
{'P2': [], 'P3': ['S3'], 'P1': ['S2', 'S4'], 'P4': ['S1', 'S5'], 'P5': [], 'SP1': [], 'P8': [], 'P9': ['S6'], 'P32': [], 'P40': []}
Supervisor assignments:
{'L6': ['P3'], 'L4': [], 'L5': [], 'L2': ['P9'], 'L3': ['P4'], 'L1': ['P1']}
An optimal solution was found with `num_topics_used`=5
Topic assignments:
{'P2': [], 'P3': ['S3'], 'P1': ['S2'], 'P4': ['S1', 'S4'], 'P5': [], 'SP1': [], 'P8': [], 'P9': ['S6'], 'P32': ['S5'], 'P40': []}
Supervisor assignments:
{'L6': ['P32'], 'L4': [], 'L5': ['P3'], 'L2': ['P9'], 'L3': ['P4'], 'L1': ['P1']}
An optimal solution was found with `num_topics_used`=6
Topic assignments:
{'P2': [], 'P3': ['S3'], 'P1': ['S2'], 'P4': ['S4'], 'P5': [], 'SP1': ['S1'], 'P8': [], 'P9': ['S6'], 'P32': ['S5'], 'P40': []}
Supervisor assignments:
{'L6': ['P32'], 'L4': ['P3'], 'L5': ['SP1'], 'L2': ['P9'], 'L3': ['P4'], 'L1': ['P1']}