If I understand correctly then the problem is when you can get to a starting point. I would start with a plain python for the sake of simplicity and explicity. Obvious brute force solution is to check all possible destinations for every source presented in data. Of course you need to organize your data a little. Then it's just a chaining. The code below implements this approach.
def get_all_sources(data):
ans = dict()
for src, dst in data:
ans.setdefault(src, set()).add(dst)
return ans
def get_all_possible_destinations(src, all_src, ans=None):
ans = set() if ans is None else ans
for dst in all_src.get(src, set()):
if dst in ans:
continue
ans.add(dst)
get_all_possible_destinations(dst, all_src, ans)
return ans
def pipeline_source_by_source(data):
all_src = get_all_sources(data)
for src in all_src:
all_possible_destiations = get_all_possible_destinations(src, all_src)
if src in all_possible_destiations:
print(f"found problem: {src} -> {src}")
break
else:
print('no problems found')
if __name__ == '__main__':
data_list = [
[(1, 2)],
[(1, 2), (2, 3)],
[(1, 2), (3, 4), (2, 3)],
[(1, 2), (3, 4), (2, 3), (3, 1)],
[(5, 6), (5, 7), (5, 8), (5, 9), (9, 10), (10, 5)],
[(5, 6), (5, 7), (5, 8), (5, 9), (9, 10), (10, 15)]
]
for idx, data in enumerate(data_list):
print(idx)
pipeline_source_by_source(data)
Result:
0
no problems found
1
no problems found
2
no problems found
3
found problem: 1 -> 1
4
found problem: 5 -> 5
5
no problems found