Great and challenging question!
I tried something and have explained the same in the comments below in the code:
#Original Sentences
clean_sentences = [
"The quick brown fox jumps over the lazy dog.",
"A wizard's job is to vex chumps quickly in fog."
]
#CSV in the form of a list
jumbled_sentences = [
"is to vex chumps ",
"jumps over the ",
"The quick brown fox ",
"quickly in fog.",
"lazy dog.",
"A wizard's job ",
]
# from fuzzywuzzy import fuzz, process
from rapidfuzz import fuzz, process # use this for faster results when a lot of fuzzywuzzy operations are to be done
for clean_sentence in clean_sentences:
ordered_sentences = []
#we find only those jumbled sentences who are 100% present(thats why partial ratio) in the original sentence
fuzzResults = process.extract(clean_sentence, jumbled_sentences, scorer=fuzz.partial_ratio, score_cutoff=100)
sentences_found = [fuzzResult[0] for fuzzResult in fuzzResults] #retrieve only sentence from fuzzy result
index_sent_dict = {}
for sentence_found in sentences_found:
#we find index of each jumbled index and store it as dixtionary of {index:sentence}
index_sent_dict.update({clean_sentence.index(sentence_found): sentence_found})
#and then we sort the dictionary based on index and join the keys of the sorted dictionary
sorted_dict = dict(sorted(index_sent_dict.items()))
final_sentence = "".join(list(sorted_dict.values()))
print(final_sentence)
# The quick brown fox jumps over the lazy dog.
# A wizard's job is to vex chumps quickly in fog.