I have trouble sorting the output of this map reduce task. It has to be sorted in the order of words then years. I have tried the following code but it does not return sorted output.
from mrjob.job import MRJob
class Job(MRJob):
def mapper(self, keys, values):
year, words = values.strip().split(',')
year = year[:4]
word_list = words.split()
sorted_list = sorted(word_list)
for word in sorted_list:
yield (year,word),1
def reducer(self, key, values):
yield words, word_freq
if __name__ == '__main__':
Job.run()