I have been trying to tweek the mapper_pre_filter
example given here. Now, if instead of specifying the command directly in steps, if I'm writing a method to return that command, like this:
from mrjob.job import MRJob
from mrjob.protocol import JSONValueProtocol
class KittiesJob(MRJob):
OUTPUT_PROTOCOL = JSONValueProtocol
def filter_input(self):
return ''' grep 'kitty' '''
def test_for_kitty(self, _, value):
yield None, 0 # make sure we have some output
if 'kitty' in value:
yield None, 1
def sum_missing_kitties(self, _, values):
yield None, sum(values)
def steps(self):
return [
self.mr(mapper_pre_filter=self.filter_input,
mapper=self.test_for_kitty,
reducer=self.sum_missing_kitties)]
if __name__ == '__main__':
KittiesJob().run()
I'm getting the following exception:
Exception: error getting step information:
Traceback (most recent call last):
File "/Users/sverma/work/mrjob/filter_input.py", line 30, in <module>
KittiesJob().run()
File "/Library/Python/2.7/site-packages/mrjob/job.py", line 494, in run
mr_job.execute()
File "/Library/Python/2.7/site-packages/mrjob/job.py", line 500, in execute
self.show_steps()
File "/Library/Python/2.7/site-packages/mrjob/job.py", line 677, in show_steps
print >> self.stdout, json.dumps(self._steps_desc())
File "/Library/Python/2.7/site-packages/simplejson/__init__.py", line 370, in dumps
return _default_encoder.encode(obj)
File "/Library/Python/2.7/site-packages/simplejson/encoder.py", line 269, in encode
chunks = self.iterencode(o, _one_shot=True)
File "/Library/Python/2.7/site-packages/simplejson/encoder.py", line 348, in iterencode
return _iterencode(o, 0)
File "/Library/Python/2.7/site-packages/simplejson/encoder.py", line 246, in default
raise TypeError(repr(o) + " is not JSON serializable")
TypeError: <bound method KittiesJob.filter_input of <__main__.KittiesJob object at 0x10449ac90>> is not JSON serializable
Can someone please explain what I'm doing wrong ?