I'm having trouble understanding how to use the add_file_arg() for mrjob. I'm trying to pass a csv to my mapper with a person's attributes and find the attributes for each person in my mapper. This is my code thus far:
class MRPeopleScores(MRJob):
def configure_args(self):
super(MRPeopleScores, self).configure_args()
self.add_file_arg('--database')
def mapper(self, _, line):
print(self.options.database)
When I run
python3 calculate_people_scores.py --jobconf mapreduce.job.reduces=1 data/people_ids.csv database=data/people_attributes.csv
I get the following error message:
Traceback (most recent call last):
File "calculate_people_scores.py", line 88, in <module>
MRPeopleScores.run()
File "/usr/local/lib/python3.6/site-packages/mrjob/job.py", line 439, in run
mr_job.execute()
File "/usr/local/lib/python3.6/site-packages/mrjob/job.py", line 460, in execute
super(MRJob, self).execute()
File "/usr/local/lib/python3.6/site-packages/mrjob/launch.py", line 161, in execute
self.run_job()
File "/usr/local/lib/python3.6/site-packages/mrjob/launch.py", line 231, in run_job
runner.run()
File "/usr/local/lib/python3.6/site-packages/mrjob/runner.py", line 476, in run
self._run()
File "/usr/local/lib/python3.6/site-packages/mrjob/sim.py", line 185, in _run
self._invoke_step(step_num, 'mapper')
File "/usr/local/lib/python3.6/site-packages/mrjob/sim.py", line 272, in _invoke_step
working_dir, env)
File "/usr/local/lib/python3.6/site-packages/mrjob/inline.py", line 154, in _run_step
child_instance.execute()
File "/usr/local/lib/python3.6/site-packages/mrjob/job.py", line 448, in execute
self.run_mapper(self.options.step_num)
File "/usr/local/lib/python3.6/site-packages/mrjob/job.py", line 526, in run_mapper
for out_key, out_value in mapper(key, value) or ():
File "calculate_people_scores.py", line 47, in mapper
print(self.options.database)
AttributeError: 'Values' object has no attribute 'database'
I'm sure I'm grossly misunderstanding how to use this argument, any help would be much appreciated.