hadoop jar MapReduceTryouts-1.jar invertedindex.simple.MyDriver -D mapreduce.job.reduces=10 /user/notprabhu2/Input/potter/ /user/notprabhu2/output
I have been trying in vain to set the number of reducers through the -D option provided by GenericOptionParser but it does not seem to work and I have no idea why.
I tried -D mapreduce.job.reduces=10
(with space after -D) and also
-Dmapreduce.job.reduces=10
(without space after -D) but nothing seems to dodge.
In my Driver class I have implemented Tools.
package invertedindex.simple;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class MyDriver extends Configured implements Tool {
@Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
Job job = Job.getInstance(conf);
job.setJarByClass(MyDriver.class);
Path outputPath = new Path(args[1]);
outputPath.getFileSystem(getConf()).delete(outputPath, true);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextInputFormat.addInputPath(job, new Path(args[0]));
TextOutputFormat.setOutputPath(job, outputPath);
job.setNumReduceTasks(3);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
return job.waitForCompletion(true) ? 0 : 1;
}
public static void main(String[] args) throws Exception {
int exitCode = ToolRunner.run(new Configuration(),new MyDriver(), args);
System.exit(exitCode);
}
}
Since I have explicitly set the number of reducers to 3 in my driver code I always end up with 3 reducers.
I am using CDH 5.4.7
which has Hadoop 2.6.0
on a 2 node cluster on Google Compute Engine.