The Mapper is reading file from two places 1) Articles visited by user(sorting by country) 2) Statistics of country (country wise)
The output of both Mapper is Text, Text
I am running program of Amazon Cluster
My aim is read data from two different set and combine the result and store it in hbase.
HDFS to HDFS is working. The code is getting stuck at reducing 67% and gives error as
17/02/24 10:45:31 INFO mapreduce.Job: map 0% reduce 0%
17/02/24 10:45:37 INFO mapreduce.Job: map 100% reduce 0%
17/02/24 10:45:49 INFO mapreduce.Job: map 100% reduce 67%
17/02/24 10:46:00 INFO mapreduce.Job: Task Id : attempt_1487926412544_0016_r_000000_0, Status : FAILED
Error: java.lang.IllegalArgumentException: Row length is 0
at org.apache.hadoop.hbase.client.Mutation.checkRow(Mutation.java:565)
at org.apache.hadoop.hbase.client.Put.<init>(Put.java:110)
at org.apache.hadoop.hbase.client.Put.<init>(Put.java:68)
at org.apache.hadoop.hbase.client.Put.<init>(Put.java:58)
at com.happiestminds.hadoop.CounterReducer.reduce(CounterReducer.java:45)
at com.happiestminds.hadoop.CounterReducer.reduce(CounterReducer.java:1)
at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:171)
at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:635)
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:390)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1698)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
Driver class is
package com.happiestminds.hadoop;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class Main extends Configured implements Tool {
/**
* @param args
* @throws Exception
*/
public static String outputTable = "mapreduceoutput";
public static void main(String[] args) throws Exception {
int exitCode = ToolRunner.run(new Main(), args);
System.exit(exitCode);
}
@Override
public int run(String[] args) throws Exception {
Configuration config = HBaseConfiguration.create();
try{
HBaseAdmin.checkHBaseAvailable(config);
}
catch(MasterNotRunningException e){
System.out.println("Master not running");
System.exit(1);
}
Job job = Job.getInstance(config, "Hbase Test");
job.setJarByClass(Main.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, ArticleMapper.class);
MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, StatisticsMapper.class);
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.initTableReducerJob(outputTable, CounterReducer.class, job);
//job.setReducerClass(CounterReducer.class);
job.setNumReduceTasks(1);
return job.waitForCompletion(true) ? 0 : 1;
}
}
Reducer class is
package com.happiestminds.hadoop;
import java.io.IOException;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class CounterReducer extends TableReducer<Text, Text, ImmutableBytesWritable> {
public static final byte[] CF = "counter".getBytes();
public static final byte[] COUNT = "combined".getBytes();
@Override
protected void reduce(Text key, Iterable<Text> values,
Reducer<Text, Text, ImmutableBytesWritable, Mutation>.Context context)
throws IOException, InterruptedException {
String vals = values.toString();
int counter = 0;
StringBuilder sbr = new StringBuilder();
System.out.println(key.toString());
for (Text val : values) {
String stat = val.toString();
if (stat.equals("***")) {
counter++;
} else {
sbr.append(stat + ",");
}
}
sbr.append("Article count : " + counter);
Put put = new Put(Bytes.toBytes(key.toString()));
put.addColumn(CF, COUNT, Bytes.toBytes(sbr.toString()));
if (counter != 0) {
context.write(null, put);
}
}
}
Dependencies
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>1.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>1.2.2</version>
</dependency>
</dependencies>