I don't understand what my MapReduce job gives me as output. I have a .csv
file as input where districts of a city are stored with the age of each tree for each district.
In the combiner I try to get the oldest tree per district, while in my reducer I try to retrieve the district with the oldest tree in the city.
My problem is that while the reduce
funciton gives me output values of 11, 12, 16, and 5, the cleanup
function inside the reducer that should return the last value of those (5) actually returns 9 (which is the last value that my reducer analyses).
I don't get what i missed.
Below is what I tried so far.
Mapper:
package com.opstty.mapper;
import org.apache.commons.io.output.NullWriter;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
import java.util.StringTokenizer;
public class TokenizerMapper_1_8_6 extends Mapper<Object, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text result = new Text();
public void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString(),";");
int i = 0;
while (itr.hasMoreTokens()) {
String arrondissement = itr.nextToken();
if(i%13==1 && !arrondissement.toString().equals("ARRONDISSEMENT")) {
itr.nextToken();itr.nextToken();itr.nextToken();
String annee = itr.nextToken();
result.set(arrondissement);
if(Double.parseDouble((String.valueOf(annee))) > 1000){
context.write(result, new IntWritable((int) Double.parseDouble((String.valueOf(annee)))));
i+=3;
}
}
i++;
}
}
}
Combiner:
package com.opstty.job;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class Compare extends Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
List a = new ArrayList();
int sum = 0;
for (IntWritable val : values) {
a.add(val.get());
}
Collections.sort(a);
result.set((Integer) Collections.min(a));
context.write(key, result);
}
}
Reducer:
public class IntReducer6 extends Reducer<Text, IntWritable, Text, NullWritable> {
private int max = 100000;
private int annee=0;
int i =0;
private List a = new ArrayList();
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
for (IntWritable value : values)
{
annee = value.get();
}
if(annee < max)
{
a.add(key);
max = annee;
context.write((Text) a.get(i), NullWritable.get());
i++;
}
}
@Override
// only display the character which has the largest value
protected void cleanup(Context context) throws IOException, InterruptedException {
context.write((Text) a.get(a.size()-1), NullWritable.get());
}
}