1

I have written a Driver, Mapper and Reducer program to try the composite key (more than one field in a input dataset).

The dataset looks like this:

Country, State, County, Population(in millions)

USA,CA,Alameda,12

USA,CA,Santa Clara,14

USA,AZ,Abajd,14

I am trying to find out the total population in country+state. So the reducer should aggregate on two fields Country+State and show the population.

While I am iterating thru population at the step (in the reducer code)

for(IntWritable i:values)

I am getting the compiler error "Can only iterate over an array or an instance of java.lang.Iterable"

So we can't get iterator on IntWritable? I was able get Iterator work on a FloatWritable data type.

Thanks much nath

import java.io.DataInput;
import java.io.DataOutput;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;

 import org.apache.commons.io.FileUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
  import org.apache.hadoop.io.FloatWritable;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.WritableComparable;
  import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;  



 public  class CompositeKeyReducer extends Reducer<Country, IntWritable, Country, FloatWritable> {

 // public  class CompositeKeyReducer extends Reducer<Country, IntWritable, Country, IntWritable> {


    public void reduce(Country key, Iterator<IntWritable> values, Context context) throws IOException, InterruptedException {

        int numberofelements = 0;

        int cnt = 0;
        while (values.hasNext()) {
            cnt = cnt + values.next().get();
        }

      //USA, Alameda = 10
      //USA, Santa Clara = 12
      //USA, Sacramento = 12

      float populationinmillions =0;

        for(IntWritable i:values)
        {
            populationinmillions = populationinmillions + i.get();
            numberofelements = numberofelements+1;


        }           


       // context.write(key, new IntWritable(cnt));
    context.write(key, new FloatWritable(populationinmillions));

    }

}
sutterhome1971
  • 380
  • 1
  • 9
  • 22

1 Answers1

1

Since complete code is not there Im not addressing your existing usecase, though different use case which used int writable and float wriatble to calculate avg is just like below example..

import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class AvgDriver extends Configured implements Tool{

    public static class ImcdpMap extends Mapper<LongWritable, Text, IntWritable, IntWritable> {

        String record;

        protected void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException {
            record = value.toString();
            String[] fields = record.split(",");

            Integer s_id = new Integer(fields[0]);
            Integer marks = new Integer(fields[2]);
            context.write(new IntWritable(s_id), new IntWritable(marks));
        } // end of map method
    } // end of mapper class


    public static class ImcdpReduce extends Reducer<IntWritable, IntWritable, IntWritable, FloatWritable> {

        protected void reduce(IntWritable key, Iterable<IntWritable> values, Reducer<IntWritable, IntWritable, IntWritable, FloatWritable>.Context context) throws IOException, InterruptedException {
            Integer s_id = key.get();
            Integer sum = 0;
            Integer cnt = 0;

            for (IntWritable value:values) {
                sum = sum + value.get();
                cnt = cnt + 1;
            }

            Float avg_m = (float) (sum/cnt);
            context.write(new IntWritable(s_id), new FloatWritable(avg_m));
        }
    }

    @Override
    public int run(String[] args) throws Exception {
        Configuration conf = new Configuration();
        args = new GenericOptionsParser(conf, args).getRemainingArgs();
        String input = args[0];
        String output = args[1];

        Job job = new Job(conf, "Avg");
        job.setJarByClass(ImcdpMap.class);
        job.setInputFormatClass(TextInputFormat.class);
        job.setMapperClass(ImcdpMap.class);
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(IntWritable.class);

        job.setReducerClass(ImcdpReduce.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(FloatWritable.class);

        FileInputFormat.setInputPaths(job, new Path(input));
        Path outPath = new Path(output);
        FileOutputFormat.setOutputPath(job, outPath);
        outPath.getFileSystem(conf).delete(outPath, true);

        job.waitForCompletion(true);
        return (job.waitForCompletion(true) ? 0 : 1);
    }

    public static void main(String[] args) throws Exception {
        int exitCode = ToolRunner.run(new AvgDriver(), args);
        System.exit(exitCode);
    }
}

However I have below findings from your code,

In your case you are looping through Iterator twice, why ? Iterators are one-traversal-only. Some iterator types are cloneable, and you might be able to clone it before traversing, but this isn't the general case.

You are following old api style of the code. You should make your method take an Iterable instead.

also see this

Community
  • 1
  • 1
Ram Ghadiyaram
  • 28,239
  • 13
  • 95
  • 121