Why am I not getting the ouput written in output files in hadoop?

Question

I have written the following code to find the maximum temperature but when I am trying to retrieve the output, the files are created but are empty. I don't really understand why is this happening...Can someone please just help?

My runner code:

import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;

public class MaxTemp {
    public static void main(String[] args) throws IOException {
        JobConf conf = new JobConf(MaxTemp.class);
        conf.setJobName("MaxTemp1");
        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);
        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(IntWritable.class);
        conf.setMapperClass(MaxTempMapper.class);
        conf.setCombinerClass(MaxTempReducer.class);
        conf.setReducerClass(MaxTempReducer.class);

        FileInputFormat.setInputPaths(conf,new Path(args[0]));
        FileOutputFormat.setOutputPath(conf,new Path(args[1]));
        JobClient.runJob(conf);
    }
}

Mapper code:

import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;

public class MaxTempMapper extends MapReduceBase implements Mapper<LongWritable,Text,Text,IntWritable> {
    public void map(LongWritable key, Text value, OutputCollector<Text,IntWritable> output, Reporter reporter) throws IOException {
        String record = value.toString();
        String[] parts = record.split(",");
        output.collect(new Text(parts[0]), new IntWritable(Integer.parseInt(parts[1])));
    }
}

My reducer code:

import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;

public class MaxTempReducer extends MapReduceBase implements Reducer<Text,IntWritable,Text,IntWritable> {
    public void reduce1(Text key, Iterator<IntWritable> values, OutputCollector<Text,IntWritable> output, Reporter reporter) throws IOException {
        int maxValue = 0;
        while (values.hasNext()) {
            maxValue=Math.max(maxValue,values.next().get());
        }
        output.collect(key, new IntWritable(maxValue));
    }


    @Override
    public void reduce(Text arg0, Iterator<IntWritable> arg1, OutputCollector<Text, IntWritable> arg2, Reporter arg3) throws IOException {
    // TODO Auto-generated method stub
    
    }
}

I am attaching the output screenshots enter image description here

enter image description here

score 0 · Answer 1 · edited Oct 30 '20 at 23:38

I can't help but notice that inside the MaxTempReducer class you have the reduce1 function, while overriding the proper reduce function to be used outside of the class of the reducer. This is the reason you don't get any output in the HDFS, because the program sees the reducer class, but doesn't see the reduce function that describes what you want to do with it (aka find the max of the temperature).

There's also the issue that you are using deprecated classes from old versions of Hadoop, which can bear their own issues as the framework and its components are getting tested updated on a satisfactory basis (as you can check by yourself over here).

So by fixing those two issues, your program could look a bit like this:

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.Counters;
import java.io.*;
import java.io.IOException;
import java.util.*;
import java.nio.charset.StandardCharsets;

public class MaxTemp 
{
    /* input:  <byte_offset, line_of_dataset>
     * output: <City, Temperature>
     */
    public static class Map extends Mapper<Object, Text, Text, IntWritable> 
    {
        public void map(Object key, Text value, Context context) throws IOException, InterruptedException 
        {
            String record = value.toString();
            String[] parts = record.split(", ");

            context.write(new Text(parts[0]), new IntWritable(Integer.parseInt(parts[1])));
        }
    }

    /* input:  <City, Temperature>
     * output: <City, Max Temperature>
     */
    public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable>
    {
        public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException 
        {
            int max_value = 0;
            
            for(IntWritable value : values)
            {
                if(value.get() > max_value)
                    max_value = value.get();
            }

            context.write(key, new IntWritable(max_value));
        }
    }


    public static void main(String[] args) throws Exception
    {
        // set the paths of the input and output directories in the HDFS
        Path input_dir = new Path("temperatures");
        Path output_dir = new Path("temp_out");

        // in case the output directory already exists, delete it
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);
        if(fs.exists(output_dir))
            fs.delete(output_dir, true);

        // configure the MapReduce job
        Job maxtemp_job = Job.getInstance(conf, "Max Temperature");
        maxtemp_job.setJarByClass(MaxTemp.class);
        maxtemp_job.setMapperClass(Map.class);
        maxtemp_job.setCombinerClass(Reduce.class);
        maxtemp_job.setReducerClass(Reduce.class);    
        maxtemp_job.setMapOutputKeyClass(Text.class);
        maxtemp_job.setMapOutputValueClass(IntWritable.class);
        maxtemp_job.setOutputKeyClass(Text.class);
        maxtemp_job.setOutputValueClass(IntWritable.class);
        FileInputFormat.addInputPath(maxtemp_job, input_dir);
        FileOutputFormat.setOutputPath(maxtemp_job, output_dir);
        maxtemp_job.waitForCompletion(true);
    }
}

Where the temperatures directory in the HDFS looks like this:

Boston, 3
Athens, 15
Tokyo, 20
Tokyo, 10
Athens, 32
Boston, 9

And the results in the temp_out directory look like this (image from the HDFS browser of Hadoop):

Why am I not getting the ouput written in output files in hadoop?

1 Answers1