Using Multiple Mappers for multiple output directories in Hadoop MapReduce

Question

I want to run two mappers that produce two different outputs in different directories.The output of the first mapper(Send as argument) should be send to the input of the second mapper.i have this code in the driver class

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;


public class Export_Column_Mapping 
{
    private static String[] Detail_output_column_array = new String[27];
    private static String[] Shop_output_column_array = new String[8];
    private static String details_output = null ;
    private static String Shop_output = null;

    public static void main(String[] args) throws Exception 
    {

        String Output_filetype = args[3];
        String Input_column_number = args[4];
        String Output_column_number = args[5];

        Configuration Detailsconf = new Configuration(false);

        Detailsconf.setStrings("output_filetype",Output_filetype);
        Detailsconf.setStrings("Input_column_number",Input_column_number);
        Detailsconf.setStrings("Output_column_number",Output_column_number);

        Job Details = new Job(Detailsconf," Export_Column_Mapping");

        Details.setJarByClass(Export_Column_Mapping.class);
        Details.setJobName("DetailsFile_Job");

        Details.setMapperClass(DetailFile_Mapper.class);
        Details.setNumReduceTasks(0);

        Details.setInputFormatClass(TextInputFormat.class);
        Details.setOutputFormatClass(TextOutputFormat.class);

        FileInputFormat.setInputPaths(Details, new Path(args[0])); 
        FileOutputFormat.setOutputPath(Details, new Path(args[1]));

        if(Details.waitForCompletion(true))
        {

        Configuration Shopconf = new Configuration();

        Job Shop = new Job(Shopconf,"Export_Column_Mapping");
        Shop.setJarByClass(Export_Column_Mapping.class);
        Shop.setJobName("ShopFile_Job");

        Shop.setMapperClass(ShopFile_Mapper.class);
        Shop.setNumReduceTasks(0);

        Shop.setInputFormatClass(TextInputFormat.class);
        Shop.setOutputFormatClass(TextOutputFormat.class);

        FileInputFormat.setInputPaths(Shop, new Path(args[1])); 
        FileOutputFormat.setOutputPath(Shop, new Path(args[2]));

        MultipleOutputs.addNamedOutput(Shop, "text", TextOutputFormat.class,LongWritable.class, Text.class);
        System.exit(Shop.waitForCompletion(true) ? 0 : 1);
        }
    }

    public static class DetailFile_Mapper extends Mapper<LongWritable,Text,Text,Text>
    {   
        public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException 
        {
            String str_Output_filetype = context.getConfiguration().get("output_filetype"); 

            String str_Input_column_number = context.getConfiguration().get("Input_column_number");
            String[] input_columns_number = str_Input_column_number.split(",");

            String str_Output_column_number= context.getConfiguration().get("Output_column_number");    
            String[] output_columns_number = str_Output_column_number.split(",");

            String str_line = value.toString();
            String[] input_column_array = str_line.split(",");

            try
            {

                for(int i = 0;i<=input_column_array.length+1; i++)
                {
                    int int_outputcolumn = Integer.parseInt(output_columns_number[i]);
                    int int_inputcolumn = Integer.parseInt(input_columns_number[i]);

                    if((int_inputcolumn != 0) && (int_outputcolumn != 0) && output_columns_number.length == input_columns_number.length)
                    {

                        Detail_output_column_array[int_outputcolumn-1] = input_column_array[int_inputcolumn-1];


                        if(details_output != null)
                        {
                            details_output = details_output+"       "+ Detail_output_column_array[int_outputcolumn-1];
                            Shop_output = Shop_output+"     "+ Shop_output_column_array[int_outputcolumn-1];

                        }else
                        {
                            details_output = Detail_output_column_array[int_outputcolumn-1];
                            Shop_output =  Shop_output_column_array[int_outputcolumn-1];

                        }
                    }
                }

            }catch (Exception e)
            {

            }
            context.write(null,new Text(details_output));
        }
    }
    public static class ShopFile_Mapper extends Mapper<LongWritable,Text,Text,Text>
    {   
        public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException 
        {
            try
            {

                for(int i = 0;i<=Shop_output_column_array.length; i++)
                {
                    Shop_output_column_array[0] = Detail_output_column_array[0];
                    Shop_output_column_array[1] = Detail_output_column_array[1];
                    Shop_output_column_array[2] = Detail_output_column_array[2];
                    Shop_output_column_array[3] = Detail_output_column_array[3];
                    Shop_output_column_array[4] = Detail_output_column_array[14];

                    if(details_output != null)
                    {
                        Shop_output = Shop_output+"     "+ Shop_output_column_array[i];

                    }else
                    {
                        Shop_output =  Shop_output_column_array[i-1];

                    }
                }
            }catch (Exception e){

            }
            context.write(null,new Text(Shop_output));
        }
    }

}

I get the error..

Error:org.apache.hadoop.mapreduce.lib.input.InvalidInputException: Input path does not exist: file:/home/Barath.B.Natarajan.ap/rules/text.txt

I want to run the jobs one by one can any one help me in this?...

madhu · Accepted Answer · 2015-10-14T06:02:28.140

1

There is something called jobcontrol with which you will be able to achieve it.

Suppose there are two jobs A and B

ControlledJob A= new ControlledJob(JobConf for A);
ControlledJob B= new ControlledJob(JobConf for B);
B.addDependingJob(A);

JobControl jControl = newJobControl("Name");
jControl.addJob(A);
jControl.addJob(B);
Thread runJControl = new Thread(jControl);
runJControl.start();
while (!jControl.allFinished()) {
code = jControl.getFailedJobList().size() == 0 ? 0 : 1;
Thread.sleep(1000);
}
System.exit(1);

Initialize code at the beginning like this:

int code =1;

Let the first job in your case be the first mapper with zero reducer and second job be the second mapper with zero reducer.The configuration should be such that the input path of B and output path of A should be same.

edited Oct 14 '15 at 06:02

answered Oct 13 '15 at 13:57

madhu

1,140
8
14

**code = jControl.getFailedJobList().size() == 0 ? 0 : 1;** can you explain what code? – Barath Oct 13 '15 at 14:14
I am getting this error while running this but i have set all the required arguments**Error:org.apache.hadoop.mapred.InvalidJobConfException: Output directory not set.** – Barath Oct 13 '15 at 14:39
When you use fileoutputformat.setoutputpath specify the conf value and path. When you textinputformat specify the job value and path. Make the changes.... – madhu Oct 14 '15 at 06:22
Is this r8? **Details.setInputFormatClass(TextInputFormat.class);** **FileOutputFormat.setOutputPath(Details,new Path(args[1]));** – Barath Oct 14 '15 at 07:16
It should be FileOutputFormat.setOutputPath(DetailsConf,new Path(args[1])) – madhu Oct 14 '15 at 07:18
It is showing error in the code-*The method setOutputPath(Job, Path) in the type FileOutputFormat is not applicable for the arguments (Configuration, Path)* – Barath Oct 14 '15 at 07:25
are you using old api or new api. – madhu Oct 14 '15 at 07:35
using new apis only for all for jobcontroller i am using this *org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob; org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;* – Barath Oct 14 '15 at 07:43
Its working fine after a slight modification,but it is searching for third mapper ,i want to stop it after 2 mappers completes – Barath Oct 15 '15 at 05:33

Using Multiple Mappers for multiple output directories in Hadoop MapReduce

1 Answers1