1

I have a text file with data written in the following (key,value) format:

1,34
5,67
8,88

The file is placed in the local file system.

I want to convert it into one hadoop sequence file, again on local file system, for using it in mahout. The sequence file should have all the records. For record 1, for example, 1 is the key and 34 is value. Similarly for other records.

I am new to Java. I shall be grateful for help.

Thanks.

Ashok K Harnal
  • 1,191
  • 2
  • 15
  • 28
  • You should check the answer from Sanjay Subramanian to similar question http://stackoverflow.com/questions/5377118/how-to-convert-txt-file-to-hadoops-sequence-file-format – Aleksei Shestakov Dec 22 '14 at 10:12

1 Answers1

0

Well I did found out a way. Here is the code:

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;

public class CreateSequenceFile {
    public static void main(String[] argsx) throws FileNotFoundException, IOException 
      {
       String myfile = "/home/ashokharnal/keyvalue.txt";
       String outputseqfile =  "/home/ashokharnal/part-0000";
       Path path = new Path(outputseqfile);

       //open input file
       BufferedReader br = new BufferedReader(new FileReader(myfile));
       //create Sequence Writer
       Configuration conf = new Configuration();        
       FileSystem fs = FileSystem.get(conf);
       SequenceFile.Writer writer = new SequenceFile.Writer(fs,conf,path,LongWritable.class,Text.class);
       LongWritable key ; 
       Text value ;
       String line = br.readLine();
       String field_delimiter = ",";
       String[] temp;
       while (line != null) {
          try
           {
               temp = line.split(field_delimiter);
               key = new LongWritable(Integer.valueOf(temp[0]))  ;
               value = new Text(temp[1].toString());
               writer.append(key,value);    
               System.out.println("Appended to sequence file key " + key.toString() + " and value " + value.toString());
               line = br.readLine();    
           }
           catch(Exception ex)
           {
              ex.printStackTrace();
           }
      }        
    writer.close();
}
}
YoungHobbit
  • 13,254
  • 9
  • 50
  • 73
Ashok K Harnal
  • 1,191
  • 2
  • 15
  • 28