1

Got this error running a custom jar on EMR.

Exception in thread "main" com.amazon.ws.emr.hadoop.fs.shaded.com.amazonaws.services.s3.model.AmazonS3Exception: Bad Request (Service: Amazon S3; Status Code: 400; Error Code: 400 Bad Request; Request ID: B042BB0B40A75966), S3 Extended Request ID: vr/DUr8HD3xjomauyzqvVdGuW3fHBP8PDUmTIAoVLUxrmsxh9H+OS9+cgo4OmHxaz/b8CSPGmuc=
    at com.amazon.ws.emr.hadoop.fs.shaded.com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:1389)
    at com.amazon.ws.emr.hadoop.fs.shaded.com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:902)
    at com.amazon.ws.emr.hadoop.fs.shaded.com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:607)
    at com.amazon.ws.emr.hadoop.fs.shaded.com.amazonaws.http.AmazonHttpClient.doExecute(AmazonHttpClient.java:376)
    at com.amazon.ws.emr.hadoop.fs.shaded.com.amazonaws.http.AmazonHttpClient.executeWithTimer(AmazonHttpClient.java:338)
    at com.amazon.ws.emr.hadoop.fs.shaded.com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:287)
    at com.amazon.ws.emr.hadoop.fs.shaded.com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3826)
    at com.amazon.ws.emr.hadoop.fs.shaded.com.amazonaws.services.s3.AmazonS3Client.headBucket(AmazonS3Client.java:1071)
    at com.amazon.ws.emr.hadoop.fs.shaded.com.amazonaws.services.s3.AmazonS3Client.doesBucketExist(AmazonS3Client.java:1029)
    at com.amazon.ws.emr.hadoop.fs.s3n.Jets3tNativeFileSystemStore.ensureBucketExists(Jets3tNativeFileSystemStore.java:138)
    at com.amazon.ws.emr.hadoop.fs.s3n.Jets3tNativeFileSystemStore.initialize(Jets3tNativeFileSystemStore.java:116)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:606)
    at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:191)
    at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
    at com.sun.proxy.$Proxy23.initialize(Unknown Source)
    at com.amazon.ws.emr.hadoop.fs.s3n.S3NativeFileSystem.initialize(S3NativeFileSystem.java:461)
    at com.amazon.ws.emr.hadoop.fs.EmrFileSystem.initialize(EmrFileSystem.java:110)
    at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2703)
    at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:91)
    at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2737)
    at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2719)
    at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:375)
    at org.apache.hadoop.fs.Path.getFileSystem(Path.java:295)
    at org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(FileInputFormat.java:485)
    at SentimentsDriver.run(SentimentsDriver.java:21)
    at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70)
    at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:84)
    at SentimentsDriver.main(SentimentsDriver.java:33)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:606)
    at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
    at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
x

arguments: s3://sentimentproj/input/tweet.txt s3://sentimentproj/output

SentimentsDriver:

    import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/*This class is responsible for running map reduce job*/
public class SentimentsDriver extends Configured implements Tool{
public int run(String[] args) throws Exception{
 if(args.length !=2) {
 System.err.println("Usage: MaxTemperatureDriver <input path> <outputpath>");
 System.exit(-1);
 }
 @SuppressWarnings("deprecation")
Job job = new Job();
 job.setJarByClass(SentimentsDriver.class);
 job.setJobName("SentimentAnalysis");
 FileInputFormat.addInputPath(job, new Path(args[0]));
 FileOutputFormat.setOutputPath(job,new Path(args[1]));
 job.setMapperClass(SentimentsMapper.class);
 job.setReducerClass(SentimentsReducer.class);
 job.setOutputKeyClass(Text.class);
 job.setOutputValueClass(IntWritable.class);
 System.exit(job.waitForCompletion(true) ? 0:1); 
 boolean success = job.waitForCompletion(true);
 return success ? 0 : 1;
 }
public static void main(String[] args) throws Exception {
 SentimentsDriver driver = new SentimentsDriver();
 int exitCode = ToolRunner.run(driver, args);
 System.exit(exitCode);
 }
}
Piyush Patil
  • 14,512
  • 6
  • 35
  • 54
Sulabh Kumar
  • 101
  • 7

1 Answers1

1

The issue may be with the S3 bucket. Make sure to keep the S3 bucket region in same Region of your cluster while creating bucket. If it will be different then it usually throw the Bad Request 400 error.