I am able to call AWS Textract to read an image from my local path. How can I integrate this textract code to read the image uploaded onto a created S3 bucket with the S3 bucket codes.
Working Textract Code to textract images from local path
package aws.cloud.work;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.io.InputStream;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.regions.Regions;
import com.amazonaws.services.textract.AmazonTextract;
import com.amazonaws.services.textract.AmazonTextractClientBuilder;
import com.amazonaws.services.textract.model.DetectDocumentTextRequest;
import com.amazonaws.services.textract.model.DetectDocumentTextResult;
import com.amazonaws.services.textract.model.Document;
import com.amazonaws.util.IOUtils;
public class TextractDemo {
static AmazonTextractClientBuilder clientBuilder = AmazonTextractClientBuilder.standard()
.withRegion(Regions.US_EAST_1);
private static FileWriter file;
public static void main(String[] args) throws IOException {
//AWS Credentials to access AWS Textract services
clientBuilder.setCredentials(new AWSStaticCredentialsProvider(
new BasicAWSCredentials("Access Key", "Secret key")));
//Set the path of the image to be textract. Can be configured to use from S3
String document="C:\\Users\\image-local-path\\sampleTT.jpg";
ByteBuffer imageBytes;
//Code to use AWS Textract services
try (InputStream inputStream = new FileInputStream(new File(document))) {
imageBytes = ByteBuffer.wrap(IOUtils.toByteArray(inputStream));
}
AmazonTextract client = clientBuilder.build();
DetectDocumentTextRequest request = new DetectDocumentTextRequest()
.withDocument(new Document().withBytes(imageBytes));
/*
* DetectDocumentTextResult result = client.detectDocumentText(request);
* System.out.println(result); result.getBlocks().forEach(block ->{
* if(block.getBlockType().equals("LINE")) System.out.println("text is "+
* block.getText() + " confidence is "+ block.getConfidence());
*/
//
DetectDocumentTextResult result = client.detectDocumentText(request);
System.out.println(result);
JSONObject obj = new JSONObject();
result.getBlocks().forEach(block -> {
if (block.getBlockType().equals("LINE"))
System.out.println("text is " + block.getText() + " confidence is " + block.getConfidence());
JSONArray fields = new JSONArray();
fields.add(block.getText() + " , " + block.getConfidence());
obj.put(block.getText(), fields);
});
//To import the results into JSON file and output the console output as sample.txt
try {
file = new FileWriter("/Users/output-path/sample.txt");
file.write(obj.toJSONString());
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
file.flush();
file.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
S3 bucket code integration I managed to find from the docs:
String document = "sampleTT.jpg";
String bucket = "textract-images";
AmazonS3 s3client = AmazonS3ClientBuilder.standard()
.withEndpointConfiguration(
new EndpointConfiguration("https://s3.amazonaws.com","us-east-1"))
.build();
// Get the document from S3
com.amazonaws.services.s3.model.S3Object s3object = s3client.getObject(bucket, document);
S3ObjectInputStream inputStream = s3object.getObjectContent();
BufferedImage image = ImageIO.read(inputStream);
(Edited) - Thanks @smac2020, I currently have a working Rekognition Code that reads from my AWS console S3 bucket and runs the Rekognition services that I am referencing to. However, I am unable to modify and merge it with the Textract source code
package com.amazonaws.samples;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.services.rekognition.AmazonRekognition;
import com.amazonaws.services.rekognition.AmazonRekognitionClientBuilder;
import com.amazonaws.services.rekognition.model.AmazonRekognitionException;
import com.amazonaws.services.rekognition.model.DetectLabelsRequest;
import com.amazonaws.services.rekognition.model.DetectLabelsResult;
import com.amazonaws.services.rekognition.model.Image;
import com.amazonaws.services.rekognition.model.Label;
import com.amazonaws.services.rekognition.model.S3Object;
import java.util.List;
public class DetectLabels {
public static void main(String[] args) throws Exception {
String photo = "sampleTT.jpg";
String bucket = "Textract-bucket";
// AmazonRekognition rekognitionClient = AmazonRekognitionClientBuilder.standard().withRegion("ap-southeast-1").build();
AWSCredentialsProvider credentialsProvider = new AWSStaticCredentialsProvider (new BasicAWSCredentials("Access Key", "Secret Key"));
AmazonRekognition rekognitionClient = AmazonRekognitionClientBuilder.standard().withCredentials(credentialsProvider).withRegion("ap-southeast-1").build();
DetectLabelsRequest request = new DetectLabelsRequest()
.withImage(new Image()
.withS3Object(new S3Object()
.withName(photo).withBucket(bucket)))
.withMaxLabels(10)
.withMinConfidence(75F);
try {
DetectLabelsResult result = rekognitionClient.detectLabels(request);
List <Label> labels = result.getLabels();
System.out.println("Detected labels for " + photo);
for (Label label: labels) {
System.out.println(label.getName() + ": " + label.getConfidence().toString());
}
} catch(AmazonRekognitionException e) {
e.printStackTrace();
}
}
}