I have code as below -
import java.io.InputStream
import java.net.URI
import java.util
import com.amazonaws.auth.AWSCredentialsProviderChain
import com.amazonaws.{ClientConfiguration, Protocol}
import com.amazonaws.services.s3.AmazonS3Client
import com.amazonaws.services.s3.model._
import org.apache.hadoop.conf.{Configuration => HadoopConfiguration}
import org.apache.hadoop.fs.{Path => HadoopPath}
import org.apache.hadoop.fs.s3a.{BasicAWSCredentialsProvider, S3AFileSystem}
import com.amazonaws.services.s3.model.ObjectListing
import scala.annotation.tailrec
object FileOperation {
val uri = new URI("s3a://bucket-name/prefixKey/file.json")
val fs: S3AFileSystem = new S3AFileSystem()
def getAWSClient: AmazonS3Client = {
val conf: HadoopConfiguration = new HadoopConfiguration(true)
val awsConf: ClientConfiguration = new ClientConfiguration()
val secureConnections: Boolean = conf.getBoolean("fs.s3a.connection.ssl.enabled", false)
awsConf.setProtocol(if (secureConnections) Protocol.HTTPS else Protocol.HTTP)
val accessKey: String = conf.get("fs.s3a.access.key", null.asInstanceOf[String])
val secretKey: String = conf.get("fs.s3a.secret.key", null.asInstanceOf[String])
println(s"inside getAWSClient accessKey -> $accessKey ; secretKey -> $secretKey")
val credentials = new AWSCredentialsProviderChain(new BasicAWSCredentialsProvider(accessKey, secretKey))
val s3: AmazonS3Client = new AmazonS3Client(credentials, awsConf)
s3.setEndpoint(conf.get("fs.s3a.endpoint", null.asInstanceOf[String]))
s3
}
def getEntries(recursive: Boolean): Seq[URI] = {
@tailrec
def collectEntries(summaries: util.Iterator[S3ObjectSummary], collected: Seq[HadoopPath]): Seq[HadoopPath] = {
if (summaries.hasNext) {
val summary: S3ObjectSummary = summaries.next()
val newPath: String = "s3a://" + summary.getBucketName + "/" + summary.getKey
collectEntries(summaries, collected :+ {
fs.initialize(new URI(newPath), new HadoopConfiguration(true))
new HadoopPath(new URI(newPath))
})
} else collected
}
val prefixKey: String = if (!(uri.getScheme != null && uri.getPath.isEmpty)) uri.getPath.substring(1) else ""
val objects: ObjectListing = getAWSClient.listObjects(uri.getHost, prefixKey)
if (objects.getObjectSummaries.isEmpty) throw new java.nio.file.NoSuchFileException(uri.toString)
else {
collectEntries(objects.getObjectSummaries.iterator(), Seq.empty).map(path => path.toUri)
}
}
def asStream: InputStream = {
val prefixKey1 = if (!(uri.getScheme != null && uri.getPath.isEmpty)) uri.getPath.substring(1) else ""
val s3object: S3Object = getAWSClient.getObject(new GetObjectRequest(uri.getHost, prefixKey1))
s3object.getObjectContent
}
}
the call to function getAWSClient in the function getEntries work but the call in asStream function gets empty access Keys and secret key. getEntries function is used to list files under a folder whereas asStream function returns the input stream of such files which is used to create a BufferedSource and then the content is read.
Below is the hadoop core-ste.xml file. Can someone help me usnderstand why the access and secret keys are appearing null in the asStream function.
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Sample config for Hadoop S3A client. -->
<configuration>
<property>
<name>fs.s3a.access.key</name>
<value>xxxxxxxx</value>
</property>
<property>
<name>fs.s3a.secret.key</name>
<value>yyyyyyy</value>
</property>
<property>
<name>fs.s3a.connection.ssl.enabled</name>
<value>false</value>
</property>
<property>
<name>fs.s3a.endpoint</name>
<value>dev:80</value>
</property>
<property>
<name>fs.s3a.imp</name>
<value>org.apache.hadoop.fs.s3a.S3A</value>
</property>
<property>
<name>fs.s3a.path.style.access</name>
<value>true</value>
</property>
</configuration>