This notebook illustrates a number of ways to load data using the Scala language. Scala runs on the JVM. Java and Scala classes can be freely mixed, no matter whether they reside in different projects or in the same. Looking at the mechanics of how Scala code interacts with Openstack Swift object storage should help guide you to craft a Java equivalent.
From the above notebook, here are some steps illustrating how to configure and extract data from an Openstack Swift Object Storage instance using the Stocator library using the Scala language. The swift url decomposes into:
swift2d :// container . myacct / filename.extension
^ ^ ^ ^
stocator name of namespace object storage
protocol container filename
Imports
import org.apache.spark.SparkContext
import scala.util.control.NonFatal
import play.api.libs.json.Json
val sqlctx = new SQLContext(sc)
val scplain = sqlctx.sparkContext
Sample Creds
// @hidden_cell
var credentials = scala.collection.mutable.HashMap[String, String](
"auth_url"->"https://identity.open.softlayer.com",
"project"->"object_storage_3xxxxxx3_xxxx_xxxx_xxxx_xxxxxxxxxxxx",
"project_id"->"6xxxxxxxxxx04fxxxxxxxxxx6xxxxxx7",
"region"->"dallas",
"user_id"->"cxxxxxxxxxxaxxxxxxxxxx1xxxxxxxxx",
"domain_id"->"cxxxxxxxxxxaxxyyyyyyxx1xxxxxxxxx",
"domain_name"->"853255",
"username"->"Admin_cxxxxxxxxxxaxxxxxxxxxx1xxxxxxxxx",
"password"->"""&M7372!FAKE""",
"container"->"notebooks",
"tenantId"->"undefined",
"filename"->"file.xml"
)
Helper Method
def setRemoteObjectStorageConfig(name:String, sc: SparkContext, dsConfiguration:String) : Boolean = {
try {
val result = scala.util.parsing.json.JSON.parseFull(dsConfiguration)
result match {
case Some(e:Map[String,String]) => {
val prefix = "fs.swift2d.service." + name
val hconf = sc.hadoopConfiguration
hconf.set("fs.swift2d.impl","com.ibm.stocator.fs.ObjectStoreFileSystem")
hconf.set(prefix + ".auth.url", e("auth_url") + "/v3/auth/tokens")
hconf.set(prefix + ".tenant", e("project_id"))
hconf.set(prefix + ".username", e("user_id"))
hconf.set(prefix + ".password", e("password"))
hconf.set(prefix + "auth.method", "keystoneV3")
hconf.set(prefix + ".region", e("region"))
hconf.setBoolean(prefix + ".public", true)
println("Successfully modified sparkcontext object with remote Object Storage Credentials using datasource name " + name)
println("")
return true
}
case None => println("Failed.")
return false
}
}
catch {
case NonFatal(exc) => println(exc)
return false
}
}
Load the Data
val setObjStor = setRemoteObjectStorageConfig("sparksql", scplain, Json.toJson(credentials.toMap).toString)
val data_rdd = scplain.textFile("swift2d://notebooks.sparksql/" + credentials("filename"))
data_rdd.take(5)