I am trying to create DataFrame using spark sqlContext. I have used spark 1.6.3 and scala 2.10.5. Below is my code for creating DataFrames.
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
import org.apache.spark.sql.SQLContext
import com.knoldus.pipeline.KMeansPipeLine
object SimpleApp{
def main(args:Array[String]){
val conf = new SparkConf().setAppName("Simple Application")
val sc = new SparkContext(conf)
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
import sqlContext.implicits._
val kMeans = new KMeansPipeLine()
val df = sqlContext.createDataFrame(Seq(
("a@email.com", 12000,"M"),
("b@email.com", 43000,"M"),
("c@email.com", 5000,"F"),
("d@email.com", 60000,"M")
)).toDF("email", "income","gender")
val categoricalFeatures = List("gender","email")
val numberOfClusters = 2
val iterations = 10
val predictionResult = kMeans.predict(sqlContext,df,categoricalFeatures,numberOfClusters,iterations)
}
}
Its giving me the following exception. What mistake I am doing? Can anyone help me resolve this?
Exception in thread "main" java.lang.NoSuchMethodError:
org.apache.spark.sql.SQLContext.createDataFrame(Lscala/collection/Seq;Lscala/ref lect/api/TypeTags$TypeTag;)Lorg/apache/spark/sql/Dataset;
at SimpleApp$.main(SimpleApp.scala:24)
at SimpleApp.main(SimpleApp.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
The dependencies I have used are:
scalaVersion := "2.10.5"
libraryDependencies ++= Seq(
"org.apache.spark" % "spark-core_2.10" % "2.0.0" % "provided",
"org.apache.spark" % "spark-sql_2.10" % "2.0.0" % "provided",
"org.apache.spark" % "spark-mllib_2.10" % "2.0.0" % "provided",
"knoldus" % "k-means-pipeline" % "0.0.1" )