This is my first time use scala and spark. I use this code to extract data. I extracted successfully. If I don't use println(write(output1)) but use println(output1), the code runs well and did print the correct output without keys. But when I use println(write(output1)), I meet error. I use scala 2.12.10 and Java 1.8. My code is as follows:
import org.json4s._
import org.json4s.jackson.JsonMethods._
import org.json4s.jackson.Serialization
import org.json4s.jackson.Serialization.write
import org.apache.spark.SparkContext
import scala.collection.mutable.Map
//import java.io.Writer
object task1 {
def main(args : Array[String]): Unit = {
// Logger.getRootLogger.setLevel(Level.INFO)
val sc = new SparkContext("local[*]", "task1")
val reviewRDD = sc.textFile("review.json")
val n_review = reviewRDD.count().toInt
val n_review_2018 = reviewRDD.filter(x => x contains ",\"date\":\"2018-").count().toInt
val n_user = reviewRDD.map(x => x.split(",")(1).split(":")(1)).distinct().count().toInt
val top10_user = reviewRDD.map(x => (x.split(",")(1).split(":")(1).split("\"")(1), 1)).reduceByKey((x, y) => x + y).sortBy(r => (-r._2, r._1)).take(10)
val n_business = reviewRDD.map(x => x.split(",")(2).split(":")(1)).distinct().count().toInt
val top10_business = reviewRDD.map(x => (x.split(",")(2).split(":")(1).split("\"")(1), 1)).reduceByKey((x, y) => x + y).sortBy(r => (-r._2, r._1)).take(10)
implicit val formats = DefaultFormats
case class Output1(
n_review: Int,
n_review_2018: Int,
n_user: Int,
top10_user: List[(String, Int)],
n_business: Int,
top10_business: List[(String, Int)]
)
val output1 = Output1(
n_review = n_review,
n_review_2018 = n_review_2018,
n_user = n_user,
top10_user = top10_user.toList,
n_business = n_business,
top10_business = top10_business.toList
)
//val output11: String = write(output1)
println(write(output1))
//println(output11)
}
}
The error is as follows:
Exception in thread "main" org.json4s.package$MappingException: Can't find ScalaSig for class task1$Output1$1
at org.json4s.reflect.package$.fail(package.scala:95)
at org.json4s.reflect.ScalaSigReader$.$anonfun$findClass$1(ScalaSigReader.scala:63)
at scala.Option.getOrElse(Option.scala:189)
at org.json4s.reflect.ScalaSigReader$.findClass(ScalaSigReader.scala:63)
at org.json4s.reflect.ScalaSigReader$.readConstructor(ScalaSigReader.scala:31)
at org.json4s.reflect.Reflector$ClassDescriptorBuilder.ctorParamType(Reflector.scala:119)
at org.json4s.reflect.Reflector$ClassDescriptorBuilder.$anonfun$ctorParamType$3(Reflector.scala:109)
at scala.collection.immutable.List.map(List.scala:297)
at org.json4s.reflect.Reflector$ClassDescriptorBuilder.ctorParamType(Reflector.scala:106)
at org.json4s.reflect.Reflector$ClassDescriptorBuilder.$anonfun$ctorParamType$3(Reflector.scala:109)
at scala.collection.immutable.List.map(List.scala:293)
at org.json4s.reflect.Reflector$ClassDescriptorBuilder.ctorParamType(Reflector.scala:106)
at org.json4s.reflect.Reflector$ClassDescriptorBuilder.$anonfun$createConstructorDescriptors$7(Reflector.scala:194)
at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at scala.collection.TraversableLike.map(TraversableLike.scala:286)
at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
at scala.collection.AbstractTraversable.map(Traversable.scala:108)
at org.json4s.reflect.Reflector$ClassDescriptorBuilder.$anonfun$createConstructorDescriptors$3(Reflector.scala:177)
at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
at scala.collection.mutable.ArraySeq.foreach(ArraySeq.scala:75)
at scala.collection.TraversableLike.map(TraversableLike.scala:286)
at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
at scala.collection.AbstractTraversable.map(Traversable.scala:108)
at org.json4s.reflect.Reflector$ClassDescriptorBuilder.createConstructorDescriptors(Reflector.scala:157)
at org.json4s.reflect.Reflector$ClassDescriptorBuilder.constructorsAndCompanion(Reflector.scala:136)
at org.json4s.reflect.Reflector$ClassDescriptorBuilder.result(Reflector.scala:220)
at org.json4s.reflect.Reflector$.createDescriptorWithFormats(Reflector.scala:61)
at org.json4s.reflect.Reflector$.$anonfun$describeWithFormats$1(Reflector.scala:52)
at org.json4s.reflect.package$Memo.apply(package.scala:36)
at org.json4s.reflect.Reflector$.describeWithFormats(Reflector.scala:52)
at org.json4s.Extraction$.decomposeObject$1(Extraction.scala:126)
at org.json4s.Extraction$.internalDecomposeWithBuilder(Extraction.scala:241)
at org.json4s.Extraction$.decomposeWithBuilder(Extraction.scala:70)
at org.json4s.Extraction$.decompose(Extraction.scala:255)
at org.json4s.jackson.Serialization$.write(Serialization.scala:22)
at task1$.main(task1.scala:51)
at task1.main(task1.scala)
Here is my build.sbt
name := "hw1"
ThisBuild / version := "0.1"
ThisBuild / scalaVersion := "2.12.15"
libraryDependencies += "org.apache.spark" %% "spark-core" % "3.1.2"