both of the train data and the test data have 52 features ,,have the same dimension. The way of extracting features are the same. The program has no syntax errors. When I added a random sampling of negative samples, the error occurred.
17/10/02 10:28:23 ERROR HiveMetaStore: Failed to delete table directory: file:/E:/tianchi_taobao/tianchi2/spark-warehouse/re Got exception: org.apache.hadoop.hive.metastore.api.MetaException Unable to delete directory: file:/E:/tianchi_taobao/tianchi2/spark-warehouse/re
Traceback (most recent call last):
File "E:/tianchi_taobao/tianchi2/test4.py", line 256, in <module>
spark.sql("create table re as SELECT user_id,item_id FROM result WHERE prediction>0 ")
File "D:\Anaconda3\lib\site-packages\pyspark\sql\context.py", line 360, in sql
return self.sparkSession.sql(sqlQuery)
File "D:\Anaconda3\lib\site-packages\pyspark\sql\session.py", line 543, in sql
return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped)
File "D:\spark-2.0.2-bin-hadoop2.7\python\lib\py4j-0.10.3-src.zip\py4j\java_gateway.py", line 1133, in __call__
File "D:\Anaconda3\lib\site-packages\pyspark\sql\utils.py", line 63, in deco
return f(*a, **kw)
File "D:\spark-2.0.2-bin-hadoop2.7\python\lib\py4j-0.10.3-src.zip\py4j\protocol.py", line 319, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling o24.sql.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 4 in stage 25.0 failed 1 times, most recent failure: Lost task 4.0 in stage 25.0 (TID 3881, localhost): org.apache.spark.SparkException: Failed to execute user defined function($anonfun$11: (vector) => vector)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:370)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.foreach(WholeStageCodegenExec.scala:368)
at org.apache.spark.sql.hive.SparkHiveWriterContainer.writeToFile(hiveWriterContainers.scala:185)
at org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$saveAsHiveFile$3.apply(InsertIntoHiveTable.scala:131)
at org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$saveAsHiveFile$3.apply(InsertIntoHiveTable.scala:131)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70)
at org.apache.spark.scheduler.Task.run(Task.scala:86)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.util.NoSuchElementException: key not found: 0.006578947368421052
at scala.collection.MapLike$class.default(MapLike.scala:228)
at scala.collection.AbstractMap.default(Map.scala:59)
at scala.collection.MapLike$class.apply(MapLike.scala:141)
at scala.collection.AbstractMap.apply(Map.scala:59)
at org.apache.spark.ml.feature.VectorIndexerModel$$anonfun$10$$anonfun$apply$4.apply(VectorIndexer.scala:324)
at org.apache.spark.ml.feature.VectorIndexerModel$$anonfun$10$$anonfun$apply$4.apply(VectorIndexer.scala:323)
at scala.collection.immutable.Map$Map2.foreach(Map.scala:137)
at org.apache.spark.ml.feature.VectorIndexerModel$$anonfun$10.apply(VectorIndexer.scala:323)
at org.apache.spark.ml.feature.VectorIndexerModel$$anonfun$10.apply(VectorIndexer.scala:317)
at org.apache.spark.ml.feature.VectorIndexerModel$$anonfun$11.apply(VectorIndexer.scala:362)
at org.apache.spark.ml.feature.VectorIndexerModel$$anonfun$11.apply(VectorIndexer.scala:362)
... 14 more