While running below code getting the error..it is azure Data bricks hands on EDA.
df_typed = spark.sql("SELECT cast(Price as int),
cast(Age as int), cast(KM as int), FuelType,
cast(HP as int), cast(MetColor as int),
cast(Automatic as int), cast(CC as int),
cast(Doors as int), cast(Weight as int) FROM usedcars_CSV")
df_typed
Error:
Py4JJavaError Traceback (most recent call last)
/databricks/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
62 try:
---> 63 return f(*a, **kw)
64 except py4j.protocol.Py4JJavaError as e:
/databricks/spark/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
327 "An error occurred while calling {0}{1}{2}.\n".
--> 328 format(target_id, ".", name), value)
329 else:
Py4JJavaError: An error occurred while calling o204.sql.
: org.apache.spark.sql.AnalysisException: cannot resolve '`Price`' given input columns: [default.usedcars_csv._c3, default.usedcars_csv._c6, default.usedcars_csv._c5, default.usedcars_csv._c1, default.usedcars_csv._c7, default.usedcars_csv._c0, default.usedcars_csv._c9, default.usedcars_csv._c2, default.usedcars_csv._c4, default.usedcars_csv._c8]; line 1 pos 12;
'Project [unresolvedalias(cast('Price as int), None), unresolvedalias(cast('Age as int), None), unresolvedalias(cast('KM as int), None), 'FuelType, unresolvedalias(cast('HP as int), None), unresolvedalias(cast('MetColor as int), None), unresolvedalias(cast('Automatic as int), None), unresolvedalias(cast('CC as int), None), unresolvedalias(cast('Doors as int), None), unresolvedalias(cast('Weight as int), None)]
+- SubqueryAlias `default`.`usedcars_csv`
+- Relation[_c0#347,_c1#348,_c2#349,_c3#350,_c4#351,_c5#352,_c6#353,_c7#354,_c8#355,_c9#356] csv
at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$3.applyOrElse(CheckAnalysis.scala:120)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$3.applyOrElse(CheckAnalysis.scala:111)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$6.apply(TreeNode.scala:303)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$6.apply(TreeNode.scala:303)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:76)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:302)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:300)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:300)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$8.apply(TreeNode.scala:353)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:207)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:351)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:300)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:300)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:300)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$8.apply(TreeNode.scala:353)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:207)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:351)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:300)