1

I'm testing the new databricks connect and I often use sql variables in my python scripts on databricks, however I'm not able to use those variables through dbconnect. The example below works fine in databricks but not in dbconnect:

from pyspark.sql import SparkSession
from pyspark.sql import SQLContext
import pandas as pd

spark = SparkSession.builder.getOrCreate()
sqlContext = SQLContext(spark)

df = spark.createDataFrame(pd.DataFrame({'a':[2,5,8], 'b':[3,5,5]}))
df.createOrReplaceTempView('test_view')

sqlContext.sql("set a_value = 2")
sqlContext.sql("select * from test_view where a = ${a_value}")

In dbconnect I received the follow:

---------------------------------------------------------------------------
ParseException                            Traceback (most recent call last)
<ipython-input-50-404f4c5b017c> in <module>
     10 
     11 sqlContext.sql("set a_value = 2")
---> 12 sqlContext.sql("select * from test_view where a = ${a_value}")

c:\users\pc\miniconda3\lib\site-packages\pyspark\sql\context.py in sql(self, sqlQuery)
    369         [Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')]
    370         """
--> 371         return self.sparkSession.sql(sqlQuery)
    372 
    373     @since(1.0)

c:\users\pc\miniconda3\lib\site-packages\pyspark\sql\session.py in sql(self, sqlQuery)
    702         [Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')]
    703         """
--> 704         return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped)
    705 
    706     @since(2.0)

c:\users\pc\miniconda3\lib\site-packages\py4j\java_gateway.py in __call__(self, *args)
   1303         answer = self.gateway_client.send_command(command)
   1304         return_value = get_return_value(
-> 1305             answer, self.gateway_client, self.target_id, self.name)
   1306 
   1307         for temp_arg in temp_args:

c:\users\pc\miniconda3\lib\site-packages\pyspark\sql\utils.py in deco(*a, **kw)
    132                 # Hide where the exception came from that shows a non-Pythonic
    133                 # JVM exception message.
--> 134                 raise_from(converted)
    135             else:
    136                 raise

c:\users\pc\miniconda3\lib\site-packages\pyspark\sql\utils.py in raise_from(e)

ParseException: 
mismatched input '<EOF>' expecting {'(', 'COLLECT', 'CONVERT', 'DELTA', 'HISTORY', 'MATCHED', 'MERGE', 'OPTIMIZE', 'SAMPLE', 'TIMESTAMP', 'UPDATE', 'VERSION', 'ZORDER', 'ADD', 'AFTER', 'ALL', 'ALTER', 'ANALYZE', 'AND', 'ANTI', 'ANY', 'ARCHIVE', 'ARRAY', 'AS', 'ASC', 'AT', 'AUTHORIZATION', 'BETWEEN', 'BOTH', 'BUCKET', 'BUCKETS', 'BY', 'CACHE', 'CASCADE', 'CASE', 'CAST', 'CHANGE', 'CHECK', 'CLEAR', 'CLONE', 'CLUSTER', 'CLUSTERED', 'CODEGEN', 'COLLATE', 'COLLECTION', 'COLUMN', 'COLUMNS', 'COMMENT', 'COMMIT', 'COMPACT', 'COMPACTIONS', 'COMPUTE', 'CONCATENATE', 'CONSTRAINT', 'COPY', 'COPY_OPTIONS', 'COST', 'CREATE', 'CREDENTIALS', 'CROSS', 'CUBE', 'CURRENT', 'CURRENT_DATE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER', 'DATA', 'DATABASE', DATABASES, 'DAY', 'DBPROPERTIES', 'DEEP', 'DEFINED', 'DELETE', 'DELIMITED', 'DESC', 'DESCRIBE', 'DFS', 'DIRECTORIES', 'DIRECTORY', 'DISTINCT', 'DISTRIBUTE', 'DROP', 'ELSE', 'ENCRYPTION', 'END', 'ESCAPE', 'ESCAPED', 'EXCEPT', 'EXCHANGE', 'EXISTS', 'EXPLAIN', 'EXPORT', 'EXTENDED', 'EXTERNAL', 'EXTRACT', 'FALSE', 'FETCH', 'FIELDS', 'FILTER', 'FILEFORMAT', 'FILES', 'FIRST', 'FOLLOWING', 'FOR', 'FOREIGN', 'FORMAT', 'FORMAT_OPTIONS', 'FORMATTED', 'FROM', 'FULL', 'FUNCTION', 'FUNCTIONS', 'GLOBAL', 'GRANT', 'GROUP', 'GROUPING', 'HAVING', 'HOUR', 'IF', 'IGNORE', 'IMPORT', 'IN', 'INDEX', 'INDEXES', 'INNER', 'INPATH', 'INPUTFORMAT', 'INSERT', 'INTERSECT', 'INTERVAL', 'INTO', 'IS', 'ITEMS', 'JOIN', 'KEYS', 'LAST', 'LATERAL', 'LAZY', 'LEADING', 'LEFT', 'LIKE', 'LIMIT', 'LINES', 'LIST', 'LOAD', 'LOCAL', 'LOCATION', 'LOCK', 'LOCKS', 'LOGICAL', 'MACRO', 'MAP', 'MINUTE', 'MONTH', 'MSCK', 'NAMESPACE', 'NAMESPACES', 'NATURAL', 'NO', NOT, 'NULL', 'NULLS', 'OF', 'ON', 'ONLY', 'OPTION', 'OPTIONS', 'OR', 'ORDER', 'OUT', 'OUTER', 'OUTPUTFORMAT', 'OVER', 'OVERLAPS', 'OVERLAY', 'OVERWRITE', 'PARTITION', 'PARTITIONED', 'PARTITIONS', 'PATTERN', 'PERCENT', 'PIVOT', 'PLACING', 'POSITION', 'PRECEDING', 'PRIMARY', 'PRINCIPALS', 'PROPERTIES', 'PURGE', 'QUERY', 'RANGE', 'RECORDREADER', 'RECORDWRITER', 'RECOVER', 'REDUCE', 'REFERENCES', 'REFRESH', 'RENAME', 'REPAIR', 'REPLACE', 'RESET', 'RESTRICT', 'REVOKE', 'RIGHT', RLIKE, 'ROLE', 'ROLES', 'ROLLBACK', 'ROLLUP', 'ROW', 'ROWS', 'SCHEMA', 'SECOND', 'SELECT', 'SEMI', 'SEPARATED', 'SERDE', 'SERDEPROPERTIES', 'SESSION_USER', 'SET', 'MINUS', 'SETS', 'SHALLOW', 'SHOW', 'SKEWED', 'SOME', 'SORT', 'SORTED', 'START', 'STATISTICS', 'STORED', 'STRATIFY', 'STRUCT', 'SUBSTR', 'SUBSTRING', 'TABLE', 'TABLES', 'TABLESAMPLE', 'TBLPROPERTIES', TEMPORARY, 'TERMINATED', 'THEN', 'TO', 'TOUCH', 'TRAILING', 'TRANSACTION', 'TRANSACTIONS', 'TRANSFORM', 'TRIM', 'TRUE', 'TRUNCATE', 'TYPE', 'UNARCHIVE', 'UNBOUNDED', 'UNCACHE', 'UNION', 'UNIQUE', 'UNKNOWN', 'UNLOCK', 'UNSET', 'USE', 'USER', 'USING', 'VALUES', 'VIEW', 'VIEWS', 'WHEN', 'WHERE', 'WINDOW', 'WITH', 'YEAR', '+', '-', '*', 'DIV', '~', STRING, BIGINT_LITERAL, SMALLINT_LITERAL, TINYINT_LITERAL, INTEGER_VALUE, EXPONENT_VALUE, DECIMAL_VALUE, DOUBLE_LITERAL, BIGDECIMAL_LITERAL, IDENTIFIER, BACKQUOTED_IDENTIFIER}(line 1, pos 34)

== SQL ==
select * from test_view where a = 
----------------------------------^^^

So, has anyone managed to make these variables work?

Thanks

1 Answers1

1

You can pass parameters/arguments to your SQL statements by programmatically creating the SQL string using Scala/Python and pass it to sqlContext.sql(string).

sqlContext.sql("set a_value = 2")
sqlContext.sql("select * from test_view where a = ${a_value}").show()

enter image description here

CHEEKATLAPRADEEP
  • 12,191
  • 1
  • 19
  • 42
  • If it is useful for you, could you please [accept it as an answer](https://meta.stackexchange.com/questions/5234/how-does-accepting-an-answer-work)? It may help more people who have similar issue. – CHEEKATLAPRADEEP Nov 06 '20 at 04:20