Using AWS DataLake Formation blueprint to import a MySQL DB to S3. Used the stock blueprint to import the data. But the job failed with below error and stack trace.
Expression #4 of SELECT list is not in GROUP BY clause and contains nonaggregated column 'DBName.TTT.Num' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by
I know I can workaround the issue by disabling the 'full group by' in my MySQL DB. But shouldnt the AWS guys be knowing not to include columns in select clause that are not in group by, or add all the columns in select clause to group by?
Also, why is it happening to only a few tables, but not all?
Any AWS internal DataLake devs nearby?
Traceback (most recent call last):
File "script_2020-01-13-17-14-42.py", line 320, in <module>
main()
File "script_2020-01-13-17-14-42.py", line 316, in main
driver.run_transform()
File "script_2020-01-13-17-14-42.py", line 297, in run_transform
transform.transform()
File "script_2020-01-13-17-14-42.py", line 90, in transform
self._snapshot_transform()
File "script_2020-01-13-17-14-42.py", line 78, in _snapshot_transform
table_name=self.source.table_name)
File "/mnt/yarn/usercache/root/appcache/application_1578935347236_0001/container_1578935347236_0001_01_000001/PyGlue.zip/awsglue/dynamicframe.py", line 611, in from_catalog
File "/mnt/yarn/usercache/root/appcache/application_1578935347236_0001/container_1578935347236_0001_01_000001/PyGlue.zip/awsglue/context.py", line 142, in create_dynamic_frame_from_catalog
File "/mnt/yarn/usercache/root/appcache/application_1578935347236_0001/container_1578935347236_0001_01_000001/PyGlue.zip/awsglue/data_source.py", line 36, in getFrame
File "/mnt/yarn/usercache/root/appcache/application_1578935347236_0001/container_1578935347236_0001_01_000001/py4j-0.10.7-src.zip/py4j/java_gateway.py", line 1257, in __call__
File "/mnt/yarn/usercache/root/appcache/application_1578935347236_0001/container_1578935347236_0001_01_000001/pyspark.zip/pyspark/sql/utils.py", line 63, in deco
print("[INFO] GRANTed Select permission to
{}
on table
{}
::
{}
".format(creator_arn, db_name, table_name))
File "/mnt/yarn/usercache/root/appcache/application_1578935347236_0001/container_1578935347236_0001_01_000001/py4j-0.10.7-src.zip/py4j/protocol.py", line 328, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling o1287.getDynamicFrame.
: com.mysql.jdbc.exceptions.jdbc4.MySQLSyntaxErrorException: Expression #4 of SELECT list is not in GROUP BY clause and contains nonaggregated column 'DBName.TTT.Num' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at com.mysql.jdbc.Util.handleNewInstance(Util.java:377)
at com.mysql.jdbc.Util.getInstance(Util.java:360)
at com.mysql.jdbc.SQLError.createSQLException(SQLError.java:978)
at com.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:3887)
at com.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:3823)
at com.mysql.jdbc.MysqlIO.sendCommand(MysqlIO.java:2435)
at com.mysql.jdbc.MysqlIO.sqlQueryDirect(MysqlIO.java:2582)
at com.mysql.jdbc.ConnectionImpl.execSQL(ConnectionImpl.java:2530)
at com.mysql.jdbc.PreparedStatement.executeInternal(PreparedStatement.java:1907)
at com.mysql.jdbc.PreparedStatement.executeQuery(PreparedStatement.java:2030)
at org.apache.spark.sql.jdbc.glue.GlueJDBCSource$.resolveTable(GlueJDBCSource.scala:47)
at org.apache.spark.sql.jdbc.glue.GlueJDBCSource$.createRelation(GlueJDBCSource.scala:30)
at com.amazonaws.services.glue.util.JDBCWrapper.tableDF(JDBCUtils.scala:805)
at com.amazonaws.services.glue.util.NoCondition$.tableDF(JDBCUtils.scala:84)
at com.amazonaws.services.glue.util.NoJDBCPartitioner$.tableDF(JDBCUtils.scala:123)
at com.amazonaws.services.glue.JDBCDataSource.getDynamicFrame(DataSource.scala:745)
at com.amazonaws.services.glue.DataSource$class.getDynamicFrame(DataSource.scala:77)
at com.amazonaws.services.glue.SparkSQLDataSource.getDynamicFrame(DataSource.scala:586)
at sun.reflect.GeneratedMethodAccessor107.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)