I am using AWS Glue Spark with python job to sync the data from s3 to on-prem Sql Server and using AWS Wrangler and attached pyodbc wheel file along with it. when I ran my job I am getting this error "ModuleNotFoundError: You need to install pyodbc respectively the AWS Data Wrangler package with the sqlserver
extra for using the sqlserver module".
Need help on how to install the pyodbc respectively with AWS Data Wrangler package.
Another way I tried to import the pyodbc is from importlib.util.find_spec("pyodbc") but getting same error as well.
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
import importlib
import awswrangler as wr
from awswrangler import _data_types
from awswrangler import _databases as _db_utils
from awswrangler import exceptions
from awswrangler import sqlserver
__all__ = ["connect", "read_sql_query", "read_sql_table", "to_sql"]
_pyodbc_found = importlib.util.find_spec("pyodbc")
if _pyodbc_found:
import pyodbc
args = getResolvedOptions(sys.argv, ['JOB_NAME'])
sc = SparkContext()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)
job.init(args['JOB_NAME'], args)
con = wr.sqlserver.connect(connection="glue_to_onprem_test_1", jdbc_driver_version=3.0)
df = wr.sqlserver.read_sql_query(sql="SELECT TOP 10 * FROM dbo.GlueDataSync", con=con)
con.close()