I have a database table column address and want to search for abbreviations (Address like STE,ST,AVE,BVLD, etc., and replace them all State, Street Avenue, etc.). I have the Abbreviation and Long form in a table in Snowflake and also a table having an address in the table. I have implemented the logic using the Snowflake connector to bring the data into dataframes and populate the Abbreviations table into a dictionary inside python so that lookup is faster.
Please let me know where I am missing the code. I am getting an str error when trying to use find, contains, etc functions
ADD_ABBREVIATIONS={};
#print(df)
sql_abbreviations="SELECT ABBRVTN_SHRT_CD,ABBRVTN_DESCRIPTION FROM
DATAANALYTICS.PUBLIC.ADDRESS_STANDARDIZATION"
df1=pd.read_sql(sql_abbreviations,engine)
#print(df1)
ADD_ABBREVIATIONS=df1.to_dict(orient="records");
#print(ADD_ABBREVIATIONS)
sql_final_query= "SELECT DISTINCT
CONSUMER_NUMBER,CONSUMER_SHPPNG_FNAME,CONSUMER_SHPPNG_LNAME,IFNULL(UPPER(TRIM(CONSUMER_SHPPNG_ADDRESSL1)),' ') AS Consumer_Add1,IFNULL(UPPER(TRIM(CONSUMER_SHPPNG_ADDRESSL2)),' ') AS Consumer_Add2 FROM ( SELECT CUSTOMER_NUMBER AS CONSUMER_NUMBER,SHIPPING_FIRSTNAME AS CONSUMER_SHPPNG_FNAME,SHIPPING_LASTNAME AS CONSUMER_SHPPNG_LNAME, CASE SHIPPING_ADDRESS_LINE1 WHEN NULL THEN 'Unknown' WHEN '' THEN 'Unknown' ELSE SHIPPING_ADDRESS_LINE1 END AS CONSUMER_SHPPNG_ADDRESSL1, CASE SHIPPING_ADDRESS_LINE2 WHEN NULL THEN 'Unknown' WHEN '' THEN 'Unknown' ELSE SHIPPING_ADDRESS_LINE2 END AS CONSUMER_SHPPNG_ADDRESSL2 FROM DATAANALYTICS.PUBLIC.LNDG_WORLDPANTRY ) X ORDER BY CONSUMER_NUMBER ASC"
df = pd.read_sql(sql_final_query,engine)
for i, row in df.iterrows():
#print(row[3])
CONSUM_ADD1=row[3]
#print(CONSUM_ADD1)
print(ADD_ABBREVIATIONS)
for key, value in ADD_ABBREVIATIONS:
print(ADD_ABBREVIATIONS)
print(ADD_ABBREVIATIONS.getitem(key))
new_varkey = key[0]
new_varval = value[0]
print(new_varkey)
if CONSUM_ADD1.find(new_varkey):
print("No key is here!")
else:
print("Found key in the string.")
#for key, value in ADD_ABBREVIATIONS[0].items():
# for v in range(len(value)):
#print(key)
#print(v)
#if ADD_ABBREVIATIONS[0].items().find(row[3]) == -1:
# print("No key is here!")
#else:
#print("Found key in the string.")
#contain = CONSUM_ADD1[(CONSUM_ADD1.str.contains(key[k]))]
#if not contain.empty:
# print(k)
if row.items('Consumer_Add1').isin(ADD_ABBREVIATIONS.key()):
{
print('Key Exist')
}
else:
{
print('Key does not Exist')
}