0

I am using the sqlActivity in amazon data pipeline to copy data to my redshift table.
The script runs fine if I specify one fileName like part-00000.gz but when i specify the wildcard .gz to pick all files in the directory I get the error where the wildcard is actually considered to be the filename and so it complains that the filename Output/Prod/Bens_Analytics/2015/02/10/IP_To_FileName/.gz does not exist

begin transaction; create temp table ip_to_filename_staging_table (like bens_analytics_ip_to_filename);

copy ip_to_filename_staging_table from 's3://er-hadoop/Output/Prod/Bens_Analytics/2015/02/10/IP_To_FileName/*.gz' credentials 'aws_access_key_id=<>;aws_secret_access_key=<>' gzip delimiter '\t' COMPUPDATE OFF STATUPDATE OFF;

delete from ip_to_filename_staging_table using bens_analytics_ip_to_filename where (ip_to_filename_staging_table.day = bens_analytics_ip_to_filename.day and ip_to_filename_staging_table.ip = bens_analytics_ip_to_filename.ip and ip_to_filename_staging_table.filename = bens_analytics_ip_to_filename.filename);

insert into bens_analytics_ip_to_filename select * from ip_to_filename_staging_table;

drop table ip_to_filename_staging_table; end transaction;

user2330278
  • 67
  • 10

1 Answers1

0

i figured it out when i used a path like 's3://er-hadoop/Output/Prod/Bens_Analytics/2015/02/10/IP_To_FileName/part' it picked all the part-xxxxx.gz files

user2330278
  • 67
  • 10