So the problem I'm encountering is that I built a docker image that will run an etl pipeline. When I run the image locally it works fine but when I deploy my image to ECR on AWS and I run my lambda function that uses this image it returns a PermissionError Forbidden. Here is the full stacktrace.
{
"errorMessage": "Forbidden",
"errorType": "PermissionError",
"requestId": "ed12c997-d1f7-4cba-8eaf-66b88643271c",
"stackTrace": [
" File \"/var/task/app.py\", line 10, in handler\n main(now)\n",
" File \"/var/task/scraper_backend/jobs/__init__.py\", line 15, in main\n df = transform.update_file(\n",
" File \"/var/task/scraper_backend/jobs/transform.py\", line 24, in update_file\n df = read_file(path)\n",
" File \"/var/task/scraper_backend/jobs/transform.py\", line 15, in read_file\n dataset = pq.ParquetDataset(f\"s3://{bucket}/{key}\", filesystem=fs)\n",
" File \"/var/lang/lib/python3.9/site-packages/pyarrow/parquet/core.py\", line 1763, in __new__\n return _ParquetDatasetV2(\n",
" File \"/var/lang/lib/python3.9/site-packages/pyarrow/parquet/core.py\", line 2452, in __init__\n finfo = filesystem.get_file_info(path_or_paths)\n",
" File \"pyarrow/_fs.pyx\", line 571, in pyarrow._fs.FileSystem.get_file_info\n",
" File \"pyarrow/error.pxi\", line 144, in pyarrow.lib.pyarrow_internal_check_status\n",
" File \"pyarrow/_fs.pyx\", line 1490, in pyarrow._fs._cb_get_file_info\n",
" File \"/var/lang/lib/python3.9/site-packages/pyarrow/fs.py\", line 330, in get_file_info\n info = self.fs.info(path)\n",
" File \"/var/lang/lib/python3.9/site-packages/fsspec/asyn.py\", line 115, in wrapper\n return sync(self.loop, func, *args, **kwargs)\n",
" File \"/var/lang/lib/python3.9/site-packages/fsspec/asyn.py\", line 100, in sync\n raise return_result\n",
" File \"/var/lang/lib/python3.9/site-packages/fsspec/asyn.py\", line 55, in _runner\n result[0] = await coro\n",
" File \"/var/lang/lib/python3.9/site-packages/s3fs/core.py\", line 1248, in _info\n out = await self._call_s3(\n",
" File \"/var/lang/lib/python3.9/site-packages/s3fs/core.py\", line 348, in _call_s3\n return await _error_wrapper(\n",
" File \"/var/lang/lib/python3.9/site-packages/s3fs/core.py\", line 140, in _error_wrapper\n raise err\n"
]
}
So I firstly went to IAM console to change my permissions and the policy looks like this.
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"s3:GetBucketLocation",
"s3:ListAllMyBuckets"
],
"Resource": "arn:aws:s3:::*"
},
{
"Effect": "Allow",
"Action": "s3:*",
"Resource": [
"arn:aws:s3:::my_bucket",
"arn:aws:s3:::my_bucket/*"
]
}
]
}
Also just tried this policy but this also didn't seem to fix my permission forbidden error
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "ListObjectsInBucket",
"Effect": "Allow",
"Action": ["s3:ListBucket"],
"Resource": ["arn:aws:s3:::my_bucket"]
},
{
"Sid": "AllObjectActions",
"Effect": "Allow",
"Action": "s3:*Object",
"Resource": ["arn:aws:s3:::my_bucket/*"]
}
]
}
I'm not sure how long it takes for a policy to be in effect so maybe that is the problem, but it has been like 15-20 minutes so I doubt that that will be the problem. So if anyone has any idea what the problem could be any help is greatly appreciated.