I have a a batch delete, deleting over 100 records so I am chunking the request. The error is below this error occurs INFREQUENTLY, I would say 1 out of 200 requests. This is in a AWS Glue job so at this time I do not have better logging. But I am guessing at what is happening is that it is deleting a file the does not exist anymore.
S3 Permissions are not a problem.
What exactly is happening here? How does one prevent this? It is currently causing an error and failing the AWS Glue job but I would imagine that it is the same for aws lambda.
class S3SlowDownError(Exception):
"""S3 Slow Down retry exception."""
def batch_delete_objects(s3_client, s3_bucket, s3_file_names: List[str]):
"""Batch delete objects."""
chunks: List[Any] = _utils.chunkify(lst=s3_file_names, max_length=100)
response = []
for chunk in chunks: # pylint: disable=too-many-nested-blocks
response.extend(
s3_client.delete_objects(Bucket=s3_bucket, Delete={"Objects": [{
"Key": x
} for x in chunk]})["Deleted"])
keys = [{"Key": x} for x in chunk]
response.extend(_batch_delete_objects(s3_client=s3_client, s3_bucket=s3_bucket, keys=keys))
# response.extend(
# s3_client.delete_objects(Bucket=s3_bucket, Delete={"Objects": [{
# "Key": x
# } for x in chunk]})["Deleted"])
logger.info("Deleted files in paths: {files}".format(files=list(set([os.path.dirname(x["Key"])
for x in response]))))
return response
@tenacity.retry(
retry=tenacity.retry_if_exception_type(exception_types=(S3SlowDownError)),
wait=tenacity.wait_random_exponential(multiplier=0.5),
stop=tenacity.stop_after_attempt(max_attempt_number=5),
reraise=True,
after=tenacity.after_log(logger, logging.INFO),
)
def _batch_delete_objects(s3_client, s3_bucket, keys: List[Any]):
"""Batch delete objects with retry."""
try:
response = s3_client.delete_objects(Bucket=s3_bucket, Delete={"Objects": keys})
# we are deleting the object if the delete fails because object
# does not exist we do not want to fail since we are deleting anyway
if "Deleted" in response:
return response["Deleted"]
return []
except ClientError as ce:
if ce.response["Error"]["Code"] == "SlowDown":
raise S3SlowDownError()
raise ce
Python stacktrace:
ERROR:root:ResponseParserError - Unable to parse response (no element found: line 2, column 0), invalid XML received:
b'<?xml version="1.0" encoding="UTF-8"?>\n'
Traceback (most recent call last):
File "/home/spark/.local/lib/python3.7/site-packages/botocore/parsers.py", line 433, in _parse_xml_string_to_dom
root = parser.close()
File "<string>", line None
xml.etree.ElementTree.ParseError: no element found: line 2, column 0
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/tmp/main_etl_script.py", line 31, in main_handler
spark_session=spark)
File "/tmp/<app>.zip/run_bundle_etl.py", line 638, in run_bundle_etl
raise e
File "/tmp/<app>.zip/run_bundle_etl.py", line 441, in run_bundle_etl
s3_table_prefix=syndicated_data.s3_storage_prefix,
File "/tmp/<app>.zip/s3.py", line 141, in cleanup_overwrite_staging_files
s3_file_names=[x["Key"] for x in files],
File "/tmp/<app>.zip/s3.py", line 30, in batch_delete_objects
} for x in chunk]})["Deleted"])
File "/home/spark/.local/lib/python3.7/site-packages/botocore/client.py", line 316, in _api_call
return self._make_api_call(operation_name, kwargs)
File "/home/spark/.local/lib/python3.7/site-packages/botocore/client.py", line 613, in _make_api_call
operation_model, request_dict, request_context)
File "/home/spark/.local/lib/python3.7/site-packages/botocore/client.py", line 632, in _make_request
return self._endpoint.make_request(operation_model, request_dict)
File "/home/spark/.local/lib/python3.7/site-packages/botocore/endpoint.py", line 102, in make_request
return self._send_request(request_dict, operation_model)
File "/home/spark/.local/lib/python3.7/site-packages/botocore/endpoint.py", line 148, in _send_request
request, operation_model, context)
File "/home/spark/.local/lib/python3.7/site-packages/botocore/endpoint.py", line 167, in _get_response
request, operation_model)
File "/home/spark/.local/lib/python3.7/site-packages/botocore/endpoint.py", line 218, in _do_get_response
response_dict, operation_model.output_shape)
File "/home/spark/.local/lib/python3.7/site-packages/botocore/parsers.py", line 242, in parse
parsed = self._do_parse(response, shape)
File "/home/spark/.local/lib/python3.7/site-packages/botocore/parsers.py", line 775, in _do_parse
self._parse_payload(response, shape, member_shapes, final_parsed)
File "/home/spark/.local/lib/python3.7/site-packages/botocore/parsers.py", line 811, in _parse_payload
original_parsed = self._initial_body_parse(response['body'])
File "/home/spark/.local/lib/python3.7/site-packages/botocore/parsers.py", line 897, in _initial_body_parse
return self._parse_xml_string_to_dom(xml_string)
File "/home/spark/.local/lib/python3.7/site-packages/botocore/parsers.py", line 437, in _parse_xml_string_to_dom
"invalid XML received:\n%s" % (e, xml_string))
botocore.parsers.ResponseParserError: Unable to parse response (no element found: line 2, column 0), invalid XML received:
b'<?xml version="1.0" encoding="UTF-8"?>\n'