From 1eb680dcaf8543831b66b3e8a6c9146fe058ef8f Mon Sep 17 00:00:00 2001 From: Vasyl Spachynskyi Date: Wed, 4 Jan 2023 16:53:07 +0200 Subject: [PATCH 1/2] Fix for #684 - terminate multipart upload if exception is raised --- smart_open/compression.py | 15 ++++++++++++--- smart_open/tests/test_s3.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/smart_open/compression.py b/smart_open/compression.py index 08469e74..7fd19691 100644 --- a/smart_open/compression.py +++ b/smart_open/compression.py @@ -8,6 +8,9 @@ """Implements the compression layer of the ``smart_open`` library.""" import logging import os.path +import sys + +from smart_open.s3 import MultipartWriter logger = logging.getLogger(__name__) @@ -83,9 +86,15 @@ def close_both(*args): try: outer_close() finally: - if inner: - inner, fp = None, inner - fp.close() + ex, _, _ = sys.exc_info() + + # https://github.com/RaRe-Technologies/smart_open/issues/684 + # If exception is raised multipart writer should be terminated to abort file uploading to S3 + # If not incomplete file will be saved in bucket + if type(inner) == MultipartWriter and ex: + inner.terminate() + elif inner: + inner.close() outer.close = close_both diff --git a/smart_open/tests/test_s3.py b/smart_open/tests/test_s3.py index a91a731e..fa2098a0 100644 --- a/smart_open/tests/test_s3.py +++ b/smart_open/tests/test_s3.py @@ -555,6 +555,34 @@ def test_writebuffer(self): assert actual == contents + def test_write_gz_with_error(self): + """Does s3 multipart chunking work correctly if exception is raised for compression file?""" + + session = boto3.Session() + resource = session.resource('s3', region_name='us-east-1') + resource.create_bucket(Bucket=BUCKET_NAME) + + with self.assertRaises(ValueError): + with smart_open.open( + f's3://{BUCKET_NAME}/{WRITE_KEY_NAME}', + mode="wb", + compression='.gz', + transport_params={ + "multipart_upload": True, + "min_part_size": 10, + } + ) as fout: + fout.write(b"test12345678test12345678") + fout.write(b"test\n") + + raise ValueError("some error") + + with self.assertRaises(OSError) as cm: + smart_open.s3.open(BUCKET_NAME, WRITE_KEY_NAME, 'rb') + + assert 'The specified key does not exist.' in cm.exception.args[0] + + @moto.mock_s3 class SinglepartWriterTest(unittest.TestCase): """ From bbb257f42b05c04aefe367cecb991b81297a7b2b Mon Sep 17 00:00:00 2001 From: Vasyl Spachynskyi Date: Wed, 4 Jan 2023 17:02:28 +0200 Subject: [PATCH 2/2] Make static code analyser happy --- smart_open/tests/test_s3.py | 1 - 1 file changed, 1 deletion(-) diff --git a/smart_open/tests/test_s3.py b/smart_open/tests/test_s3.py index fa2098a0..453e92d7 100644 --- a/smart_open/tests/test_s3.py +++ b/smart_open/tests/test_s3.py @@ -554,7 +554,6 @@ def test_writebuffer(self): assert actual == contents - def test_write_gz_with_error(self): """Does s3 multipart chunking work correctly if exception is raised for compression file?"""