From 9fa61a19703c6a7c53db82b19e6e498af152f54d Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Thu, 21 Mar 2024 13:17:47 +0900 Subject: [PATCH 1/6] bump version to 7.0.2.dev0 --- smart_open/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smart_open/version.py b/smart_open/version.py index 42665109..8dde8bdc 100644 --- a/smart_open/version.py +++ b/smart_open/version.py @@ -1,4 +1,4 @@ -__version__ = '7.0.2' +__version__ = '7.0.2.dev0' if __name__ == '__main__': From 1b271bfe062cf745d8e8a8420117d6697a5fddd6 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Thu, 21 Mar 2024 17:01:09 +0900 Subject: [PATCH 2/6] Revert "Make calls to smart_open.open() for GCS 1000x faster by avoiding unnecessary GCS API call (#788)" This reverts commit bcc2335bebd82bbfee4961ae1585974954aaf184. --- integration-tests/test_gcs.py | 10 ---------- setup.py | 3 +-- smart_open/gcs.py | 5 ++++- 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/integration-tests/test_gcs.py b/integration-tests/test_gcs.py index 051193d7..bd8b58d3 100644 --- a/integration-tests/test_gcs.py +++ b/integration-tests/test_gcs.py @@ -28,9 +28,6 @@ def write_read(key, content, write_mode, read_mode, **kwargs): with smart_open.open(key, read_mode, **kwargs) as fin: return fin.read() -def open_only(key, read_mode, **kwargs) -> None: - with smart_open.open(key, read_mode, **kwargs): - pass def read_length_prefixed_messages(key, read_mode, **kwargs): result = io.BytesIO() @@ -124,10 +121,3 @@ def test_gcs_performance_small_reads(benchmark): actual = benchmark(read_length_prefixed_messages, key, 'rb', buffering=ONE_MIB) assert actual == one_megabyte_of_msgs - -def test_gcs_performance_open(benchmark): - # we don't need to use a uri that actually exists in order to call GCS's open() - key = "gs://some-bucket/some_blob.txt" - transport_params = {'client': google.cloud.storage.Client()} - benchmark(open_only, key, 'rb', transport_params=transport_params) - assert True diff --git a/setup.py b/setup.py index 44d45d6c..735d3c36 100644 --- a/setup.py +++ b/setup.py @@ -49,8 +49,7 @@ def read(fname): 'responses', 'boto3', 'pytest', - 'pytest-rerunfailures', - 'pytest-benchmark', + 'pytest-rerunfailures' ] setup( diff --git a/smart_open/gcs.py b/smart_open/gcs.py index f1ced9eb..1f20ea6b 100644 --- a/smart_open/gcs.py +++ b/smart_open/gcs.py @@ -128,7 +128,10 @@ def Reader(bucket, warn_deprecated('line_terminator') bkt = client.bucket(bucket) - blob = bkt.blob(key) + blob = bkt.get_blob(key) + + if blob is None: + raise google.cloud.exceptions.NotFound(f'blob {key} not found in {bucket}') return blob.open('rb', **blob_open_kwargs) From 01b14c6a4f85c62827c0b657ce77df4b37e23d81 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Thu, 21 Mar 2024 17:27:48 +0900 Subject: [PATCH 3/6] add support for zst writing (#812) * add support for zst writing * windows dammit * use our special named_temporary_file decorator --- smart_open/compression.py | 4 ++-- smart_open/tests/test_smart_open.py | 12 ++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/smart_open/compression.py b/smart_open/compression.py index 499f557d..1c88224e 100644 --- a/smart_open/compression.py +++ b/smart_open/compression.py @@ -106,8 +106,8 @@ def _handle_gzip(file_obj, mode): def _handle_zstd(file_obj, mode): - import zstandard as zstd - result = zstd.ZstdDecompressor().stream_reader(file_obj, closefd=True) + import zstandard # type: ignore + result = zstandard.open(filename=file_obj, mode=mode) return result diff --git a/smart_open/tests/test_smart_open.py b/smart_open/tests/test_smart_open.py index 067ea19b..414a9c64 100644 --- a/smart_open/tests/test_smart_open.py +++ b/smart_open/tests/test_smart_open.py @@ -77,6 +77,18 @@ def named_temporary_file(mode='w+b', prefix=None, suffix=None, delete=True): logger.error(e) +def test_zst_write(): + with named_temporary_file(suffix=".zst") as tmp: + with smart_open.open(tmp.name, "wt") as fout: + print("hello world", file=fout) + print("this is a test", file=fout) + + with smart_open.open(tmp.name, "rt") as fin: + got = list(fin) + + assert got == ["hello world\n", "this is a test\n"] + + class ParseUriTest(unittest.TestCase): """ Test ParseUri class. From f2fa89df51150dcca74b3f87b027979cd9473149 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Thu, 21 Mar 2024 17:57:10 +0900 Subject: [PATCH 4/6] bump version to 7.0.3 --- smart_open/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smart_open/version.py b/smart_open/version.py index 8dde8bdc..8544daac 100644 --- a/smart_open/version.py +++ b/smart_open/version.py @@ -1,4 +1,4 @@ -__version__ = '7.0.2.dev0' +__version__ = '7.0.3' if __name__ == '__main__': From 5ae44e81c485e4370da72efc4acd5ca866b92597 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Thu, 21 Mar 2024 17:58:57 +0900 Subject: [PATCH 5/6] updated CHANGELOG.md for version 7.0.3 --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ea46b63..9b0fe41f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# 7.0.3, 2024-03-21 + +* add support for zst writing (PR [#812](https://github.com/piskvorky/smart_open/pull/812), [@mpenkov](https://github.com/mpenkov)) +* roll back PR [#812](https://github.com/piskvorky/smart_open/pull/788), restore compatibility with built-in open function ([@mpenkov](https://github.com/mpenkov)) + # 7.0.2, 2024-03-21 * Add `__next__` method to FileLikeProxy (PR [#811](https://github.com/piskvorky/smart_open/pull/811), [@ddelange](https://github.com/ddelange)) From 3864134695ea2784e989715308f74171d609f650 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Thu, 21 Mar 2024 17:59:53 +0900 Subject: [PATCH 6/6] patch setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 735d3c36..a9a4fc53 100644 --- a/setup.py +++ b/setup.py @@ -49,7 +49,7 @@ def read(fname): 'responses', 'boto3', 'pytest', - 'pytest-rerunfailures' + 'pytest-rerunfailures', ] setup(