diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ea46b63..9b0fe41f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# 7.0.3, 2024-03-21 + +* add support for zst writing (PR [#812](https://github.com/piskvorky/smart_open/pull/812), [@mpenkov](https://github.com/mpenkov)) +* roll back PR [#812](https://github.com/piskvorky/smart_open/pull/788), restore compatibility with built-in open function ([@mpenkov](https://github.com/mpenkov)) + # 7.0.2, 2024-03-21 * Add `__next__` method to FileLikeProxy (PR [#811](https://github.com/piskvorky/smart_open/pull/811), [@ddelange](https://github.com/ddelange)) diff --git a/integration-tests/test_gcs.py b/integration-tests/test_gcs.py index 051193d7..bd8b58d3 100644 --- a/integration-tests/test_gcs.py +++ b/integration-tests/test_gcs.py @@ -28,9 +28,6 @@ def write_read(key, content, write_mode, read_mode, **kwargs): with smart_open.open(key, read_mode, **kwargs) as fin: return fin.read() -def open_only(key, read_mode, **kwargs) -> None: - with smart_open.open(key, read_mode, **kwargs): - pass def read_length_prefixed_messages(key, read_mode, **kwargs): result = io.BytesIO() @@ -124,10 +121,3 @@ def test_gcs_performance_small_reads(benchmark): actual = benchmark(read_length_prefixed_messages, key, 'rb', buffering=ONE_MIB) assert actual == one_megabyte_of_msgs - -def test_gcs_performance_open(benchmark): - # we don't need to use a uri that actually exists in order to call GCS's open() - key = "gs://some-bucket/some_blob.txt" - transport_params = {'client': google.cloud.storage.Client()} - benchmark(open_only, key, 'rb', transport_params=transport_params) - assert True diff --git a/setup.py b/setup.py index 44d45d6c..a9a4fc53 100644 --- a/setup.py +++ b/setup.py @@ -50,7 +50,6 @@ def read(fname): 'boto3', 'pytest', 'pytest-rerunfailures', - 'pytest-benchmark', ] setup( diff --git a/smart_open/compression.py b/smart_open/compression.py index 499f557d..1c88224e 100644 --- a/smart_open/compression.py +++ b/smart_open/compression.py @@ -106,8 +106,8 @@ def _handle_gzip(file_obj, mode): def _handle_zstd(file_obj, mode): - import zstandard as zstd - result = zstd.ZstdDecompressor().stream_reader(file_obj, closefd=True) + import zstandard # type: ignore + result = zstandard.open(filename=file_obj, mode=mode) return result diff --git a/smart_open/gcs.py b/smart_open/gcs.py index f1ced9eb..1f20ea6b 100644 --- a/smart_open/gcs.py +++ b/smart_open/gcs.py @@ -128,7 +128,10 @@ def Reader(bucket, warn_deprecated('line_terminator') bkt = client.bucket(bucket) - blob = bkt.blob(key) + blob = bkt.get_blob(key) + + if blob is None: + raise google.cloud.exceptions.NotFound(f'blob {key} not found in {bucket}') return blob.open('rb', **blob_open_kwargs) diff --git a/smart_open/tests/test_smart_open.py b/smart_open/tests/test_smart_open.py index 067ea19b..414a9c64 100644 --- a/smart_open/tests/test_smart_open.py +++ b/smart_open/tests/test_smart_open.py @@ -77,6 +77,18 @@ def named_temporary_file(mode='w+b', prefix=None, suffix=None, delete=True): logger.error(e) +def test_zst_write(): + with named_temporary_file(suffix=".zst") as tmp: + with smart_open.open(tmp.name, "wt") as fout: + print("hello world", file=fout) + print("this is a test", file=fout) + + with smart_open.open(tmp.name, "rt") as fin: + got = list(fin) + + assert got == ["hello world\n", "this is a test\n"] + + class ParseUriTest(unittest.TestCase): """ Test ParseUri class. diff --git a/smart_open/version.py b/smart_open/version.py index 42665109..8544daac 100644 --- a/smart_open/version.py +++ b/smart_open/version.py @@ -1,4 +1,4 @@ -__version__ = '7.0.2' +__version__ = '7.0.3' if __name__ == '__main__':