Skip to content

Commit

Permalink
Merge branch 'release-6.2.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
mpenkov committed Sep 14, 2022
2 parents 706b7cf + 07b6d99 commit 377c0de
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 4 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Unreleased

# 6.2.0, 14 September 2022

- Fix quadratic time ByteBuffer operations (PR [#711](https://github.com/RaRe-Technologies/smart_open/pull/711), [@Joshua-Landau-Anthropic](https://github.com/Joshua-Landau-Anthropic))

# 6.1.0, 21 August 2022

- Add cert parameter to http transport params (PR [#703](https://github.com/RaRe-Technologies/smart_open/pull/703), [@stev-0](https://github.com/stev-0))
Expand Down
34 changes: 34 additions & 0 deletions benchmark/bytebuffer_bench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import time
import sys

import smart_open
from smart_open.bytebuffer import ByteBuffer


def raw_bytebuffer_benchmark():
buffer = ByteBuffer()

start = time.time()
for _ in range(10_000):
assert buffer.fill([b"X" * 1000]) == 1000
return time.time() - start


def file_read_benchmark(filename):
file = smart_open.open(filename, mode="rb")

start = time.time()
read = file.read(100_000_000)
end = time.time()

if len(read) < 100_000_000:
print("File smaller than 100MB")

return end - start


print("Raw ByteBuffer benchmark:", raw_bytebuffer_benchmark())

if len(sys.argv) > 1:
bench_result = file_read_benchmark(sys.argv[1])
print("File read benchmark", bench_result)
6 changes: 3 additions & 3 deletions smart_open/bytebuffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,12 @@ def peek(self, size=-1):
if size < 0 or size > len(self):
size = len(self)

part = self._bytes[self._pos:self._pos+size]
part = bytes(self._bytes[self._pos:self._pos+size])
return part

def empty(self):
"""Remove all bytes from the buffer"""
self._bytes = b''
self._bytes = bytearray()
self._pos = 0

def fill(self, source, size=-1):
Expand Down Expand Up @@ -151,7 +151,7 @@ def fill(self, source, size=-1):
if hasattr(source, 'read'):
new_bytes = source.read(size)
else:
new_bytes = b''
new_bytes = bytearray()
for more_bytes in source:
new_bytes += more_bytes
if len(new_bytes) >= size:
Expand Down
2 changes: 1 addition & 1 deletion smart_open/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '6.1.0'
__version__ = '6.2.0'


if __name__ == '__main__':
Expand Down

0 comments on commit 377c0de

Please sign in to comment.