-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Limiting network traffic rate for uploading stage. (#48)
* Limiting network traffic rate for uploading stage.
- Loading branch information
1 parent
3440923
commit 00ff3e2
Showing
8 changed files
with
221 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
57 changes: 57 additions & 0 deletions
57
ch_backup/storage/async_pipeline/base_pipeline/rate_limiter.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
""" | ||
Rate limiter module. | ||
""" | ||
import time | ||
from typing import Callable | ||
|
||
|
||
class RateLimiter: | ||
""" | ||
Rate limiter based on token bucket algorithm without a separate replenishment process. | ||
""" | ||
|
||
def __init__(self, limit_per_sec: int, get_time_func: Callable = time.time): | ||
self._limit_per_sec = limit_per_sec | ||
self._get_time_func = get_time_func | ||
self._bucket_tokens = self._limit_per_sec | ||
self._bucket_last_update = self._get_time_func() | ||
|
||
def _replenish_bucket(self): | ||
""" | ||
Replenish the bucket with tokens depending on the time of the last update. | ||
""" | ||
current_time = self._get_time_func() | ||
lapse = current_time - self._bucket_last_update | ||
self._bucket_tokens = min( | ||
self._limit_per_sec, self._bucket_tokens + int(lapse * self._limit_per_sec) | ||
) | ||
self._bucket_last_update = current_time | ||
|
||
def extract_tokens(self, desired_quantity): | ||
""" | ||
Extract minimum from available in bucket and wanted number of tokens from the bucket. | ||
""" | ||
if self._limit_per_sec == 0: | ||
return desired_quantity | ||
|
||
self._replenish_bucket() | ||
extracted = min(desired_quantity, self._bucket_tokens) | ||
|
||
self._bucket_tokens -= extracted | ||
return extracted | ||
|
||
def grant(self, tokens=1): | ||
""" | ||
If there's enough tokens in a bucket to grant | ||
requested number of tokens extract them and return True. Otherwise return False. | ||
""" | ||
|
||
if self._limit_per_sec == 0: | ||
return True | ||
self._replenish_bucket() | ||
|
||
if self._bucket_tokens >= tokens: | ||
self._bucket_tokens -= tokens | ||
return True | ||
|
||
return False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,3 +17,4 @@ | |
StartMultipartUploadStage, | ||
StorageUploadingStage, | ||
) | ||
from .storage.rate_limiter_stage import RateLimiterStage |
37 changes: 37 additions & 0 deletions
37
ch_backup/storage/async_pipeline/stages/storage/rate_limiter_stage.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
""" | ||
Data rate limiting stage. | ||
""" | ||
|
||
import time | ||
from typing import Iterator | ||
|
||
from ch_backup.storage.async_pipeline.base_pipeline.handler import Handler | ||
from ch_backup.storage.async_pipeline.base_pipeline.rate_limiter import RateLimiter | ||
from ch_backup.storage.async_pipeline.stages.types import StageType | ||
|
||
|
||
class RateLimiterStage(Handler): | ||
""" | ||
A bottleneck for controlling the number of data to prevent excessive loading. | ||
Based on tocken bucket algorithm. | ||
""" | ||
|
||
stype = StageType.STORAGE | ||
|
||
def __init__( | ||
self, traffic_limit_per_sec: int, retry_interval: float = 0.01 | ||
) -> None: | ||
self._retry_interval = retry_interval | ||
self._rate_limiter = RateLimiter(limit_per_sec=traffic_limit_per_sec) | ||
|
||
def __call__(self, value: bytes, index: int) -> Iterator[bytes]: | ||
while len(value) > 0: | ||
available_tokens = self._rate_limiter.extract_tokens(len(value)) | ||
|
||
pass_bytes = min(available_tokens, len(value)) | ||
|
||
yield value[:pass_bytes] | ||
|
||
value = value[pass_bytes:] | ||
if len(value) > 0: | ||
time.sleep(self._retry_interval) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
Feature: Backup & Restore sources scenario with traffic limit. | ||
|
||
Background: | ||
Given default configuration | ||
And a working s3 | ||
And a working zookeeper on zookeeper01 | ||
And a working clickhouse on clickhouse01 | ||
And clickhouse on clickhouse01 has test schema | ||
And clickhouse01 has test clickhouse data test1 | ||
|
||
Scenario Outline: Test restore sources set traffic rate limit. | ||
Given ch-backup configuration on clickhouse01 | ||
""" | ||
rate_limiter: | ||
max_upload_rate: <rate> | ||
""" | ||
When we create clickhouse01 clickhouse backup | ||
Then we got the following backups on clickhouse01 | ||
| num | state | data_count | link_count | title | | ||
| 0 | created | 4 | 0 | shared | | ||
|
||
When we restore clickhouse backup #0 to clickhouse02 | ||
Given a working clickhouse on clickhouse02 | ||
Then we got same clickhouse data at clickhouse01 clickhouse02 | ||
|
||
Examples: | ||
| rate | | ||
# unlimited | ||
| 0 | | ||
# 5MB | ||
| 5242880 | | ||
# 16 MB | ||
| 16777216 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
""" | ||
Unit test for RateLimiter. | ||
""" | ||
from typing import List | ||
|
||
import pytest | ||
|
||
from ch_backup.storage.async_pipeline.base_pipeline.rate_limiter import RateLimiter | ||
|
||
|
||
class TimeMocker: | ||
def __init__(self) -> None: | ||
self._timer = 0.0 | ||
|
||
def time(self): | ||
return self._timer | ||
|
||
def sleep(self, sleep_time: float) -> None: | ||
self._timer = self._timer + sleep_time | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"data_size, rate, expected_time", | ||
[ | ||
# expected_time = divide with round up(data_size,rate) - 1 | ||
(0, 0, 0), | ||
(0, 10000, 0), | ||
(1000, 0, 0), | ||
(10, 1, 9), | ||
(1, 10, 0), | ||
(10, 10, 0), | ||
(10, 4, 2), | ||
(123456, 5321, 23), | ||
], | ||
) | ||
def test_rate_limiter_extract(data_size: int, rate: int, expected_time: int) -> None: | ||
timer = TimeMocker() | ||
data = bytes("a" * data_size, encoding="utf-8") | ||
rate_limiter = RateLimiter(rate, timer.time) | ||
|
||
while len(data) > 0: | ||
available = rate_limiter.extract_tokens(len(data)) | ||
data = data[available:] | ||
if len(data) > 0: | ||
timer.sleep(1) | ||
|
||
assert timer.time() == expected_time | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"chunks_sizes, rate, expected_time", | ||
[ | ||
([100, 123, 531, 1], 0, 0), | ||
([1], 1, 0), | ||
([1, 1, 1], 2, 1), | ||
([1, 2, 2, 1], 2, 3), | ||
([10, 1, 9, 2, 11], 11, 2), | ||
([1, 2, 1, 2, 3, 1, 1, 1], 3, 3), | ||
], | ||
) | ||
def test_rate_limiter_grand( | ||
chunks_sizes: List[int], rate: int, expected_time: int | ||
) -> None: | ||
timer = TimeMocker() | ||
|
||
rate_limiter = RateLimiter(rate, timer.time) | ||
for chunk_size in chunks_sizes: | ||
while not rate_limiter.grant(chunk_size): | ||
timer.sleep(1) | ||
assert timer.time() == expected_time |