From d80292ced40bcc3a3af4e8207774bcca92e97bf5 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Wed, 10 Jan 2024 20:06:42 +0100 Subject: [PATCH] :pushpin: Require pandas~=2.1 and drop support for python 3.8 (#62) * :pushpin: Require pandas~=2.1 * Drop support for python 3.8 * Update type checks --- .github/workflows/CI.yml | 2 +- pyproject.toml | 6 +++--- requirements/ci.txt | 1 - requirements/prod.txt | 1 + src/mapply/_groupby.py | 12 +++--------- src/mapply/parallel.py | 3 ++- 6 files changed, 10 insertions(+), 15 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index c1a8d96..ec33c0d 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -17,7 +17,7 @@ jobs: runs-on: ubuntu-20.04 strategy: matrix: - python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] + python-version: ['3.9', '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v4 diff --git a/pyproject.toml b/pyproject.toml index 40f1c92..719a0c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ description = "Sensible multi-core apply function for Pandas" readme = "README.md" urls = {Repository = "https://github.com/ddelange/mapply", Documentation = "https://mapply.readthedocs.io"} authors = [{name = "ddelange", email = "ddelange@delange.dev"}] -requires-python = ">=3.8" # sync with classifiers below, and tool.ruff and tool.mypy +requires-python = ">=3.9" # sync with classifiers below, and tool.ruff and tool.mypy classifiers = [ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", @@ -16,7 +16,6 @@ classifiers = [ "License :: OSI Approved :: MIT License", "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", @@ -31,7 +30,7 @@ branch = true omit = ["site-packages"] [tool.mypy] -python_version = "3.8" +python_version = "3.9" ignore_missing_imports = true warn_no_return = false disallow_untyped_defs = false @@ -58,6 +57,7 @@ ignore = [ "D203", # there is D211 "D213", # there is D212 "FIX002", # there is TD002,TD003 + "TCH003", # clutters ] [tool.ruff.extend-per-file-ignores] diff --git a/requirements/ci.txt b/requirements/ci.txt index 8a20ae2..9ef1adc 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -3,4 +3,3 @@ mypy~=1.6 pre-commit~=3.5 pytest-cov~=4.1 pytest~=7.4 -pandas diff --git a/requirements/prod.txt b/requirements/prod.txt index 670eece..a273ef4 100644 --- a/requirements/prod.txt +++ b/requirements/prod.txt @@ -2,3 +2,4 @@ pathos>=0.3.1 # https://github.com/uqfoundation/pathos/pull/252 multiprocess psutil tqdm>=4.27 # from tqdm.auto import tqdm +pandas~=2.1 diff --git a/src/mapply/_groupby.py b/src/mapply/_groupby.py index ed1eb97..1fcbc3a 100644 --- a/src/mapply/_groupby.py +++ b/src/mapply/_groupby.py @@ -33,7 +33,7 @@ # ruff: noqa: ERA001 import logging from types import MethodType -from typing import Any, Callable, Tuple +from typing import Any, Callable from mapply.parallel import multiprocessing_imap, tqdm @@ -46,11 +46,10 @@ def run_groupwise_apply( *, n_workers: int, progressbar: bool, - args: Tuple[Any, ...] = (), # noqa: FA100 + args: tuple[Any, ...] = (), **kwargs: Any, ): """Patch GroupBy.grouper.apply, applying func to each group in parallel.""" - from pandas import __version__ def apply(self, f, data, axis=0): # patching https://github.com/pandas-dev/pandas/blob/v1.5.3/pandas/core/groupby/ops.py#L823 @@ -118,13 +117,8 @@ def apply(self, f, data, axis=0): return result_values, mutated - if __version__.split(".") < ["1", "5"]: # pragma: no cover - logger.warning("GroupBy.mapply only works for pandas>=1.5.0. Using single CPU.") - return df_or_series.apply(func, *args, **kwargs) - - # 2.1.0 renamed to apply_groupwise ref https://github.com/pandas-dev/pandas/commit/dc947a459b094ccd087557db355cfde5ed97b454 - attr = "apply" if hasattr(df_or_series.grouper, "apply") else "apply_groupwise" # overwrite apply method and restore after execution + attr = "apply_groupwise" original_apply = getattr(df_or_series.grouper, attr) setattr(df_or_series.grouper, attr, MethodType(apply, df_or_series.grouper)) try: diff --git a/src/mapply/parallel.py b/src/mapply/parallel.py index 97bce66..171bccc 100644 --- a/src/mapply/parallel.py +++ b/src/mapply/parallel.py @@ -54,8 +54,9 @@ def some_heavy_computation(x, power): import logging import os +from collections.abc import Iterable, Iterator from functools import partial -from typing import Any, Callable, Iterable, Iterator +from typing import Any, Callable import multiprocess import psutil