From d80292ced40bcc3a3af4e8207774bcca92e97bf5 Mon Sep 17 00:00:00 2001
From: ddelange <14880945+ddelange@users.noreply.github.com>
Date: Wed, 10 Jan 2024 20:06:42 +0100
Subject: [PATCH] :pushpin: Require pandas~=2.1 and drop support for python 3.8
 (#62)

* :pushpin: Require pandas~=2.1

* Drop support for python 3.8

* Update type checks
---
 .github/workflows/CI.yml |  2 +-
 pyproject.toml           |  6 +++---
 requirements/ci.txt      |  1 -
 requirements/prod.txt    |  1 +
 src/mapply/_groupby.py   | 12 +++---------
 src/mapply/parallel.py   |  3 ++-
 6 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index c1a8d96..ec33c0d 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -17,7 +17,7 @@ jobs:
     runs-on: ubuntu-20.04
     strategy:
       matrix:
-        python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
+        python-version: ['3.9', '3.10', '3.11', '3.12']
 
     steps:
     - uses: actions/checkout@v4
diff --git a/pyproject.toml b/pyproject.toml
index 40f1c92..719a0c7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,7 +8,7 @@ description = "Sensible multi-core apply function for Pandas"
 readme = "README.md"
 urls = {Repository = "https://github.com/ddelange/mapply", Documentation = "https://mapply.readthedocs.io"}
 authors = [{name = "ddelange", email = "ddelange@delange.dev"}]
-requires-python = ">=3.8" # sync with classifiers below, and tool.ruff and tool.mypy
+requires-python = ">=3.9" # sync with classifiers below, and tool.ruff and tool.mypy
 classifiers = [
   "Development Status :: 5 - Production/Stable",
   "Intended Audience :: Developers",
@@ -16,7 +16,6 @@ classifiers = [
   "License :: OSI Approved :: MIT License",
   "Programming Language :: Python",
   "Programming Language :: Python :: 3",
-  "Programming Language :: Python :: 3.8",
   "Programming Language :: Python :: 3.9",
   "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
@@ -31,7 +30,7 @@ branch = true
 omit = ["site-packages"]
 
 [tool.mypy]
-python_version = "3.8"
+python_version = "3.9"
 ignore_missing_imports = true
 warn_no_return = false
 disallow_untyped_defs = false
@@ -58,6 +57,7 @@ ignore = [
   "D203", # there is D211
   "D213", # there is D212
   "FIX002", # there is TD002,TD003
+  "TCH003", # clutters
 ]
 
 [tool.ruff.extend-per-file-ignores]
diff --git a/requirements/ci.txt b/requirements/ci.txt
index 8a20ae2..9ef1adc 100644
--- a/requirements/ci.txt
+++ b/requirements/ci.txt
@@ -3,4 +3,3 @@ mypy~=1.6
 pre-commit~=3.5
 pytest-cov~=4.1
 pytest~=7.4
-pandas
diff --git a/requirements/prod.txt b/requirements/prod.txt
index 670eece..a273ef4 100644
--- a/requirements/prod.txt
+++ b/requirements/prod.txt
@@ -2,3 +2,4 @@ pathos>=0.3.1  # https://github.com/uqfoundation/pathos/pull/252
 multiprocess
 psutil
 tqdm>=4.27  # from tqdm.auto import tqdm
+pandas~=2.1
diff --git a/src/mapply/_groupby.py b/src/mapply/_groupby.py
index ed1eb97..1fcbc3a 100644
--- a/src/mapply/_groupby.py
+++ b/src/mapply/_groupby.py
@@ -33,7 +33,7 @@
 # ruff: noqa: ERA001
 import logging
 from types import MethodType
-from typing import Any, Callable, Tuple
+from typing import Any, Callable
 
 from mapply.parallel import multiprocessing_imap, tqdm
 
@@ -46,11 +46,10 @@ def run_groupwise_apply(
     *,
     n_workers: int,
     progressbar: bool,
-    args: Tuple[Any, ...] = (),  # noqa: FA100
+    args: tuple[Any, ...] = (),
     **kwargs: Any,
 ):
     """Patch GroupBy.grouper.apply, applying func to each group in parallel."""
-    from pandas import __version__
 
     def apply(self, f, data, axis=0):
         # patching https://github.com/pandas-dev/pandas/blob/v1.5.3/pandas/core/groupby/ops.py#L823
@@ -118,13 +117,8 @@ def apply(self, f, data, axis=0):
 
         return result_values, mutated
 
-    if __version__.split(".") < ["1", "5"]:  # pragma: no cover
-        logger.warning("GroupBy.mapply only works for pandas>=1.5.0. Using single CPU.")
-        return df_or_series.apply(func, *args, **kwargs)
-
-    # 2.1.0 renamed to apply_groupwise ref https://github.com/pandas-dev/pandas/commit/dc947a459b094ccd087557db355cfde5ed97b454
-    attr = "apply" if hasattr(df_or_series.grouper, "apply") else "apply_groupwise"
     # overwrite apply method and restore after execution
+    attr = "apply_groupwise"
     original_apply = getattr(df_or_series.grouper, attr)
     setattr(df_or_series.grouper, attr, MethodType(apply, df_or_series.grouper))
     try:
diff --git a/src/mapply/parallel.py b/src/mapply/parallel.py
index 97bce66..171bccc 100644
--- a/src/mapply/parallel.py
+++ b/src/mapply/parallel.py
@@ -54,8 +54,9 @@ def some_heavy_computation(x, power):
 
 import logging
 import os
+from collections.abc import Iterable, Iterator
 from functools import partial
-from typing import Any, Callable, Iterable, Iterator
+from typing import Any, Callable
 
 import multiprocess
 import psutil