From 22773aca350b0fd8890216b5e4facbcdcbee385a Mon Sep 17 00:00:00 2001 From: Ryan Morshead Date: Mon, 21 Aug 2023 21:25:53 -0600 Subject: [PATCH 1/5] handle dataclass field type sentinels --- cloudpickle/cloudpickle_fast.py | 24 ++++++++++++++++++++++++ tests/cloudpickle_test.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/cloudpickle/cloudpickle_fast.py b/cloudpickle/cloudpickle_fast.py index ee1f4b8e..30a0ce4f 100644 --- a/cloudpickle/cloudpickle_fast.py +++ b/cloudpickle/cloudpickle_fast.py @@ -13,6 +13,7 @@ import _collections_abc import abc import copyreg +import dataclasses import io import itertools import logging @@ -482,6 +483,10 @@ def _odict_items_reduce(obj): return _make_dict_items, (dict(obj), True) +def _dataclass_field_base_reduce(obj): + return _get_dataclass_field_type_sentinel, (obj.name,) + + # COLLECTIONS OF OBJECTS STATE SETTERS # ------------------------------------ # state setters are called at unpickling time, once the object is created and @@ -537,6 +542,24 @@ def _class_setstate(obj, state): return obj +# COLLECTION OF DATACLASS UTILITIES +# --------------------------------- +# There are some internal sentinel values whose identity must be preserved when +# unpickling dataclass fields. Each sentinel value has a unique name that we can +# use to retrieve its identity at unpickling time. + + +_DATACLASSE_FIELD_TYPE_SENTINELS = { + dataclasses._FIELD.name: dataclasses._FIELD, + dataclasses._FIELD_CLASSVAR.name: dataclasses._FIELD_CLASSVAR, + dataclasses._FIELD_INITVAR.name: dataclasses._FIELD_INITVAR, +} + + +def _get_dataclass_field_type_sentinel(name): + return _DATACLASSE_FIELD_TYPE_SENTINELS[name] + + class CloudPickler(Pickler): # set of reducers defined and used by cloudpickle (private) _dispatch_table = {} @@ -565,6 +588,7 @@ class CloudPickler(Pickler): _dispatch_table[abc.abstractclassmethod] = _classmethod_reduce _dispatch_table[abc.abstractstaticmethod] = _classmethod_reduce _dispatch_table[abc.abstractproperty] = _property_reduce + _dispatch_table[dataclasses._FIELD_BASE] = _dataclass_field_base_reduce dispatch_table = ChainMap(_dispatch_table, copyreg.dispatch_table) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 2bd1f257..2232f89d 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2,6 +2,7 @@ import abc import collections import base64 +import dataclasses import functools import io import itertools @@ -2770,6 +2771,33 @@ def func_with_globals(): "Expected a single deterministic payload, got %d/5" % len(vals) ) + def test_dataclass_fields_are_preserved(self): + + @dataclasses.dataclass + class SampleDataclass: + x: int + y: dataclasses.InitVar[int] + z: typing.ClassVar[int] + + PickledSampleDataclass = pickle_depickle( + SampleDataclass, protocol=self.protocol + ) + + found_fields = list(PickledSampleDataclass.__dataclass_fields__.values()) + assert set(f.name for f in found_fields) == { + "x", "y", "z" + } + + expected_ftypes = { + "x": dataclasses._FIELD, + "y": dataclasses._FIELD_INITVAR, + "z": dataclasses._FIELD_CLASSVAR, + } + + + for f in found_fields: + assert f._field_type is expected_ftypes[f.name] + class Protocol2CloudPickleTest(CloudPickleTest): From 9892fba51db5472fd9adf440c84322977d92f8c8 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Mon, 9 Oct 2023 16:11:23 +0200 Subject: [PATCH 2/5] Trigger CI From 56f51b2e897c289fc83ae3cd411eb5095e4923eb Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Mon, 9 Oct 2023 16:31:50 +0200 Subject: [PATCH 3/5] Code style --- tests/cloudpickle_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 2232f89d..4c1b52dd 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2794,7 +2794,6 @@ class SampleDataclass: "z": dataclasses._FIELD_CLASSVAR, } - for f in found_fields: assert f._field_type is expected_ftypes[f.name] From e15fb62ce836ccf099b38edef554559ba4db3488 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Mon, 9 Oct 2023 18:27:33 +0200 Subject: [PATCH 4/5] More testing for dataclass instances with special fields --- tests/cloudpickle_test.py | 54 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 4c1b52dd..dc60d782 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2797,6 +2797,60 @@ class SampleDataclass: for f in found_fields: assert f._field_type is expected_ftypes[f.name] + def test_interactively_defined_dataclass_with_initvar_and_classvar(self): + code = """if __name__ == "__main__": + import dataclasses + from testutils import subprocess_worker + import typing + + with subprocess_worker(protocol={protocol}) as w: + + @dataclasses.dataclass + class SampleDataclass: + x: int + y: dataclasses.InitVar[int] = None + z: typing.ClassVar[int] = 42 + + def __post_init__(self, y=0): + self.x += y + + def large_enough(self): + return self.x > self.z + + value = SampleDataclass(2, y=2) + + def check_dataclass_instance(value): + assert isinstance(value, SampleDataclass) + assert value.x == 4 + assert value.z == 42 + expected_dict = dict(x=4) + assert dataclasses.asdict(value) == expected_dict + assert not value.large_enough() + try: + SampleDataclass.z = 0 + assert value.z == 0 + assert value.large_enough() + finally: + SampleDataclass.z = 42 + return "ok" + + assert check_dataclass_instance(value) == "ok" + + # Check that this instance of an interactively defined dataclass + # behavesconsistently in a remote worker process: + assert w.run(check_dataclass_instance, value) == "ok" + + # Check class provenance tracking is not impacted by the + # @dataclass decorator: + def echo(*args): + return args + + cloned_value, cloned_type = w.run(echo, value, SampleDataclass) + assert cloned_type is SampleDataclass + assert isinstance(cloned_value, SampleDataclass) + """.format(protocol=self.protocol) + assert_run_python_script(code) + class Protocol2CloudPickleTest(CloudPickleTest): From 550f7df08ae8252dd3b8067f6f8f03fb1e5e8005 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Mon, 9 Oct 2023 18:31:08 +0200 Subject: [PATCH 5/5] Add changelog entry --- CHANGES.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index e2e3a5d5..c037af40 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,8 +1,9 @@ 2.3.0 (development) =================== -TODO - +- Fix pickling of dataclasses and their instances. + ([issue #386](https://github.com/cloudpipe/cloudpickle/issues/386), + [PR #513](https://github.com/cloudpipe/cloudpickle/pull/513)) 2.2.1 =====