Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Instance-level dispatch table #395

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 33 additions & 14 deletions cloudpickle/cloudpickle_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,9 +496,6 @@ class CloudPickler(Pickler):
_dispatch_table[_collections_abc.dict_values] = _dict_values_reduce
_dispatch_table[_collections_abc.dict_items] = _dict_items_reduce


dispatch_table = ChainMap(_dispatch_table, copyreg.dispatch_table)

# function reducers are defined as instance methods of CloudPickler
# objects, as they rely on a CloudPickler attribute (globals_ref)
def _dynamic_function_reduce(self, func):
Expand Down Expand Up @@ -572,17 +569,6 @@ def dump(self, obj):
raise

if pickle.HIGHEST_PROTOCOL >= 5:
# `CloudPickler.dispatch` is only left for backward compatibility - note
# that when using protocol 5, `CloudPickler.dispatch` is not an
# extension of `Pickler.dispatch` dictionary, because CloudPickler
# subclasses the C-implemented Pickler, which does not expose a
# `dispatch` attribute. Earlier versions of the protocol 5 CloudPickler
# used `CloudPickler.dispatch` as a class-level attribute storing all
# reducers implemented by cloudpickle, but the attribute name was not a
# great choice given the meaning of `Cloudpickler.dispatch` when
# `CloudPickler` extends the pure-python pickler.
dispatch = dispatch_table

# Implementation of the reducer_override callback, in order to
# efficiently serialize dynamic functions and classes by subclassing
# the C-implemented Pickler.
Expand All @@ -604,6 +590,30 @@ def __init__(self, file, protocol=None, buffer_callback=None):
self.globals_ref = {}
self.proto = int(protocol)

# Instance-level public dispatch_table should not mutate the
# private class-level _dispatch_table while still reflecting.
# Note that it is important to lazy init the dispatch_table
# public attribute to make it possible to re-assign it later
# and get the C implementation of Pickler take the
# re-assignment into account.
self.dispatch_table = ChainMap(
{}, # to register instance-level custom reducers
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we have an internal variable to point to this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good suggestion. I still need to find the time to understand the cause of the PyPy segfault.

self._dispatch_table, # cloudpickle specific reducers
copyreg.dispatch_table, # base Python types
)

# `CloudPickler.dispatch` is only left for backward compatibility -
# note that when using protocol 5, `CloudPickler.dispatch` is not
# an extension of `Pickler.dispatch` dictionary, because
# CloudPickler subclasses the C-implemented Pickler, which does not
# expose a `dispatch` attribute. Earlier versions of the protocol
# 5 CloudPickler used `CloudPickler.dispatch` as a class-level
# attribute storing all reducers implemented by cloudpickle, but
# the attribute name was not a great choice given the meaning of
# `Cloudpickler.dispatch` when `CloudPickler` extends the
# pure-python pickler.
self.dispatch = self.dispatch_table

def reducer_override(self, obj):
"""Type-agnostic reducing callback for function and classes.

Expand Down Expand Up @@ -673,6 +683,15 @@ def __init__(self, file, protocol=None):
self.globals_ref = {}
assert hasattr(self, 'proto')

# Isolated the instance dispatch table to make it possible to
# customize the reducers of a give Pickler instance without
# impacting the class-level picklers.
self.dispatch = self.dispatch.copy()

# Compatibility alias to get the same attributes irrespective of
# the version of Python.
self.dispatch_table = self.dispatch

def _save_reduce_pickle5(self, func, args, state=None, listitems=None,
dictitems=None, state_setter=None, obj=None):
save = self.save
Expand Down
76 changes: 72 additions & 4 deletions tests/cloudpickle_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from __future__ import division
from pickle import HIGHEST_PROTOCOL

import _collections_abc
import abc
Expand Down Expand Up @@ -1330,12 +1331,17 @@ def f():
# serializable.
from cloudpickle import CloudPickler
from cloudpickle import cloudpickle_fast as cp_fast
CloudPickler.dispatch_table[type(py.builtin)] = cp_fast._module_reduce
py_builtin_type = type(py.builtin)
assert py_builtin_type not in CloudPickler._dispatch_table
try:
CloudPickler._dispatch_table[py_builtin_type] = cp_fast._module_reduce

g = cloudpickle.loads(cloudpickle.dumps(f, protocol=self.protocol))
g = cloudpickle.loads(cloudpickle.dumps(f, protocol=self.protocol))

result = g()
self.assertEqual(1, result)
result = g()
self.assertEqual(1, result)
finally:
del CloudPickler._dispatch_table[py_builtin_type]

def test_function_module_name(self):
func = lambda x: x
Expand Down Expand Up @@ -2283,6 +2289,68 @@ def reduce_myclass(x):
finally:
copyreg.dispatch_table.pop(MyClass)

def test_side_effect_free_pickler_instance(self):
class A:
pass

iob = io.BytesIO()
p = cloudpickle.CloudPickler(iob, protocol=self.protocol)
p.dispatch_table[A] = lambda obj: (int, (42,))
p.dump(A())
assert pickle.loads(iob.getvalue()) == 42

# Check there is no side-effect on other CloudPickler instances
iob = io.BytesIO()
p = cloudpickle.CloudPickler(iob, protocol=self.protocol)
p.dump(A())
assert isinstance(pickle.loads(iob.getvalue()), A)

def test_pickler_dispatch_table_reassign(self):
# Check that re-assigning the dispatch table attribute on an instance
# works as expected. This would not waork on Python 3.8 if
# dispatch_table was a class attribute of CloudPickler because of
# implementation details in initialization step of the C implementation
# of the Pickler class in the standard library.
class A:
pass

iob = io.BytesIO()
p = cloudpickle.CloudPickler(iob, protocol=self.protocol)
p.dispatch_table = p.dispatch_table.copy()
p.dispatch_table[A] = lambda obj: (int, (42,))
p.dump(A())
assert pickle.loads(iob.getvalue()) == 42

def test_pickler_subclass_with_custom_dispatch_table(self):
class PicklerSubclass(cloudpickle.CloudPickler):
def __init__(self, buffer, custom_reducers,
protocol=HIGHEST_PROTOCOL):
# initialize the instance -level dispatch_table attribute
super().__init__(buffer, protocol=protocol)
self.dispatch_table.update(custom_reducers)

class A:
pass

class B:
pass

custom_reducers = {
A: lambda obj: (int, (42,))
}

iob = io.BytesIO()
p = PicklerSubclass(iob, custom_reducers, protocol=self.protocol)
p.dump((A(), B()))
a, b = pickle.loads(iob.getvalue())
assert a == 42
assert isinstance(b, B) # pickling locally defined classes works

# Check there is no side-effect on other CloudPickler instances
iob = io.BytesIO()
p = cloudpickle.CloudPickler(iob, protocol=self.protocol)
p.dump(A())
assert isinstance(pickle.loads(iob.getvalue()), A)

class Protocol2CloudPickleTest(CloudPickleTest):

Expand Down