Skip to content

Commit

Permalink
Don't cache hash codes across deserialization.
Browse files Browse the repository at this point in the history
 Because the hash code cache field gets serialized and deserialized by
 Pickle, previously when you deserialize a cache_hash=True attrs object, the
 hashcode will be the hashcode the object had at serialization-time.
 However, if your object had fields with hash codes which were not
 deterministic between interpreter runs, then on a new interpreter run
 your deserialized object would have a hash code which differs from a
 newly created identical object.

 This commit fixes that by clearing the cache on deserialization. It
 needs to override the __setstate__ method to do so, so this commit
 also forbids using a custom __setstate__ on a cache_hash=True object.

Closes python-attrs#482 .
  • Loading branch information
Ryan Gabbard committed Jan 22, 2019
1 parent f85a3f5 commit 42e8653
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 0 deletions.
2 changes: 2 additions & 0 deletions changelog.d/428.change.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fixes a bug where deserialized objects with ``cache_hash=True`` could
have incorrect hash code values.
27 changes: 27 additions & 0 deletions src/attr/_make.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,24 @@ def _patch_original_class(self):
for name, value in self._cls_dict.items():
setattr(cls, name, value)

# attach __setstate__. This is necessary to clear the hash code
# cache on deserialization. See issue
# https://github.com/python-attrs/attrs/issues/482 .
# Note that this code only handles setstate for non-slots classes.
# For slots classes, see similar code in _create_slots_class
if self._cache_hash:
if hasattr(cls, "__setstate__"):
raise ValueError(
"Currently you cannot use hash caching if "
"you specify your own __setstate__ method."
)

def cache_hash_set_state(chss_self, _):
# clear hash code cache
setattr(chss_self, _hash_cache_field, None)

setattr(cls, "__setstate__", cache_hash_set_state)

return cls

def _create_slots_class(self):
Expand Down Expand Up @@ -581,13 +599,22 @@ def slots_getstate(self):
"""
return tuple(getattr(self, name) for name in state_attr_names)

hash_caching_enabled = self._cache_hash

def slots_setstate(self, state):
"""
Automatically created by attrs.
"""
__bound_setattr = _obj_setattr.__get__(self, Attribute)
for name, value in zip(state_attr_names, state):
__bound_setattr(name, value)
# Clearing the hash code cache on deserialization is needed
# because hash codes can change from run to run. See issue
# https://github.com/python-attrs/attrs/issues/482 .
# Note that this code only handles setstate for slots classes.
# For slots classes, see similar code in _patch_original_class
if hash_caching_enabled:
__bound_setattr(_hash_cache_field, None)

# slots and frozen require __getstate__/__setstate__ to work
cd["__getstate__"] = slots_getstate
Expand Down
84 changes: 84 additions & 0 deletions tests/test_dunders.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from __future__ import absolute_import, division, print_function

import copy
import pickle

import pytest

Expand Down Expand Up @@ -453,6 +454,89 @@ def __hash__(self):
assert 2 == uncached_instance.hash_counter.times_hash_called
assert 1 == cached_instance.hash_counter.times_hash_called

def test_cache_hash_serialization(self):
"""
Tests that the hash cache is cleared on deserialization to fix
https://github.com/python-attrs/attrs/issues/482
"""

# first, check that our fix didn't break serialization without
# hash caching.
# we don't care about the result of this; we just want to make sure we
# can do it without exceptions
hash(pickle.loads(pickle.dumps(HashCacheSerializationTestUncached)))

def assert_hash_code_not_cached_across_serialization(original):
# now check our fix for #482 for when hash caching is enabled
original_hash = hash(original)
round_tripped = pickle.loads(pickle.dumps(original))
# What we want to guard against is having a stale hash code
# when a field's hash code differs in a new interpreter after
# deserialization. This is tricky to test because we are,
# of course, still running in the same interpreter. So
# after deserialization we reach in and change the value of
# a field to simulate the field changing its hash code. We then
# check that the object's hash code changes, indicating that we
# don't have a stale hash code.
# This could fail in two ways: (1) pickle.loads could get the hash
# code of the deserialized value (triggering it to cache) before
# we alter the field value. This doesn't happen in our tested
# Python versions. (2) "foo" and "something different" could
# have a hash collision on this interpreter run. But this is
# extremely improbable and would just result in one buggy test run.
round_tripped.foo_string = "something different"
assert original_hash != hash(round_tripped)

# slots and non-slots classes implement __setstate__ differently,
# so we need to test both cases
assert_hash_code_not_cached_across_serialization(
HashCacheSerializationTestCached()
)
assert_hash_code_not_cached_across_serialization(
HashCacheSerializationTestCachedSlots()
)

# test custom __setstate__ disallowed on a cache_hash=True object.
# This is needed because we handle clearing the cache after
# deserialization with a custom __setstate__. It is possible
# to make both work, but it requires some thought about how to go
# about it, so it has not yet been implemented.
with pytest.raises(
ValueError,
message="Currently you cannot use hash caching if you "
"specify your own __setstate__ method.",
):

@attr.attrs(hash=True, cache_hash=True)
class NoCacheHashAndCustomSetState:
def __setstate__(self, state):
pass


# these are for use in TestAddHash.test_cache_hash_serialization
# they need to be out here so they can be un-pickled
HashCacheSerializationTestUncached = make_class(
"HashCacheSerializationTestUncached",
{"foo_string": attr.ib(default="foo")},
hash=True,
cache_hash=False,
)

HashCacheSerializationTestCached = make_class(
"HashCacheSerializationTestCached",
{"foo_string": attr.ib(default="foo")},
hash=True,
cache_hash=True,
)

HashCacheSerializationTestCachedSlots = make_class(
"HashCacheSerializationTestCachedSlots",
{"foo_string": attr.ib(default="foo")},
hash=True,
cache_hash=True,
slots=True,
)


class TestAddInit(object):
"""
Expand Down

0 comments on commit 42e8653

Please sign in to comment.