From 42e86532cae7d59c8f1b0621a0ddfe929cd32687 Mon Sep 17 00:00:00 2001 From: Ryan Gabbard Date: Thu, 17 Jan 2019 16:45:07 -0500 Subject: [PATCH] Don't cache hash codes across deserialization. Because the hash code cache field gets serialized and deserialized by Pickle, previously when you deserialize a cache_hash=True attrs object, the hashcode will be the hashcode the object had at serialization-time. However, if your object had fields with hash codes which were not deterministic between interpreter runs, then on a new interpreter run your deserialized object would have a hash code which differs from a newly created identical object. This commit fixes that by clearing the cache on deserialization. It needs to override the __setstate__ method to do so, so this commit also forbids using a custom __setstate__ on a cache_hash=True object. Closes #482 . --- changelog.d/428.change.rst | 2 + src/attr/_make.py | 27 ++++++++++++ tests/test_dunders.py | 84 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+) create mode 100644 changelog.d/428.change.rst diff --git a/changelog.d/428.change.rst b/changelog.d/428.change.rst new file mode 100644 index 000000000..daa5729aa --- /dev/null +++ b/changelog.d/428.change.rst @@ -0,0 +1,2 @@ +Fixes a bug where deserialized objects with ``cache_hash=True`` could +have incorrect hash code values. diff --git a/src/attr/_make.py b/src/attr/_make.py index 6d72df4af..2ae967a21 100644 --- a/src/attr/_make.py +++ b/src/attr/_make.py @@ -529,6 +529,24 @@ def _patch_original_class(self): for name, value in self._cls_dict.items(): setattr(cls, name, value) + # attach __setstate__. This is necessary to clear the hash code + # cache on deserialization. See issue + # https://github.com/python-attrs/attrs/issues/482 . + # Note that this code only handles setstate for non-slots classes. + # For slots classes, see similar code in _create_slots_class + if self._cache_hash: + if hasattr(cls, "__setstate__"): + raise ValueError( + "Currently you cannot use hash caching if " + "you specify your own __setstate__ method." + ) + + def cache_hash_set_state(chss_self, _): + # clear hash code cache + setattr(chss_self, _hash_cache_field, None) + + setattr(cls, "__setstate__", cache_hash_set_state) + return cls def _create_slots_class(self): @@ -581,6 +599,8 @@ def slots_getstate(self): """ return tuple(getattr(self, name) for name in state_attr_names) + hash_caching_enabled = self._cache_hash + def slots_setstate(self, state): """ Automatically created by attrs. @@ -588,6 +608,13 @@ def slots_setstate(self, state): __bound_setattr = _obj_setattr.__get__(self, Attribute) for name, value in zip(state_attr_names, state): __bound_setattr(name, value) + # Clearing the hash code cache on deserialization is needed + # because hash codes can change from run to run. See issue + # https://github.com/python-attrs/attrs/issues/482 . + # Note that this code only handles setstate for slots classes. + # For slots classes, see similar code in _patch_original_class + if hash_caching_enabled: + __bound_setattr(_hash_cache_field, None) # slots and frozen require __getstate__/__setstate__ to work cd["__getstate__"] = slots_getstate diff --git a/tests/test_dunders.py b/tests/test_dunders.py index 59d5764a1..708c542f1 100644 --- a/tests/test_dunders.py +++ b/tests/test_dunders.py @@ -5,6 +5,7 @@ from __future__ import absolute_import, division, print_function import copy +import pickle import pytest @@ -453,6 +454,89 @@ def __hash__(self): assert 2 == uncached_instance.hash_counter.times_hash_called assert 1 == cached_instance.hash_counter.times_hash_called + def test_cache_hash_serialization(self): + """ + Tests that the hash cache is cleared on deserialization to fix + https://github.com/python-attrs/attrs/issues/482 + """ + + # first, check that our fix didn't break serialization without + # hash caching. + # we don't care about the result of this; we just want to make sure we + # can do it without exceptions + hash(pickle.loads(pickle.dumps(HashCacheSerializationTestUncached))) + + def assert_hash_code_not_cached_across_serialization(original): + # now check our fix for #482 for when hash caching is enabled + original_hash = hash(original) + round_tripped = pickle.loads(pickle.dumps(original)) + # What we want to guard against is having a stale hash code + # when a field's hash code differs in a new interpreter after + # deserialization. This is tricky to test because we are, + # of course, still running in the same interpreter. So + # after deserialization we reach in and change the value of + # a field to simulate the field changing its hash code. We then + # check that the object's hash code changes, indicating that we + # don't have a stale hash code. + # This could fail in two ways: (1) pickle.loads could get the hash + # code of the deserialized value (triggering it to cache) before + # we alter the field value. This doesn't happen in our tested + # Python versions. (2) "foo" and "something different" could + # have a hash collision on this interpreter run. But this is + # extremely improbable and would just result in one buggy test run. + round_tripped.foo_string = "something different" + assert original_hash != hash(round_tripped) + + # slots and non-slots classes implement __setstate__ differently, + # so we need to test both cases + assert_hash_code_not_cached_across_serialization( + HashCacheSerializationTestCached() + ) + assert_hash_code_not_cached_across_serialization( + HashCacheSerializationTestCachedSlots() + ) + + # test custom __setstate__ disallowed on a cache_hash=True object. + # This is needed because we handle clearing the cache after + # deserialization with a custom __setstate__. It is possible + # to make both work, but it requires some thought about how to go + # about it, so it has not yet been implemented. + with pytest.raises( + ValueError, + message="Currently you cannot use hash caching if you " + "specify your own __setstate__ method.", + ): + + @attr.attrs(hash=True, cache_hash=True) + class NoCacheHashAndCustomSetState: + def __setstate__(self, state): + pass + + +# these are for use in TestAddHash.test_cache_hash_serialization +# they need to be out here so they can be un-pickled +HashCacheSerializationTestUncached = make_class( + "HashCacheSerializationTestUncached", + {"foo_string": attr.ib(default="foo")}, + hash=True, + cache_hash=False, +) + +HashCacheSerializationTestCached = make_class( + "HashCacheSerializationTestCached", + {"foo_string": attr.ib(default="foo")}, + hash=True, + cache_hash=True, +) + +HashCacheSerializationTestCachedSlots = make_class( + "HashCacheSerializationTestCachedSlots", + {"foo_string": attr.ib(default="foo")}, + hash=True, + cache_hash=True, + slots=True, +) + class TestAddInit(object): """