From 4ca0a334c50a92ef3197a519e08e47520eca715f Mon Sep 17 00:00:00 2001 From: "Dr. Leo" Date: Sat, 3 Jan 2015 16:38:14 +0100 Subject: [PATCH 1/3] Validate that 'name' attribute is set only if hashable. Addresses first part of issue #8263 --- pandas/core/generic.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0fc7171410152..90c4e1e65326f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1949,10 +1949,14 @@ def __getattr__(self, name): def __setattr__(self, name, value): """After regular attribute access, try setting the name This allows simpler access to columns for interactive use.""" - # first try regular attribute access via __getattribute__, so that + # If attribute name is 'name', validate that value is hashable. + # This prevents broken DataFrames if an unhashable Series.name + # is propagated to a column index. + # Then try regular attribute access via __getattribute__, so that # e.g. ``obj.x`` and ``obj.x = 4`` will always reference/modify # the same attribute. - + + if name == 'name': hash(value) try: object.__getattribute__(self, name) return object.__setattr__(self, name, value) From 7fdf290a64ea54ffcaedb1ebbf1ad2bc2f8b5f4a Mon Sep 17 00:00:00 2001 From: "Dr. Leo" Date: Mon, 12 Jan 2015 21:08:09 +0100 Subject: [PATCH 2/3] property to validate that Series.name is hashable, add testcase for this --- pandas/core/generic.py | 8 ++------ pandas/core/series.py | 15 +++++++++++++++ pandas/tests/test_series.py | 9 ++++++++- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 90c4e1e65326f..0fc7171410152 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1949,14 +1949,10 @@ def __getattr__(self, name): def __setattr__(self, name, value): """After regular attribute access, try setting the name This allows simpler access to columns for interactive use.""" - # If attribute name is 'name', validate that value is hashable. - # This prevents broken DataFrames if an unhashable Series.name - # is propagated to a column index. - # Then try regular attribute access via __getattribute__, so that + # first try regular attribute access via __getattribute__, so that # e.g. ``obj.x`` and ``obj.x = 4`` will always reference/modify # the same attribute. - - if name == 'name': hash(value) + try: object.__getattribute__(self, name) return object.__setattr__(self, name, value) diff --git a/pandas/core/series.py b/pandas/core/series.py index 081e5c50946bc..7afadefd07d72 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -105,6 +105,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame): dict. dtype : numpy.dtype or None If None, dtype will be inferred + name: must be hashable, defaults to None. copy : boolean, default False Copy input data """ @@ -272,6 +273,20 @@ def _update_inplace(self, result, **kwargs): # we want to call the generic version and not the IndexOpsMixin return generic.NDFrame._update_inplace(self, result, **kwargs) + # Validate that name is hashable + @property + def name(self): + return self._name + + @name.setter + def name(self, value): + try: + hash(value) + except TypeError: + raise TypeError('Series.name must be hashable.') + self._name = value + + # ndarray compatibility @property def dtype(self): diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index c0daeb793fc40..6c33dcc23a0a7 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -644,6 +644,13 @@ def test_constructor_map(self): result = Series(m, index=lrange(10, 20)) exp.index = lrange(10, 20) assert_series_equal(result, exp) + + def test_constructor_unhashable_name(self): + def set_to_unhashable(s_): + s_.name = {} + s = Series([1,3], name = 'test') + self.assertRaises(TypeError, set_to_unhashable, s) + self.assertEqual(s.name, 'test') def test_constructor_categorical(self): cat = pd.Categorical([0, 1, 2, 0, 1, 2], ['a', 'b', 'c'], fastpath=True) @@ -1980,7 +1987,7 @@ def test_repr(self): rep_str = repr(ser) self.assertIn("Name: 0", rep_str) - ser = Series(["a\n\r\tb"], name=["a\n\r\td"], index=["a\n\r\tf"]) + ser = Series(["a\n\r\tb"], name="a\n\r\td", index=["a\n\r\tf"]) self.assertFalse("\t" in repr(ser)) self.assertFalse("\r" in repr(ser)) self.assertFalse("a\n" in repr(ser)) From c244273f916fe5d1f387b5116827693a4994a9d5 Mon Sep 17 00:00:00 2001 From: "Dr. Leo" Date: Thu, 15 Jan 2015 20:16:40 +0100 Subject: [PATCH 3/3] use is_hashable. Some unittests are broken as float64 is deemed unhashable. --- pandas/core/series.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 7afadefd07d72..7ce783278b86f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -17,6 +17,7 @@ _default_index, _maybe_upcast, _asarray_tuplesafe, _infer_dtype_from_scalar, is_list_like, _values_from_object, + is_hashable, _possibly_cast_to_datetime, _possibly_castable, _possibly_convert_platform, _try_sort, ABCSparseArray, _maybe_match_name, _coerce_to_dtype, @@ -105,7 +106,8 @@ class Series(base.IndexOpsMixin, generic.NDFrame): dict. dtype : numpy.dtype or None If None, dtype will be inferred - name: must be hashable, defaults to None. + name : used to attach metadata to a Series, e.g., as str or namedtuple. + Must be hashable, defaults to None. copy : boolean, default False Copy input data """ @@ -280,11 +282,11 @@ def name(self): @name.setter def name(self, value): - try: - hash(value) - except TypeError: - raise TypeError('Series.name must be hashable.') - self._name = value + if is_hashable(value): + object.__setattr__(self, '_name', value) + else: + raise TypeError('Series.name must be hashable, got %s.' + % value.__class__.__name__) # ndarray compatibility