From b2eb4e7b7dc50c8c581e1cb44640683f2c847eae Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 1 May 2015 11:51:02 -0400 Subject: [PATCH 1/2] disallow numeric setlike operations on CategoricalIndex (GH10039) --- pandas/core/index.py | 16 ++++++++++++++++ pandas/tests/test_index.py | 19 +++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/pandas/core/index.py b/pandas/core/index.py index 83f60b360c746..6b75dba549f31 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -2469,6 +2469,21 @@ def _evaluate_compare(self, other): cls.__le__ = _make_compare('__le__') cls.__ge__ = _make_compare('__ge__') + @classmethod + def _add_numericlike_set_methods_disabled(cls): + """ add in the numeric set-like methods to disable """ + + def _make_invalid_op(name): + + def invalid_op(self, other=None): + raise TypeError("cannot perform {name} with this index type: {typ}".format(name=name, + typ=type(self))) + invalid_op.__name__ = name + return invalid_op + + cls.__add__ = cls.__add__ = __iadd__ = _make_invalid_op('__add__') + cls.__sub__ = cls.__sub__ = __isub__ = _make_invalid_op('__sub__') + @classmethod def _add_numeric_methods_disabled(cls): """ add in numeric methods to disable """ @@ -3148,6 +3163,7 @@ def _add_accessors(cls): overwrite=True) +CategoricalIndex._add_numericlike_set_methods_disabled() CategoricalIndex._add_numeric_methods_disabled() CategoricalIndex._add_logical_methods_disabled() CategoricalIndex._add_comparison_methods() diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 313a723f7b1ef..557228ccbf4da 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -1488,6 +1488,19 @@ def test_construction_with_dtype(self): result = CategoricalIndex(idx, categories=idx, ordered=True) tm.assert_index_equal(result, expected, exact=True) + def test_disallow_set_ops(self): + + # GH 10039 + # set ops (+/-) raise TypeError + idx = pd.Index(pd.Categorical(['a', 'b'])) + + self.assertRaises(TypeError, lambda : idx - idx) + self.assertRaises(TypeError, lambda : idx + idx) + self.assertRaises(TypeError, lambda : idx - ['a','b']) + self.assertRaises(TypeError, lambda : idx + ['a','b']) + self.assertRaises(TypeError, lambda : ['a','b'] - idx) + self.assertRaises(TypeError, lambda : ['a','b'] + idx) + def test_method_delegation(self): ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) @@ -3882,6 +3895,12 @@ def test_difference(self): # - API change GH 8226 with tm.assert_produces_warning(): first - self.index[-3:] + with tm.assert_produces_warning(): + self.index[-3:] - first + with tm.assert_produces_warning(): + self.index[-3:] - first.tolist() + with tm.assert_produces_warning(): + first.tolist() - self.index[-3:] expected = MultiIndex.from_tuples(sorted(self.index[:-3].values), sortorder=0, From 7b6d23d6e7ed05a34ae8a4324d5023a3e5e93b79 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 1 May 2015 12:00:36 -0400 Subject: [PATCH 2/2] BUG: provide deprecation warnings when using setlike operations on Indexes and lists (for +/-) (GH10038) --- pandas/core/index.py | 13 +++++++------ pandas/core/reshape.py | 4 ++-- pandas/io/tests/test_pytables.py | 2 +- pandas/tests/test_index.py | 11 ++++++++--- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/pandas/core/index.py b/pandas/core/index.py index 6b75dba549f31..b49108378ca68 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1179,17 +1179,18 @@ def argsort(self, *args, **kwargs): return result.argsort(*args, **kwargs) def __add__(self, other): - if isinstance(other, Index): + if com.is_list_like(other): warnings.warn("using '+' to provide set union with Indexes is deprecated, " "use '|' or .union()",FutureWarning) + if isinstance(other, Index): return self.union(other) return Index(np.array(self) + other) __iadd__ = __add__ + __radd__ = __add__ def __sub__(self, other): - if isinstance(other, Index): - warnings.warn("using '-' to provide set differences with Indexes is deprecated, " - "use .difference()",FutureWarning) + warnings.warn("using '-' to provide set differences with Indexes is deprecated, " + "use .difference()",FutureWarning) return self.difference(other) def __and__(self, other): @@ -2481,8 +2482,8 @@ def invalid_op(self, other=None): invalid_op.__name__ = name return invalid_op - cls.__add__ = cls.__add__ = __iadd__ = _make_invalid_op('__add__') - cls.__sub__ = cls.__sub__ = __isub__ = _make_invalid_op('__sub__') + cls.__add__ = cls.__radd__ = __iadd__ = _make_invalid_op('__add__') + cls.__sub__ = __isub__ = _make_invalid_op('__sub__') @classmethod def _add_numeric_methods_disabled(cls): diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index af98e533cb5b7..9a812ec71b9a2 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -612,7 +612,7 @@ def _convert_level_number(level_num, columns): new_data[key] = value_slice.ravel() if len(drop_cols) > 0: - new_columns = new_columns - drop_cols + new_columns = new_columns.difference(drop_cols) N = len(this) @@ -1045,7 +1045,7 @@ def check_len(item, name): with_dummies = [result] for (col, pre, sep) in zip(columns_to_encode, prefix, prefix_sep): - dummy = _get_dummies_1d(data[col], prefix=pre, prefix_sep=sep, + dummy = _get_dummies_1d(data[col], prefix=pre, prefix_sep=sep, dummy_na=dummy_na, sparse=sparse) with_dummies.append(dummy) result = concat(with_dummies, axis=1) diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index ec33e53481950..2e5cef8a1ef57 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -3613,7 +3613,7 @@ def test_frame_select_complex(self): # invert ok for filters result = store.select('df', "~(columns=['A','B'])") - expected = df.loc[:,df.columns-['A','B']] + expected = df.loc[:,df.columns.difference(['A','B'])] tm.assert_frame_equal(result, expected) # in diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 557228ccbf4da..a7ce3dcdda9f7 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -686,6 +686,10 @@ def test_add(self): # - API change GH 8226 with tm.assert_produces_warning(): self.strIndex + self.strIndex + with tm.assert_produces_warning(): + self.strIndex + self.strIndex.tolist() + with tm.assert_produces_warning(): + self.strIndex.tolist() + self.strIndex firstCat = self.strIndex.union(self.dateIndex) secondCat = self.strIndex.union(self.strIndex) @@ -772,6 +776,7 @@ def test_difference(self): assertRaisesRegexp(TypeError, "iterable", first.difference, 0.5) def test_symmetric_diff(self): + # smoke idx1 = Index([1, 2, 3, 4], name='idx1') idx2 = Index([2, 3, 4, 5]) @@ -819,7 +824,7 @@ def test_symmetric_diff(self): # other isn't iterable with tm.assertRaises(TypeError): - Index(idx1,dtype='object') - 1 + Index(idx1,dtype='object').difference(1) def test_is_numeric(self): self.assertFalse(self.dateIndex.is_numeric()) @@ -3899,8 +3904,8 @@ def test_difference(self): self.index[-3:] - first with tm.assert_produces_warning(): self.index[-3:] - first.tolist() - with tm.assert_produces_warning(): - first.tolist() - self.index[-3:] + + self.assertRaises(TypeError, lambda : first.tolist() - self.index[-3:]) expected = MultiIndex.from_tuples(sorted(self.index[:-3].values), sortorder=0,