From 9b76f7ed146831829dc3b54f3143c787c0a35b90 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 31 Oct 2016 16:25:51 +0100 Subject: [PATCH 1/3] BUG/API: Index.append with mixed object/Categorical indices --- pandas/indexes/base.py | 5 ----- pandas/tests/indexes/test_category.py | 10 ++++++++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 4d2dcd259e623..883076542cf88 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -1466,11 +1466,6 @@ def append(self, other): typs = _concat.get_dtype_kinds(to_concat) - if 'category' in typs: - # if any of the to_concat is category - from pandas.indexes.category import CategoricalIndex - return CategoricalIndex._append_same_dtype(self, to_concat, name) - if len(typs) == 1: return self._append_same_dtype(to_concat, name=name) return _concat._concat_index_asobject(to_concat, name=name) diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 9f8405bcc2e1e..4f2d41f86e6c3 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -272,11 +272,17 @@ def test_append(self): # with objects result = ci.append(Index(['c', 'a'])) - expected = CategoricalIndex(list('aabbcaca'), categories=categories) + #expected = CategoricalIndex(list('aabbcaca'), categories=categories) + expected = Index(list('aabbcaca')) tm.assert_index_equal(result, expected, exact=True) # invalid objects - self.assertRaises(TypeError, lambda: ci.append(Index(['a', 'd']))) + #self.assertRaises(TypeError, lambda: ci.append(Index(['a', 'd']))) + + # GH14298 - if base object is not categorical -> coerce to object + result = Index(['c', 'a']).append(ci) + expected = Index(list('caaabbca')) + tm.assert_index_equal(result, expected, exact=True) def test_insert(self): From b351037fa4fc6eda9c0907a7e8edcf0f2231fd6a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 1 Nov 2016 11:27:27 +0100 Subject: [PATCH 2/3] Only coerce to object if the calling index is not categorical --- pandas/indexes/base.py | 5 +++++ pandas/tests/indexes/test_category.py | 5 ++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 883076542cf88..54eaf86315a88 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -1464,6 +1464,11 @@ def append(self, other): names = set([obj.name for obj in to_concat]) name = None if len(names) > 1 else self.name + if self.is_categorical(): + # if calling index is category, don't check dtype of others + from pandas.indexes.category import CategoricalIndex + return CategoricalIndex._append_same_dtype(self, to_concat, name) + typs = _concat.get_dtype_kinds(to_concat) if len(typs) == 1: diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 4f2d41f86e6c3..c76f5ff22c534 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -272,12 +272,11 @@ def test_append(self): # with objects result = ci.append(Index(['c', 'a'])) - #expected = CategoricalIndex(list('aabbcaca'), categories=categories) - expected = Index(list('aabbcaca')) + expected = CategoricalIndex(list('aabbcaca'), categories=categories) tm.assert_index_equal(result, expected, exact=True) # invalid objects - #self.assertRaises(TypeError, lambda: ci.append(Index(['a', 'd']))) + self.assertRaises(TypeError, lambda: ci.append(Index(['a', 'd']))) # GH14298 - if base object is not categorical -> coerce to object result = Index(['c', 'a']).append(ci) From 0d4749527959ec7bfce1a786b6a06a90d152414c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 2 Nov 2016 15:12:06 +0100 Subject: [PATCH 3/3] Add test for the df.info() case (GH14298) --- pandas/tests/frame/test_repr_info.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 5e5e9abda1200..12cd62f8b4cc0 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -405,3 +405,11 @@ def memory_usage(f): # high upper bound self.assertTrue(memory_usage(unstacked) - memory_usage(df) < 2000) + + def test_info_categorical(self): + # GH14298 + idx = pd.CategoricalIndex(['a', 'b']) + df = pd.DataFrame(np.zeros((2, 2)), index=idx, columns=idx) + + buf = StringIO() + df.info(buf=buf)