Skip to content

Commit 620462b

Browse files
committed
BUG: Categorical concat should preserve levels (GH7864)
1 parent c49bef9 commit 620462b

File tree

3 files changed

+28
-14
lines changed

3 files changed

+28
-14
lines changed

doc/source/v0.15.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ Categoricals in Series/DataFrame
117117

118118
:class:`~pandas.Categorical` can now be included in `Series` and `DataFrames` and gained new
119119
methods to manipulate. Thanks to Jan Schultz for much of this API/implementation. (:issue:`3943`, :issue:`5313`, :issue:`5314`,
120-
:issue:`7444`, :issue:`7839`, :issue:`7848`).
120+
:issue:`7444`, :issue:`7839`, :issue:`7848`, :issue:`7864`).
121121

122122
For full docs, see the :ref:`Categorical introduction <categorical>` and the :ref:`API documentation <api.categorical>`.
123123

pandas/core/internals.py

+11-13
Original file line numberDiff line numberDiff line change
@@ -451,9 +451,9 @@ def to_native_types(self, slicer=None, na_rep='', **kwargs):
451451
values[mask] = na_rep
452452
return values.tolist()
453453

454-
def _validate_merge(self, blocks):
455-
""" validate that we can merge these blocks """
456-
return True
454+
def _concat_blocks(self, blocks, values):
455+
""" return the block concatenation """
456+
return self._holder(values[0])
457457

458458
# block actions ####
459459
def copy(self, deep=True):
@@ -1639,15 +1639,19 @@ def _astype(self, dtype, copy=False, raise_on_error=True, values=None,
16391639
ndim=self.ndim,
16401640
placement=self.mgr_locs)
16411641

1642-
def _validate_merge(self, blocks):
1643-
""" validate that we can merge these blocks """
1642+
def _concat_blocks(self, blocks, values):
1643+
"""
1644+
validate that we can merge these blocks
1645+
1646+
return the block concatenation
1647+
"""
16441648

16451649
levels = self.values.levels
16461650
for b in blocks:
16471651
if not levels.equals(b.values.levels):
16481652
raise ValueError("incompatible levels in categorical block merge")
16491653

1650-
return True
1654+
return self._holder(values[0], levels=levels)
16511655

16521656
def to_native_types(self, slicer=None, na_rep='', **kwargs):
16531657
""" convert to our native types format, slicing if desired """
@@ -4026,17 +4030,11 @@ def concatenate_join_units(join_units, concat_axis, copy):
40264030
else:
40274031
concat_values = com._concat_compat(to_concat, axis=concat_axis)
40284032

4029-
# FIXME: optimization potential: if len(join_units) == 1, single join unit
4030-
# is densified and sparsified back.
40314033
if any(unit.needs_block_conversion for unit in join_units):
40324034

40334035
# need to ask the join unit block to convert to the underlying repr for us
40344036
blocks = [ unit.block for unit in join_units if unit.block is not None ]
4035-
4036-
# may need to validate this combination
4037-
blocks[0]._validate_merge(blocks)
4038-
4039-
return blocks[0]._holder(concat_values[0])
4037+
return blocks[0]._concat_blocks(blocks, concat_values)
40404038
else:
40414039
return concat_values
40424040

pandas/tests/test_categorical.py

+16
Original file line numberDiff line numberDiff line change
@@ -1470,6 +1470,22 @@ def f():
14701470
pd.concat([df,df_wrong_levels])
14711471
self.assertRaises(ValueError, f)
14721472

1473+
# GH 7864
1474+
# make sure ordering is preserverd
1475+
df = pd.DataFrame({"id":[1,2,3,4,5,6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']})
1476+
df["grade"] = pd.Categorical(df["raw_grade"])
1477+
df['grade'].cat.reorder_levels(['e', 'a', 'b'])
1478+
1479+
df1 = df[0:3]
1480+
df2 = df[3:]
1481+
1482+
self.assert_numpy_array_equal(df['grade'].cat.levels, df1['grade'].cat.levels)
1483+
self.assert_numpy_array_equal(df['grade'].cat.levels, df2['grade'].cat.levels)
1484+
1485+
dfx = pd.concat([df1, df2])
1486+
dfx['grade'].cat.levels
1487+
self.assert_numpy_array_equal(df['grade'].cat.levels, dfx['grade'].cat.levels)
1488+
14731489
def test_append(self):
14741490
cat = pd.Categorical(["a","b"], levels=["a","b"])
14751491
vals = [1,2]

0 commit comments

Comments
 (0)