Skip to content

Commit 95474f8

Browse files
author
Jan Rudolph
committed
BUG: merging with a boolean/int categorical column pandas-dev#17187
1 parent 727ea20 commit 95474f8

File tree

3 files changed

+45
-1
lines changed

3 files changed

+45
-1
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1010,6 +1010,7 @@ Categorical
10101010
- Bug in :func:`Series.isin` when called with a categorical (:issue:`16639`)
10111011
- Bug in the categorical constructor with empty values and categories causing the ``.categories`` to be an empty ``Float64Index`` rather than an empty ``Index`` with object dtype (:issue:`17248`)
10121012
- Bug in categorical operations with :ref:`Series.cat <categorical.cat>` not preserving the original Series' name (:issue:`17509`)
1013+
- Bug in :func:`DataFrame.merge` failing for categorical columns with boolean/int data types (:issue:`17187`)
10131014

10141015
PyPy
10151016
^^^^

pandas/core/internals.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5494,7 +5494,7 @@ def get_reindexed_values(self, empty_dtype, upcasted_na):
54945494
# preserve these for validation in _concat_compat
54955495
return self.block.values
54965496

5497-
if self.block.is_bool:
5497+
if self.block.is_bool and not self.block.is_categorical:
54985498
# External code requested filling/upcasting, bool values must
54995499
# be upcasted to object to avoid being upcasted to numeric.
55005500
values = self.block.astype(np.object_).values

pandas/tests/reshape/test_merge.py

+43
Original file line numberDiff line numberDiff line change
@@ -1546,6 +1546,49 @@ def test_dtype_on_categorical_dates(self):
15461546
result_inner = pd.merge(df, df2, how='inner', on=['date'])
15471547
assert_frame_equal(result_inner, expected_inner)
15481548

1549+
def test_merging_with_boolean_cateorical_column(self):
1550+
df1 = pd.DataFrame({'id': [1, 2, 3, 4],
1551+
'cat': [False, True, True, False]})
1552+
df1['cat'] = df1['cat'].astype('category',
1553+
categories=[True, False], ordered=True)
1554+
df2 = pd.DataFrame({'id': [2, 4], 'num': [1, 9]})
1555+
result = df1.merge(df2)
1556+
expected = pd.DataFrame({'id': [2, 4], 'cat': [True, False],
1557+
'num': [1, 9]})
1558+
expected['cat'] = expected['cat'].astype('category',
1559+
categories=[True, False],
1560+
ordered=True)
1561+
assert_frame_equal(expected, result)
1562+
1563+
def test_merging_with_integer_cateorical_column(self):
1564+
df1 = pd.DataFrame({'id': [1, 2, 3, 4],
1565+
'cat': [2, 1, 1, 2]})
1566+
df1['cat'] = df1['cat'].astype('category',
1567+
categories=[1, 2], ordered=True)
1568+
df2 = pd.DataFrame({'id': [2, 4], 'num': [1, 9]})
1569+
result = df1.merge(df2)
1570+
expected = pd.DataFrame({'id': [2, 4], 'cat': [1, 2],
1571+
'num': [1, 9]})
1572+
expected['cat'] = expected['cat'].astype('category',
1573+
categories=[1, 2],
1574+
ordered=True)
1575+
assert_frame_equal(expected, result)
1576+
1577+
def test_merging_with_string_cateorical_column(self):
1578+
df1 = pd.DataFrame({'id': [1, 2, 3, 4],
1579+
'cat': ['False', 'True', 'True', 'False']})
1580+
df1['cat'] = df1['cat'].astype('category',
1581+
categories=['True', 'False'],
1582+
ordered=True)
1583+
df2 = pd.DataFrame({'id': [2, 4], 'num': [1, 9]})
1584+
result = df1.merge(df2)
1585+
expected = pd.DataFrame({'id': [2, 4], 'cat': ['True', 'False'],
1586+
'num': [1, 9]})
1587+
expected['cat'] = expected['cat'].astype('category',
1588+
categories=['True', 'False'],
1589+
ordered=True)
1590+
assert_frame_equal(expected, result)
1591+
15491592

15501593
@pytest.fixture
15511594
def left_df():

0 commit comments

Comments
 (0)