Skip to content

Commit 218da66

Browse files
committed
Generic solution to categorical problem
1 parent 48e7163 commit 218da66

File tree

2 files changed

+10
-11
lines changed

2 files changed

+10
-11
lines changed

pandas/core/reshape/merge.py

+4-8
Original file line numberDiff line numberDiff line change
@@ -896,12 +896,6 @@ def _maybe_coerce_merge_keys(self):
896896
if lk.is_dtype_equal(rk):
897897
continue
898898

899-
# if we are dates with differing categories
900-
# then allow them to proceed because
901-
# coercing to object below results in integers.
902-
if is_datetimelike(lk.categories) and is_datetimelike(rk.categories):
903-
continue
904-
905899
elif is_categorical_dtype(lk) or is_categorical_dtype(rk):
906900
pass
907901

@@ -923,11 +917,13 @@ def _maybe_coerce_merge_keys(self):
923917
# Houston, we have a problem!
924918
# let's coerce to object
925919
if name in self.left.columns:
920+
typ = lk.categories.dtype if is_categorical_dtype(lk) else object
926921
self.left = self.left.assign(
927-
**{name: self.left[name].astype(object)})
922+
**{name: self.left[name].astype(typ)})
928923
if name in self.right.columns:
924+
typ = rk.categories.dtype if is_categorical_dtype(rk) else object
929925
self.right = self.right.assign(
930-
**{name: self.right[name].astype(object)})
926+
**{name: self.right[name].astype(typ)})
931927

932928
def _validate_specification(self):
933929
# Hm, any way to make this logic less complicated??

pandas/tests/reshape/test_merge.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -1515,7 +1515,7 @@ def test_self_join_multiple_categories(self):
15151515

15161516
assert_frame_equal(result, df)
15171517

1518-
def test_dtype_on_categorical_dates(self):
1518+
def test_categorical_dates(self):
15191519
# GH 16900
15201520
# dates should not be coerced to ints
15211521

@@ -1534,10 +1534,13 @@ def test_dtype_on_categorical_dates(self):
15341534
df2['date'] = df2['date'].astype('category')
15351535

15361536
result = pd.merge(df, df2, how='outer', on=['date'])
1537-
assert result['date'].dtype == 'category'
1537+
assert result.shape == (3, 3)
1538+
assert result['date'].iloc[0] == pd.Timestamp('2001-01-01')
1539+
assert result['date'].iloc[-1] == pd.Timestamp('2001-01-03')
15381540

15391541
result_inner = pd.merge(df, df2, how='inner', on=['date'])
1540-
assert result_inner['date'].dtype == 'category'
1542+
assert result_inner.shape == (1, 3)
1543+
assert result_inner['date'].iloc[-1] == pd.Timestamp('2001-01-01')
15411544

15421545

15431546
@pytest.fixture

0 commit comments

Comments
 (0)