Skip to content

Commit 8d5032a

Browse files
REGR: allow merging on object boolean columns (#21310)
1 parent 05e55aa commit 8d5032a

File tree

3 files changed

+33
-5
lines changed

3 files changed

+33
-5
lines changed

doc/source/whatsnew/v0.23.1.txt

+2-3
Original file line numberDiff line numberDiff line change
@@ -65,15 +65,14 @@ In addition, ordering comparisons will raise a ``TypeError`` in the future.
6565
a tz-aware time instead of tz-naive (:issue:`21267`) and :attr:`DatetimeIndex.date`
6666
returned incorrect date when the input date has a non-UTC timezone (:issue:`21230`).
6767
- Fixed regression in :meth:`pandas.io.json.json_normalize` when called with ``None`` values
68-
in nested levels in JSON (:issue:`21158`).
68+
in nested levels in JSON, and to not drop keys with value as `None` (:issue:`21158`, :issue:`21356`).
6969
- Bug in :meth:`~DataFrame.to_csv` causes encoding error when compression and encoding are specified (:issue:`21241`, :issue:`21118`)
7070
- Bug preventing pandas from being importable with -OO optimization (:issue:`21071`)
7171
- Bug in :meth:`Categorical.fillna` incorrectly raising a ``TypeError`` when `value` the individual categories are iterable and `value` is an iterable (:issue:`21097`, :issue:`19788`)
7272
- Fixed regression in constructors coercing NA values like ``None`` to strings when passing ``dtype=str`` (:issue:`21083`)
7373
- Regression in :func:`pivot_table` where an ordered ``Categorical`` with missing
7474
values for the pivot's ``index`` would give a mis-aligned result (:issue:`21133`)
75-
- Fixed Regression in :func:`nested_to_record` which now flattens list of dictionaries and doesnot drop keys with value as `None` (:issue:`21356`)
76-
75+
- Fixed regression in merging on boolean index/columns (:issue:`21119`).
7776

7877
.. _whatsnew_0231.performance:
7978

pandas/core/reshape/merge.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
is_int_or_datetime_dtype,
2929
is_dtype_equal,
3030
is_bool,
31+
is_bool_dtype,
3132
is_list_like,
3233
is_datetimelike,
3334
_ensure_int64,
@@ -974,9 +975,14 @@ def _maybe_coerce_merge_keys(self):
974975

975976
# Check if we are trying to merge on obviously
976977
# incompatible dtypes GH 9780, GH 15800
977-
elif is_numeric_dtype(lk) and not is_numeric_dtype(rk):
978+
979+
# boolean values are considered as numeric, but are still allowed
980+
# to be merged on object boolean values
981+
elif ((is_numeric_dtype(lk) and not is_bool_dtype(lk))
982+
and not is_numeric_dtype(rk)):
978983
raise ValueError(msg)
979-
elif not is_numeric_dtype(lk) and is_numeric_dtype(rk):
984+
elif (not is_numeric_dtype(lk)
985+
and (is_numeric_dtype(rk) and not is_bool_dtype(rk))):
980986
raise ValueError(msg)
981987
elif is_datetimelike(lk) and not is_datetimelike(rk):
982988
raise ValueError(msg)

pandas/tests/reshape/merge/test_merge.py

+23
Original file line numberDiff line numberDiff line change
@@ -1526,6 +1526,27 @@ def test_merge_on_ints_floats_warning(self):
15261526
result = B.merge(A, left_on='Y', right_on='X')
15271527
assert_frame_equal(result, expected[['Y', 'X']])
15281528

1529+
def test_merge_incompat_infer_boolean_object(self):
1530+
# GH21119: bool + object bool merge OK
1531+
df1 = DataFrame({'key': Series([True, False], dtype=object)})
1532+
df2 = DataFrame({'key': [True, False]})
1533+
1534+
expected = DataFrame({'key': [True, False]}, dtype=object)
1535+
result = pd.merge(df1, df2, on='key')
1536+
assert_frame_equal(result, expected)
1537+
result = pd.merge(df2, df1, on='key')
1538+
assert_frame_equal(result, expected)
1539+
1540+
# with missing value
1541+
df1 = DataFrame({'key': Series([True, False, np.nan], dtype=object)})
1542+
df2 = DataFrame({'key': [True, False]})
1543+
1544+
expected = DataFrame({'key': [True, False]}, dtype=object)
1545+
result = pd.merge(df1, df2, on='key')
1546+
assert_frame_equal(result, expected)
1547+
result = pd.merge(df2, df1, on='key')
1548+
assert_frame_equal(result, expected)
1549+
15291550
@pytest.mark.parametrize('df1_vals, df2_vals', [
15301551
([0, 1, 2], ["0", "1", "2"]),
15311552
([0.0, 1.0, 2.0], ["0", "1", "2"]),
@@ -1538,6 +1559,8 @@ def test_merge_on_ints_floats_warning(self):
15381559
pd.date_range('20130101', periods=3, tz='US/Eastern')),
15391560
([0, 1, 2], Series(['a', 'b', 'a']).astype('category')),
15401561
([0.0, 1.0, 2.0], Series(['a', 'b', 'a']).astype('category')),
1562+
# TODO ([0, 1], pd.Series([False, True], dtype=bool)),
1563+
([0, 1], pd.Series([False, True], dtype=object))
15411564
])
15421565
def test_merge_incompat_dtypes(self, df1_vals, df2_vals):
15431566
# GH 9780, GH 15800

0 commit comments

Comments
 (0)