Skip to content

Commit 416bdf0

Browse files
Backport PR pandas-dev#38532: BUG: Regression in logical ops raising ValueError with Categorical columns with unused categories (pandas-dev#38612)
Co-authored-by: patrick <[email protected]>
1 parent c41013e commit 416bdf0

File tree

3 files changed

+24
-3
lines changed

3 files changed

+24
-3
lines changed

doc/source/whatsnew/v1.2.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -860,7 +860,7 @@ Other
860860
- Bug in :meth:`RangeIndex.difference` returning :class:`Int64Index` in some cases where it should return :class:`RangeIndex` (:issue:`38028`)
861861
- Fixed bug in :func:`assert_series_equal` when comparing a datetime-like array with an equivalent non extension dtype array (:issue:`37609`)
862862
- Bug in :func:`.is_bool_dtype` would raise when passed a valid string such as ``"boolean"`` (:issue:`38386`)
863-
863+
- Fixed regression in logical operators raising ``ValueError`` when columns of :class:`DataFrame` are a :class:`CategoricalIndex` with unused categories (:issue:`38367`)
864864

865865
.. ---------------------------------------------------------------------------
866866

pandas/core/ops/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -309,11 +309,11 @@ def should_reindex_frame_op(
309309

310310
if fill_value is None and level is None and axis is default_axis:
311311
# TODO: any other cases we should handle here?
312-
cols = left.columns.intersection(right.columns)
313312

314313
# Intersection is always unique so we have to check the unique columns
315314
left_uniques = left.columns.unique()
316315
right_uniques = right.columns.unique()
316+
cols = left_uniques.intersection(right_uniques)
317317
if len(cols) and not (cols.equals(left_uniques) and cols.equals(right_uniques)):
318318
# TODO: is there a shortcut available when len(cols) == 0?
319319
return True

pandas/tests/frame/test_logical_ops.py

+22-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import numpy as np
55
import pytest
66

7-
from pandas import DataFrame, Series
7+
from pandas import CategoricalIndex, DataFrame, Interval, Series, isnull
88
import pandas._testing as tm
99

1010

@@ -162,3 +162,24 @@ def test_logical_with_nas(self):
162162
result = d["a"].fillna(False, downcast=False) | d["b"]
163163
expected = Series([True, True])
164164
tm.assert_series_equal(result, expected)
165+
166+
def test_logical_ops_categorical_columns(self):
167+
# GH#38367
168+
intervals = [Interval(1, 2), Interval(3, 4)]
169+
data = DataFrame(
170+
[[1, np.nan], [2, np.nan]],
171+
columns=CategoricalIndex(
172+
intervals, categories=intervals + [Interval(5, 6)]
173+
),
174+
)
175+
mask = DataFrame(
176+
[[False, False], [False, False]], columns=data.columns, dtype=bool
177+
)
178+
result = mask | isnull(data)
179+
expected = DataFrame(
180+
[[False, True], [False, True]],
181+
columns=CategoricalIndex(
182+
intervals, categories=intervals + [Interval(5, 6)]
183+
),
184+
)
185+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)