Skip to content

Commit 9bf2e46

Browse files
jbrockmendelukarroum
authored andcommitted
DEPR: Index.__and__, __or__, __xor__ behaving as set ops (#37374)
1 parent 76edfa9 commit 9bf2e46

File tree

16 files changed

+79
-32
lines changed

16 files changed

+79
-32
lines changed

doc/source/user_guide/indexing.rst

+4-8
Original file line numberDiff line numberDiff line change
@@ -1594,19 +1594,16 @@ See :ref:`Advanced Indexing <advanced>` for usage of MultiIndexes.
15941594
Set operations on Index objects
15951595
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
15961596

1597-
The two main operations are ``union (|)`` and ``intersection (&)``.
1598-
These can be directly called as instance methods or used via overloaded
1599-
operators. Difference is provided via the ``.difference()`` method.
1597+
The two main operations are ``union`` and ``intersection``.
1598+
Difference is provided via the ``.difference()`` method.
16001599

16011600
.. ipython:: python
16021601
16031602
a = pd.Index(['c', 'b', 'a'])
16041603
b = pd.Index(['c', 'e', 'd'])
1605-
a | b
1606-
a & b
16071604
a.difference(b)
16081605
1609-
Also available is the ``symmetric_difference (^)`` operation, which returns elements
1606+
Also available is the ``symmetric_difference`` operation, which returns elements
16101607
that appear in either ``idx1`` or ``idx2``, but not in both. This is
16111608
equivalent to the Index created by ``idx1.difference(idx2).union(idx2.difference(idx1))``,
16121609
with duplicates dropped.
@@ -1616,7 +1613,6 @@ with duplicates dropped.
16161613
idx1 = pd.Index([1, 2, 3, 4])
16171614
idx2 = pd.Index([2, 3, 4, 5])
16181615
idx1.symmetric_difference(idx2)
1619-
idx1 ^ idx2
16201616
16211617
.. note::
16221618

@@ -1631,7 +1627,7 @@ integer values are converted to float
16311627
16321628
idx1 = pd.Index([0, 1, 2])
16331629
idx2 = pd.Index([0.5, 1.5])
1634-
idx1 | idx2
1630+
idx1.union(idx2)
16351631
16361632
.. _indexing.missing:
16371633

doc/source/user_guide/missing_data.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,7 @@ at the new values.
466466
ser = pd.Series(np.sort(np.random.uniform(size=100)))
467467
468468
# interpolate at new_index
469-
new_index = ser.index | pd.Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75])
469+
new_index = ser.index.union(pd.Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75]))
470470
interp_s = ser.reindex(new_index).interpolate(method="pchip")
471471
interp_s[49:51]
472472

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ Deprecations
338338
- Deprecated slice-indexing on timezone-aware :class:`DatetimeIndex` with naive ``datetime`` objects, to match scalar indexing behavior (:issue:`36148`)
339339
- :meth:`Index.ravel` returning a ``np.ndarray`` is deprecated, in the future this will return a view on the same index (:issue:`19956`)
340340
- Deprecate use of strings denoting units with 'M', 'Y' or 'y' in :func:`~pandas.to_timedelta` (:issue:`36666`)
341+
- :class:`Index` methods ``&``, ``|``, and ``^`` behaving as the set operations :meth:`Index.intersection`, :meth:`Index.union`, and :meth:`Index.symmetric_difference`, respectively, are deprecated and in the future will behave as pointwise boolean operations matching :class:`Series` behavior. Use the named set methods instead (:issue:`36758`)
341342

342343
.. ---------------------------------------------------------------------------
343344

pandas/core/indexes/base.py

+21
Original file line numberDiff line numberDiff line change
@@ -2504,14 +2504,35 @@ def __iadd__(self, other):
25042504

25052505
@final
25062506
def __and__(self, other):
2507+
warnings.warn(
2508+
"Index.__and__ operating as a set operation is deprecated, "
2509+
"in the future this will be a logical operation matching "
2510+
"Series.__and__. Use index.intersection(other) instead",
2511+
FutureWarning,
2512+
stacklevel=2,
2513+
)
25072514
return self.intersection(other)
25082515

25092516
@final
25102517
def __or__(self, other):
2518+
warnings.warn(
2519+
"Index.__or__ operating as a set operation is deprecated, "
2520+
"in the future this will be a logical operation matching "
2521+
"Series.__or__. Use index.union(other) instead",
2522+
FutureWarning,
2523+
stacklevel=2,
2524+
)
25112525
return self.union(other)
25122526

25132527
@final
25142528
def __xor__(self, other):
2529+
warnings.warn(
2530+
"Index.__xor__ operating as a set operation is deprecated, "
2531+
"in the future this will be a logical operation matching "
2532+
"Series.__xor__. Use index.symmetric_difference(other) instead",
2533+
FutureWarning,
2534+
stacklevel=2,
2535+
)
25152536
return self.symmetric_difference(other)
25162537

25172538
@final

pandas/core/indexes/multi.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -3126,12 +3126,12 @@ def _convert_to_indexer(r) -> Int64Index:
31263126
r = r.nonzero()[0]
31273127
return Int64Index(r)
31283128

3129-
def _update_indexer(idxr, indexer=indexer):
3129+
def _update_indexer(idxr: Optional[Index], indexer: Optional[Index]) -> Index:
31303130
if indexer is None:
31313131
indexer = Index(np.arange(n))
31323132
if idxr is None:
31333133
return indexer
3134-
return indexer & idxr
3134+
return indexer.intersection(idxr)
31353135

31363136
for i, k in enumerate(seq):
31373137

@@ -3149,7 +3149,9 @@ def _update_indexer(idxr, indexer=indexer):
31493149
idxrs = _convert_to_indexer(
31503150
self._get_level_indexer(x, level=i, indexer=indexer)
31513151
)
3152-
indexers = idxrs if indexers is None else indexers | idxrs
3152+
indexers = (idxrs if indexers is None else indexers).union(
3153+
idxrs
3154+
)
31533155
except KeyError:
31543156

31553157
# ignore not founds

pandas/core/series.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -725,7 +725,7 @@ def __array_ufunc__(
725725
# it to handle *args.
726726
index = alignable[0].index
727727
for s in alignable[1:]:
728-
index |= s.index
728+
index = index.union(s.index)
729729
inputs = tuple(
730730
x.reindex(index) if issubclass(t, Series) else x
731731
for x, t in zip(inputs, types)

pandas/io/formats/excel.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -475,10 +475,10 @@ def __init__(
475475
if cols is not None:
476476

477477
# all missing, raise
478-
if not len(Index(cols) & df.columns):
478+
if not len(Index(cols).intersection(df.columns)):
479479
raise KeyError("passes columns are not ALL present dataframe")
480480

481-
if len(Index(cols) & df.columns) != len(cols):
481+
if len(Index(cols).intersection(df.columns)) != len(cols):
482482
# Deprecated in GH#17295, enforced in 1.0.0
483483
raise KeyError("Not all names specified in 'columns' are found")
484484

pandas/io/json/_json.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ def __init__(
268268
if (
269269
(obj.ndim == 1)
270270
and (obj.name in set(obj.index.names))
271-
or len(obj.columns & obj.index.names)
271+
or len(obj.columns.intersection(obj.index.names))
272272
):
273273
msg = "Overlapping names between the index and columns"
274274
raise ValueError(msg)

pandas/tests/indexes/datetimes/test_setops.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,8 @@ def test_intersection_bug_1708(self):
300300
index_1 = date_range("1/1/2012", periods=4, freq="12H")
301301
index_2 = index_1 + DateOffset(hours=1)
302302

303-
result = index_1 & index_2
303+
with tm.assert_produces_warning(FutureWarning):
304+
result = index_1 & index_2
304305
assert len(result) == 0
305306

306307
@pytest.mark.parametrize("tz", tz)

pandas/tests/indexes/multi/test_setops.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -105,11 +105,13 @@ def test_symmetric_difference(idx, sort):
105105
def test_multiindex_symmetric_difference():
106106
# GH 13490
107107
idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=["a", "b"])
108-
result = idx ^ idx
108+
with tm.assert_produces_warning(FutureWarning):
109+
result = idx ^ idx
109110
assert result.names == idx.names
110111

111112
idx2 = idx.copy().rename(["A", "B"])
112-
result = idx ^ idx2
113+
with tm.assert_produces_warning(FutureWarning):
114+
result = idx ^ idx2
113115
assert result.names == [None, None]
114116

115117

pandas/tests/indexes/test_base.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1008,7 +1008,8 @@ def test_symmetric_difference(self, sort):
10081008
tm.assert_index_equal(result, expected)
10091009

10101010
# __xor__ syntax
1011-
expected = index1 ^ index2
1011+
with tm.assert_produces_warning(FutureWarning):
1012+
expected = index1 ^ index2
10121013
assert tm.equalContents(result, expected)
10131014
assert result.name is None
10141015

pandas/tests/indexes/test_setops.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -93,5 +93,18 @@ def test_union_dtypes(left, right, expected):
9393
right = pandas_dtype(right)
9494
a = pd.Index([], dtype=left)
9595
b = pd.Index([], dtype=right)
96-
result = (a | b).dtype
96+
result = a.union(b).dtype
9797
assert result == expected
98+
99+
100+
def test_dunder_inplace_setops_deprecated(index):
101+
# GH#37374 these will become logical ops, not setops
102+
103+
with tm.assert_produces_warning(FutureWarning):
104+
index |= index
105+
106+
with tm.assert_produces_warning(FutureWarning):
107+
index &= index
108+
109+
with tm.assert_produces_warning(FutureWarning):
110+
index ^= index

pandas/tests/indexes/timedeltas/test_setops.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -97,13 +97,15 @@ def test_intersection_bug_1708(self):
9797
index_1 = timedelta_range("1 day", periods=4, freq="h")
9898
index_2 = index_1 + pd.offsets.Hour(5)
9999

100-
result = index_1 & index_2
100+
with tm.assert_produces_warning(FutureWarning):
101+
result = index_1 & index_2
101102
assert len(result) == 0
102103

103104
index_1 = timedelta_range("1 day", periods=4, freq="h")
104105
index_2 = index_1 + pd.offsets.Hour(1)
105106

106-
result = index_1 & index_2
107+
with tm.assert_produces_warning(FutureWarning):
108+
result = index_1 & index_2
107109
expected = timedelta_range("1 day 01:00:00", periods=3, freq="h")
108110
tm.assert_index_equal(result, expected)
109111
assert result.freq == expected.freq

pandas/tests/resample/test_datetime_index.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1112,9 +1112,9 @@ def test_resample_anchored_multiday():
11121112
#
11131113
# See: https://github.com/pandas-dev/pandas/issues/8683
11141114

1115-
index = pd.date_range(
1116-
"2014-10-14 23:06:23.206", periods=3, freq="400L"
1117-
) | pd.date_range("2014-10-15 23:00:00", periods=2, freq="2200L")
1115+
index1 = pd.date_range("2014-10-14 23:06:23.206", periods=3, freq="400L")
1116+
index2 = pd.date_range("2014-10-15 23:00:00", periods=2, freq="2200L")
1117+
index = index1.union(index2)
11181118

11191119
s = Series(np.random.randn(5), index=index)
11201120

pandas/tests/series/test_logical_ops.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -269,11 +269,13 @@ def test_reversed_xor_with_index_returns_index(self):
269269
idx2 = Index([1, 0, 1, 0])
270270

271271
expected = Index.symmetric_difference(idx1, ser)
272-
result = idx1 ^ ser
272+
with tm.assert_produces_warning(FutureWarning):
273+
result = idx1 ^ ser
273274
tm.assert_index_equal(result, expected)
274275

275276
expected = Index.symmetric_difference(idx2, ser)
276-
result = idx2 ^ ser
277+
with tm.assert_produces_warning(FutureWarning):
278+
result = idx2 ^ ser
277279
tm.assert_index_equal(result, expected)
278280

279281
@pytest.mark.parametrize(
@@ -304,11 +306,13 @@ def test_reversed_logical_op_with_index_returns_series(self, op):
304306
idx2 = Index([1, 0, 1, 0])
305307

306308
expected = Series(op(idx1.values, ser.values))
307-
result = op(ser, idx1)
309+
with tm.assert_produces_warning(FutureWarning):
310+
result = op(ser, idx1)
308311
tm.assert_series_equal(result, expected)
309312

310313
expected = Series(op(idx2.values, ser.values))
311-
result = op(ser, idx2)
314+
with tm.assert_produces_warning(FutureWarning):
315+
result = op(ser, idx2)
312316
tm.assert_series_equal(result, expected)
313317

314318
@pytest.mark.parametrize(
@@ -324,7 +328,9 @@ def test_reverse_ops_with_index(self, op, expected):
324328
# multi-set Index ops are buggy, so let's avoid duplicates...
325329
ser = Series([True, False])
326330
idx = Index([False, True])
327-
result = op(ser, idx)
331+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
332+
# behaving as set ops is deprecated, will become logical ops
333+
result = op(ser, idx)
328334
tm.assert_index_equal(result, expected)
329335

330336
def test_logical_ops_label_based(self):

pandas/tests/test_strings.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -645,7 +645,9 @@ def test_str_cat_align_mixed_inputs(self, join):
645645
u = np.array(["A", "B", "C", "D"])
646646
expected_outer = Series(["aaA", "bbB", "c-C", "ddD", "-e-"])
647647
# joint index of rhs [t, u]; u will be forced have index of s
648-
rhs_idx = t.index & s.index if join == "inner" else t.index | s.index
648+
rhs_idx = (
649+
t.index.intersection(s.index) if join == "inner" else t.index.union(s.index)
650+
)
649651

650652
expected = expected_outer.loc[s.index.join(rhs_idx, how=join)]
651653
result = s.str.cat([t, u], join=join, na_rep="-")

0 commit comments

Comments
 (0)