Skip to content

Commit 8fb347f

Browse files
authored
DEPR: Index.reindex with duplicate index (#42568)
1 parent e042219 commit 8fb347f

File tree

10 files changed

+49
-16
lines changed

10 files changed

+49
-16
lines changed

doc/source/user_guide/duplicates.rst

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ duplicates present. The output can't be determined, and so pandas raises.
2828

2929
.. ipython:: python
3030
:okexcept:
31+
:okwarning:
3132
3233
s1 = pd.Series([0, 1, 2], index=["a", "b", "b"])
3334
s1.reindex(["a", "b", "c"])

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ Deprecations
162162
- Deprecated ignoring missing labels when indexing with a sequence of labels on a level of a MultiIndex (:issue:`42351`)
163163
- Creating an empty Series without a dtype will now raise a more visible ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`30017`)
164164
- Deprecated the 'kind' argument in :meth:`Index.get_slice_bound`, :meth:`Index.slice_indexer`, :meth:`Index.slice_locs`; in a future version passing 'kind' will raise (:issue:`42857`)
165+
- Deprecated :meth:`Index.reindex` with a non-unique index (:issue:`42568`)
165166
-
166167

167168
.. ---------------------------------------------------------------------------

pandas/core/indexes/base.py

+9
Original file line numberDiff line numberDiff line change
@@ -3915,6 +3915,15 @@ def reindex(
39153915
)
39163916
indexer, _ = self.get_indexer_non_unique(target)
39173917

3918+
if not self.is_unique:
3919+
# GH#42568
3920+
warnings.warn(
3921+
"reindexing with a non-unique Index is deprecated and "
3922+
"will raise in a future version",
3923+
FutureWarning,
3924+
stacklevel=2,
3925+
)
3926+
39183927
target = self._wrap_reindex_result(target, indexer, preserve_names)
39193928
return target, indexer
39203929

pandas/core/indexes/category.py

+8
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,14 @@ def reindex(
426426
missing = np.array([], dtype=np.intp)
427427
else:
428428
indexer, missing = self.get_indexer_non_unique(target)
429+
if not self.is_unique:
430+
# GH#42568
431+
warnings.warn(
432+
"reindexing with a non-unique Index is deprecated and will "
433+
"raise in a future version",
434+
FutureWarning,
435+
stacklevel=2,
436+
)
429437

430438
if len(self) and indexer is not None:
431439
new_target = self.take(indexer)

pandas/tests/frame/indexing/test_getitem.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,8 @@ def test_getitem_boolean_frame_unaligned_with_duplicate_columns(self, df_dup_col
301301
df = df_dup_cols
302302
msg = "cannot reindex on an axis with duplicate labels"
303303
with pytest.raises(ValueError, match=msg):
304-
df[df.A > 6]
304+
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
305+
df[df.A > 6]
305306

306307
def test_getitem_boolean_series_with_duplicate_columns(self, df_dup_cols):
307308
# boolean indexing

pandas/tests/frame/indexing/test_setitem.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,8 @@ def test_setitem_error_msmgs(self):
7070
)
7171
msg = "cannot reindex on an axis with duplicate labels"
7272
with pytest.raises(ValueError, match=msg):
73-
df["newcol"] = ser
73+
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
74+
df["newcol"] = ser
7475

7576
# GH 4107, more descriptive error message
7677
df = DataFrame(np.random.randint(0, 2, (4, 4)), columns=["a", "b", "c", "d"])

pandas/tests/frame/methods/test_reindex.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -674,7 +674,8 @@ def test_reindex_dups(self):
674674
# reindex fails
675675
msg = "cannot reindex on an axis with duplicate labels"
676676
with pytest.raises(ValueError, match=msg):
677-
df.reindex(index=list(range(len(df))))
677+
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
678+
df.reindex(index=list(range(len(df))))
678679

679680
def test_reindex_with_duplicate_columns(self):
680681

@@ -684,9 +685,11 @@ def test_reindex_with_duplicate_columns(self):
684685
)
685686
msg = "cannot reindex on an axis with duplicate labels"
686687
with pytest.raises(ValueError, match=msg):
687-
df.reindex(columns=["bar"])
688+
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
689+
df.reindex(columns=["bar"])
688690
with pytest.raises(ValueError, match=msg):
689-
df.reindex(columns=["bar", "foo"])
691+
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
692+
df.reindex(columns=["bar", "foo"])
690693

691694
def test_reindex_axis_style(self):
692695
# https://github.com/pandas-dev/pandas/issues/12392
@@ -958,7 +961,8 @@ def test_reindex_with_categoricalindex(self):
958961
# passed duplicate indexers are not allowed
959962
msg = "cannot reindex on an axis with duplicate labels"
960963
with pytest.raises(ValueError, match=msg):
961-
df2.reindex(["a", "b"])
964+
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
965+
df2.reindex(["a", "b"])
962966

963967
# args NotImplemented ATM
964968
msg = r"argument {} is not implemented for CategoricalIndex\.reindex"

pandas/tests/indexes/categorical/test_reindex.py

+14-8
Original file line numberDiff line numberDiff line change
@@ -13,26 +13,32 @@
1313

1414
class TestReindex:
1515
def test_reindex_dtype(self):
16-
c = CategoricalIndex(["a", "b", "c", "a"])
17-
res, indexer = c.reindex(["a", "c"])
16+
# GH#11586
17+
ci = CategoricalIndex(["a", "b", "c", "a"])
18+
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
19+
res, indexer = ci.reindex(["a", "c"])
20+
1821
tm.assert_index_equal(res, Index(["a", "a", "c"]), exact=True)
1922
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))
2023

21-
c = CategoricalIndex(["a", "b", "c", "a"])
22-
res, indexer = c.reindex(Categorical(["a", "c"]))
24+
ci = CategoricalIndex(["a", "b", "c", "a"])
25+
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
26+
res, indexer = ci.reindex(Categorical(["a", "c"]))
2327

2428
exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"])
2529
tm.assert_index_equal(res, exp, exact=True)
2630
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))
2731

28-
c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
29-
res, indexer = c.reindex(["a", "c"])
32+
ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
33+
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
34+
res, indexer = ci.reindex(["a", "c"])
3035
exp = Index(["a", "a", "c"], dtype="object")
3136
tm.assert_index_equal(res, exp, exact=True)
3237
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))
3338

34-
c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
35-
res, indexer = c.reindex(Categorical(["a", "c"]))
39+
ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
40+
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
41+
res, indexer = ci.reindex(Categorical(["a", "c"]))
3642
exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"])
3743
tm.assert_index_equal(res, exp, exact=True)
3844
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))

pandas/tests/indexes/multi/test_reindex.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,8 @@ def test_reindex_non_unique():
110110

111111
msg = "cannot handle a non-unique multi-index!"
112112
with pytest.raises(ValueError, match=msg):
113-
a.reindex(new_idx)
113+
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
114+
a.reindex(new_idx)
114115

115116

116117
@pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]])

pandas/tests/resample/test_datetime_index.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -694,7 +694,8 @@ def test_asfreq_non_unique():
694694

695695
msg = "cannot reindex on an axis with duplicate labels"
696696
with pytest.raises(ValueError, match=msg):
697-
ts.asfreq("B")
697+
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
698+
ts.asfreq("B")
698699

699700

700701
def test_resample_axis1():

0 commit comments

Comments
 (0)