diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 3b3b1e6c14a8f..24359809065b1 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -1581,3 +1581,23 @@ def get_op_from_name(op_name: str) -> Callable: op = lambda x, y: rop(y, x) return op + + +# ----------------------------------------------------------------------------- +# Indexing test helpers + + +def getitem(x): + return x + + +def setitem(x): + return x + + +def loc(x): + return x.loc + + +def iloc(x): + return x.iloc diff --git a/pandas/conftest.py b/pandas/conftest.py index d84a72d4cc7a8..2862f7c957abc 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1446,3 +1446,19 @@ def names(request): A 3-tuple of names, the first two for operands, the last for a result. """ return request.param + + +@pytest.fixture(params=[tm.setitem, tm.loc, tm.iloc]) +def indexer_sli(request): + """ + Parametrize over __setitem__, loc.__setitem__, iloc.__setitem__ + """ + return request.param + + +@pytest.fixture(params=[tm.setitem, tm.iloc]) +def indexer_si(request): + """ + Parametrize over __setitem__, iloc.__setitem__ + """ + return request.param diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 2199c32dbd0ba..f67341ab176d7 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -17,23 +17,6 @@ from .test_floats import gen_obj - -def getitem(x): - return x - - -def setitem(x): - return x - - -def loc(x): - return x.loc - - -def iloc(x): - return x.iloc - - # ------------------------------------------------------------------------ # Indexing test cases @@ -72,7 +55,7 @@ def test_setitem_ndarray_1d(self): with pytest.raises(ValueError, match=msg): df[2:5] = np.arange(1, 4) * 1j - @pytest.mark.parametrize("idxr", [getitem, loc, iloc]) + @pytest.mark.parametrize("idxr", [tm.getitem, tm.loc, tm.iloc]) def test_getitem_ndarray_3d(self, index, frame_or_series, idxr): # GH 25567 obj = gen_obj(frame_or_series, index) @@ -95,7 +78,7 @@ def test_getitem_ndarray_3d(self, index, frame_or_series, idxr): with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): idxr[nd3] - @pytest.mark.parametrize("indexer", [setitem, loc, iloc]) + @pytest.mark.parametrize("indexer", [tm.setitem, tm.loc, tm.iloc]) def test_setitem_ndarray_3d(self, index, frame_or_series, indexer): # GH 25567 obj = gen_obj(frame_or_series, index) @@ -297,7 +280,7 @@ def test_dups_fancy_indexing2(self): result = df.loc[[1, 2], ["a", "b"]] tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("case", [getitem, loc]) + @pytest.mark.parametrize("case", [tm.getitem, tm.loc]) def test_duplicate_int_indexing(self, case): # GH 17347 s = Series(range(3), index=[1, 1, 3]) @@ -592,7 +575,7 @@ def test_astype_assignment(self): expected = DataFrame({"A": [1, 2, 3, 4]}) tm.assert_frame_equal(df, expected) - @pytest.mark.parametrize("indexer", [getitem, loc]) + @pytest.mark.parametrize("indexer", [tm.getitem, tm.loc]) def test_index_type_coercion(self, indexer): # GH 11836 @@ -965,7 +948,7 @@ def test_none_coercion_mixed_dtypes(self): class TestDatetimelikeCoercion: - @pytest.mark.parametrize("indexer", [setitem, loc, iloc]) + @pytest.mark.parametrize("indexer", [tm.setitem, tm.loc, tm.iloc]) def test_setitem_dt64_string_scalar(self, tz_naive_fixture, indexer): # dispatching _can_hold_element to underling DatetimeArray tz = tz_naive_fixture @@ -991,12 +974,12 @@ def test_setitem_dt64_string_scalar(self, tz_naive_fixture, indexer): @pytest.mark.parametrize( "key", [[0, 1], slice(0, 2), np.array([True, True, False])] ) - @pytest.mark.parametrize("indexer", [setitem, loc, iloc]) + @pytest.mark.parametrize("indexer", [tm.setitem, tm.loc, tm.iloc]) def test_setitem_dt64_string_values(self, tz_naive_fixture, indexer, key, box): # dispatching _can_hold_element to underling DatetimeArray tz = tz_naive_fixture - if isinstance(key, slice) and indexer is loc: + if isinstance(key, slice) and indexer is tm.loc: key = slice(0, 1) dti = date_range("2016-01-01", periods=3, tz=tz) @@ -1017,7 +1000,7 @@ def test_setitem_dt64_string_values(self, tz_naive_fixture, indexer, key, box): assert ser._values is values @pytest.mark.parametrize("scalar", ["3 Days", offsets.Hour(4)]) - @pytest.mark.parametrize("indexer", [setitem, loc, iloc]) + @pytest.mark.parametrize("indexer", [tm.setitem, tm.loc, tm.iloc]) def test_setitem_td64_scalar(self, indexer, scalar): # dispatching _can_hold_element to underling TimedeltaArray tdi = timedelta_range("1 Day", periods=3) @@ -1033,10 +1016,10 @@ def test_setitem_td64_scalar(self, indexer, scalar): @pytest.mark.parametrize( "key", [[0, 1], slice(0, 2), np.array([True, True, False])] ) - @pytest.mark.parametrize("indexer", [setitem, loc, iloc]) + @pytest.mark.parametrize("indexer", [tm.setitem, tm.loc, tm.iloc]) def test_setitem_td64_string_values(self, indexer, key, box): # dispatching _can_hold_element to underling TimedeltaArray - if isinstance(key, slice) and indexer is loc: + if isinstance(key, slice) and indexer is tm.loc: key = slice(0, 1) tdi = timedelta_range("1 Day", periods=3) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 159b42621f970..dbc751dd614a1 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -271,32 +271,6 @@ def test_setitem(datetime_series, string_series): tm.assert_series_equal(s, expected) -def test_setitem_dtypes(): - # change dtypes - # GH 4463 - expected = Series([np.nan, 2, 3]) - - s = Series([1, 2, 3]) - s.iloc[0] = np.nan - tm.assert_series_equal(s, expected) - - s = Series([1, 2, 3]) - s.loc[0] = np.nan - tm.assert_series_equal(s, expected) - - s = Series([1, 2, 3]) - s[0] = np.nan - tm.assert_series_equal(s, expected) - - s = Series([False]) - s.loc[0] = np.nan - tm.assert_series_equal(s, Series([np.nan])) - - s = Series([False, True]) - s.loc[0] = np.nan - tm.assert_series_equal(s, Series([np.nan, 1.0])) - - def test_setslice(datetime_series): sl = datetime_series[5:20] assert len(sl) == len(sl.index) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 5f09283249fe3..d6d0723bee0e8 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -5,6 +5,7 @@ from pandas import ( DatetimeIndex, + Index, MultiIndex, NaT, Series, @@ -238,24 +239,113 @@ def test_setitem_callable_other(self): tm.assert_series_equal(ser, expected) -class TestSetitemCasting: - def test_setitem_nan_casts(self): - # these induce dtype changes - expected = Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan]) - ser = Series([2, 3, 4, 5, 6, 7, 8, 9, 10]) - ser[::2] = np.nan - tm.assert_series_equal(ser, expected) - - # gets coerced to float, right? - expected = Series([np.nan, 1, np.nan, 0]) - ser = Series([True, True, False, False]) - ser[::2] = np.nan - tm.assert_series_equal(ser, expected) - - expected = Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8, 9]) - ser = Series(np.arange(10)) - ser[:5] = np.nan - tm.assert_series_equal(ser, expected) +@pytest.mark.parametrize( + "obj,expected,key", + [ + ( + # these induce dtype changes + Series([2, 3, 4, 5, 6, 7, 8, 9, 10]), + Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan]), + slice(None, None, 2), + ), + ( + # gets coerced to float, right? + Series([True, True, False, False]), + Series([np.nan, 1, np.nan, 0]), + slice(None, None, 2), + ), + ( + # these induce dtype changes + Series(np.arange(10)), + Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8, 9]), + slice(None, 5), + ), + ( + # changes dtype GH#4463 + Series([1, 2, 3]), + Series([np.nan, 2, 3]), + 0, + ), + ( + # changes dtype GH#4463 + Series([False]), + Series([np.nan]), + 0, + ), + ( + # changes dtype GH#4463 + Series([False, True]), + Series([np.nan, 1.0]), + 0, + ), + ], +) +class TestSetitemCastingEquivalents: + """ + Check each of several methods that _should_ be equivalent to `obj[key] = np.nan` + + We assume that + - obj.index is the default Index(range(len(obj))) + - the setitem does not expand the obj + """ + + def test_int_key(self, obj, key, expected, indexer_sli): + if not isinstance(key, int): + return + + obj = obj.copy() + indexer_sli(obj)[key] = np.nan + tm.assert_series_equal(obj, expected) + + def test_slice_key(self, obj, key, expected, indexer_si): + # Note: no .loc because that handles slice edges differently + obj = obj.copy() + indexer_si(obj)[key] = np.nan + tm.assert_series_equal(obj, expected) + + def test_intlist_key(self, obj, key, expected, indexer_sli): + ilkey = list(range(len(obj)))[key] + + obj = obj.copy() + indexer_sli(obj)[ilkey] = np.nan + tm.assert_series_equal(obj, expected) + + def test_mask_key(self, obj, key, expected, indexer_sli): + # setitem with boolean mask + mask = np.zeros(obj.shape, dtype=bool) + mask[key] = True + + obj = obj.copy() + indexer_sli(obj)[mask] = np.nan + tm.assert_series_equal(obj, expected) + + def test_series_where(self, obj, key, expected): + mask = np.zeros(obj.shape, dtype=bool) + mask[key] = True + + obj = obj.copy() + res = obj.where(~mask, np.nan) + tm.assert_series_equal(res, expected) + + def test_index_where(self, obj, key, expected, request): + if obj.dtype == bool: + msg = "Index/Series casting behavior inconsistent GH#38692" + mark = pytest.xfail(reason=msg) + request.node.add_marker(mark) + + mask = np.zeros(obj.shape, dtype=bool) + mask[key] = True + + res = Index(obj).where(~mask, np.nan) + tm.assert_index_equal(res, Index(expected)) + + @pytest.mark.xfail(reason="Index/Series casting behavior inconsistent GH#38692") + def test_index_putmask(self, obj, key, expected): + mask = np.zeros(obj.shape, dtype=bool) + mask[key] = True + + res = Index(obj).putmask(mask, np.nan) + tm.assert_index_equal(res, Index(expected)) class TestSetitemWithExpansion: