From 6ee5c5f5e1613c1fc0e76642e1954dec277feb81 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 24 Dec 2020 13:47:46 -0800 Subject: [PATCH 1/2] TST: implement tm.check_setitem_equivalents --- pandas/_testing.py | 75 +++++++++++++++++++ pandas/tests/indexing/test_indexing.py | 37 +++------ pandas/tests/series/indexing/test_indexing.py | 26 ------- pandas/tests/series/indexing/test_setitem.py | 40 +++++++--- 4 files changed, 116 insertions(+), 62 deletions(-) diff --git a/pandas/_testing.py b/pandas/_testing.py index 7786eeeb46797..2256156ad222f 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -3143,3 +3143,78 @@ def get_op_from_name(op_name: str) -> Callable: op = lambda x, y: rop(y, x) return op + + +# ----------------------------------------------------------------------------- +# Indexing test helpers + + +def getitem(x): + return x + + +def setitem(x): + return x + + +def loc(x): + return x.loc + + +def iloc(x): + return x.iloc + + +def check_setitem_equivalents(obj: Series, key: Union[int, slice], expected: Series): + """ + Check each of several methods that _should_ be equivalent to `obj[key] = np.nan` + + We assume that + - obj.index is the default Index(range(len(obj))) + - the setitem does not expand the obj + """ + orig = obj.copy() + + if isinstance(key, int): + for indexer in [setitem, loc, iloc]: + obj = orig.copy() + indexer(obj)[key] = np.nan + assert_series_equal(obj, expected) + + key = slice(key, key + 1) + + # setitem with slice + for indexer in [setitem, iloc]: + # Note: no .loc because that handles slice edges differently + obj = orig.copy() + indexer(obj)[key] = np.nan + assert_series_equal(obj, expected) + + # list of ints + ilkey = list(range(len(obj)))[key] + for indexer in [setitem, loc, iloc]: + obj = orig.copy() + indexer(obj)[ilkey] = np.nan + assert_series_equal(obj, expected) + + # setitem with boolean mask + mask = np.zeros(obj.shape, dtype=bool) + mask[key] = True + for indexer in [setitem, loc, iloc]: + obj = orig.copy() + indexer(obj)[mask] = np.nan + assert_series_equal(obj, expected) + + # Series.where + obj = orig.copy() + res = obj.where(~mask, np.nan) + assert_equal(res, expected) + + # Index equivalents + if Index(orig).dtype == orig.dtype: + obj = orig.copy() + res = Index(obj).where(~mask, np.nan) + assert_index_equal(res, Index(expected)) + + # TODO: implement the same for Index(obj).putmask(mask, np.nan) + # once that behavior matches diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index e8c4a834bdeb1..e59d6de1b2a4c 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -17,23 +17,6 @@ from .test_floats import gen_obj - -def getitem(x): - return x - - -def setitem(x): - return x - - -def loc(x): - return x.loc - - -def iloc(x): - return x.iloc - - # ------------------------------------------------------------------------ # Indexing test cases @@ -72,7 +55,7 @@ def test_setitem_ndarray_1d(self): with pytest.raises(ValueError, match=msg): df[2:5] = np.arange(1, 4) * 1j - @pytest.mark.parametrize("idxr", [getitem, loc, iloc]) + @pytest.mark.parametrize("idxr", [tm.getitem, tm.loc, tm.iloc]) def test_getitem_ndarray_3d(self, index, frame_or_series, idxr): # GH 25567 obj = gen_obj(frame_or_series, index) @@ -95,7 +78,7 @@ def test_getitem_ndarray_3d(self, index, frame_or_series, idxr): with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): idxr[nd3] - @pytest.mark.parametrize("indexer", [setitem, loc, iloc]) + @pytest.mark.parametrize("indexer", [tm.setitem, tm.loc, tm.iloc]) def test_setitem_ndarray_3d(self, index, frame_or_series, indexer): # GH 25567 obj = gen_obj(frame_or_series, index) @@ -297,7 +280,7 @@ def test_dups_fancy_indexing2(self): result = df.loc[[1, 2], ["a", "b"]] tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("case", [getitem, loc]) + @pytest.mark.parametrize("case", [tm.getitem, tm.loc]) def test_duplicate_int_indexing(self, case): # GH 17347 s = Series(range(3), index=[1, 1, 3]) @@ -594,7 +577,7 @@ def test_astype_assignment(self): expected = DataFrame({"A": [1, 2, 3, 4]}) tm.assert_frame_equal(df, expected) - @pytest.mark.parametrize("indexer", [getitem, loc]) + @pytest.mark.parametrize("indexer", [tm.getitem, tm.loc]) def test_index_type_coercion(self, indexer): # GH 11836 @@ -967,7 +950,7 @@ def test_none_coercion_mixed_dtypes(self): class TestDatetimelikeCoercion: - @pytest.mark.parametrize("indexer", [setitem, loc, iloc]) + @pytest.mark.parametrize("indexer", [tm.setitem, tm.loc, tm.iloc]) def test_setitem_dt64_string_scalar(self, tz_naive_fixture, indexer): # dispatching _can_hold_element to underling DatetimeArray tz = tz_naive_fixture @@ -993,12 +976,12 @@ def test_setitem_dt64_string_scalar(self, tz_naive_fixture, indexer): @pytest.mark.parametrize( "key", [[0, 1], slice(0, 2), np.array([True, True, False])] ) - @pytest.mark.parametrize("indexer", [setitem, loc, iloc]) + @pytest.mark.parametrize("indexer", [tm.setitem, tm.loc, tm.iloc]) def test_setitem_dt64_string_values(self, tz_naive_fixture, indexer, key, box): # dispatching _can_hold_element to underling DatetimeArray tz = tz_naive_fixture - if isinstance(key, slice) and indexer is loc: + if isinstance(key, slice) and indexer is tm.loc: key = slice(0, 1) dti = date_range("2016-01-01", periods=3, tz=tz) @@ -1019,7 +1002,7 @@ def test_setitem_dt64_string_values(self, tz_naive_fixture, indexer, key, box): assert ser._values is values @pytest.mark.parametrize("scalar", ["3 Days", offsets.Hour(4)]) - @pytest.mark.parametrize("indexer", [setitem, loc, iloc]) + @pytest.mark.parametrize("indexer", [tm.setitem, tm.loc, tm.iloc]) def test_setitem_td64_scalar(self, indexer, scalar): # dispatching _can_hold_element to underling TimedeltaArray tdi = timedelta_range("1 Day", periods=3) @@ -1035,10 +1018,10 @@ def test_setitem_td64_scalar(self, indexer, scalar): @pytest.mark.parametrize( "key", [[0, 1], slice(0, 2), np.array([True, True, False])] ) - @pytest.mark.parametrize("indexer", [setitem, loc, iloc]) + @pytest.mark.parametrize("indexer", [tm.setitem, tm.loc, tm.iloc]) def test_setitem_td64_string_values(self, indexer, key, box): # dispatching _can_hold_element to underling TimedeltaArray - if isinstance(key, slice) and indexer is loc: + if isinstance(key, slice) and indexer is tm.loc: key = slice(0, 1) tdi = timedelta_range("1 Day", periods=3) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 159b42621f970..dbc751dd614a1 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -271,32 +271,6 @@ def test_setitem(datetime_series, string_series): tm.assert_series_equal(s, expected) -def test_setitem_dtypes(): - # change dtypes - # GH 4463 - expected = Series([np.nan, 2, 3]) - - s = Series([1, 2, 3]) - s.iloc[0] = np.nan - tm.assert_series_equal(s, expected) - - s = Series([1, 2, 3]) - s.loc[0] = np.nan - tm.assert_series_equal(s, expected) - - s = Series([1, 2, 3]) - s[0] = np.nan - tm.assert_series_equal(s, expected) - - s = Series([False]) - s.loc[0] = np.nan - tm.assert_series_equal(s, Series([np.nan])) - - s = Series([False, True]) - s.loc[0] = np.nan - tm.assert_series_equal(s, Series([np.nan, 1.0])) - - def test_setslice(datetime_series): sl = datetime_series[5:20] assert len(sl) == len(sl.index) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 5f09283249fe3..8abcf3f7ceead 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -241,21 +241,43 @@ def test_setitem_callable_other(self): class TestSetitemCasting: def test_setitem_nan_casts(self): # these induce dtype changes - expected = Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan]) + ser = Series([2, 3, 4, 5, 6, 7, 8, 9, 10]) - ser[::2] = np.nan - tm.assert_series_equal(ser, expected) + expected = Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan]) + key = slice(None, None, 2) + tm.check_setitem_equivalents(ser, key, expected) # gets coerced to float, right? - expected = Series([np.nan, 1, np.nan, 0]) ser = Series([True, True, False, False]) - ser[::2] = np.nan - tm.assert_series_equal(ser, expected) + expected = Series([np.nan, 1, np.nan, 0]) + key = slice(None, None, 2) + tm.check_setitem_equivalents(ser, key, expected) - expected = Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8, 9]) ser = Series(np.arange(10)) - ser[:5] = np.nan - tm.assert_series_equal(ser, expected) + expected = Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8, 9]) + key = slice(None, 5) + tm.check_setitem_equivalents(ser, key, expected) + + def test_setitem_nan_into_int(self): + # change dtypes + # GH#4463 + ser = Series([1, 2, 3]) + expected = Series([np.nan, 2, 3]) + key = 0 + tm.check_setitem_equivalents(ser, key, expected) + + def test_setitem_nan_into_bool(self): + # change dtypes + # GH#4463 + ser = Series([False]) + expected = Series([np.nan]) + key = 0 + tm.check_setitem_equivalents(ser, key, expected) + + ser = Series([False, True]) + expected = Series([np.nan, 1.0]) + key = 0 + tm.check_setitem_equivalents(ser, key, expected) class TestSetitemWithExpansion: From 33e39f9a2dd50f12251cc6e3774a012f7314cefe Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 25 Dec 2020 16:01:50 -0800 Subject: [PATCH 2/2] re-write as fixturized --- pandas/_testing.py | 55 ------- pandas/conftest.py | 16 ++ pandas/tests/series/indexing/test_setitem.py | 148 ++++++++++++++----- 3 files changed, 124 insertions(+), 95 deletions(-) diff --git a/pandas/_testing.py b/pandas/_testing.py index 2256156ad222f..bc153bcba1faa 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -3163,58 +3163,3 @@ def loc(x): def iloc(x): return x.iloc - - -def check_setitem_equivalents(obj: Series, key: Union[int, slice], expected: Series): - """ - Check each of several methods that _should_ be equivalent to `obj[key] = np.nan` - - We assume that - - obj.index is the default Index(range(len(obj))) - - the setitem does not expand the obj - """ - orig = obj.copy() - - if isinstance(key, int): - for indexer in [setitem, loc, iloc]: - obj = orig.copy() - indexer(obj)[key] = np.nan - assert_series_equal(obj, expected) - - key = slice(key, key + 1) - - # setitem with slice - for indexer in [setitem, iloc]: - # Note: no .loc because that handles slice edges differently - obj = orig.copy() - indexer(obj)[key] = np.nan - assert_series_equal(obj, expected) - - # list of ints - ilkey = list(range(len(obj)))[key] - for indexer in [setitem, loc, iloc]: - obj = orig.copy() - indexer(obj)[ilkey] = np.nan - assert_series_equal(obj, expected) - - # setitem with boolean mask - mask = np.zeros(obj.shape, dtype=bool) - mask[key] = True - for indexer in [setitem, loc, iloc]: - obj = orig.copy() - indexer(obj)[mask] = np.nan - assert_series_equal(obj, expected) - - # Series.where - obj = orig.copy() - res = obj.where(~mask, np.nan) - assert_equal(res, expected) - - # Index equivalents - if Index(orig).dtype == orig.dtype: - obj = orig.copy() - res = Index(obj).where(~mask, np.nan) - assert_index_equal(res, Index(expected)) - - # TODO: implement the same for Index(obj).putmask(mask, np.nan) - # once that behavior matches diff --git a/pandas/conftest.py b/pandas/conftest.py index d84a72d4cc7a8..2862f7c957abc 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1446,3 +1446,19 @@ def names(request): A 3-tuple of names, the first two for operands, the last for a result. """ return request.param + + +@pytest.fixture(params=[tm.setitem, tm.loc, tm.iloc]) +def indexer_sli(request): + """ + Parametrize over __setitem__, loc.__setitem__, iloc.__setitem__ + """ + return request.param + + +@pytest.fixture(params=[tm.setitem, tm.iloc]) +def indexer_si(request): + """ + Parametrize over __setitem__, iloc.__setitem__ + """ + return request.param diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 8abcf3f7ceead..d6d0723bee0e8 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -5,6 +5,7 @@ from pandas import ( DatetimeIndex, + Index, MultiIndex, NaT, Series, @@ -238,46 +239,113 @@ def test_setitem_callable_other(self): tm.assert_series_equal(ser, expected) -class TestSetitemCasting: - def test_setitem_nan_casts(self): - # these induce dtype changes - - ser = Series([2, 3, 4, 5, 6, 7, 8, 9, 10]) - expected = Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan]) - key = slice(None, None, 2) - tm.check_setitem_equivalents(ser, key, expected) - - # gets coerced to float, right? - ser = Series([True, True, False, False]) - expected = Series([np.nan, 1, np.nan, 0]) - key = slice(None, None, 2) - tm.check_setitem_equivalents(ser, key, expected) - - ser = Series(np.arange(10)) - expected = Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8, 9]) - key = slice(None, 5) - tm.check_setitem_equivalents(ser, key, expected) - - def test_setitem_nan_into_int(self): - # change dtypes - # GH#4463 - ser = Series([1, 2, 3]) - expected = Series([np.nan, 2, 3]) - key = 0 - tm.check_setitem_equivalents(ser, key, expected) - - def test_setitem_nan_into_bool(self): - # change dtypes - # GH#4463 - ser = Series([False]) - expected = Series([np.nan]) - key = 0 - tm.check_setitem_equivalents(ser, key, expected) - - ser = Series([False, True]) - expected = Series([np.nan, 1.0]) - key = 0 - tm.check_setitem_equivalents(ser, key, expected) +@pytest.mark.parametrize( + "obj,expected,key", + [ + ( + # these induce dtype changes + Series([2, 3, 4, 5, 6, 7, 8, 9, 10]), + Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan]), + slice(None, None, 2), + ), + ( + # gets coerced to float, right? + Series([True, True, False, False]), + Series([np.nan, 1, np.nan, 0]), + slice(None, None, 2), + ), + ( + # these induce dtype changes + Series(np.arange(10)), + Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8, 9]), + slice(None, 5), + ), + ( + # changes dtype GH#4463 + Series([1, 2, 3]), + Series([np.nan, 2, 3]), + 0, + ), + ( + # changes dtype GH#4463 + Series([False]), + Series([np.nan]), + 0, + ), + ( + # changes dtype GH#4463 + Series([False, True]), + Series([np.nan, 1.0]), + 0, + ), + ], +) +class TestSetitemCastingEquivalents: + """ + Check each of several methods that _should_ be equivalent to `obj[key] = np.nan` + + We assume that + - obj.index is the default Index(range(len(obj))) + - the setitem does not expand the obj + """ + + def test_int_key(self, obj, key, expected, indexer_sli): + if not isinstance(key, int): + return + + obj = obj.copy() + indexer_sli(obj)[key] = np.nan + tm.assert_series_equal(obj, expected) + + def test_slice_key(self, obj, key, expected, indexer_si): + # Note: no .loc because that handles slice edges differently + obj = obj.copy() + indexer_si(obj)[key] = np.nan + tm.assert_series_equal(obj, expected) + + def test_intlist_key(self, obj, key, expected, indexer_sli): + ilkey = list(range(len(obj)))[key] + + obj = obj.copy() + indexer_sli(obj)[ilkey] = np.nan + tm.assert_series_equal(obj, expected) + + def test_mask_key(self, obj, key, expected, indexer_sli): + # setitem with boolean mask + mask = np.zeros(obj.shape, dtype=bool) + mask[key] = True + + obj = obj.copy() + indexer_sli(obj)[mask] = np.nan + tm.assert_series_equal(obj, expected) + + def test_series_where(self, obj, key, expected): + mask = np.zeros(obj.shape, dtype=bool) + mask[key] = True + + obj = obj.copy() + res = obj.where(~mask, np.nan) + tm.assert_series_equal(res, expected) + + def test_index_where(self, obj, key, expected, request): + if obj.dtype == bool: + msg = "Index/Series casting behavior inconsistent GH#38692" + mark = pytest.xfail(reason=msg) + request.node.add_marker(mark) + + mask = np.zeros(obj.shape, dtype=bool) + mask[key] = True + + res = Index(obj).where(~mask, np.nan) + tm.assert_index_equal(res, Index(expected)) + + @pytest.mark.xfail(reason="Index/Series casting behavior inconsistent GH#38692") + def test_index_putmask(self, obj, key, expected): + mask = np.zeros(obj.shape, dtype=bool) + mask[key] = True + + res = Index(obj).putmask(mask, np.nan) + tm.assert_index_equal(res, Index(expected)) class TestSetitemWithExpansion: