From 6aff6b2b7a732ac3a16941f919f9457ce664e9c0 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 2 Feb 2021 20:36:57 -0800 Subject: [PATCH 1/7] TST/REF: share more of SetitemCastingEquivalents --- pandas/tests/series/indexing/test_setitem.py | 155 ++++++------------- 1 file changed, 47 insertions(+), 108 deletions(-) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index bbf3715d8e022..947a115250974 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -3,8 +3,6 @@ import numpy as np import pytest -from pandas.compat import np_version_under1p20 - from pandas import ( DatetimeIndex, Index, @@ -317,44 +315,59 @@ def val(self, request): """ return request.param - def check_indexer(self, obj, key, expected, val, indexer): + def check_indexer(self, obj, key, expected, val, indexer, is_inplace): + orig = obj obj = obj.copy() + arr = obj._values + indexer(obj)[key] = val tm.assert_series_equal(obj, expected) - def test_int_key(self, obj, key, expected, val, indexer_sli): + self._check_inplace(is_inplace, orig, arr, obj) + + def _check_inplace(self, is_inplace, orig, arr, obj): + if is_inplace is None: + # We are not (yet) checking whether setting is inplace or not + pass + elif is_inplace: + assert obj._values is arr + else: + # otherwise original array should be unchanged + tm.assert_equal(arr, orig._values) + + def test_int_key(self, obj, key, expected, val, indexer_sli, is_inplace): if not isinstance(key, int): return - self.check_indexer(obj, key, expected, val, indexer_sli) + self.check_indexer(obj, key, expected, val, indexer_sli, is_inplace) rng = range(key, key + 1) - self.check_indexer(obj, rng, expected, val, indexer_sli) + self.check_indexer(obj, rng, expected, val, indexer_sli, is_inplace) if indexer_sli is not tm.loc: # Note: no .loc because that handles slice edges differently slc = slice(key, key + 1) - self.check_indexer(obj, slc, expected, val, indexer_sli) + self.check_indexer(obj, slc, expected, val, indexer_sli, is_inplace) ilkey = [key] - self.check_indexer(obj, ilkey, expected, val, indexer_sli) + self.check_indexer(obj, ilkey, expected, val, indexer_sli, is_inplace) indkey = np.array(ilkey) - self.check_indexer(obj, indkey, expected, val, indexer_sli) + self.check_indexer(obj, indkey, expected, val, indexer_sli, is_inplace) - def test_slice_key(self, obj, key, expected, val, indexer_sli): + def test_slice_key(self, obj, key, expected, val, indexer_sli, is_inplace): if not isinstance(key, slice): return if indexer_sli is not tm.loc: # Note: no .loc because that handles slice edges differently - self.check_indexer(obj, key, expected, val, indexer_sli) + self.check_indexer(obj, key, expected, val, indexer_sli, is_inplace) ilkey = list(range(len(obj)))[key] - self.check_indexer(obj, ilkey, expected, val, indexer_sli) + self.check_indexer(obj, ilkey, expected, val, indexer_sli, is_inplace) indkey = np.array(ilkey) - self.check_indexer(obj, indkey, expected, val, indexer_sli) + self.check_indexer(obj, indkey, expected, val, indexer_sli, is_inplace) def test_mask_key(self, obj, key, expected, val, indexer_sli): # setitem with boolean mask @@ -365,14 +378,19 @@ def test_mask_key(self, obj, key, expected, val, indexer_sli): indexer_sli(obj)[mask] = val tm.assert_series_equal(obj, expected) - def test_series_where(self, obj, key, expected, val): + def test_series_where(self, obj, key, expected, val, is_inplace): mask = np.zeros(obj.shape, dtype=bool) mask[key] = True + orig = obj obj = obj.copy() + arr = obj._values + res = obj.where(~mask, val) tm.assert_series_equal(res, expected) + self._check_inplace(is_inplace, orig, arr, obj) + def test_index_where(self, obj, key, expected, val, request): if Index(obj).dtype != obj.dtype: pytest.skip("test not applicable for this dtype") @@ -457,6 +475,13 @@ def val(self, request): """ return request.param + @pytest.fixture + def is_inplace(self): + """ + Indicate that we are not (yet) checking whether or not setting is inplace. + """ + return None + class TestSetitemWithExpansion: def test_setitem_empty_series(self): @@ -518,7 +543,7 @@ def test_setitem_slice_into_readonly_backing_data(): assert not array.any() -class TestSetitemCastingEquivalentsTimedelta64IntoNumeric: +class TestSetitemCastingEquivalentsTimedelta64IntoNumeric(SetitemCastingEquivalents): # timedelta64 should not be treated as integers when setting into # numeric Series @@ -526,7 +551,7 @@ class TestSetitemCastingEquivalentsTimedelta64IntoNumeric: def val(self): td = np.timedelta64(4, "ns") return td - return np.full((1,), td) + # TODO: could also try np.full((1,), td) @pytest.fixture(params=[complex, int, float]) def dtype(self, request): @@ -550,95 +575,9 @@ def expected(self, dtype): def key(self): return 0 - def check_indexer(self, obj, key, expected, val, indexer): - orig = obj - obj = obj.copy() - arr = obj._values - - indexer(obj)[key] = val - tm.assert_series_equal(obj, expected) - - tm.assert_equal(arr, orig._values) # original array is unchanged - - def test_int_key(self, obj, key, expected, val, indexer_sli): - if not isinstance(key, int): - return - - self.check_indexer(obj, key, expected, val, indexer_sli) - - rng = range(key, key + 1) - self.check_indexer(obj, rng, expected, val, indexer_sli) - - if indexer_sli is not tm.loc: - # Note: no .loc because that handles slice edges differently - slc = slice(key, key + 1) - self.check_indexer(obj, slc, expected, val, indexer_sli) - - ilkey = [key] - self.check_indexer(obj, ilkey, expected, val, indexer_sli) - - indkey = np.array(ilkey) - self.check_indexer(obj, indkey, expected, val, indexer_sli) - - def test_slice_key(self, obj, key, expected, val, indexer_sli): - if not isinstance(key, slice): - return - - if indexer_sli is not tm.loc: - # Note: no .loc because that handles slice edges differently - self.check_indexer(obj, key, expected, val, indexer_sli) - - ilkey = list(range(len(obj)))[key] - self.check_indexer(obj, ilkey, expected, val, indexer_sli) - - indkey = np.array(ilkey) - self.check_indexer(obj, indkey, expected, val, indexer_sli) - - def test_mask_key(self, obj, key, expected, val, indexer_sli): - # setitem with boolean mask - mask = np.zeros(obj.shape, dtype=bool) - mask[key] = True - - self.check_indexer(obj, mask, expected, val, indexer_sli) - - def test_series_where(self, obj, key, expected, val): - mask = np.zeros(obj.shape, dtype=bool) - mask[key] = True - - orig = obj - obj = obj.copy() - arr = obj._values - res = obj.where(~mask, val) - tm.assert_series_equal(res, expected) - - tm.assert_equal(arr, orig._values) # original array is unchanged - - def test_index_where(self, obj, key, expected, val, request): - if Index(obj).dtype != obj.dtype: - pytest.skip("test not applicable for this dtype") - - mask = np.zeros(obj.shape, dtype=bool) - mask[key] = True - - if obj.dtype == bool and not mask.all(): - # When mask is all True, casting behavior does not apply - msg = "Index/Series casting behavior inconsistent GH#38692" - mark = pytest.mark.xfail(reason=msg) - request.node.add_marker(mark) - - res = Index(obj).where(~mask, val) - tm.assert_index_equal(res, Index(expected)) - - @pytest.mark.xfail( - np_version_under1p20, - reason="Index/Series casting behavior inconsistent GH#38692", - ) - def test_index_putmask(self, obj, key, expected, val): - if Index(obj).dtype != obj.dtype: - pytest.skip("test not applicable for this dtype") - - mask = np.zeros(obj.shape, dtype=bool) - mask[key] = True - - res = Index(obj).putmask(mask, val) - tm.assert_index_equal(res, Index(expected)) + @pytest.fixture + def is_inplace(self): + """ + Indicate we do _not_ expect the setting to be done inplace. + """ + return False From c3478ff0405a46da716078995eaaf27ed2fd246e Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 2 Feb 2021 20:38:44 -0800 Subject: [PATCH 2/7] less verbose name --- pandas/tests/series/indexing/test_setitem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 947a115250974..d96048633acfc 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -543,7 +543,7 @@ def test_setitem_slice_into_readonly_backing_data(): assert not array.any() -class TestSetitemCastingEquivalentsTimedelta64IntoNumeric(SetitemCastingEquivalents): +class TestSetitemTimedelta64IntoNumeric(SetitemCastingEquivalents): # timedelta64 should not be treated as integers when setting into # numeric Series From be98959dd4af4199f1d5f69041d02352723d3fde Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 2 Feb 2021 20:42:18 -0800 Subject: [PATCH 3/7] remove duplicate fixture --- pandas/tests/series/indexing/test_setitem.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index d96048633acfc..bb71bb9be24f3 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -307,14 +307,6 @@ class SetitemCastingEquivalents: - the setitem does not expand the obj """ - @pytest.fixture(params=[np.nan, np.float64("NaN")]) - def val(self, request): - """ - One python float NaN, one np.float64. Only np.float64 has a `dtype` - attribute. - """ - return request.param - def check_indexer(self, obj, key, expected, val, indexer, is_inplace): orig = obj obj = obj.copy() From 12c1b9c2e050d6b492e10c2ee92b26390be9ef46 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 4 Feb 2021 07:17:29 -0800 Subject: [PATCH 4/7] post-merge --- pandas/tests/series/indexing/test_setitem.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index a3522b69fbde8..c19dbaa5c6045 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -307,6 +307,13 @@ class SetitemCastingEquivalents: - the setitem does not expand the obj """ + @pytest.fixture + def is_inplace(self): + """ + Indicate that we are not (yet) checking whether or not setting is inplace. + """ + return None + def check_indexer(self, obj, key, expected, val, indexer, is_inplace): orig = obj obj = obj.copy() @@ -334,9 +341,9 @@ def test_int_key(self, obj, key, expected, val, indexer_sli, is_inplace): self.check_indexer(obj, key, expected, val, indexer_sli, is_inplace) if indexer_sli is tm.loc: - self.check_indexer(obj, key, expected, val, tm.at) + self.check_indexer(obj, key, expected, val, tm.at, is_inplace) elif indexer_sli is tm.iloc: - self.check_indexer(obj, key, expected, val, tm.iat) + self.check_indexer(obj, key, expected, val, tm.iat, is_inplace) rng = range(key, key + 1) self.check_indexer(obj, rng, expected, val, indexer_sli, is_inplace) @@ -470,13 +477,6 @@ def val(self, request): """ return request.param - @pytest.fixture - def is_inplace(self): - """ - Indicate that we are not (yet) checking whether or not setting is inplace. - """ - return None - class TestSetitemWithExpansion: def test_setitem_empty_series(self): From dca3d54646ec9e0327e7e7ac46248de469190989 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 5 Feb 2021 15:09:56 -0800 Subject: [PATCH 5/7] BUG: Series[int].__setitem__(mask, td64_or_dt64) incorrect casting --- pandas/core/generic.py | 31 +----- pandas/core/indexes/base.py | 11 +- pandas/core/internals/blocks.py | 16 ++- pandas/tests/series/indexing/test_setitem.py | 102 ++++++++++++------- 4 files changed, 91 insertions(+), 69 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 96b35f1aaab9c..1d8295ffb0eb1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8891,32 +8891,11 @@ def _where( if isinstance(other, (np.ndarray, ExtensionArray)): if other.shape != self.shape: - - if self.ndim == 1: - - icond = cond._values - - # GH 2745 / GH 4192 - # treat like a scalar - if len(other) == 1: - other = other[0] - - # GH 3235 - # match True cond to other - elif len(cond[icond]) == len(other): - - # try to not change dtype at first - new_other = self._values - new_other = new_other.copy() - new_other[icond] = other - other = new_other - - else: - raise ValueError( - "Length of replacements must equal series length" - ) - - else: + if self.ndim != 1: + # In the ndim == 1 case we may have + # other length 1, which we treat as scalar (GH#2745, GH#4192) + # or len(other) == icond.sum(), which we treat like + # __setitem__ (GH#3235) raise ValueError( "other must be the same shape as self when an ndarray" ) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f2fd5ca9c62c7..5c1fabd67bc8d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4550,11 +4550,16 @@ def putmask(self, mask, value): return self.astype(dtype).putmask(mask, value) values = self._values.copy() - if isinstance(converted, np.timedelta64) and self.dtype == object: + dtype, _ = infer_dtype_from(converted, pandas_dtype=True) + if dtype.kind in ["m", "M"]: # https://github.com/numpy/numpy/issues/12550 # timedelta64 will incorrectly cast to int - converted = [converted] * mask.sum() - values[mask] = converted + if not is_list_like(converted): + converted = [converted] * mask.sum() + values[mask] = converted + else: + converted = list(converted) + np.putmask(values, mask, converted) else: np.putmask(values, mask, converted) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 9314666acdaad..49a591b3e1edf 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1031,7 +1031,8 @@ def putmask(self, mask, new) -> List[Block]: elif not mask.any(): return [self] - elif isinstance(new, np.timedelta64): + dtype, _ = infer_dtype_from(new) + if dtype.kind in ["m", "M"]: # using putmask with object dtype will incorrect cast to object # Having excluded self._can_hold_element, we know we cannot operate # in-place, so we are safe using `where` @@ -1317,10 +1318,15 @@ def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]: blocks = block.where(orig_other, cond, errors=errors, axis=axis) return self._maybe_downcast(blocks, "infer") - elif isinstance(other, np.timedelta64): - # expressions.where will cast np.timedelta64 to int - result = self.values.copy() - result[~cond] = [other] * (~cond).sum() + dtype, _ = infer_dtype_from(other, pandas_dtype=True) + if dtype.kind in ["m", "M"] != values.dtype.kind: + # expressions.where would cast np.timedelta64 to int + if not is_list_like(other): + other = [other] * (~cond).sum() + else: + other = list(other) + result = values.copy() + np.putmask(result, ~cond, other) else: # convert datetime to datetime64, timedelta to timedelta64 diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 767b61e31698b..34f5a338fc468 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -74,18 +74,6 @@ def test_setitem_tuple_with_datetimetz_values(self): tm.assert_series_equal(result, expected) -class TestSetitemPeriodDtype: - @pytest.mark.parametrize("na_val", [None, np.nan]) - def test_setitem_na_period_dtype_casts_to_nat(self, na_val): - ser = Series(period_range("2000-01-01", periods=10, freq="D")) - - ser[3] = na_val - assert ser[3] is NaT - - ser[3:5] = na_val - assert ser[4] is NaT - - class TestSetitemScalarIndexer: def test_setitem_negative_out_of_bounds(self): ser = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10)) @@ -259,29 +247,6 @@ def test_setitem_callable_other(self): class TestSetitemCasting: - @pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"]) - def test_setitem_dt64_into_int_series(self, dtype): - # dont cast dt64 to int when doing this setitem - orig = Series([1, 2, 3]) - - val = np.datetime64("2021-01-18 13:25:00", "ns") - if dtype == "m8[ns]": - val = val - val - - ser = orig.copy() - ser[:-1] = val - expected = Series([val, val, 3], dtype=object) - tm.assert_series_equal(ser, expected) - assert isinstance(ser[0], type(val)) - - ser = orig.copy() - ser[:-1] = [val, val] - tm.assert_series_equal(ser, expected) - - ser = orig.copy() - ser[:-1] = np.array([val, val]) - tm.assert_series_equal(ser, expected) - @pytest.mark.parametrize("unique", [True, False]) @pytest.mark.parametrize("val", [3, 3.0, "3"], ids=type) def test_setitem_non_bool_into_bool(self, val, indexer_sli, unique): @@ -599,3 +564,70 @@ def is_inplace(self): Indicate we do _not_ expect the setting to be done inplace. """ return False + + +class TestSetitemDT64IntoInt(SetitemCastingEquivalents): + # dont cast dt64 to int when doing this setitem + + @pytest.fixture(params=["M8[ns]", "m8[ns]"]) + def dtype(self, request): + return request.param + + @pytest.fixture + def scalar(self, dtype): + val = np.datetime64("2021-01-18 13:25:00", "ns") + if dtype == "m8[ns]": + val = val - val + return val + + @pytest.fixture + def expected(self, scalar): + expected = Series([scalar, scalar, 3], dtype=object) + assert isinstance(expected[0], type(scalar)) + return expected + + @pytest.fixture + def obj(self): + return Series([1, 2, 3]) + + @pytest.fixture + def key(self): + return slice(None, -1) + + @pytest.fixture(params=[None, list, np.array]) + def val(self, scalar, request): + box = request.param + if box is None: + return scalar + return box([scalar, scalar]) + + @pytest.fixture + def is_inplace(self): + return False + + +class TestSetitemNAPeriodDtype(SetitemCastingEquivalents): + # Setting compatible NA values into Series with PeriodDtype + + @pytest.fixture + def expected(self, key): + exp = Series(period_range("2000-01-01", periods=10, freq="D")) + exp._values.view("i8")[key] = NaT.value + assert exp[key] is NaT or all(x is NaT for x in exp[key]) + return exp + + @pytest.fixture + def obj(self): + return Series(period_range("2000-01-01", periods=10, freq="D")) + + @pytest.fixture(params=[3, slice(3, 5)]) + def key(self, request): + return request.param + + @pytest.fixture(params=[None, np.nan]) + def val(self, request): + return request.param + + @pytest.fixture + def is_inplace(self): + return True From 4b39caa4d8c0e81e565dddd4ebfe2cf7def0752a Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 5 Feb 2021 15:12:30 -0800 Subject: [PATCH 6/7] GH ref --- doc/source/whatsnew/v1.3.0.rst | 2 +- pandas/tests/series/indexing/test_setitem.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 17d8c79994dbe..79e9cec71d03d 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -338,7 +338,7 @@ Indexing - Bug in :meth:`Series.__setitem__` raising ``ValueError`` when setting a :class:`Series` with a scalar indexer (:issue:`38303`) - Bug in :meth:`DataFrame.loc` dropping levels of :class:`MultiIndex` when :class:`DataFrame` used as input has only one row (:issue:`10521`) - Bug in :meth:`DataFrame.__getitem__` and :meth:`Series.__getitem__` always raising ``KeyError`` when slicing with existing strings an :class:`Index` with milliseconds (:issue:`33589`) -- Bug in setting ``timedelta64`` values into numeric :class:`Series` failing to cast to object dtype (:issue:`39086`) +- Bug in setting ``timedelta64`` or ``datetime64`` values into numeric :class:`Series` failing to cast to object dtype (:issue:`39086`, issue:`39619`) - Bug in setting :class:`Interval` values into a :class:`Series` or :class:`DataFrame` with mismatched :class:`IntervalDtype` incorrectly casting the new values to the existing dtype (:issue:`39120`) - Bug in setting ``datetime64`` values into a :class:`Series` with integer-dtype incorrect casting the datetime64 values to integers (:issue:`39266`) - Bug in :meth:`Index.get_loc` not raising ``KeyError`` when method is specified for ``NaN`` value when ``NaN`` is not in :class:`Index` (:issue:`39382`) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 34f5a338fc468..3a993f544b64a 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -567,7 +567,7 @@ def is_inplace(self): class TestSetitemDT64IntoInt(SetitemCastingEquivalents): - # dont cast dt64 to int when doing this setitem + # GH#39619 dont cast dt64 to int when doing this setitem @pytest.fixture(params=["M8[ns]", "m8[ns]"]) def dtype(self, request): From b408c7d32c280e844a717639d72fe8bd8d8dd8db Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 11 Feb 2021 18:14:26 -0800 Subject: [PATCH 7/7] TST: split large tests --- pandas/tests/indexing/test_categorical.py | 6 +++ .../indexing/test_chaining_and_caching.py | 1 + pandas/tests/indexing/test_datetime.py | 22 +++++---- pandas/tests/indexing/test_indexing.py | 48 ++++++++++++------- 4 files changed, 52 insertions(+), 25 deletions(-) diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index 1b9b6452b2e33..3b6bc42544c51 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -322,6 +322,7 @@ def test_loc_listlike_dtypes(self): with pytest.raises(KeyError, match=re.escape(msg)): df.loc[["a", "x"]] + def test_loc_listlike_dtypes_duplicated_categories_and_codes(self): # duplicated categories and codes index = CategoricalIndex(["a", "b", "a"]) df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=index) @@ -341,9 +342,11 @@ def test_loc_listlike_dtypes(self): ) tm.assert_frame_equal(res, exp, check_index_type=True) + msg = "The following labels were missing: Index(['x'], dtype='object')" with pytest.raises(KeyError, match=re.escape(msg)): df.loc[["a", "x"]] + def test_loc_listlike_dtypes_unused_category(self): # contains unused category index = CategoricalIndex(["a", "b", "a", "c"], categories=list("abcde")) df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}, index=index) @@ -363,6 +366,7 @@ def test_loc_listlike_dtypes(self): ) tm.assert_frame_equal(res, exp, check_index_type=True) + msg = "The following labels were missing: Index(['x'], dtype='object')" with pytest.raises(KeyError, match=re.escape(msg)): df.loc[["a", "x"]] @@ -405,6 +409,8 @@ def test_ix_categorical_index(self): expect = DataFrame(df.loc[:, ["X", "Y"]], index=cdf.index, columns=exp_columns) tm.assert_frame_equal(cdf.loc[:, ["X", "Y"]], expect) + def test_ix_categorical_index_non_unique(self): + # non-unique df = DataFrame(np.random.randn(3, 3), index=list("ABA"), columns=list("XYX")) cdf = df.copy() diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 1ac2a16660f93..25d4692e4cd1d 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -373,6 +373,7 @@ def test_setting_with_copy_bug(self): with pytest.raises(com.SettingWithCopyError, match=msg): df[["c"]][mask] = df[["b"]][mask] + def test_setting_with_copy_bug_no_warning(self): # invalid warning as we are returning a new object # GH 8730 df1 = DataFrame({"x": Series(["a", "b", "c"]), "y": Series(["d", "e", "f"])}) diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index 44a5e2ae6d9e9..9f58f4af0ba55 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -37,6 +37,7 @@ def test_indexing_with_datetime_tz(self): ) tm.assert_series_equal(result, expected) + def test_indexing_fast_xs(self): # indexing - fast_xs df = DataFrame({"a": date_range("2014-01-01", periods=10, tz="UTC")}) result = df.iloc[5] @@ -53,6 +54,7 @@ def test_indexing_with_datetime_tz(self): expected = df.iloc[4:] tm.assert_frame_equal(result, expected) + def test_setitem_with_expansion(self): # indexing - setting an element df = DataFrame( data=pd.to_datetime(["2015-03-30 20:12:32", "2015-03-12 00:11:11"]), @@ -234,21 +236,23 @@ def test_loc_setitem_with_existing_dst(self): def test_getitem_millisecond_resolution(self, frame_or_series): # GH#33589 + + keys = [ + "2017-10-25T16:25:04.151", + "2017-10-25T16:25:04.252", + "2017-10-25T16:50:05.237", + "2017-10-25T16:50:05.238", + ] obj = frame_or_series( [1, 2, 3, 4], - index=[ - Timestamp("2017-10-25T16:25:04.151"), - Timestamp("2017-10-25T16:25:04.252"), - Timestamp("2017-10-25T16:50:05.237"), - Timestamp("2017-10-25T16:50:05.238"), - ], + index=[Timestamp(x) for x in keys], ) - result = obj["2017-10-25T16:25:04.252":"2017-10-25T16:50:05.237"] + result = obj[keys[1] : keys[2]] expected = frame_or_series( [2, 3], index=[ - Timestamp("2017-10-25T16:25:04.252"), - Timestamp("2017-10-25T16:50:05.237"), + Timestamp(keys[1]), + Timestamp(keys[2]), ], ) tm.assert_equal(result, expected) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index dcd073681cecf..63313589d64f7 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -125,6 +125,7 @@ def test_inf_upcast(self): expected = pd.Float64Index([1, 2, np.inf]) tm.assert_index_equal(result, expected) + def test_inf_upcast_empty(self): # Test with np.inf in columns df = DataFrame() df.loc[0, 0] = 1 @@ -148,6 +149,9 @@ def test_setitem_dtype_upcast(self): ) tm.assert_frame_equal(df, expected) + @pytest.mark.parametrize("val", [3.14, "wxyz"]) + def test_setitem_dtype_upcast2(self, val): + # GH10280 df = DataFrame( np.arange(6, dtype="int64").reshape(2, 3), @@ -155,19 +159,19 @@ def test_setitem_dtype_upcast(self): columns=["foo", "bar", "baz"], ) - for val in [3.14, "wxyz"]: - left = df.copy() - left.loc["a", "bar"] = val - right = DataFrame( - [[0, val, 2], [3, 4, 5]], - index=list("ab"), - columns=["foo", "bar", "baz"], - ) + left = df.copy() + left.loc["a", "bar"] = val + right = DataFrame( + [[0, val, 2], [3, 4, 5]], + index=list("ab"), + columns=["foo", "bar", "baz"], + ) - tm.assert_frame_equal(left, right) - assert is_integer_dtype(left["foo"]) - assert is_integer_dtype(left["baz"]) + tm.assert_frame_equal(left, right) + assert is_integer_dtype(left["foo"]) + assert is_integer_dtype(left["baz"]) + def test_setitem_dtype_upcast3(self): left = DataFrame( np.arange(6, dtype="int64").reshape(2, 3) / 10.0, index=list("ab"), @@ -195,6 +199,8 @@ def test_dups_fancy_indexing(self): expected = Index(["b", "a", "a"]) tm.assert_index_equal(result, expected) + def test_dups_fancy_indexing_across_dtypes(self): + # across dtypes df = DataFrame([[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]], columns=list("aaaaaaa")) df.head() @@ -208,6 +214,7 @@ def test_dups_fancy_indexing(self): tm.assert_frame_equal(df, result) + def test_dups_fancy_indexing_not_in_order(self): # GH 3561, dups not in selected order df = DataFrame( {"test": [5, 7, 9, 11], "test1": [4.0, 5, 6, 7], "other": list("abcd")}, @@ -232,6 +239,8 @@ def test_dups_fancy_indexing(self): with pytest.raises(KeyError, match="with any missing labels"): df.loc[rows] + def test_dups_fancy_indexing_only_missing_label(self): + # List containing only missing label dfnu = DataFrame(np.random.randn(5, 3), index=list("AABCD")) with pytest.raises( @@ -244,6 +253,8 @@ def test_dups_fancy_indexing(self): # ToDo: check_index_type can be True after GH 11497 + def test_dups_fancy_indexing_missing_label(self): + # GH 4619; duplicate indexer with missing label df = DataFrame({"A": [0, 1, 2]}) with pytest.raises(KeyError, match="with any missing labels"): @@ -253,6 +264,8 @@ def test_dups_fancy_indexing(self): with pytest.raises(KeyError, match="with any missing labels"): df.loc[[0, 8, 0]] + def test_dups_fancy_indexing_non_unique(self): + # non unique with non unique selector df = DataFrame({"test": [5, 7, 9, 11]}, index=["A", "A", "B", "C"]) with pytest.raises(KeyError, match="with any missing labels"): @@ -447,6 +460,7 @@ def test_multi_assign(self): df2.loc[mask, cols] = dft.loc[mask, cols].values tm.assert_frame_equal(df2, expected) + def test_multi_assign_broadcasting_rhs(self): # broadcasting on the rhs is required df = DataFrame( { @@ -781,14 +795,16 @@ def test_non_reducing_slice(self, slc): tslice_ = non_reducing_slice(slc) assert isinstance(df.loc[tslice_], DataFrame) - def test_list_slice(self): + @pytest.mark.parametrize("box", [list, Series, np.array]) + def test_list_slice(self, box): # like dataframe getitem - slices = [["A"], Series(["A"]), np.array(["A"])] + subset = box(["A"]) + df = DataFrame({"A": [1, 2], "B": [3, 4]}, index=["A", "B"]) expected = pd.IndexSlice[:, ["A"]] - for subset in slices: - result = non_reducing_slice(subset) - tm.assert_frame_equal(df.loc[result], df.loc[expected]) + + result = non_reducing_slice(subset) + tm.assert_frame_equal(df.loc[result], df.loc[expected]) def test_maybe_numeric_slice(self): df = DataFrame({"A": [1, 2], "B": ["c", "d"], "C": [True, False]})