diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 9eaa0d0ae6876..4214ac14cba49 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -66,21 +66,6 @@ def test_getitem_dupe_cols(self): with pytest.raises(KeyError, match=re.escape(msg)): df[["baf"]] - @pytest.mark.parametrize("key_type", [iter, np.array, Series, Index]) - def test_loc_iterable(self, float_frame, key_type): - idx = key_type(["A", "B", "C"]) - result = float_frame.loc[:, idx] - expected = float_frame.loc[:, ["A", "B", "C"]] - tm.assert_frame_equal(result, expected) - - def test_loc_timedelta_0seconds(self): - # GH#10583 - df = DataFrame(np.random.normal(size=(10, 4))) - df.index = pd.timedelta_range(start="0s", periods=10, freq="s") - expected = df.loc[pd.Timedelta("0s") :, :] - result = df.loc["0s":, :] - tm.assert_frame_equal(expected, result) - @pytest.mark.parametrize( "idx_type", [ @@ -125,28 +110,20 @@ def test_getitem_listlike(self, idx_type, levels, float_frame): with pytest.raises(KeyError, match="not in index"): frame[idx] - @pytest.mark.parametrize( - "val,expected", [(2 ** 63 - 1, Series([1])), (2 ** 63, Series([2]))] - ) - def test_loc_uint64(self, val, expected): - # see gh-19399 - df = DataFrame([1, 2], index=[2 ** 63 - 1, 2 ** 63]) - result = df.loc[val] - - expected.name = val - tm.assert_series_equal(result, expected) - def test_getitem_callable(self, float_frame): # GH 12533 result = float_frame[lambda x: "A"] - tm.assert_series_equal(result, float_frame.loc[:, "A"]) + expected = float_frame.loc[:, "A"] + tm.assert_series_equal(result, expected) result = float_frame[lambda x: ["A", "B"]] + expected = float_frame.loc[:, ["A", "B"]] tm.assert_frame_equal(result, float_frame.loc[:, ["A", "B"]]) df = float_frame[:3] result = df[lambda x: [True, False, True]] - tm.assert_frame_equal(result, float_frame.iloc[[0, 2], :]) + expected = float_frame.iloc[[0, 2], :] + tm.assert_frame_equal(result, expected) def test_setitem_list(self, float_frame): @@ -181,11 +158,6 @@ def test_setitem_list(self, float_frame): expected = Series(["1", "2"], df.columns, name=1) tm.assert_series_equal(result, expected) - def test_setitem_list_not_dataframe(self, float_frame): - data = np.random.randn(len(float_frame), 2) - float_frame[["A", "B"]] = data - tm.assert_almost_equal(float_frame[["A", "B"]].values, data) - def test_setitem_list_of_tuples(self, float_frame): tuples = list(zip(float_frame["A"], float_frame["B"])) float_frame["tuples"] = tuples @@ -273,14 +245,6 @@ def test_setitem_multi_index(self): df[("joe", "last")] = df[("jolie", "first")].loc[i, j] tm.assert_frame_equal(df[("joe", "last")], df[("jolie", "first")]) - def test_setitem_callable(self): - # GH 12533 - df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}) - df[lambda x: "A"] = [11, 12, 13, 14] - - exp = DataFrame({"A": [11, 12, 13, 14], "B": [5, 6, 7, 8]}) - tm.assert_frame_equal(df, exp) - def test_setitem_other_callable(self): # GH 13299 def inc(x): @@ -518,18 +482,13 @@ def test_setitem(self, float_frame): df.loc[0] = np.nan tm.assert_frame_equal(df, expected) - @pytest.mark.parametrize("dtype", ["int32", "int64", "float32", "float64"]) - def test_setitem_dtype(self, dtype, float_frame): - arr = np.random.randn(len(float_frame)) - - float_frame[dtype] = np.array(arr, dtype=dtype) - assert float_frame[dtype].dtype.name == dtype - def test_setitem_tuple(self, float_frame): float_frame["A", "B"] = float_frame["A"] - tm.assert_series_equal( - float_frame["A", "B"], float_frame["A"], check_names=False - ) + assert ("A", "B") in float_frame.columns + + result = float_frame["A", "B"] + expected = float_frame["A"] + tm.assert_series_equal(result, expected, check_names=False) def test_setitem_always_copy(self, float_frame): s = float_frame["A"].copy() @@ -588,25 +547,6 @@ def test_setitem_boolean(self, float_frame): np.putmask(expected.values, mask.values, df.values * 2) tm.assert_frame_equal(df, expected) - @pytest.mark.parametrize( - "mask_type", - [lambda df: df > np.abs(df) / 2, lambda df: (df > np.abs(df) / 2).values], - ids=["dataframe", "array"], - ) - def test_setitem_boolean_mask(self, mask_type, float_frame): - - # Test for issue #18582 - df = float_frame.copy() - mask = mask_type(df) - - # index with boolean mask - result = df.copy() - result[mask] = np.nan - - expected = df.copy() - expected.values[np.array(mask)] = np.nan - tm.assert_frame_equal(result, expected) - def test_setitem_cast(self, float_frame): float_frame["D"] = float_frame["D"].astype("i8") assert float_frame["D"].dtype == np.int64 @@ -821,19 +761,6 @@ def test_getitem_empty_frame_with_boolean(self): df2 = df[df > 0] tm.assert_frame_equal(df, df2) - def test_slice_floats(self): - index = [52195.504153, 52196.303147, 52198.369883] - df = DataFrame(np.random.rand(3, 2), index=index) - - s1 = df.loc[52195.1:52196.5] - assert len(s1) == 2 - - s1 = df.loc[52195.1:52196.6] - assert len(s1) == 2 - - s1 = df.loc[52195.1:52198.9] - assert len(s1) == 3 - def test_getitem_fancy_slice_integers_step(self): df = DataFrame(np.random.randn(10, 5)) @@ -883,15 +810,6 @@ def test_fancy_getitem_slice_mixed(self, float_frame, float_string_frame): assert (float_frame["C"] == 4).all() - def test_setitem_slice_position(self): - # GH#31469 - df = DataFrame(np.zeros((100, 1))) - df[-4:] = 1 - arr = np.zeros((100, 1)) - arr[-4:] = 1 - expected = DataFrame(arr) - tm.assert_frame_equal(df, expected) - def test_getitem_setitem_non_ix_labels(self): df = tm.makeTimeDataFrame() @@ -1000,14 +918,13 @@ def test_getitem_fancy_ints(self, float_frame): expected = float_frame.loc[:, float_frame.columns[[2, 0, 1]]] tm.assert_frame_equal(result, expected) - def test_getitem_setitem_fancy_exceptions(self, float_frame): - ix = float_frame.iloc + def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame): with pytest.raises(IndexingError, match="Too many indexers"): - ix[:, :, :] + float_frame.iloc[:, :, :] with pytest.raises(IndexError, match="too many indices for array"): # GH#32257 we let numpy do validation, get their exception - ix[:, :, :] = 1 + float_frame.iloc[:, :, :] = 1 def test_getitem_setitem_boolean_misaligned(self, float_frame): # boolean index misaligned labels diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index e1ce10970f07b..cb04a61b9e1cb 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -22,6 +22,18 @@ class TestDataFrameSetItem: + @pytest.mark.parametrize("dtype", ["int32", "int64", "float32", "float64"]) + def test_setitem_dtype(self, dtype, float_frame): + arr = np.random.randn(len(float_frame)) + + float_frame[dtype] = np.array(arr, dtype=dtype) + assert float_frame[dtype].dtype.name == dtype + + def test_setitem_list_not_dataframe(self, float_frame): + data = np.random.randn(len(float_frame), 2) + float_frame[["A", "B"]] = data + tm.assert_almost_equal(float_frame[["A", "B"]].values, data) + def test_setitem_error_msmgs(self): # GH 7432 @@ -285,3 +297,45 @@ def test_iloc_setitem_bool_indexer(self, klass): df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2 expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]}) tm.assert_frame_equal(df, expected) + + +class TestDataFrameSetItemSlicing: + def test_setitem_slice_position(self): + # GH#31469 + df = DataFrame(np.zeros((100, 1))) + df[-4:] = 1 + arr = np.zeros((100, 1)) + arr[-4:] = 1 + expected = DataFrame(arr) + tm.assert_frame_equal(df, expected) + + +class TestDataFrameSetItemCallable: + def test_setitem_callable(self): + # GH#12533 + df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}) + df[lambda x: "A"] = [11, 12, 13, 14] + + exp = DataFrame({"A": [11, 12, 13, 14], "B": [5, 6, 7, 8]}) + tm.assert_frame_equal(df, exp) + + +class TestDataFrameSetItemBooleanMask: + @pytest.mark.parametrize( + "mask_type", + [lambda df: df > np.abs(df) / 2, lambda df: (df > np.abs(df) / 2).values], + ids=["dataframe", "array"], + ) + def test_setitem_boolean_mask(self, mask_type, float_frame): + + # Test for issue #18582 + df = float_frame.copy() + mask = mask_type(df) + + # index with boolean mask + result = df.copy() + result[mask] = np.nan + + expected = df.copy() + expected.values[np.array(mask)] = np.nan + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/indexing/test_sparse.py b/pandas/tests/frame/indexing/test_sparse.py index c0cd7faafb4db..47e4ae1f9f9e1 100644 --- a/pandas/tests/frame/indexing/test_sparse.py +++ b/pandas/tests/frame/indexing/test_sparse.py @@ -1,12 +1,6 @@ -import numpy as np -import pytest - -import pandas.util._test_decorators as td - import pandas as pd import pandas._testing as tm from pandas.arrays import SparseArray -from pandas.core.arrays.sparse import SparseDtype class TestSparseDataFrameIndexing: @@ -23,34 +17,3 @@ def test_getitem_sparse_column(self): result = df.loc[:, "A"] tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"]) - @pytest.mark.parametrize("dtype", [np.int64, np.float64, complex]) - @td.skip_if_no_scipy - def test_loc_getitem_from_spmatrix(self, spmatrix_t, dtype): - import scipy.sparse - - spmatrix_t = getattr(scipy.sparse, spmatrix_t) - - # The bug is triggered by a sparse matrix with purely sparse columns. So the - # recipe below generates a rectangular matrix of dimension (5, 7) where all the - # diagonal cells are ones, meaning the last two columns are purely sparse. - rows, cols = 5, 7 - spmatrix = spmatrix_t(np.eye(rows, cols, dtype=dtype), dtype=dtype) - df = pd.DataFrame.sparse.from_spmatrix(spmatrix) - - # regression test for #34526 - itr_idx = range(2, rows) - result = df.loc[itr_idx].values - expected = spmatrix.toarray()[itr_idx] - tm.assert_numpy_array_equal(result, expected) - - # regression test for #34540 - result = df.loc[itr_idx].dtypes.values - expected = np.full(cols, SparseDtype(dtype, fill_value=0)) - tm.assert_numpy_array_equal(result, expected) - - def test_all_sparse(self): - df = pd.DataFrame({"A": pd.array([0, 0], dtype=pd.SparseDtype("int64"))}) - result = df.loc[[0, 1]] - tm.assert_frame_equal(result, df) diff --git a/pandas/tests/indexing/test_at.py b/pandas/tests/indexing/test_at.py index 9c2d88f1589c2..2e06d8c73d7d1 100644 --- a/pandas/tests/indexing/test_at.py +++ b/pandas/tests/indexing/test_at.py @@ -1,14 +1,41 @@ from datetime import datetime, timezone -import pandas as pd +import numpy as np +import pytest + +from pandas import DataFrame import pandas._testing as tm def test_at_timezone(): # https://github.com/pandas-dev/pandas/issues/33544 - result = pd.DataFrame({"foo": [datetime(2000, 1, 1)]}) + result = DataFrame({"foo": [datetime(2000, 1, 1)]}) result.at[0, "foo"] = datetime(2000, 1, 2, tzinfo=timezone.utc) - expected = pd.DataFrame( + expected = DataFrame( {"foo": [datetime(2000, 1, 2, tzinfo=timezone.utc)]}, dtype=object ) tm.assert_frame_equal(result, expected) + + +class TestAtWithDuplicates: + def test_at_with_duplicate_axes_requires_scalar_lookup(self): + # GH#33041 check that falling back to loc doesn't allow non-scalar + # args to slip in + + arr = np.random.randn(6).reshape(3, 2) + df = DataFrame(arr, columns=["A", "A"]) + + msg = "Invalid call for scalar access" + with pytest.raises(ValueError, match=msg): + df.at[[1, 2]] + with pytest.raises(ValueError, match=msg): + df.at[1, ["A"]] + with pytest.raises(ValueError, match=msg): + df.at[:, "A"] + + with pytest.raises(ValueError, match=msg): + df.at[[1, 2]] = 1 + with pytest.raises(ValueError, match=msg): + df.at[1, ["A"]] = 1 + with pytest.raises(ValueError, match=msg): + df.at[:, "A"] = 1 diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index 854ca176fd2f4..6cdd73d37aec8 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -73,16 +73,6 @@ def test_loc_scalar(self): with pytest.raises(KeyError, match="^1$"): df.loc[1] - def test_getitem_scalar(self): - - cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")]) - - s = Series([1, 2], index=cats) - - expected = s.iloc[0] - result = s[cats[0]] - assert result == expected - def test_slicing(self): cat = Series(Categorical([1, 2, 3, 4])) reversed = cat[::-1] diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index 4879f805b5a2d..fad3478499929 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -1,6 +1,3 @@ -from datetime import date, datetime, timedelta - -from dateutil import tz import numpy as np import pytest @@ -206,26 +203,6 @@ def test_partial_setting_with_datetimelike_dtype(self): df.loc[mask, "C"] = df.loc[mask].index tm.assert_frame_equal(df, expected) - def test_loc_setitem_datetime(self): - - # GH 9516 - dt1 = Timestamp("20130101 09:00:00") - dt2 = Timestamp("20130101 10:00:00") - - for conv in [ - lambda x: x, - lambda x: x.to_datetime64(), - lambda x: x.to_pydatetime(), - lambda x: np.datetime64(x), - ]: - - df = DataFrame() - df.loc[conv(dt1), "one"] = 100 - df.loc[conv(dt2), "one"] = 200 - - expected = DataFrame({"one": [100.0, 200.0]}, index=[dt1, dt2]) - tm.assert_frame_equal(df, expected) - def test_series_partial_set_datetime(self): # GH 11497 @@ -245,7 +222,8 @@ def test_series_partial_set_datetime(self): exp = Series( [0.2, 0.2, 0.1], index=pd.DatetimeIndex(keys, name="idx"), name="s" ) - tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) + result = ser.loc[keys] + tm.assert_series_equal(result, exp, check_index_type=True) keys = [ Timestamp("2011-01-03"), @@ -273,7 +251,8 @@ def test_series_partial_set_period(self): pd.Period("2011-01-01", freq="D"), ] exp = Series([0.2, 0.2, 0.1], index=pd.PeriodIndex(keys, name="idx"), name="s") - tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) + result = ser.loc[keys] + tm.assert_series_equal(result, exp, check_index_type=True) keys = [ pd.Period("2011-01-03", freq="D"), @@ -297,33 +276,6 @@ def test_nanosecond_getitem_setitem_with_tz(self): expected = DataFrame(-1, index=index, columns=["a"]) tm.assert_frame_equal(result, expected) - def test_loc_getitem_across_dst(self): - # GH 21846 - idx = pd.date_range( - "2017-10-29 01:30:00", tz="Europe/Berlin", periods=5, freq="30 min" - ) - series2 = Series([0, 1, 2, 3, 4], index=idx) - - t_1 = Timestamp("2017-10-29 02:30:00+02:00", tz="Europe/Berlin", freq="30min") - t_2 = Timestamp("2017-10-29 02:00:00+01:00", tz="Europe/Berlin", freq="30min") - result = series2.loc[t_1:t_2] - expected = Series([2, 3], index=idx[2:4]) - tm.assert_series_equal(result, expected) - - result = series2[t_1] - expected = 2 - assert result == expected - - def test_loc_incremental_setitem_with_dst(self): - # GH 20724 - base = datetime(2015, 11, 1, tzinfo=tz.gettz("US/Pacific")) - idxs = [base + timedelta(seconds=i * 900) for i in range(16)] - result = Series([0], index=[idxs[0]]) - for ts in idxs: - result.loc[ts] = 1 - expected = Series(1, index=idxs) - tm.assert_series_equal(result, expected) - def test_loc_setitem_with_existing_dst(self): # GH 18308 start = Timestamp("2017-10-29 00:00:00+0200", tz="Europe/Madrid") @@ -339,39 +291,3 @@ def test_loc_setitem_with_existing_dst(self): dtype=object, ) tm.assert_frame_equal(result, expected) - - def test_loc_str_slicing(self): - ix = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M") - ser = ix.to_series() - result = ser.loc[:"2017-12"] - expected = ser.iloc[:-1] - - tm.assert_series_equal(result, expected) - - def test_loc_label_slicing(self): - ix = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M") - ser = ix.to_series() - result = ser.loc[: ix[-2]] - expected = ser.iloc[:-1] - - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize( - "slice_, positions", - [ - [slice(date(2018, 1, 1), None), [0, 1, 2]], - [slice(date(2019, 1, 2), None), [2]], - [slice(date(2020, 1, 1), None), []], - [slice(None, date(2020, 1, 1)), [0, 1, 2]], - [slice(None, date(2019, 1, 1)), [0]], - ], - ) - def test_getitem_slice_date(self, slice_, positions): - # https://github.com/pandas-dev/pandas/issues/31501 - s = Series( - [0, 1, 2], - pd.DatetimeIndex(["2019-01-01", "2019-01-01T06:00:00", "2019-01-02"]), - ) - result = s[slice_] - expected = s.take(positions) - tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index c1a5db992d3df..fff4c0f78f38a 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1,15 +1,28 @@ """ test label based indexing with loc """ -from datetime import time +from datetime import datetime, time, timedelta from io import StringIO import re +from dateutil.tz import gettz import numpy as np import pytest from pandas.compat.numpy import is_numpy_dev +import pandas.util._test_decorators as td import pandas as pd -from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + SparseDtype, + Timedelta, + Timestamp, + date_range, + timedelta_range, + to_datetime, +) import pandas._testing as tm from pandas.api.types import is_scalar from pandas.tests.indexing.common import Base @@ -1014,6 +1027,73 @@ def test_loc_getitem_time_object(self, frame_or_series): expected.index = expected.index._with_freq(None) tm.assert_equal(result, expected) + @pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"]) + @pytest.mark.parametrize("dtype", [np.int64, np.float64, complex]) + @td.skip_if_no_scipy + def test_loc_getitem_range_from_spmatrix(self, spmatrix_t, dtype): + import scipy.sparse + + spmatrix_t = getattr(scipy.sparse, spmatrix_t) + + # The bug is triggered by a sparse matrix with purely sparse columns. So the + # recipe below generates a rectangular matrix of dimension (5, 7) where all the + # diagonal cells are ones, meaning the last two columns are purely sparse. + rows, cols = 5, 7 + spmatrix = spmatrix_t(np.eye(rows, cols, dtype=dtype), dtype=dtype) + df = DataFrame.sparse.from_spmatrix(spmatrix) + + # regression test for GH#34526 + itr_idx = range(2, rows) + result = df.loc[itr_idx].values + expected = spmatrix.toarray()[itr_idx] + tm.assert_numpy_array_equal(result, expected) + + # regression test for GH#34540 + result = df.loc[itr_idx].dtypes.values + expected = np.full(cols, SparseDtype(dtype, fill_value=0)) + tm.assert_numpy_array_equal(result, expected) + + def test_loc_getitem_listlike_all_retains_sparse(self): + df = DataFrame({"A": pd.array([0, 0], dtype=SparseDtype("int64"))}) + result = df.loc[[0, 1]] + tm.assert_frame_equal(result, df) + + @pytest.mark.parametrize("key_type", [iter, np.array, Series, Index]) + def test_loc_getitem_iterable(self, float_frame, key_type): + idx = key_type(["A", "B", "C"]) + result = float_frame.loc[:, idx] + expected = float_frame.loc[:, ["A", "B", "C"]] + tm.assert_frame_equal(result, expected) + + def test_loc_getitem_timedelta_0seconds(self): + # GH#10583 + df = DataFrame(np.random.normal(size=(10, 4))) + df.index = timedelta_range(start="0s", periods=10, freq="s") + expected = df.loc[Timedelta("0s") :, :] + result = df.loc["0s":, :] + tm.assert_frame_equal(expected, result) + + @pytest.mark.parametrize( + "val,expected", [(2 ** 63 - 1, Series([1])), (2 ** 63, Series([2]))] + ) + def test_loc_getitem_uint64_scalar(self, val, expected): + # see GH#19399 + df = DataFrame([1, 2], index=[2 ** 63 - 1, 2 ** 63]) + result = df.loc[val] + + expected.name = val + tm.assert_series_equal(result, expected) + + def test_loc_setitem_int_label_with_float64index(self): + # note labels are floats + ser = Series(["a", "b", "c"], index=[0, 0.5, 1]) + tmp = ser.copy() + + ser.loc[1] = "zoo" + tmp.iloc[2] = "zoo" + + tm.assert_series_equal(ser, tmp) + class TestLocWithMultiIndex: @pytest.mark.parametrize( @@ -1103,6 +1183,11 @@ def test_loc_setitem_multiindex_slice(self): tm.assert_series_equal(result, expected) + def test_loc_getitem_slice_datetime_objs_with_datetimeindex(self): + times = date_range("2000-01-01", freq="10min", periods=100000) + ser = Series(range(100000), times) + ser.loc[datetime(1900, 1, 1) : datetime(2100, 1, 1)] + class TestLocSetitemWithExpansion: @pytest.mark.slow @@ -1113,6 +1198,59 @@ def test_loc_setitem_with_expansion_large_dataframe(self): expected = DataFrame({"x": range(10 ** 6 + 1)}, dtype="int64") tm.assert_frame_equal(result, expected) + def test_loc_setitem_empty_series(self): + # GH#5226 + + # partially set with an empty object series + ser = Series(dtype=object) + ser.loc[1] = 1 + tm.assert_series_equal(ser, Series([1], index=[1])) + ser.loc[3] = 3 + tm.assert_series_equal(ser, Series([1, 3], index=[1, 3])) + + ser = Series(dtype=object) + ser.loc[1] = 1.0 + tm.assert_series_equal(ser, Series([1.0], index=[1])) + ser.loc[3] = 3.0 + tm.assert_series_equal(ser, Series([1.0, 3.0], index=[1, 3])) + + ser = Series(dtype=object) + ser.loc["foo"] = 1 + tm.assert_series_equal(ser, Series([1], index=["foo"])) + ser.loc["bar"] = 3 + tm.assert_series_equal(ser, Series([1, 3], index=["foo", "bar"])) + ser.loc[3] = 4 + tm.assert_series_equal(ser, Series([1, 3, 4], index=["foo", "bar", 3])) + + def test_loc_setitem_incremental_with_dst(self): + # GH#20724 + base = datetime(2015, 11, 1, tzinfo=gettz("US/Pacific")) + idxs = [base + timedelta(seconds=i * 900) for i in range(16)] + result = Series([0], index=[idxs[0]]) + for ts in idxs: + result.loc[ts] = 1 + expected = Series(1, index=idxs) + tm.assert_series_equal(result, expected) + + def test_loc_setitem_datetime_keys_cast(self): + # GH#9516 + dt1 = Timestamp("20130101 09:00:00") + dt2 = Timestamp("20130101 10:00:00") + + for conv in [ + lambda x: x, + lambda x: x.to_datetime64(), + lambda x: x.to_pydatetime(), + lambda x: np.datetime64(x), + ]: + + df = DataFrame() + df.loc[conv(dt1), "one"] = 100 + df.loc[conv(dt2), "one"] = 200 + + expected = DataFrame({"one": [100.0, 200.0]}, index=[dt1, dt2]) + tm.assert_frame_equal(df, expected) + class TestLocCallable: def test_frame_loc_getitem_callable(self): @@ -1280,6 +1418,85 @@ def test_frame_loc_setitem_callable(self): tm.assert_frame_equal(res, exp) +class TestPartialStringSlicing: + def test_loc_getitem_partial_string_slicing_datetimeindex(self): + # GH#35509 + df = DataFrame( + {"col1": ["a", "b", "c"], "col2": [1, 2, 3]}, + index=to_datetime(["2020-08-01", "2020-07-02", "2020-08-05"]), + ) + expected = DataFrame( + {"col1": ["a", "c"], "col2": [1, 3]}, + index=to_datetime(["2020-08-01", "2020-08-05"]), + ) + result = df.loc["2020-08"] + tm.assert_frame_equal(result, expected) + + def test_loc_getitem_partial_string_slicing_with_periodindex(self): + pi = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M") + ser = pi.to_series() + result = ser.loc[:"2017-12"] + expected = ser.iloc[:-1] + + tm.assert_series_equal(result, expected) + + def test_loc_getitem_partial_string_slicing_with_timedeltaindex(self): + ix = timedelta_range(start="1 day", end="2 days", freq="1H") + ser = ix.to_series() + result = ser.loc[:"1 days"] + expected = ser.iloc[:-1] + + tm.assert_series_equal(result, expected) + + +class TestLabelSlicing: + def test_loc_getitem_label_slice_across_dst(self): + # GH#21846 + idx = date_range( + "2017-10-29 01:30:00", tz="Europe/Berlin", periods=5, freq="30 min" + ) + series2 = Series([0, 1, 2, 3, 4], index=idx) + + t_1 = Timestamp("2017-10-29 02:30:00+02:00", tz="Europe/Berlin", freq="30min") + t_2 = Timestamp("2017-10-29 02:00:00+01:00", tz="Europe/Berlin", freq="30min") + result = series2.loc[t_1:t_2] + expected = Series([2, 3], index=idx[2:4]) + tm.assert_series_equal(result, expected) + + result = series2[t_1] + expected = 2 + assert result == expected + + def test_loc_getitem_label_slice_period(self): + ix = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M") + ser = ix.to_series() + result = ser.loc[: ix[-2]] + expected = ser.iloc[:-1] + + tm.assert_series_equal(result, expected) + + def test_loc_getitem_label_slice_timedelta64(self): + ix = timedelta_range(start="1 day", end="2 days", freq="1H") + ser = ix.to_series() + result = ser.loc[: ix[-2]] + expected = ser.iloc[:-1] + + tm.assert_series_equal(result, expected) + + def test_loc_getitem_slice_floats_inexact(self): + index = [52195.504153, 52196.303147, 52198.369883] + df = DataFrame(np.random.rand(3, 2), index=index) + + s1 = df.loc[52195.1:52196.5] + assert len(s1) == 2 + + s1 = df.loc[52195.1:52196.6] + assert len(s1) == 2 + + s1 = df.loc[52195.1:52198.9] + assert len(s1) == 3 + + def test_series_loc_getitem_label_list_missing_values(): # gh-11428 key = np.array( diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 80b7947eb5239..01db937153b3a 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -351,31 +351,6 @@ def test_partial_set_invalid(self): tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"])) assert df.index.dtype == "object" - def test_partial_set_empty_series(self): - - # GH5226 - - # partially set with an empty object series - s = Series(dtype=object) - s.loc[1] = 1 - tm.assert_series_equal(s, Series([1], index=[1])) - s.loc[3] = 3 - tm.assert_series_equal(s, Series([1, 3], index=[1, 3])) - - s = Series(dtype=object) - s.loc[1] = 1.0 - tm.assert_series_equal(s, Series([1.0], index=[1])) - s.loc[3] = 3.0 - tm.assert_series_equal(s, Series([1.0, 3.0], index=[1, 3])) - - s = Series(dtype=object) - s.loc["foo"] = 1 - tm.assert_series_equal(s, Series([1], index=["foo"])) - s.loc["bar"] = 3 - tm.assert_series_equal(s, Series([1, 3], index=["foo", "bar"])) - s.loc[3] = 4 - tm.assert_series_equal(s, Series([1, 3, 4], index=["foo", "bar", 3])) - def test_partial_set_empty_frame(self): # partially set with an empty object @@ -504,10 +479,12 @@ def test_partial_set_empty_frame_set_series(self): # GH 5756 # setting with empty Series df = DataFrame(Series(dtype=object)) - tm.assert_frame_equal(df, DataFrame({0: Series(dtype=object)})) + expected = DataFrame({0: Series(dtype=object)}) + tm.assert_frame_equal(df, expected) df = DataFrame(Series(name="foo", dtype=object)) - tm.assert_frame_equal(df, DataFrame({"foo": Series(dtype=object)})) + expected = DataFrame({"foo": Series(dtype=object)}) + tm.assert_frame_equal(df, expected) def test_partial_set_empty_frame_empty_copy_assignment(self): # GH 5932 @@ -565,19 +542,17 @@ def test_partial_set_empty_frame_empty_consistencies(self): ], ) def test_loc_with_list_of_strings_representing_datetimes( - self, idx, labels, expected_idx + self, idx, labels, expected_idx, frame_or_series ): # GH 11278 - s = Series(range(20), index=idx) - df = DataFrame(range(20), index=idx) + obj = frame_or_series(range(20), index=idx) expected_value = [3, 7, 11] - expected_s = Series(expected_value, expected_idx) - expected_df = DataFrame(expected_value, expected_idx) + expected = frame_or_series(expected_value, expected_idx) - tm.assert_series_equal(expected_s, s.loc[labels]) - tm.assert_series_equal(expected_s, s[labels]) - tm.assert_frame_equal(expected_df, df.loc[labels]) + tm.assert_equal(expected, obj.loc[labels]) + if frame_or_series is Series: + tm.assert_series_equal(expected, obj[labels]) @pytest.mark.parametrize( "idx,labels", @@ -651,16 +626,6 @@ def test_loc_with_list_of_strings_representing_datetimes_not_matched_type( with pytest.raises(KeyError, match=msg): df.loc[labels] - def test_indexing_timeseries_regression(self): - # Issue 34860 - arr = date_range("1/1/2008", "1/1/2009") - result = arr.to_series()["2008"] - - rng = date_range(start="2008-01-01", end="2008-12-31") - expected = Series(rng, index=rng) - - tm.assert_series_equal(result, expected) - def test_index_name_empty(self): # GH 31368 df = DataFrame({}, index=pd.RangeIndex(0, name="df_index")) @@ -689,16 +654,3 @@ def test_slice_irregular_datetime_index_with_nan(self): expected = DataFrame(range(len(index[:3])), index=index[:3]) result = df["2012-01-01":"2012-01-04"] tm.assert_frame_equal(result, expected) - - def test_slice_datetime_index(self): - # GH35509 - df = DataFrame( - {"col1": ["a", "b", "c"], "col2": [1, 2, 3]}, - index=pd.to_datetime(["2020-08-01", "2020-07-02", "2020-08-05"]), - ) - expected = DataFrame( - {"col1": ["a", "c"], "col2": [1, 3]}, - index=pd.to_datetime(["2020-08-01", "2020-08-05"]), - ) - result = df.loc["2020-08"] - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index 4337f01ea33e0..72296bb222a5a 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -146,28 +146,6 @@ def test_frame_at_with_duplicate_axes(self): expected = Series([2.0, 2.0], index=["A", "A"], name=1) tm.assert_series_equal(df.iloc[1], expected) - def test_frame_at_with_duplicate_axes_requires_scalar_lookup(self): - # GH#33041 check that falling back to loc doesn't allow non-scalar - # args to slip in - - arr = np.random.randn(6).reshape(3, 2) - df = DataFrame(arr, columns=["A", "A"]) - - msg = "Invalid call for scalar access" - with pytest.raises(ValueError, match=msg): - df.at[[1, 2]] - with pytest.raises(ValueError, match=msg): - df.at[1, ["A"]] - with pytest.raises(ValueError, match=msg): - df.at[:, "A"] - - with pytest.raises(ValueError, match=msg): - df.at[[1, 2]] = 1 - with pytest.raises(ValueError, match=msg): - df.at[1, ["A"]] = 1 - with pytest.raises(ValueError, match=msg): - df.at[:, "A"] = 1 - def test_series_at_raises_type_error(self): # at should not fallback # GH 7814 diff --git a/pandas/tests/indexing/test_timedelta.py b/pandas/tests/indexing/test_timedelta.py index 7da368e4bb321..9461bb74b2a87 100644 --- a/pandas/tests/indexing/test_timedelta.py +++ b/pandas/tests/indexing/test_timedelta.py @@ -104,19 +104,3 @@ def test_roundtrip_thru_setitem(self): assert expected == result tm.assert_frame_equal(df, df_copy) - - def test_loc_str_slicing(self): - ix = pd.timedelta_range(start="1 day", end="2 days", freq="1H") - ser = ix.to_series() - result = ser.loc[:"1 days"] - expected = ser.iloc[:-1] - - tm.assert_series_equal(result, expected) - - def test_loc_slicing(self): - ix = pd.timedelta_range(start="1 day", end="2 days", freq="1H") - ser = ix.to_series() - result = ser.loc[: ix[-2]] - expected = ser.iloc[:-1] - - tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index c25b8936c1b29..b2fc2e2d0619d 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -4,14 +4,23 @@ from datetime import datetime, timedelta import re +from dateutil.tz import gettz, tzutc import numpy as np import pytest +import pytz -from pandas._libs import iNaT -import pandas._libs.index as _index +from pandas._libs import iNaT, index as libindex import pandas as pd -from pandas import DataFrame, DatetimeIndex, NaT, Series, Timestamp, date_range +from pandas import ( + DataFrame, + DatetimeIndex, + NaT, + Series, + Timestamp, + date_range, + period_range, +) import pandas._testing as tm @@ -65,13 +74,6 @@ def test_dti_reset_index_round_trip(): assert df.reset_index()["Date"][0] == stamp -@pytest.mark.slow -def test_slice_locs_indexerror(): - times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10) for i in range(100000)] - s = Series(range(100000), times) - s.loc[datetime(1900, 1, 1) : datetime(2100, 1, 1)] - - def test_slicing_datetimes(): # GH 7523 @@ -114,8 +116,6 @@ def test_slicing_datetimes(): def test_getitem_setitem_datetime_tz_pytz(): - from pytz import timezone as tz - N = 50 # testing with timezone, GH #2785 rng = date_range("1/1/1990", periods=N, freq="H", tz="US/Eastern") @@ -134,23 +134,20 @@ def test_getitem_setitem_datetime_tz_pytz(): # repeat with datetimes result = ts.copy() - result[datetime(1990, 1, 1, 9, tzinfo=tz("UTC"))] = 0 - result[datetime(1990, 1, 1, 9, tzinfo=tz("UTC"))] = ts[4] + result[datetime(1990, 1, 1, 9, tzinfo=pytz.timezone("UTC"))] = 0 + result[datetime(1990, 1, 1, 9, tzinfo=pytz.timezone("UTC"))] = ts[4] tm.assert_series_equal(result, ts) result = ts.copy() # comparison dates with datetime MUST be localized! - date = tz("US/Central").localize(datetime(1990, 1, 1, 3)) + date = pytz.timezone("US/Central").localize(datetime(1990, 1, 1, 3)) result[date] = 0 result[date] = ts[4] tm.assert_series_equal(result, ts) def test_getitem_setitem_datetime_tz_dateutil(): - from dateutil.tz import tzutc - - from pandas._libs.tslibs.timezones import dateutil_gettz as gettz tz = ( lambda x: tzutc() if x == "UTC" else gettz(x) @@ -295,7 +292,6 @@ def test_getitem_setitem_datetimeindex(): def test_getitem_setitem_periodindex(): - from pandas import period_range N = 50 rng = period_range("1/1/1990", periods=N, freq="H") @@ -466,72 +462,50 @@ def test_duplicate_dates_indexing(dups): assert ts[datetime(2000, 1, 6)] == 0 -def test_range_slice(): - idx = DatetimeIndex(["1/1/2000", "1/2/2000", "1/2/2000", "1/3/2000", "1/4/2000"]) - - ts = Series(np.random.randn(len(idx)), index=idx) - - result = ts["1/2/2000":] - expected = ts[1:] - tm.assert_series_equal(result, expected) - - result = ts["1/2/2000":"1/3/2000"] - expected = ts[1:4] - tm.assert_series_equal(result, expected) - - def test_groupby_average_dup_values(dups): result = dups.groupby(level=0).mean() expected = dups.groupby(dups.index).mean() tm.assert_series_equal(result, expected) -def test_indexing_over_size_cutoff(): - import datetime - +def test_indexing_over_size_cutoff(monkeypatch): # #1821 - old_cutoff = _index._SIZE_CUTOFF - try: - _index._SIZE_CUTOFF = 1000 - - # create large list of non periodic datetime - dates = [] - sec = datetime.timedelta(seconds=1) - half_sec = datetime.timedelta(microseconds=500000) - d = datetime.datetime(2011, 12, 5, 20, 30) - n = 1100 - for i in range(n): - dates.append(d) - dates.append(d + sec) - dates.append(d + sec + half_sec) - dates.append(d + sec + sec + half_sec) - d += 3 * sec - - # duplicate some values in the list - duplicate_positions = np.random.randint(0, len(dates) - 1, 20) - for p in duplicate_positions: - dates[p + 1] = dates[p] - - df = DataFrame( - np.random.randn(len(dates), 4), index=dates, columns=list("ABCD") - ) - - pos = n * 3 - timestamp = df.index[pos] - assert timestamp in df.index - - # it works! - df.loc[timestamp] - assert len(df.loc[[timestamp]]) > 0 - finally: - _index._SIZE_CUTOFF = old_cutoff + monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000) + + # create large list of non periodic datetime + dates = [] + sec = timedelta(seconds=1) + half_sec = timedelta(microseconds=500000) + d = datetime(2011, 12, 5, 20, 30) + n = 1100 + for i in range(n): + dates.append(d) + dates.append(d + sec) + dates.append(d + sec + half_sec) + dates.append(d + sec + sec + half_sec) + d += 3 * sec + + # duplicate some values in the list + duplicate_positions = np.random.randint(0, len(dates) - 1, 20) + for p in duplicate_positions: + dates[p + 1] = dates[p] + + df = DataFrame(np.random.randn(len(dates), 4), index=dates, columns=list("ABCD")) + + pos = n * 3 + timestamp = df.index[pos] + assert timestamp in df.index + + # it works! + df.loc[timestamp] + assert len(df.loc[[timestamp]]) > 0 def test_indexing_over_size_cutoff_period_index(monkeypatch): # GH 27136 - monkeypatch.setattr(_index, "_SIZE_CUTOFF", 1000) + monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000) n = 1100 idx = pd.period_range("1/1/2000", freq="T", periods=n) @@ -654,19 +628,3 @@ def test_indexing(): msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central', freq='S'\)" with pytest.raises(KeyError, match=msg): df[df.index[2]] - - -""" -test NaT support -""" - - -def test_setitem_tuple_with_datetimetz(): - # GH 20441 - arr = date_range("2017", periods=4, tz="US/Eastern") - index = [(0, 1), (0, 2), (0, 3), (0, 4)] - result = Series(arr, index=index) - expected = result.copy() - result[(0, 1)] = np.nan - expected.iloc[0] = np.nan - tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index 2933983a5b18b..71bcce12796f5 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -1,7 +1,7 @@ """ Series.__getitem__ test classes are organized by the type of key passed. """ -from datetime import datetime, time +from datetime import date, datetime, time import numpy as np import pytest @@ -9,7 +9,16 @@ from pandas._libs.tslibs import conversion, timezones import pandas as pd -from pandas import DataFrame, Index, Series, Timestamp, date_range, period_range +from pandas import ( + Categorical, + DataFrame, + DatetimeIndex, + Index, + Series, + Timestamp, + date_range, + period_range, +) import pandas._testing as tm from pandas.core.indexing import IndexingError @@ -93,8 +102,46 @@ def test_getitem_time_object(self): result.index = result.index._with_freq(None) tm.assert_series_equal(result, expected) + # ------------------------------------------------------------------ + # Series with CategoricalIndex + + def test_getitem_scalar_categorical_index(self): + cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")]) + + ser = Series([1, 2], index=cats) + + expected = ser.iloc[0] + result = ser[cats[0]] + assert result == expected + class TestSeriesGetitemSlices: + def test_getitem_partial_str_slice_with_datetimeindex(self): + # GH#34860 + arr = date_range("1/1/2008", "1/1/2009") + ser = arr.to_series() + result = ser["2008"] + + rng = date_range(start="2008-01-01", end="2008-12-31") + expected = Series(rng, index=rng) + + tm.assert_series_equal(result, expected) + + def test_getitem_slice_strings_with_datetimeindex(self): + idx = DatetimeIndex( + ["1/1/2000", "1/2/2000", "1/2/2000", "1/3/2000", "1/4/2000"] + ) + + ts = Series(np.random.randn(len(idx)), index=idx) + + result = ts["1/2/2000":] + expected = ts[1:] + tm.assert_series_equal(result, expected) + + result = ts["1/2/2000":"1/3/2000"] + expected = ts[1:4] + tm.assert_series_equal(result, expected) + def test_getitem_slice_2d(self, datetime_series): # GH#30588 multi-dimensional indexing deprecated @@ -119,6 +166,26 @@ def test_getitem_median_slice_bug(self): expected = s[indexer[0]] tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( + "slc, positions", + [ + [slice(date(2018, 1, 1), None), [0, 1, 2]], + [slice(date(2019, 1, 2), None), [2]], + [slice(date(2020, 1, 1), None), []], + [slice(None, date(2020, 1, 1)), [0, 1, 2]], + [slice(None, date(2019, 1, 1)), [0]], + ], + ) + def test_getitem_slice_date(self, slc, positions): + # https://github.com/pandas-dev/pandas/issues/31501 + ser = Series( + [0, 1, 2], + DatetimeIndex(["2019-01-01", "2019-01-01T06:00:00", "2019-01-02"]), + ) + result = ser[slc] + expected = ser.take(positions) + tm.assert_series_equal(result, expected) + class TestSeriesGetitemListLike: @pytest.mark.parametrize("box", [list, np.array, pd.Index, pd.Series]) diff --git a/pandas/tests/series/indexing/test_numeric.py b/pandas/tests/series/indexing/test_numeric.py index f35f1375732cb..86af29eac1bae 100644 --- a/pandas/tests/series/indexing/test_numeric.py +++ b/pandas/tests/series/indexing/test_numeric.py @@ -71,17 +71,6 @@ def test_getitem_setitem_slice_integers(): assert not (s[4:] == 0).any() -def test_setitem_float_labels(): - # note labels are floats - s = Series(["a", "b", "c"], index=[0, 0.5, 1]) - tmp = s.copy() - - s.loc[1] = "zoo" - tmp.iloc[2] = "zoo" - - tm.assert_series_equal(s, tmp) - - def test_slice_float_get_set(datetime_series): msg = ( "cannot do slice indexing on DatetimeIndex with these indexers " diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index b4c5ac0195d26..7e25e5200d610 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -61,6 +61,16 @@ def test_setitem_with_different_tz_casts_to_object(self): ) tm.assert_series_equal(ser, expected) + def test_setitem_tuple_with_datetimetz_values(self): + # GH#20441 + arr = date_range("2017", periods=4, tz="US/Eastern") + index = [(0, 1), (0, 2), (0, 3), (0, 4)] + result = Series(arr, index=index) + expected = result.copy() + result[(0, 1)] = np.nan + expected.iloc[0] = np.nan + tm.assert_series_equal(result, expected) + class TestSetitemPeriodDtype: @pytest.mark.parametrize("na_val", [None, np.nan])