From cd1701b6def4b61628edef8ccbb90c3f8c6a37cd Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 17 Feb 2021 11:29:27 -0800 Subject: [PATCH 1/5] TST: collect/split --- .../tests/indexing/multiindex/test_getitem.py | 12 ++-- pandas/tests/indexing/multiindex/test_iloc.py | 22 +++--- .../indexing/multiindex/test_indexing_slow.py | 68 +++++++++---------- 3 files changed, 47 insertions(+), 55 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 954ef63bc2802..f07bf3464b74c 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -62,26 +62,22 @@ def test_series_getitem_duplicates_multiindex(level0_value): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("indexer", [lambda s: s[2000, 3], lambda s: s.loc[2000, 3]]) -def test_series_getitem(multiindex_year_month_day_dataframe_random_data, indexer): +def test_series_getitem(multiindex_year_month_day_dataframe_random_data, indexer_sl): s = multiindex_year_month_day_dataframe_random_data["A"] expected = s.reindex(s.index[42:65]) expected.index = expected.index.droplevel(0).droplevel(0) - result = indexer(s) + result = indexer_sl(s)[2000, 3] tm.assert_series_equal(result, expected) -@pytest.mark.parametrize( - "indexer", [lambda s: s[2000, 3, 10], lambda s: s.loc[2000, 3, 10]] -) def test_series_getitem_returns_scalar( - multiindex_year_month_day_dataframe_random_data, indexer + multiindex_year_month_day_dataframe_random_data, indexer_sl ): s = multiindex_year_month_day_dataframe_random_data["A"] expected = s.iloc[49] - result = indexer(s) + result = indexer_sl(s)[2000, 3, 10] assert result == expected diff --git a/pandas/tests/indexing/multiindex/test_iloc.py b/pandas/tests/indexing/multiindex/test_iloc.py index 1a7f93596773a..db91d5ad88252 100644 --- a/pandas/tests/indexing/multiindex/test_iloc.py +++ b/pandas/tests/indexing/multiindex/test_iloc.py @@ -17,14 +17,10 @@ def simple_multiindex_dataframe(): random data by default. """ - def _simple_multiindex_dataframe(data=None): - if data is None: - data = np.random.randn(3, 3) - return DataFrame( - data, columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]] - ) - - return _simple_multiindex_dataframe + data = np.random.randn(3, 3) + return DataFrame( + data, columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]] + ) @pytest.mark.parametrize( @@ -45,23 +41,23 @@ def _simple_multiindex_dataframe(data=None): ], ) def test_iloc_returns_series(indexer, expected, simple_multiindex_dataframe): - arr = np.random.randn(3, 3) - df = simple_multiindex_dataframe(arr) + df = simple_multiindex_dataframe + arr = df.values result = indexer(df) expected = expected(arr) tm.assert_series_equal(result, expected) def test_iloc_returns_dataframe(simple_multiindex_dataframe): - df = simple_multiindex_dataframe() + df = simple_multiindex_dataframe result = df.iloc[[0, 1]] expected = df.xs(4, drop_level=False) tm.assert_frame_equal(result, expected) def test_iloc_returns_scalar(simple_multiindex_dataframe): - arr = np.random.randn(3, 3) - df = simple_multiindex_dataframe(arr) + df = simple_multiindex_dataframe + arr = df.values result = df.iloc[2, 2] expected = arr[2, 2] assert result == expected diff --git a/pandas/tests/indexing/multiindex/test_indexing_slow.py b/pandas/tests/indexing/multiindex/test_indexing_slow.py index cca31c1e81f84..a38b5f6cc449a 100644 --- a/pandas/tests/indexing/multiindex/test_indexing_slow.py +++ b/pandas/tests/indexing/multiindex/test_indexing_slow.py @@ -40,49 +40,49 @@ b = df.drop_duplicates(subset=cols[:-1]) -@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning") -@pytest.mark.parametrize("lexsort_depth", list(range(5))) -@pytest.mark.parametrize("key", keys) -@pytest.mark.parametrize("frame", [a, b]) -def test_multiindex_get_loc(lexsort_depth, key, frame): - # GH7724, GH2646 +def validate(mi, df, key): + # check indexing into a multi-index before & past the lexsort depth - with warnings.catch_warnings(record=True): + mask = np.ones(len(df)).astype("bool") - # test indexing into a multi-index before & past the lexsort depth + # test for all partials of this key + for i, k in enumerate(key): + mask &= df.iloc[:, i] == k - def validate(mi, df, key): - mask = np.ones(len(df)).astype("bool") + if not mask.any(): + assert key[: i + 1] not in mi.index + continue - # test for all partials of this key - for i, k in enumerate(key): - mask &= df.iloc[:, i] == k + assert key[: i + 1] in mi.index + right = df[mask].copy() - if not mask.any(): - assert key[: i + 1] not in mi.index - continue + if i + 1 != len(key): # partial key + return_value = right.drop(cols[: i + 1], axis=1, inplace=True) + assert return_value is None + return_value = right.set_index(cols[i + 1 : -1], inplace=True) + assert return_value is None + tm.assert_frame_equal(mi.loc[key[: i + 1]], right) - assert key[: i + 1] in mi.index - right = df[mask].copy() + else: # full key + return_value = right.set_index(cols[:-1], inplace=True) + assert return_value is None + if len(right) == 1: # single hit + right = Series( + right["jolia"].values, name=right.index[0], index=["jolia"] + ) + tm.assert_series_equal(mi.loc[key[: i + 1]], right) + else: # multi hit + tm.assert_frame_equal(mi.loc[key[: i + 1]], right) - if i + 1 != len(key): # partial key - return_value = right.drop(cols[: i + 1], axis=1, inplace=True) - assert return_value is None - return_value = right.set_index(cols[i + 1 : -1], inplace=True) - assert return_value is None - tm.assert_frame_equal(mi.loc[key[: i + 1]], right) - else: # full key - return_value = right.set_index(cols[:-1], inplace=True) - assert return_value is None - if len(right) == 1: # single hit - right = Series( - right["jolia"].values, name=right.index[0], index=["jolia"] - ) - tm.assert_series_equal(mi.loc[key[: i + 1]], right) - else: # multi hit - tm.assert_frame_equal(mi.loc[key[: i + 1]], right) +@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning") +@pytest.mark.parametrize("lexsort_depth", list(range(5))) +@pytest.mark.parametrize("key", keys) +@pytest.mark.parametrize("frame", [a, b]) +def test_multiindex_get_loc(lexsort_depth, key, frame): + # GH7724, GH2646 + with warnings.catch_warnings(record=True): if lexsort_depth == 0: df = frame.copy() else: From 5a1bda014aacadc8896d61c7c959b294537a9332 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 17 Feb 2021 11:43:27 -0800 Subject: [PATCH 2/5] TST/REF: split/better names --- .../indexing/interval/test_interval_new.py | 2 +- .../tests/indexing/multiindex/test_setitem.py | 253 ++++++++---------- 2 files changed, 119 insertions(+), 136 deletions(-) diff --git a/pandas/tests/indexing/interval/test_interval_new.py b/pandas/tests/indexing/interval/test_interval_new.py index aea2cf42751db..34dc5d604e90d 100644 --- a/pandas/tests/indexing/interval/test_interval_new.py +++ b/pandas/tests/indexing/interval/test_interval_new.py @@ -198,7 +198,7 @@ def test_non_unique_moar(self, indexer_sl): result = indexer_sl(ser)[[Interval(1, 3)]] tm.assert_series_equal(expected, result) - def test_missing_key_error_message( + def test_loc_getitem_missing_key_error_message( self, frame_or_series, series_with_interval_index ): # GH#27365 diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index a360a53ca7672..fe5fa4bd6d12f 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -15,114 +15,109 @@ import pandas.core.common as com +def assert_equal(a, b): + assert a == b + + class TestMultiIndexSetItem: + def check(self, target, indexers, value, compare_fn=assert_equal, expected=None): + target.loc[indexers] = value + result = target.loc[indexers] + if expected is None: + expected = value + compare_fn(result, expected) + def test_setitem_multiindex(self): - for index_fn in ("loc",): - - def assert_equal(a, b): - assert a == b - - def check(target, indexers, value, compare_fn, expected=None): - fn = getattr(target, index_fn) - fn.__setitem__(indexers, value) - result = fn.__getitem__(indexers) - if expected is None: - expected = value - compare_fn(result, expected) - - # GH7190 - index = MultiIndex.from_product( - [np.arange(0, 100), np.arange(0, 80)], names=["time", "firm"] - ) - t, n = 0, 2 - df = DataFrame( - np.nan, - columns=["A", "w", "l", "a", "x", "X", "d", "profit"], - index=index, - ) - check(target=df, indexers=((t, n), "X"), value=0, compare_fn=assert_equal) - - df = DataFrame( - -999, columns=["A", "w", "l", "a", "x", "X", "d", "profit"], index=index - ) - check(target=df, indexers=((t, n), "X"), value=1, compare_fn=assert_equal) - - df = DataFrame( - columns=["A", "w", "l", "a", "x", "X", "d", "profit"], index=index - ) - check(target=df, indexers=((t, n), "X"), value=2, compare_fn=assert_equal) - - # gh-7218: assigning with 0-dim arrays - df = DataFrame( - -999, columns=["A", "w", "l", "a", "x", "X", "d", "profit"], index=index - ) - check( - target=df, - indexers=((t, n), "X"), - value=np.array(3), - compare_fn=assert_equal, - expected=3, - ) - - # GH5206 - df = DataFrame( - np.arange(25).reshape(5, 5), columns="A,B,C,D,E".split(","), dtype=float - ) - df["F"] = 99 - row_selection = df["A"] % 2 == 0 - col_selection = ["B", "C"] - df.loc[row_selection, col_selection] = df["F"] - output = DataFrame(99.0, index=[0, 2, 4], columns=["B", "C"]) - tm.assert_frame_equal(df.loc[row_selection, col_selection], output) - check( - target=df, - indexers=(row_selection, col_selection), - value=df["F"], - compare_fn=tm.assert_frame_equal, - expected=output, - ) - - # GH11372 - idx = MultiIndex.from_product( - [["A", "B", "C"], date_range("2015-01-01", "2015-04-01", freq="MS")] - ) - cols = MultiIndex.from_product( - [["foo", "bar"], date_range("2016-01-01", "2016-02-01", freq="MS")] - ) - - df = DataFrame(np.random.random((12, 4)), index=idx, columns=cols) - - subidx = MultiIndex.from_tuples( - [("A", Timestamp("2015-01-01")), ("A", Timestamp("2015-02-01"))] - ) - subcols = MultiIndex.from_tuples( - [("foo", Timestamp("2016-01-01")), ("foo", Timestamp("2016-02-01"))] - ) - - vals = DataFrame(np.random.random((2, 2)), index=subidx, columns=subcols) - check( - target=df, - indexers=(subidx, subcols), - value=vals, - compare_fn=tm.assert_frame_equal, - ) - # set all columns - vals = DataFrame(np.random.random((2, 4)), index=subidx, columns=cols) - check( - target=df, - indexers=(subidx, slice(None, None, None)), - value=vals, - compare_fn=tm.assert_frame_equal, - ) - # identity - copy = df.copy() - check( - target=df, - indexers=(df.index, df.columns), - value=df, - compare_fn=tm.assert_frame_equal, - expected=copy, - ) + # GH#7190 + cols = ["A", "w", "l", "a", "x", "X", "d", "profit"] + index = MultiIndex.from_product( + [np.arange(0, 100), np.arange(0, 80)], names=["time", "firm"] + ) + t, n = 0, 2 + + df = DataFrame( + np.nan, + columns=cols, + index=index, + ) + self.check(target=df, indexers=((t, n), "X"), value=0) + + df = DataFrame(-999, columns=cols, index=index) + self.check(target=df, indexers=((t, n), "X"), value=1) + + df = DataFrame(columns=cols, index=index) + self.check(target=df, indexers=((t, n), "X"), value=2) + + # gh-7218: assigning with 0-dim arrays + df = DataFrame(-999, columns=cols, index=index) + self.check( + target=df, + indexers=((t, n), "X"), + value=np.array(3), + expected=3, + ) + + def test_setitem_multiindex2(self): + # GH#5206 + df = DataFrame( + np.arange(25).reshape(5, 5), columns="A,B,C,D,E".split(","), dtype=float + ) + df["F"] = 99 + row_selection = df["A"] % 2 == 0 + col_selection = ["B", "C"] + df.loc[row_selection, col_selection] = df["F"] + output = DataFrame(99.0, index=[0, 2, 4], columns=["B", "C"]) + tm.assert_frame_equal(df.loc[row_selection, col_selection], output) + self.check( + target=df, + indexers=(row_selection, col_selection), + value=df["F"], + compare_fn=tm.assert_frame_equal, + expected=output, + ) + + def test_setitem_multiindex3(self): + # GH#11372 + idx = MultiIndex.from_product( + [["A", "B", "C"], date_range("2015-01-01", "2015-04-01", freq="MS")] + ) + cols = MultiIndex.from_product( + [["foo", "bar"], date_range("2016-01-01", "2016-02-01", freq="MS")] + ) + + df = DataFrame(np.random.random((12, 4)), index=idx, columns=cols) + + subidx = MultiIndex.from_tuples( + [("A", Timestamp("2015-01-01")), ("A", Timestamp("2015-02-01"))] + ) + subcols = MultiIndex.from_tuples( + [("foo", Timestamp("2016-01-01")), ("foo", Timestamp("2016-02-01"))] + ) + + vals = DataFrame(np.random.random((2, 2)), index=subidx, columns=subcols) + self.check( + target=df, + indexers=(subidx, subcols), + value=vals, + compare_fn=tm.assert_frame_equal, + ) + # set all columns + vals = DataFrame(np.random.random((2, 4)), index=subidx, columns=cols) + self.check( + target=df, + indexers=(subidx, slice(None, None, None)), + value=vals, + compare_fn=tm.assert_frame_equal, + ) + # identity + copy = df.copy() + self.check( + target=df, + indexers=(df.index, df.columns), + value=df, + compare_fn=tm.assert_frame_equal, + expected=copy, + ) def test_multiindex_setitem(self): @@ -148,6 +143,8 @@ def test_multiindex_setitem(self): with pytest.raises(TypeError, match=msg): df.loc["bar"] *= 2 + def test_multiindex_setitem2(self): + # from SO # https://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation df_orig = DataFrame.from_dict( @@ -239,17 +236,6 @@ def test_groupby_example(self): grp = df.groupby(level=index_cols[:4]) df["new_col"] = np.nan - f_index = np.arange(5) - - def f(name, df2): - return Series(np.arange(df2.shape[0]), name=df2.index.values[0]).reindex( - f_index - ) - - # FIXME: dont leave commented-out - # TODO(wesm): unused? - # new_df = pd.concat([f(name, df2) for name, df2 in grp], axis=1).T - # we are actually operating on a copy here # but in this case, that's ok for name, df2 in grp: @@ -334,8 +320,10 @@ def test_frame_setitem_multi_column(self): cp["a"] = cp["b"].values tm.assert_frame_equal(cp["a"], cp["b"]) + def test_frame_setitem_multi_column2(self): + # --------------------------------------- - # #1803 + # GH#1803 columns = MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")]) df = DataFrame(index=[1, 3, 5], columns=columns) @@ -356,6 +344,7 @@ def test_frame_setitem_multi_column(self): assert sliced_a2.name == ("A", "2") assert sliced_b1.name == ("B", "1") + # TODO: no setitem here? def test_getitem_setitem_tuple_plus_columns( self, multiindex_year_month_day_dataframe_random_data ): @@ -367,29 +356,23 @@ def test_getitem_setitem_tuple_plus_columns( expected = df.loc[2000, 1, 6][["A", "B", "C"]] tm.assert_series_equal(result, expected) - def test_getitem_setitem_slice_integers(self): + def test_loc_getitem_setitem_slice_integers(self, frame_or_series): index = MultiIndex( levels=[[0, 1, 2], [0, 2]], codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]] ) - frame = DataFrame( + obj = DataFrame( np.random.randn(len(index), 4), index=index, columns=["a", "b", "c", "d"] ) - res = frame.loc[1:2] - exp = frame.reindex(frame.index[2:]) - tm.assert_frame_equal(res, exp) - - frame.loc[1:2] = 7 - assert (frame.loc[1:2] == 7).values.all() - - series = Series(np.random.randn(len(index)), index=index) + if frame_or_series is not DataFrame: + obj = obj["a"] - res = series.loc[1:2] - exp = series.reindex(series.index[2:]) - tm.assert_series_equal(res, exp) + res = obj.loc[1:2] + exp = obj.reindex(obj.index[2:]) + tm.assert_equal(res, exp) - series.loc[1:2] = 7 - assert (series.loc[1:2] == 7).values.all() + obj.loc[1:2] = 7 + assert (obj.loc[1:2] == 7).values.all() def test_setitem_change_dtype(self, multiindex_dataframe_random_data): frame = multiindex_dataframe_random_data @@ -419,9 +402,9 @@ def test_nonunique_assignment_1750(self): ) df = df.set_index(["A", "B"]) - ix = MultiIndex.from_tuples([(1, 1)]) + mi = MultiIndex.from_tuples([(1, 1)]) - df.loc[ix, "C"] = "_" + df.loc[mi, "C"] = "_" assert (df.xs((1, 1))["C"] == "_").all() From e151812cb60424d8edb35b8e124aecbcda7fc61c Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 17 Feb 2021 14:07:24 -0800 Subject: [PATCH 3/5] misplaced setitme test --- .../tests/indexing/multiindex/test_insert.py | 35 ------------------- .../tests/indexing/multiindex/test_setitem.py | 27 ++++++++++++++ 2 files changed, 27 insertions(+), 35 deletions(-) delete mode 100644 pandas/tests/indexing/multiindex/test_insert.py diff --git a/pandas/tests/indexing/multiindex/test_insert.py b/pandas/tests/indexing/multiindex/test_insert.py deleted file mode 100644 index b62f0be5a4f10..0000000000000 --- a/pandas/tests/indexing/multiindex/test_insert.py +++ /dev/null @@ -1,35 +0,0 @@ -import numpy as np - -from pandas import ( - DataFrame, - MultiIndex, - Series, -) -import pandas._testing as tm - - -class TestMultiIndexInsertion: - def test_setitem_mixed_depth(self): - arrays = [ - ["a", "top", "top", "routine1", "routine1", "routine2"], - ["", "OD", "OD", "result1", "result2", "result1"], - ["", "wx", "wy", "", "", ""], - ] - - tuples = sorted(zip(*arrays)) - index = MultiIndex.from_tuples(tuples) - df = DataFrame(np.random.randn(4, 6), columns=index) - - result = df.copy() - expected = df.copy() - result["b"] = [1, 2, 3, 4] - expected["b", "", ""] = [1, 2, 3, 4] - tm.assert_frame_equal(result, expected) - - def test_dataframe_insert_column_all_na(self): - # GH #1534 - mix = MultiIndex.from_tuples([("1a", "2a"), ("1a", "2b"), ("1a", "2c")]) - df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mix) - s = Series({(1, 1): 1, (1, 2): 2}) - df["new"] = s - assert df["new"].isna().all() diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index fe5fa4bd6d12f..9e85f9f65a3bc 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -430,6 +430,33 @@ def test_setitem_nonmonotonic(self): tm.assert_frame_equal(df, expected) +class TestSetitemWithExpansionMultiIndex: + def test_setitem_new_column_mixed_depth(self): + arrays = [ + ["a", "top", "top", "routine1", "routine1", "routine2"], + ["", "OD", "OD", "result1", "result2", "result1"], + ["", "wx", "wy", "", "", ""], + ] + + tuples = sorted(zip(*arrays)) + index = MultiIndex.from_tuples(tuples) + df = DataFrame(np.random.randn(4, 6), columns=index) + + result = df.copy() + expected = df.copy() + result["b"] = [1, 2, 3, 4] + expected["b", "", ""] = [1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + def test_setitem_new_column_all_na(self): + # GH#1534 + mix = MultiIndex.from_tuples([("1a", "2a"), ("1a", "2b"), ("1a", "2c")]) + df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mix) + s = Series({(1, 1): 1, (1, 2): 2}) + df["new"] = s + assert df["new"].isna().all() + + def test_frame_setitem_view_direct(multiindex_dataframe_random_data): # this works because we are modifying the underlying array # really a no-no From bad8bff0ebe4561bde0fd25f324a558d5dfbdcf0 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 17 Feb 2021 14:11:22 -0800 Subject: [PATCH 4/5] misplaced formerly-ix tests --- pandas/tests/indexing/multiindex/test_ix.py | 67 -------------------- pandas/tests/indexing/multiindex/test_loc.py | 57 +++++++++++++++++ 2 files changed, 57 insertions(+), 67 deletions(-) delete mode 100644 pandas/tests/indexing/multiindex/test_ix.py diff --git a/pandas/tests/indexing/multiindex/test_ix.py b/pandas/tests/indexing/multiindex/test_ix.py deleted file mode 100644 index b8d30337dbe16..0000000000000 --- a/pandas/tests/indexing/multiindex/test_ix.py +++ /dev/null @@ -1,67 +0,0 @@ -import numpy as np -import pytest - -from pandas.errors import PerformanceWarning - -from pandas import ( - DataFrame, - MultiIndex, -) -import pandas._testing as tm - - -class TestMultiIndex: - def test_frame_setitem_loc(self, multiindex_dataframe_random_data): - frame = multiindex_dataframe_random_data - frame.loc[("bar", "two"), "B"] = 5 - assert frame.loc[("bar", "two"), "B"] == 5 - - # with integer labels - df = frame.copy() - df.columns = list(range(3)) - df.loc[("bar", "two"), 1] = 7 - assert df.loc[("bar", "two"), 1] == 7 - - def test_loc_general(self): - - # GH 2817 - data = { - "amount": {0: 700, 1: 600, 2: 222, 3: 333, 4: 444}, - "col": {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0}, - "year": {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}, - } - df = DataFrame(data).set_index(keys=["col", "year"]) - key = 4.0, 2012 - - # emits a PerformanceWarning, ok - with tm.assert_produces_warning(PerformanceWarning): - tm.assert_frame_equal(df.loc[key], df.iloc[2:]) - - # this is ok - return_value = df.sort_index(inplace=True) - assert return_value is None - res = df.loc[key] - - # col has float dtype, result should be Float64Index - index = MultiIndex.from_arrays([[4.0] * 3, [2012] * 3], names=["col", "year"]) - expected = DataFrame({"amount": [222, 333, 444]}, index=index) - tm.assert_frame_equal(res, expected) - - def test_loc_multiindex_missing_label_raises(self): - # GH 21593 - df = DataFrame( - np.random.randn(3, 3), - columns=[[2, 2, 4], [6, 8, 10]], - index=[[4, 4, 8], [8, 10, 12]], - ) - - with pytest.raises(KeyError, match=r"^2$"): - df.loc[2] - - def test_series_loc_getitem_fancy( - self, multiindex_year_month_day_dataframe_random_data - ): - s = multiindex_year_month_day_dataframe_random_data["A"] - expected = s.reindex(s.index[49:51]) - result = s.loc[[(2000, 3, 10), (2000, 3, 13)]] - tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 3e912eedb0232..07503b5b34176 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas.errors import PerformanceWarning + import pandas as pd from pandas import ( DataFrame, @@ -29,6 +31,61 @@ def frame_random_data_integer_multi_index(): class TestMultiIndexLoc: + def test_loc_setitem_frame_with_multiindex(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + frame.loc[("bar", "two"), "B"] = 5 + assert frame.loc[("bar", "two"), "B"] == 5 + + # with integer labels + df = frame.copy() + df.columns = list(range(3)) + df.loc[("bar", "two"), 1] = 7 + assert df.loc[("bar", "two"), 1] == 7 + + def test_loc_getitem_general(self): + + # GH#2817 + data = { + "amount": {0: 700, 1: 600, 2: 222, 3: 333, 4: 444}, + "col": {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0}, + "year": {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}, + } + df = DataFrame(data).set_index(keys=["col", "year"]) + key = 4.0, 2012 + + # emits a PerformanceWarning, ok + with tm.assert_produces_warning(PerformanceWarning): + tm.assert_frame_equal(df.loc[key], df.iloc[2:]) + + # this is ok + return_value = df.sort_index(inplace=True) + assert return_value is None + res = df.loc[key] + + # col has float dtype, result should be Float64Index + index = MultiIndex.from_arrays([[4.0] * 3, [2012] * 3], names=["col", "year"]) + expected = DataFrame({"amount": [222, 333, 444]}, index=index) + tm.assert_frame_equal(res, expected) + + def test_loc_getitem_multiindex_missing_label_raises(self): + # GH#21593 + df = DataFrame( + np.random.randn(3, 3), + columns=[[2, 2, 4], [6, 8, 10]], + index=[[4, 4, 8], [8, 10, 12]], + ) + + with pytest.raises(KeyError, match=r"^2$"): + df.loc[2] + + def test_loc_getitem_list_of_tuples_with_multiindex( + self, multiindex_year_month_day_dataframe_random_data + ): + ser = multiindex_year_month_day_dataframe_random_data["A"] + expected = ser.reindex(ser.index[49:51]) + result = ser.loc[[(2000, 3, 10), (2000, 3, 13)]] + tm.assert_series_equal(result, expected) + def test_loc_getitem_series(self): # GH14730 # passing a series as a key with a MultiIndex From 0275a444959a17c285a7bc821fb904f4cc12ed3e Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 17 Feb 2021 15:46:59 -0800 Subject: [PATCH 5/5] collecting indexing tests by method --- pandas/tests/frame/indexing/test_setitem.py | 9 +++ pandas/tests/indexing/test_indexing.py | 49 ---------------- pandas/tests/indexing/test_loc.py | 62 ++++++++++++++------ pandas/tests/series/indexing/test_setitem.py | 9 +++ 4 files changed, 62 insertions(+), 67 deletions(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 657faa0f9b505..6763113036de8 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -445,6 +445,15 @@ def test_setitem_intervals(self): tm.assert_series_equal(df["C"], df["C"]) tm.assert_series_equal(df["C"], df["E"], check_names=False) + def test_setitem_categorical(self): + # GH#35369 + df = DataFrame({"h": Series(list("mn")).astype("category")}) + df.h = df.h.cat.reorder_categories(["n", "m"]) + expected = DataFrame( + {"h": Categorical(["m", "n"]).reorder_categories(["n", "m"])} + ) + tm.assert_frame_equal(df, expected) + class TestSetitemTZAwareValues: @pytest.fixture diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index efd99df9a5e4f..07c9c181584dc 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -1064,52 +1064,3 @@ def test_extension_array_cross_section_converts(): result = df.iloc[0] tm.assert_series_equal(result, expected) - - -def test_setitem_with_bool_mask_and_values_matching_n_trues_in_length(): - # GH 30567 - ser = Series([None] * 10) - mask = [False] * 3 + [True] * 5 + [False] * 2 - ser[mask] = range(5) - result = ser - expected = Series([None] * 3 + list(range(5)) + [None] * 2).astype("object") - tm.assert_series_equal(result, expected) - - -def test_missing_labels_inside_loc_matched_in_error_message(): - # GH34272 - s = Series({"a": 1, "b": 2, "c": 3}) - error_message_regex = "missing_0.*missing_1.*missing_2" - with pytest.raises(KeyError, match=error_message_regex): - s.loc[["a", "b", "missing_0", "c", "missing_1", "missing_2"]] - - -def test_many_missing_labels_inside_loc_error_message_limited(): - # GH34272 - n = 10000 - missing_labels = [f"missing_{label}" for label in range(n)] - s = Series({"a": 1, "b": 2, "c": 3}) - # regex checks labels between 4 and 9995 are replaced with ellipses - error_message_regex = "missing_4.*\\.\\.\\..*missing_9995" - with pytest.raises(KeyError, match=error_message_regex): - s.loc[["a", "c"] + missing_labels] - - -def test_long_text_missing_labels_inside_loc_error_message_limited(): - # GH34272 - s = Series({"a": 1, "b": 2, "c": 3}) - missing_labels = [f"long_missing_label_text_{i}" * 5 for i in range(3)] - # regex checks for very long labels there are new lines between each - error_message_regex = "long_missing_label_text_0.*\\\\n.*long_missing_label_text_1" - with pytest.raises(KeyError, match=error_message_regex): - s.loc[["a", "c"] + missing_labels] - - -def test_setitem_categorical(): - # https://github.com/pandas-dev/pandas/issues/35369 - df = DataFrame({"h": Series(list("mn")).astype("category")}) - df.h = df.h.cat.reorder_categories(["n", "m"]) - expected = DataFrame( - {"h": pd.Categorical(["m", "n"]).reorder_categories(["n", "m"])} - ) - tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 55a979859a12a..c98666b38b8b8 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1808,25 +1808,51 @@ def test_loc_getitem_list_of_labels_categoricalindex_with_na(self, box): with pytest.raises(KeyError, match=msg): ser2.to_frame().loc[box(ci2)] + def test_loc_getitem_many_missing_labels_inside_error_message_limited(self): + # GH#34272 + n = 10000 + missing_labels = [f"missing_{label}" for label in range(n)] + ser = Series({"a": 1, "b": 2, "c": 3}) + # regex checks labels between 4 and 9995 are replaced with ellipses + error_message_regex = "missing_4.*\\.\\.\\..*missing_9995" + with pytest.raises(KeyError, match=error_message_regex): + ser.loc[["a", "c"] + missing_labels] + + def test_loc_getitem_missing_labels_inside_matched_in_error_message(self): + # GH#34272 + ser = Series({"a": 1, "b": 2, "c": 3}) + error_message_regex = "missing_0.*missing_1.*missing_2" + with pytest.raises(KeyError, match=error_message_regex): + ser.loc[["a", "b", "missing_0", "c", "missing_1", "missing_2"]] + + def test_loc_getitem_long_text_missing_labels_inside_error_message_limited(self): + # GH#34272 + ser = Series({"a": 1, "b": 2, "c": 3}) + missing_labels = [f"long_missing_label_text_{i}" * 5 for i in range(3)] + # regex checks for very long labels there are new lines between each + error_message_regex = ( + "long_missing_label_text_0.*\\\\n.*long_missing_label_text_1" + ) + with pytest.raises(KeyError, match=error_message_regex): + ser.loc[["a", "c"] + missing_labels] + + def test_loc_getitem_series_label_list_missing_values(self): + # gh-11428 + key = np.array( + ["2001-01-04", "2001-01-02", "2001-01-04", "2001-01-14"], dtype="datetime64" + ) + ser = Series([2, 5, 8, 11], date_range("2001-01-01", freq="D", periods=4)) + with pytest.raises(KeyError, match="with any missing labels"): + ser.loc[key] -def test_series_loc_getitem_label_list_missing_values(): - # gh-11428 - key = np.array( - ["2001-01-04", "2001-01-02", "2001-01-04", "2001-01-14"], dtype="datetime64" - ) - s = Series([2, 5, 8, 11], date_range("2001-01-01", freq="D", periods=4)) - with pytest.raises(KeyError, match="with any missing labels"): - s.loc[key] - - -def test_series_getitem_label_list_missing_integer_values(): - # GH: 25927 - s = Series( - index=np.array([9730701000001104, 10049011000001109]), - data=np.array([999000011000001104, 999000011000001104]), - ) - with pytest.raises(KeyError, match="with any missing labels"): - s.loc[np.array([9730701000001104, 10047311000001102])] + def test_loc_getitem_series_label_list_missing_integer_values(self): + # GH: 25927 + ser = Series( + index=np.array([9730701000001104, 10049011000001109]), + data=np.array([999000011000001104, 999000011000001104]), + ) + with pytest.raises(KeyError, match="with any missing labels"): + ser.loc[np.array([9730701000001104, 10047311000001102])] @pytest.mark.parametrize( diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 35f6647b631aa..3a3dffb0ce9e8 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -186,6 +186,15 @@ def test_setitem_boolean_nullable_int_types(self, any_nullable_numeric_dtype): ser.loc[ser > 6] = loc_ser.loc[loc_ser > 1] tm.assert_series_equal(ser, expected) + def test_setitem_with_bool_mask_and_values_matching_n_trues_in_length(self): + # GH#30567 + ser = Series([None] * 10) + mask = [False] * 3 + [True] * 5 + [False] * 2 + ser[mask] = range(5) + result = ser + expected = Series([None] * 3 + list(range(5)) + [None] * 2).astype("object") + tm.assert_series_equal(result, expected) + class TestSetitemViewCopySemantics: def test_setitem_invalidates_datetime_index_freq(self):