From 074e193e76df22cb806cbb6490a6abc346105a7d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 2 Nov 2022 15:02:46 +0100 Subject: [PATCH 1/2] TST: avoid chained assignment in tests outside of specific tests on chaining --- pandas/tests/apply/test_frame_apply.py | 2 +- pandas/tests/frame/indexing/test_setitem.py | 2 +- pandas/tests/frame/methods/test_cov_corr.py | 7 +++--- pandas/tests/frame/methods/test_fillna.py | 18 ++++++++------ pandas/tests/frame/methods/test_rank.py | 16 ++++++------ pandas/tests/frame/methods/test_replace.py | 27 ++++++++++++--------- pandas/tests/frame/methods/test_to_csv.py | 2 +- pandas/tests/frame/test_constructors.py | 7 ++++-- pandas/tests/io/excel/test_writers.py | 10 ++++---- pandas/tests/series/methods/test_rank.py | 2 +- 10 files changed, 51 insertions(+), 42 deletions(-) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 510d4ab702fdd..83b9098f01452 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -347,7 +347,7 @@ def test_apply_yield_list(float_frame): def test_apply_reduce_Series(float_frame): - float_frame["A"].iloc[::2] = np.nan + float_frame.iloc[::2, float_frame.columns.get_loc("A")] = np.nan expected = float_frame.mean(1) result = float_frame.apply(np.mean, axis=1) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 1c08a37c58e4e..8331bed881ce1 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1132,7 +1132,7 @@ def test_setitem_always_copy(self, float_frame): s = float_frame["A"].copy() float_frame["E"] = s - float_frame["E"][5:10] = np.nan + float_frame.iloc[5:10, float_frame.columns.get_loc("E")] = np.nan assert notna(s[5:10]).all() @pytest.mark.parametrize("consolidate", [True, False]) diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py index 25ef49718fbe7..1a85d53a40cc2 100644 --- a/pandas/tests/frame/methods/test_cov_corr.py +++ b/pandas/tests/frame/methods/test_cov_corr.py @@ -107,9 +107,10 @@ class TestDataFrameCorr: @pytest.mark.parametrize("method", ["pearson", "kendall", "spearman"]) @td.skip_if_no_scipy def test_corr_scipy_method(self, float_frame, method): - float_frame["A"][:5] = np.nan - float_frame["B"][5:10] = np.nan - float_frame["A"][:10] = float_frame["A"][10:20] + idx = float_frame.index + float_frame.loc[: idx[4], "A"] = np.nan + float_frame.loc[idx[5] : idx[9], "B"] = np.nan + float_frame.loc[: idx[9], "A"] = float_frame["A"][10:20] correls = float_frame.corr(method=method) expected = float_frame["A"].corr(float_frame["C"], method=method) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 869cd32aa9ef9..d34f7162d0724 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -392,16 +392,18 @@ def test_fillna_datetime_columns(self): tm.assert_frame_equal(result, expected) def test_ffill(self, datetime_frame): - datetime_frame["A"][:5] = np.nan - datetime_frame["A"][-5:] = np.nan + idx = datetime_frame.index + datetime_frame.loc[: idx[4], "A"] = np.nan + datetime_frame.loc[idx[-5] :, "A"] = np.nan tm.assert_frame_equal( datetime_frame.ffill(), datetime_frame.fillna(method="ffill") ) def test_bfill(self, datetime_frame): - datetime_frame["A"][:5] = np.nan - datetime_frame["A"][-5:] = np.nan + idx = datetime_frame.index + datetime_frame.loc[: idx[4], "A"] = np.nan + datetime_frame.loc[idx[-5] :, "A"] = np.nan tm.assert_frame_equal( datetime_frame.bfill(), datetime_frame.fillna(method="bfill") @@ -467,8 +469,8 @@ def test_fillna_integer_limit(self, type): def test_fillna_inplace(self): df = DataFrame(np.random.randn(10, 4)) - df[1][:4] = np.nan - df[3][-4:] = np.nan + df.loc[:4, 1] = np.nan + df.loc[-4:, 3] = np.nan expected = df.fillna(value=0) assert expected is not df @@ -479,8 +481,8 @@ def test_fillna_inplace(self): expected = df.fillna(value={0: 0}, inplace=True) assert expected is None - df[1][:4] = np.nan - df[3][-4:] = np.nan + df.loc[:4, 1] = np.nan + df.loc[-4:, 3] = np.nan expected = df.fillna(method="ffill") assert expected is not df diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py index 7b2f7908673e3..1f5cb95885004 100644 --- a/pandas/tests/frame/methods/test_rank.py +++ b/pandas/tests/frame/methods/test_rank.py @@ -43,10 +43,10 @@ def test_rank(self, float_frame): import scipy.stats # noqa:F401 from scipy.stats import rankdata - float_frame["A"][::2] = np.nan - float_frame["B"][::3] = np.nan - float_frame["C"][::4] = np.nan - float_frame["D"][::5] = np.nan + float_frame.loc[::2, "A"] = np.nan + float_frame.loc[::3, "B"] = np.nan + float_frame.loc[::4, "C"] = np.nan + float_frame.loc[::5, "D"] = np.nan ranks0 = float_frame.rank() ranks1 = float_frame.rank(1) @@ -148,10 +148,10 @@ def test_rank_na_option(self, float_frame): import scipy.stats # noqa:F401 from scipy.stats import rankdata - float_frame["A"][::2] = np.nan - float_frame["B"][::3] = np.nan - float_frame["C"][::4] = np.nan - float_frame["D"][::5] = np.nan + float_frame.loc[::2, "A"] = np.nan + float_frame.loc[::3, "B"] = np.nan + float_frame.loc[::4, "C"] = np.nan + float_frame.loc[::5, "D"] = np.nan # bottom ranks0 = float_frame.rank(na_option="bottom") diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 9eaba56a23e0f..01516608479c7 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -29,8 +29,9 @@ def mix_abc() -> dict[str, list[float | str]]: class TestDataFrameReplace: def test_replace_inplace(self, datetime_frame, float_string_frame): - datetime_frame["A"][:5] = np.nan - datetime_frame["A"][-5:] = np.nan + idx = datetime_frame.index + datetime_frame.loc[: idx[4], "A"] = np.nan + datetime_frame.loc[idx[-5] :, "A"] = np.nan tsframe = datetime_frame.copy() return_value = tsframe.replace(np.nan, 0, inplace=True) @@ -420,16 +421,17 @@ def test_regex_replace_string_types( tm.assert_equal(result, expected) def test_replace(self, datetime_frame): - datetime_frame["A"][:5] = np.nan - datetime_frame["A"][-5:] = np.nan + idx = datetime_frame.index + datetime_frame.loc[: idx[4], "A"] = np.nan + datetime_frame.loc[idx[-5] :, "A"] = np.nan zero_filled = datetime_frame.replace(np.nan, -1e8) tm.assert_frame_equal(zero_filled, datetime_frame.fillna(-1e8)) tm.assert_frame_equal(zero_filled.replace(-1e8, np.nan), datetime_frame) - datetime_frame["A"][:5] = np.nan - datetime_frame["A"][-5:] = np.nan - datetime_frame["B"][:5] = -1e8 + datetime_frame.loc[: idx[4], "A"] = np.nan + datetime_frame.loc[idx[-5] :, "A"] = np.nan + datetime_frame.loc[: idx[4], "B"] = -1e8 # empty df = DataFrame(index=["a", "b"]) @@ -716,16 +718,17 @@ def test_replace_for_new_dtypes(self, datetime_frame): # dtypes tsframe = datetime_frame.copy().astype(np.float32) - tsframe["A"][:5] = np.nan - tsframe["A"][-5:] = np.nan + idx = tsframe.index + tsframe.loc[: idx[4], "A"] = np.nan + tsframe.loc[idx[-5] :, "A"] = np.nan zero_filled = tsframe.replace(np.nan, -1e8) tm.assert_frame_equal(zero_filled, tsframe.fillna(-1e8)) tm.assert_frame_equal(zero_filled.replace(-1e8, np.nan), tsframe) - tsframe["A"][:5] = np.nan - tsframe["A"][-5:] = np.nan - tsframe["B"][:5] = -1e8 + tsframe.loc[: idx[4], "A"] = np.nan + tsframe.loc[idx[-5] :, "A"] = np.nan + tsframe.loc[: idx[4], "B"] = -1e8 b = tsframe["B"] b[b == -1e8] = np.nan diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index 1933278efb443..7487b2c70a264 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -35,7 +35,7 @@ def read_csv(self, path, **kwargs): def test_to_csv_from_csv1(self, float_frame, datetime_frame): with tm.ensure_clean("__tmp_to_csv_from_csv1__") as path: - float_frame["A"][:5] = np.nan + float_frame.iloc[:5, float_frame.columns.get_loc("A")] = np.nan float_frame.to_csv(path) float_frame.to_csv(path, columns=["A", "B"]) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 5a83c4997b33c..54cd39df54c57 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -718,7 +718,8 @@ def test_constructor_defaultdict(self, float_frame): from collections import defaultdict data = {} - float_frame["B"][:10] = np.nan + float_frame.loc[: float_frame.index[10], "B"] = np.nan + for k, v in float_frame.items(): dct = defaultdict(dict) dct.update(v.to_dict()) @@ -2203,7 +2204,9 @@ def test_constructor_series_copy(self, float_frame): series = float_frame._series df = DataFrame({"A": series["A"]}, copy=True) - df["A"][:] = 5 + # TODO can be replaced with `df.loc[:, "A"] = 5` after deprecation about + # inplace mutation is enforced + df.loc[df.index[0] : df.index[-1], "A"] = 5 assert not (series["A"] == 5).all() diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 307f8b7a7798f..e1fc8364a89ba 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -374,7 +374,7 @@ def test_excel_writer_context_manager(self, frame, path): def test_roundtrip(self, frame, path): frame = frame.copy() - frame["A"][:5] = np.nan + frame.iloc[:5, frame.columns.get_loc("A")] = np.nan frame.to_excel(path, "test1") frame.to_excel(path, "test1", columns=["A", "B"]) @@ -444,7 +444,7 @@ def test_ts_frame(self, tsframe, path): def test_basics_with_nan(self, frame, path): frame = frame.copy() - frame["A"][:5] = np.nan + frame.iloc[:5, frame.columns.get_loc("A")] = np.nan frame.to_excel(path, "test1") frame.to_excel(path, "test1", columns=["A", "B"]) frame.to_excel(path, "test1", header=False) @@ -508,7 +508,7 @@ def test_sheets(self, frame, tsframe, path): tsframe.index = index frame = frame.copy() - frame["A"][:5] = np.nan + frame.iloc[:5, frame.columns.get_loc("A")] = np.nan frame.to_excel(path, "test1") frame.to_excel(path, "test1", columns=["A", "B"]) @@ -530,7 +530,7 @@ def test_sheets(self, frame, tsframe, path): def test_colaliases(self, frame, path): frame = frame.copy() - frame["A"][:5] = np.nan + frame.iloc[:5, frame.columns.get_loc("A")] = np.nan frame.to_excel(path, "test1") frame.to_excel(path, "test1", columns=["A", "B"]) @@ -548,7 +548,7 @@ def test_colaliases(self, frame, path): def test_roundtrip_indexlabels(self, merge_cells, frame, path): frame = frame.copy() - frame["A"][:5] = np.nan + frame.iloc[:5, frame.columns.get_loc("A")] = np.nan frame.to_excel(path, "test1") frame.to_excel(path, "test1", columns=["A", "B"]) diff --git a/pandas/tests/series/methods/test_rank.py b/pandas/tests/series/methods/test_rank.py index 3af06502a3066..3a66bf1adf25b 100644 --- a/pandas/tests/series/methods/test_rank.py +++ b/pandas/tests/series/methods/test_rank.py @@ -44,7 +44,7 @@ def test_rank(self, datetime_series): from scipy.stats import rankdata datetime_series[::2] = np.nan - datetime_series[:10][::3] = 4.0 + datetime_series[:10:3] = 4.0 ranks = datetime_series.rank() oranks = datetime_series.astype("O").rank() From 0780199811b430cda135e573f8381dc91d9b83c0 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 3 Nov 2022 09:12:50 +0100 Subject: [PATCH 2/2] update to use .loc[index[slice], ..] pattern --- pandas/tests/frame/methods/test_cov_corr.py | 7 +++--- pandas/tests/frame/methods/test_fillna.py | 10 +++----- pandas/tests/frame/methods/test_replace.py | 27 +++++++++------------ 3 files changed, 19 insertions(+), 25 deletions(-) diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py index 1a85d53a40cc2..445b90327ed2c 100644 --- a/pandas/tests/frame/methods/test_cov_corr.py +++ b/pandas/tests/frame/methods/test_cov_corr.py @@ -107,10 +107,9 @@ class TestDataFrameCorr: @pytest.mark.parametrize("method", ["pearson", "kendall", "spearman"]) @td.skip_if_no_scipy def test_corr_scipy_method(self, float_frame, method): - idx = float_frame.index - float_frame.loc[: idx[4], "A"] = np.nan - float_frame.loc[idx[5] : idx[9], "B"] = np.nan - float_frame.loc[: idx[9], "A"] = float_frame["A"][10:20] + float_frame.loc[float_frame.index[:5], "A"] = np.nan + float_frame.loc[float_frame.index[5:10], "B"] = np.nan + float_frame.loc[float_frame.index[:10], "A"] = float_frame["A"][10:20] correls = float_frame.corr(method=method) expected = float_frame["A"].corr(float_frame["C"], method=method) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index d34f7162d0724..94831da910150 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -392,18 +392,16 @@ def test_fillna_datetime_columns(self): tm.assert_frame_equal(result, expected) def test_ffill(self, datetime_frame): - idx = datetime_frame.index - datetime_frame.loc[: idx[4], "A"] = np.nan - datetime_frame.loc[idx[-5] :, "A"] = np.nan + datetime_frame.loc[datetime_frame.index[:5], "A"] = np.nan + datetime_frame.loc[datetime_frame.index[-5:], "A"] = np.nan tm.assert_frame_equal( datetime_frame.ffill(), datetime_frame.fillna(method="ffill") ) def test_bfill(self, datetime_frame): - idx = datetime_frame.index - datetime_frame.loc[: idx[4], "A"] = np.nan - datetime_frame.loc[idx[-5] :, "A"] = np.nan + datetime_frame.loc[datetime_frame.index[:5], "A"] = np.nan + datetime_frame.loc[datetime_frame.index[-5:], "A"] = np.nan tm.assert_frame_equal( datetime_frame.bfill(), datetime_frame.fillna(method="bfill") diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 01516608479c7..15105ceedd0d5 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -29,9 +29,8 @@ def mix_abc() -> dict[str, list[float | str]]: class TestDataFrameReplace: def test_replace_inplace(self, datetime_frame, float_string_frame): - idx = datetime_frame.index - datetime_frame.loc[: idx[4], "A"] = np.nan - datetime_frame.loc[idx[-5] :, "A"] = np.nan + datetime_frame.loc[datetime_frame.index[:5], "A"] = np.nan + datetime_frame.loc[datetime_frame.index[-5:], "A"] = np.nan tsframe = datetime_frame.copy() return_value = tsframe.replace(np.nan, 0, inplace=True) @@ -421,17 +420,16 @@ def test_regex_replace_string_types( tm.assert_equal(result, expected) def test_replace(self, datetime_frame): - idx = datetime_frame.index - datetime_frame.loc[: idx[4], "A"] = np.nan - datetime_frame.loc[idx[-5] :, "A"] = np.nan + datetime_frame.loc[datetime_frame.index[:5], "A"] = np.nan + datetime_frame.loc[datetime_frame.index[-5:], "A"] = np.nan zero_filled = datetime_frame.replace(np.nan, -1e8) tm.assert_frame_equal(zero_filled, datetime_frame.fillna(-1e8)) tm.assert_frame_equal(zero_filled.replace(-1e8, np.nan), datetime_frame) - datetime_frame.loc[: idx[4], "A"] = np.nan - datetime_frame.loc[idx[-5] :, "A"] = np.nan - datetime_frame.loc[: idx[4], "B"] = -1e8 + datetime_frame.loc[datetime_frame.index[:5], "A"] = np.nan + datetime_frame.loc[datetime_frame.index[-5:], "A"] = np.nan + datetime_frame.loc[datetime_frame.index[:5], "B"] = -1e8 # empty df = DataFrame(index=["a", "b"]) @@ -718,17 +716,16 @@ def test_replace_for_new_dtypes(self, datetime_frame): # dtypes tsframe = datetime_frame.copy().astype(np.float32) - idx = tsframe.index - tsframe.loc[: idx[4], "A"] = np.nan - tsframe.loc[idx[-5] :, "A"] = np.nan + tsframe.loc[tsframe.index[:5], "A"] = np.nan + tsframe.loc[tsframe.index[-5:], "A"] = np.nan zero_filled = tsframe.replace(np.nan, -1e8) tm.assert_frame_equal(zero_filled, tsframe.fillna(-1e8)) tm.assert_frame_equal(zero_filled.replace(-1e8, np.nan), tsframe) - tsframe.loc[: idx[4], "A"] = np.nan - tsframe.loc[idx[-5] :, "A"] = np.nan - tsframe.loc[: idx[4], "B"] = -1e8 + tsframe.loc[tsframe.index[:5], "A"] = np.nan + tsframe.loc[tsframe.index[-5:], "A"] = np.nan + tsframe.loc[tsframe.index[:5], "B"] = -1e8 b = tsframe["B"] b[b == -1e8] = np.nan