diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 11b53d711fce2..5c6377349304c 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -90,7 +90,12 @@ def test_read_empty_dta_with_dtypes(self, version): "f64": np.array([0], dtype=np.float64), } ) - expected = empty_df_typed.copy() + # GH 7369, make sure can read a 0-obs dta file + with tm.ensure_clean() as path: + empty_df_typed.to_stata(path, write_index=False, version=version) + empty_reread = read_stata(path) + + expected = empty_df_typed # No uint# support. Downcast since values in range for int# expected["u8"] = expected["u8"].astype(np.int8) expected["u16"] = expected["u16"].astype(np.int16) @@ -99,12 +104,8 @@ def test_read_empty_dta_with_dtypes(self, version): expected["u64"] = expected["u64"].astype(np.int32) expected["i64"] = expected["i64"].astype(np.int32) - # GH 7369, make sure can read a 0-obs dta file - with tm.ensure_clean() as path: - empty_df_typed.to_stata(path, write_index=False, version=version) - empty_reread = read_stata(path) - tm.assert_frame_equal(expected, empty_reread) - tm.assert_series_equal(expected.dtypes, empty_reread.dtypes) + tm.assert_frame_equal(expected, empty_reread) + tm.assert_series_equal(expected.dtypes, empty_reread.dtypes) @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) def test_read_index_col_none(self, version): @@ -115,7 +116,7 @@ def test_read_index_col_none(self, version): read_df = read_stata(path) assert isinstance(read_df.index, pd.RangeIndex) - expected = df.copy() + expected = df expected["a"] = expected["a"].astype(np.int32) tm.assert_frame_equal(read_df, expected, check_index_type=True) @@ -325,7 +326,7 @@ def test_read_write_dta5(self): original.to_stata(path, convert_dates=None) written_and_read_again = self.read_dta(path) - expected = original.copy() + expected = original expected.index = expected.index.astype(np.int32) tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) @@ -424,7 +425,7 @@ def test_read_write_dta11(self): written_and_read_again = self.read_dta(path) - expected = formatted.copy() + expected = formatted expected.index = expected.index.astype(np.int32) tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) @@ -462,7 +463,7 @@ def test_read_write_dta12(self, version): written_and_read_again = self.read_dta(path) - expected = formatted.copy() + expected = formatted expected.index = expected.index.astype(np.int32) tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) @@ -480,7 +481,7 @@ def test_read_write_dta13(self): original.to_stata(path) written_and_read_again = self.read_dta(path) - expected = formatted.copy() + expected = formatted expected.index = expected.index.astype(np.int32) tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) @@ -561,7 +562,7 @@ def test_numeric_column_names(self): convert_col_name = lambda x: int(x[1]) written_and_read_again.columns = map(convert_col_name, columns) - expected = original.copy() + expected = original expected.index = expected.index.astype(np.int32) tm.assert_frame_equal(expected, written_and_read_again) @@ -579,7 +580,7 @@ def test_nan_to_missing_value(self, version): written_and_read_again = self.read_dta(path) written_and_read_again = written_and_read_again.set_index("index") - expected = original.copy() + expected = original expected.index = expected.index.astype(np.int32) tm.assert_frame_equal(written_and_read_again, expected) @@ -602,7 +603,7 @@ def test_string_no_dates(self): original.to_stata(path) written_and_read_again = self.read_dta(path) - expected = original.copy() + expected = original expected.index = expected.index.astype(np.int32) tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) @@ -619,7 +620,7 @@ def test_large_value_conversion(self): written_and_read_again = self.read_dta(path) - modified = original.copy() + modified = original modified["s1"] = Series(modified["s1"], dtype=np.int16) modified["s2"] = Series(modified["s2"], dtype=np.int32) modified["s3"] = Series(modified["s3"], dtype=np.float64) @@ -635,7 +636,7 @@ def test_dates_invalid_column(self): written_and_read_again = self.read_dta(path) - modified = original.copy() + modified = original modified.columns = ["_0"] modified.index = original.index.astype(np.int32) tm.assert_frame_equal(written_and_read_again.set_index("index"), modified) @@ -721,8 +722,15 @@ def test_bool_uint(self, byteorder, version): {"s0": s0, "s1": s1, "s2": s2, "s3": s3, "s4": s4, "s5": s5, "s6": s6} ) original.index.name = "index" - expected = original.copy() - expected.index = original.index.astype(np.int32) + + with tm.ensure_clean() as path: + original.to_stata(path, byteorder=byteorder, version=version) + written_and_read_again = self.read_dta(path) + + written_and_read_again = written_and_read_again.set_index("index") + + expected = original + expected.index = expected.index.astype(np.int32) expected_types = ( np.int8, np.int8, @@ -735,11 +743,6 @@ def test_bool_uint(self, byteorder, version): for c, t in zip(expected.columns, expected_types): expected[c] = expected[c].astype(t) - with tm.ensure_clean() as path: - original.to_stata(path, byteorder=byteorder, version=version) - written_and_read_again = self.read_dta(path) - - written_and_read_again = written_and_read_again.set_index("index") tm.assert_frame_equal(written_and_read_again, expected) def test_variable_labels(self, datapath): @@ -1000,18 +1003,19 @@ def test_categorical_writing(self, version): "unlabeled", ], ) - expected = original.copy() + with tm.ensure_clean() as path: + original.astype("category").to_stata(path, version=version) + written_and_read_again = self.read_dta(path) - # these are all categoricals - original = pd.concat( - [original[col].astype("category") for col in original], axis=1 - ) + res = written_and_read_again.set_index("index") + + expected = original expected.index = expected.index.set_names("index").astype(np.int32) expected["incompletely_labeled"] = expected["incompletely_labeled"].apply(str) expected["unlabeled"] = expected["unlabeled"].apply(str) for col in expected: - orig = expected[col].copy() + orig = expected[col] cat = orig.astype("category")._values cat = cat.as_ordered() @@ -1022,11 +1026,6 @@ def test_categorical_writing(self, version): expected[col] = cat - with tm.ensure_clean() as path: - original.to_stata(path, version=version) - written_and_read_again = self.read_dta(path) - - res = written_and_read_again.set_index("index") tm.assert_frame_equal(res, expected) def test_categorical_warnings_and_errors(self): @@ -1037,9 +1036,7 @@ def test_categorical_warnings_and_errors(self): columns=["Too_long"], ) - original = pd.concat( - [original[col].astype("category") for col in original], axis=1 - ) + original = original.astype("category") with tm.ensure_clean() as path: msg = ( "Stata value labels for a single variable must have " @@ -1050,10 +1047,7 @@ def test_categorical_warnings_and_errors(self): original = DataFrame.from_records( [["a"], ["b"], ["c"], ["d"], [1]], columns=["Too_long"] - ) - original = pd.concat( - [original[col].astype("category") for col in original], axis=1 - ) + ).astype("category") with tm.assert_produces_warning(ValueLabelTypeMismatch): original.to_stata(path) @@ -1074,7 +1068,7 @@ def test_categorical_with_stata_missing_values(self, version): res = written_and_read_again.set_index("index") - expected = original.copy() + expected = original for col in expected: cat = expected[col]._values new_cats = cat.remove_unused_categories().categories @@ -1525,7 +1519,7 @@ def test_out_of_range_float(self): reread = read_stata(path) original["ColumnTooBig"] = original["ColumnTooBig"].astype(np.float64) - expected = original.copy() + expected = original expected.index = expected.index.astype(np.int32) tm.assert_frame_equal(reread.set_index("index"), expected) @@ -1672,13 +1666,13 @@ def test_writer_117(self): version=117, ) written_and_read_again = self.read_dta(path) - # original.index is np.int32, read index is np.int64 - tm.assert_frame_equal( - written_and_read_again.set_index("index"), - original, - check_index_type=False, - ) - tm.assert_frame_equal(original, copy) + # original.index is np.int32, read index is np.int64 + tm.assert_frame_equal( + written_and_read_again.set_index("index"), + original, + check_index_type=False, + ) + tm.assert_frame_equal(original, copy) def test_convert_strl_name_swap(self): original = DataFrame( @@ -2052,7 +2046,7 @@ def test_compression(compression, version, use_dict, infer, compression_to_exten fp = path reread = read_stata(fp, index_col="index") - expected = df.copy() + expected = df expected.index = expected.index.astype(np.int32) tm.assert_frame_equal(reread, expected) @@ -2078,7 +2072,7 @@ def test_compression_dict(method, file_ext): fp = path reread = read_stata(fp, index_col="index") - expected = df.copy() + expected = df expected.index = expected.index.astype(np.int32) tm.assert_frame_equal(reread, expected) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 6ba2ac0104e75..ab75dd7469b73 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -134,7 +134,7 @@ def test_resample_empty_series(freq, index, resample_method): if resample_method == "ohlc": expected = DataFrame( - [], index=ser.index[:0].copy(), columns=["open", "high", "low", "close"] + [], index=ser.index[:0], columns=["open", "high", "low", "close"] ) expected.index = _asfreq_compat(ser.index, freq) tm.assert_frame_equal(result, expected, check_dtype=False) @@ -167,7 +167,7 @@ def test_resample_nat_index_series(freq, resample_method): if resample_method == "ohlc": expected = DataFrame( - [], index=ser.index[:0].copy(), columns=["open", "high", "low", "close"] + [], index=ser.index[:0], columns=["open", "high", "low", "close"] ) tm.assert_frame_equal(result, expected, check_dtype=False) else: @@ -248,9 +248,7 @@ def test_resample_empty_dataframe(index, freq, resample_method): if resample_method == "ohlc": # TODO: no tests with len(df.columns) > 0 mi = MultiIndex.from_product([df.columns, ["open", "high", "low", "close"]]) - expected = DataFrame( - [], index=df.index[:0].copy(), columns=mi, dtype=np.float64 - ) + expected = DataFrame([], index=df.index[:0], columns=mi, dtype=np.float64) expected.index = _asfreq_compat(df.index, freq) elif resample_method != "size": diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 337c5ff53bd14..6b406a6e6f67a 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -397,10 +397,9 @@ def test_median_duplicate_columns(): columns=list("aaa"), index=date_range("2012-01-01", periods=20, freq="s"), ) - df2 = df.copy() - df2.columns = ["a", "b", "c"] - expected = df2.resample("5s").median() result = df.resample("5s").median() + df.columns = ["a", "b", "c"] + expected = df.resample("5s").median() expected.columns = result.columns tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index d8bc7974b4139..7174245ec16d8 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -412,7 +412,7 @@ def test_concat_bug_1719(self): ts1 = Series( np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10) ) - ts2 = ts1.copy()[::2] + ts2 = ts1[::2] # to join with union # these two are of different length! diff --git a/pandas/tests/reshape/concat/test_series.py b/pandas/tests/reshape/concat/test_series.py index c12b835cb61e1..9a481fed384d3 100644 --- a/pandas/tests/reshape/concat/test_series.py +++ b/pandas/tests/reshape/concat/test_series.py @@ -30,11 +30,11 @@ def test_concat_series(self): result = concat(pieces, keys=[0, 1, 2]) expected = ts.copy() - - ts.index = DatetimeIndex(np.array(ts.index.values, dtype="M8[ns]")) - exp_codes = [np.repeat([0, 1, 2], [len(x) for x in pieces]), np.arange(len(ts))] - exp_index = MultiIndex(levels=[[0, 1, 2], ts.index], codes=exp_codes) + exp_index = MultiIndex( + levels=[[0, 1, 2], DatetimeIndex(ts.index.to_numpy(dtype="M8[ns]"))], + codes=exp_codes, + ) expected.index = exp_index tm.assert_series_equal(result, expected) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 5a1f47e341222..21804d32e76ca 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -153,13 +153,12 @@ def test_join_on(self, target_source, infer_string): target.join(source, on="E") # overlap - source_copy = source.copy() msg = ( "You are trying to merge on float64 and object|string columns for key " "'A'. If you wish to proceed you should use pd.concat" ) with pytest.raises(ValueError, match=msg): - target.join(source_copy, on="A") + target.join(source, on="A") def test_join_on_fails_with_different_right_index(self): df = DataFrame( diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index bc02da0d5b97b..cc05f49550e9b 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -126,9 +126,7 @@ def run_asserts(left, right, sort): "2nd", np.random.default_rng(2).integers(0, 10, len(left)).astype("float"), ) - - i = np.random.default_rng(2).permutation(len(left)) - right = left.iloc[i].copy() + right = left.sample(frac=1, random_state=np.random.default_rng(2)) left["4th"] = bind_cols(left) right["5th"] = -bind_cols(right) diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index ff9f927597956..eb858e06c15dd 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -349,13 +349,12 @@ def test_melt_missing_columns_raises(self): df.melt(["a", "b", "not_here", "or_there"], ["c", "d"]) # Multiindex melt fails if column is missing from multilevel melt - multi = df.copy() - multi.columns = [list("ABCD"), list("abcd")] + df.columns = [list("ABCD"), list("abcd")] with pytest.raises(KeyError, match=msg): - multi.melt([("E", "a")], [("B", "b")]) + df.melt([("E", "a")], [("B", "b")]) # Multiindex fails if column is missing from single level melt with pytest.raises(KeyError, match=msg): - multi.melt(["A"], ["F"], col_level=0) + df.melt(["A"], ["F"], col_level=0) def test_melt_mixed_int_str_id_vars(self): # GH 29718 diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index fc1c80eb4dec6..a6e4b4f78e25a 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -430,7 +430,7 @@ def test_indexing(): result = ts["2001"] tm.assert_series_equal(result, ts.iloc[:12]) - df = DataFrame({"A": ts.copy()}) + df = DataFrame({"A": ts}) # GH#36179 pre-2.0 df["2001"] operated as slicing on rows. in 2.0 it behaves # like any other key, so raises diff --git a/pandas/tests/series/methods/test_cov_corr.py b/pandas/tests/series/methods/test_cov_corr.py index bd60265582652..7a4d48fb76940 100644 --- a/pandas/tests/series/methods/test_cov_corr.py +++ b/pandas/tests/series/methods/test_cov_corr.py @@ -86,22 +86,24 @@ def test_corr(self, datetime_series, any_float_dtype): index=date_range("2020-01-01", periods=10), name="ts", ) - B = A.copy() - result = A.corr(B) - expected, _ = stats.pearsonr(A, B) + result = A.corr(A) + expected, _ = stats.pearsonr(A, A) tm.assert_almost_equal(result, expected) def test_corr_rank(self): stats = pytest.importorskip("scipy.stats") # kendall and spearman - A = Series( + B = Series( np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10), name="ts", ) - B = A.copy() - A[-5:] = A[:5].copy() + A = Series( + np.concatenate([np.arange(5, dtype=np.float64)] * 2), + index=date_range("2020-01-01", periods=10), + name="ts", + ) result = A.corr(B, method="kendall") expected = stats.kendalltau(A, B)[0] tm.assert_almost_equal(result, expected) diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py index d854f0b787759..0f43c1bc72c45 100644 --- a/pandas/tests/series/methods/test_interpolate.py +++ b/pandas/tests/series/methods/test_interpolate.py @@ -224,8 +224,7 @@ def test_interpolate_index_values(self): result = s.interpolate(method="index") - expected = s.copy() - bad = isna(expected.values) + bad = isna(s) good = ~bad expected = Series( np.interp(vals[bad], vals[good], s.values[good]), index=s.index[bad] diff --git a/pandas/tests/series/methods/test_nlargest.py b/pandas/tests/series/methods/test_nlargest.py index bf13ea04ca9f9..c37f57771e29d 100644 --- a/pandas/tests/series/methods/test_nlargest.py +++ b/pandas/tests/series/methods/test_nlargest.py @@ -177,7 +177,7 @@ def test_nlargest_nullable(self, any_numeric_ea_dtype): arr = np.random.default_rng(2).standard_normal(10) arr = arr.astype(dtype.lower(), copy=False) - ser = Series(arr.copy(), dtype=dtype) + ser = Series(arr, dtype=dtype, copy=True) ser[1] = pd.NA result = ser.nlargest(5) diff --git a/pandas/tests/series/methods/test_rank.py b/pandas/tests/series/methods/test_rank.py index 4d48f290e6a44..776c5633cb4b3 100644 --- a/pandas/tests/series/methods/test_rank.py +++ b/pandas/tests/series/methods/test_rank.py @@ -106,7 +106,7 @@ def test_rank(self, datetime_series): tm.assert_series_equal(iranks, exp) iseries = Series(np.repeat(np.nan, 100)) - exp = iseries.copy() + exp = iseries iranks = iseries.rank(pct=True) tm.assert_series_equal(iranks, exp) diff --git a/pandas/tests/series/methods/test_sort_values.py b/pandas/tests/series/methods/test_sort_values.py index 00142c4d82327..cb83bc5833fba 100644 --- a/pandas/tests/series/methods/test_sort_values.py +++ b/pandas/tests/series/methods/test_sort_values.py @@ -92,8 +92,7 @@ def test_sort_values(self, datetime_series, using_copy_on_write): s.sort_values(inplace=True) def test_sort_values_categorical(self): - c = Categorical(["a", "b", "b", "a"], ordered=False) - cat = Series(c.copy()) + cat = Series(Categorical(["a", "b", "b", "a"], ordered=False)) # sort in the categories order expected = Series( diff --git a/pandas/tests/series/methods/test_to_csv.py b/pandas/tests/series/methods/test_to_csv.py index 1c17013d621c7..e292861012c8f 100644 --- a/pandas/tests/series/methods/test_to_csv.py +++ b/pandas/tests/series/methods/test_to_csv.py @@ -173,10 +173,10 @@ def test_to_csv_interval_index(self, using_infer_string): s.to_csv(path, header=False) result = self.read_csv(path, index_col=0) - # can't roundtrip intervalindex via read_csv so check string repr (GH 23595) - expected = s.copy() - if using_infer_string: - expected.index = expected.index.astype("string[pyarrow_numpy]") - else: - expected.index = expected.index.astype(str) - tm.assert_series_equal(result, expected) + # can't roundtrip intervalindex via read_csv so check string repr (GH 23595) + expected = s + if using_infer_string: + expected.index = expected.index.astype("string[pyarrow_numpy]") + else: + expected.index = expected.index.astype(str) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_unstack.py b/pandas/tests/series/methods/test_unstack.py index 3c70e839c8e20..ad11827117209 100644 --- a/pandas/tests/series/methods/test_unstack.py +++ b/pandas/tests/series/methods/test_unstack.py @@ -148,7 +148,7 @@ def test_unstack_multi_index_categorical_values(): dti = ser.index.levels[0] c = pd.Categorical(["foo"] * len(dti)) expected = DataFrame( - {"A": c.copy(), "B": c.copy(), "C": c.copy(), "D": c.copy()}, + {"A": c, "B": c, "C": c, "D": c}, columns=Index(list("ABCD"), name="minor"), index=dti.rename("major"), ) diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index b40e2e99dae2e..5e52e4166b902 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -241,7 +241,7 @@ def test_add_corner_cases(self, datetime_series): result = datetime_series + empty assert np.isnan(result).all() - result = empty + empty.copy() + result = empty + empty assert len(result) == 0 def test_add_float_plus_int(self, datetime_series): diff --git a/pandas/tests/series/test_logical_ops.py b/pandas/tests/series/test_logical_ops.py index d9c94e871bd4b..0e6d4932102d1 100644 --- a/pandas/tests/series/test_logical_ops.py +++ b/pandas/tests/series/test_logical_ops.py @@ -396,11 +396,11 @@ def test_logical_ops_label_based(self, using_infer_string): # vs empty empty = Series([], dtype=object) - result = a & empty.copy() + result = a & empty expected = Series([False, False, False], list("abc")) tm.assert_series_equal(result, expected) - result = a | empty.copy() + result = a | empty expected = Series([True, True, False], list("abc")) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index de0338b39d91a..3fd771c7fe31a 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1788,7 +1788,6 @@ def test_scipy_compat(self, arr): arr = np.array(arr) mask = ~np.isfinite(arr) - arr = arr.copy() result = libalgos.rank_1d(arr) arr[mask] = np.inf exp = sp_stats.rankdata(arr) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 71994d186163e..838fee1db878c 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -32,7 +32,7 @@ def _frame2(): def _mixed(_frame): return DataFrame( { - "A": _frame["A"].copy(), + "A": _frame["A"], "B": _frame["B"].astype("float32"), "C": _frame["C"].astype("int64"), "D": _frame["D"].astype("int32"), @@ -44,7 +44,7 @@ def _mixed(_frame): def _mixed2(_frame2): return DataFrame( { - "A": _frame2["A"].copy(), + "A": _frame2["A"], "B": _frame2["B"].astype("float32"), "C": _frame2["C"].astype("int64"), "D": _frame2["D"].astype("int32"), @@ -78,22 +78,22 @@ def _integer2(): @pytest.fixture def _array(_frame): - return _frame["A"].values.copy() + return _frame["A"].to_numpy() @pytest.fixture def _array2(_frame2): - return _frame2["A"].values.copy() + return _frame2["A"].to_numpy() @pytest.fixture def _array_mixed(_mixed): - return _mixed["D"].values.copy() + return _mixed["D"].to_numpy() @pytest.fixture def _array_mixed2(_mixed2): - return _mixed2["D"].values.copy() + return _mixed2["D"].to_numpy() @pytest.mark.skipif(not expr.USE_NUMEXPR, reason="not using numexpr") @@ -170,7 +170,7 @@ def test_run_binary(self, request, fixture, flex, comparison_op, monkeypatch): df = request.getfixturevalue(fixture) arith = comparison_op.__name__ with option_context("compute.use_numexpr", False): - other = df.copy() + 1 + other = df + 1 with monkeypatch.context() as m: m.setattr(expr, "_MIN_ELEMENTS", 0) diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py index 329fbac925539..132608d7df115 100644 --- a/pandas/tests/test_sorting.py +++ b/pandas/tests/test_sorting.py @@ -33,12 +33,10 @@ def left_right(): np.random.default_rng(2).integers(low, high, (n, 7)), columns=list("ABCDEFG") ) left["left"] = left.sum(axis=1) - - # one-2-one match - i = np.random.default_rng(2).permutation(len(left)) - right = left.iloc[i].copy() + right = left.sample( + frac=1, random_state=np.random.default_rng(2), ignore_index=True + ) right.columns = right.columns[:-1].tolist() + ["right"] - right.index = np.arange(len(right)) right["right"] *= -1 return left, right @@ -267,13 +265,12 @@ def test_int64_overflow_one_to_many_none_match(self, join_type, sort): right["right"] = np.random.default_rng(2).standard_normal(len(right)) # shuffle left & right frames - i = np.random.default_rng(5).permutation(len(left)) - left = left.iloc[i].copy() - left.index = np.arange(len(left)) - - i = np.random.default_rng(6).permutation(len(right)) - right = right.iloc[i].copy() - right.index = np.arange(len(right)) + left = left.sample( + frac=1, ignore_index=True, random_state=np.random.default_rng(5) + ) + right = right.sample( + frac=1, ignore_index=True, random_state=np.random.default_rng(6) + ) # manually compute outer merge ldict, rdict = defaultdict(list), defaultdict(list) @@ -307,13 +304,8 @@ def test_int64_overflow_one_to_many_none_match(self, join_type, sort): for rv in rval ) - def align(df): - df = df.sort_values(df.columns.tolist()) - df.index = np.arange(len(df)) - return df - out = DataFrame(vals, columns=list("ABCDEFG") + ["left", "right"]) - out = align(out) + out = out.sort_values(out.columns.to_list(), ignore_index=True) jmask = { "left": out["left"].notna(), @@ -323,19 +315,21 @@ def align(df): } mask = jmask[how] - frame = align(out[mask].copy()) + frame = out[mask].sort_values(out.columns.to_list(), ignore_index=True) assert mask.all() ^ mask.any() or how == "outer" res = merge(left, right, how=how, sort=sort) if sort: kcols = list("ABCDEFG") tm.assert_frame_equal( - res[kcols].copy(), res[kcols].sort_values(kcols, kind="mergesort") + res[kcols], res[kcols].sort_values(kcols, kind="mergesort") ) # as in GH9092 dtypes break with outer/right join # 2021-12-18: dtype does not break anymore - tm.assert_frame_equal(frame, align(res)) + tm.assert_frame_equal( + frame, res.sort_values(res.columns.to_list(), ignore_index=True) + ) @pytest.mark.parametrize( diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index c452382ec572b..7f49c5f0f86fd 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -204,11 +204,9 @@ def test_numeric_df_columns(columns): ) expected = DataFrame({"a": [1.2, 3.14, np.inf, 0.1], "b": [1.0, 2.0, 3.0, 4.0]}) + df[columns] = df[columns].apply(to_numeric) - df_copy = df.copy() - df_copy[columns] = df_copy[columns].apply(to_numeric) - - tm.assert_frame_equal(df_copy, expected) + tm.assert_frame_equal(df, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index 400bf10817ab8..48247cd480083 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -447,7 +447,7 @@ def get_window_bounds( ): min_periods = self.window_size if min_periods is None else 0 end = np.arange(num_values, dtype=np.int64) + 1 - start = end.copy() - self.window_size + start = end - self.window_size start[start < 0] = min_periods return start, end diff --git a/pandas/tests/window/test_pairwise.py b/pandas/tests/window/test_pairwise.py index 3ceb58756bac6..6fae79ee70702 100644 --- a/pandas/tests/window/test_pairwise.py +++ b/pandas/tests/window/test_pairwise.py @@ -96,11 +96,10 @@ def test_flex_binary_frame(method, frame): tm.assert_frame_equal(res, exp) tm.assert_frame_equal(res2, exp) - frame2 = frame.copy() frame2 = DataFrame( - np.random.default_rng(2).standard_normal(frame2.shape), - index=frame2.index, - columns=frame2.columns, + np.random.default_rng(2).standard_normal(frame.shape), + index=frame.index, + columns=frame.columns, ) res3 = getattr(frame.rolling(window=10), method)(frame2) diff --git a/pandas/tests/window/test_win_type.py b/pandas/tests/window/test_win_type.py index 5052019ddb726..5c785ed3fccb2 100644 --- a/pandas/tests/window/test_win_type.py +++ b/pandas/tests/window/test_win_type.py @@ -445,12 +445,12 @@ def test_cmov_window_regular_linear_range(win_types, step): # GH 8238 pytest.importorskip("scipy") vals = np.array(range(10), dtype=float) - xp = vals.copy() + rs = Series(vals).rolling(5, win_type=win_types, center=True, step=step).mean() + xp = vals xp[:2] = np.nan xp[-2:] = np.nan xp = Series(xp)[::step] - rs = Series(vals).rolling(5, win_type=win_types, center=True, step=step).mean() tm.assert_series_equal(xp, rs) @@ -648,16 +648,15 @@ def test_cmov_window_special_linear_range(win_types_special, step): } vals = np.array(range(10), dtype=float) - xp = vals.copy() - xp[:2] = np.nan - xp[-2:] = np.nan - xp = Series(xp)[::step] - rs = ( Series(vals) .rolling(5, win_type=win_types_special, center=True, step=step) .mean(**kwds[win_types_special]) ) + xp = vals + xp[:2] = np.nan + xp[-2:] = np.nan + xp = Series(xp)[::step] tm.assert_series_equal(xp, rs)