pandas-dev · phofl · Jan 8, 2024 · Jan 5, 2024 · Jan 5, 2024 · Jan 8, 2024
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
@@ -90,7 +90,12 @@ def test_read_empty_dta_with_dtypes(self, version):
                 "f64": np.array([0], dtype=np.float64),
             }
         )
-        expected = empty_df_typed.copy()
+        # GH 7369, make sure can read a 0-obs dta file
+        with tm.ensure_clean() as path:
+            empty_df_typed.to_stata(path, write_index=False, version=version)
+            empty_reread = read_stata(path)
+
+        expected = empty_df_typed
         # No uint# support. Downcast since values in range for int#
         expected["u8"] = expected["u8"].astype(np.int8)
         expected["u16"] = expected["u16"].astype(np.int16)
@@ -99,12 +104,8 @@ def test_read_empty_dta_with_dtypes(self, version):
         expected["u64"] = expected["u64"].astype(np.int32)
         expected["i64"] = expected["i64"].astype(np.int32)
 
-        # GH 7369, make sure can read a 0-obs dta file
-        with tm.ensure_clean() as path:
-            empty_df_typed.to_stata(path, write_index=False, version=version)
-            empty_reread = read_stata(path)
-            tm.assert_frame_equal(expected, empty_reread)
-            tm.assert_series_equal(expected.dtypes, empty_reread.dtypes)
+        tm.assert_frame_equal(expected, empty_reread)
+        tm.assert_series_equal(expected.dtypes, empty_reread.dtypes)
 
     @pytest.mark.parametrize("version", [114, 117, 118, 119, None])
     def test_read_index_col_none(self, version):
@@ -115,7 +116,7 @@ def test_read_index_col_none(self, version):
             read_df = read_stata(path)
 
         assert isinstance(read_df.index, pd.RangeIndex)
-        expected = df.copy()
+        expected = df
         expected["a"] = expected["a"].astype(np.int32)
         tm.assert_frame_equal(read_df, expected, check_index_type=True)
 
@@ -325,7 +326,7 @@ def test_read_write_dta5(self):
             original.to_stata(path, convert_dates=None)
             written_and_read_again = self.read_dta(path)
 
-        expected = original.copy()
+        expected = original
         expected.index = expected.index.astype(np.int32)
         tm.assert_frame_equal(written_and_read_again.set_index("index"), expected)
 
@@ -424,7 +425,7 @@ def test_read_write_dta11(self):
 
             written_and_read_again = self.read_dta(path)
 
-        expected = formatted.copy()
+        expected = formatted
         expected.index = expected.index.astype(np.int32)
         tm.assert_frame_equal(written_and_read_again.set_index("index"), expected)
 
@@ -462,7 +463,7 @@ def test_read_write_dta12(self, version):
 
             written_and_read_again = self.read_dta(path)
 
-        expected = formatted.copy()
+        expected = formatted
         expected.index = expected.index.astype(np.int32)
         tm.assert_frame_equal(written_and_read_again.set_index("index"), expected)
 
@@ -480,7 +481,7 @@ def test_read_write_dta13(self):
             original.to_stata(path)
             written_and_read_again = self.read_dta(path)
 
-        expected = formatted.copy()
+        expected = formatted
         expected.index = expected.index.astype(np.int32)
         tm.assert_frame_equal(written_and_read_again.set_index("index"), expected)
 
@@ -561,7 +562,7 @@ def test_numeric_column_names(self):
         convert_col_name = lambda x: int(x[1])
         written_and_read_again.columns = map(convert_col_name, columns)
 
-        expected = original.copy()
+        expected = original
         expected.index = expected.index.astype(np.int32)
         tm.assert_frame_equal(expected, written_and_read_again)
 
@@ -579,7 +580,7 @@ def test_nan_to_missing_value(self, version):
             written_and_read_again = self.read_dta(path)
 
         written_and_read_again = written_and_read_again.set_index("index")
-        expected = original.copy()
+        expected = original
         expected.index = expected.index.astype(np.int32)
         tm.assert_frame_equal(written_and_read_again, expected)
 
@@ -602,7 +603,7 @@ def test_string_no_dates(self):
             original.to_stata(path)
             written_and_read_again = self.read_dta(path)
 
-        expected = original.copy()
+        expected = original
         expected.index = expected.index.astype(np.int32)
         tm.assert_frame_equal(written_and_read_again.set_index("index"), expected)
 
@@ -619,7 +620,7 @@ def test_large_value_conversion(self):
 
             written_and_read_again = self.read_dta(path)
 
-        modified = original.copy()
+        modified = original
         modified["s1"] = Series(modified["s1"], dtype=np.int16)
         modified["s2"] = Series(modified["s2"], dtype=np.int32)
         modified["s3"] = Series(modified["s3"], dtype=np.float64)
@@ -635,7 +636,7 @@ def test_dates_invalid_column(self):
 
             written_and_read_again = self.read_dta(path)
 
-        modified = original.copy()
+        modified = original
         modified.columns = ["_0"]
         modified.index = original.index.astype(np.int32)
         tm.assert_frame_equal(written_and_read_again.set_index("index"), modified)
@@ -721,8 +722,15 @@ def test_bool_uint(self, byteorder, version):
             {"s0": s0, "s1": s1, "s2": s2, "s3": s3, "s4": s4, "s5": s5, "s6": s6}
         )
         original.index.name = "index"
-        expected = original.copy()
-        expected.index = original.index.astype(np.int32)
+
+        with tm.ensure_clean() as path:
+            original.to_stata(path, byteorder=byteorder, version=version)
+            written_and_read_again = self.read_dta(path)
+
+        written_and_read_again = written_and_read_again.set_index("index")
+
+        expected = original
+        expected.index = expected.index.astype(np.int32)
         expected_types = (
             np.int8,
             np.int8,
@@ -735,11 +743,6 @@ def test_bool_uint(self, byteorder, version):
         for c, t in zip(expected.columns, expected_types):
             expected[c] = expected[c].astype(t)
 
-        with tm.ensure_clean() as path:
-            original.to_stata(path, byteorder=byteorder, version=version)
-            written_and_read_again = self.read_dta(path)
-
-        written_and_read_again = written_and_read_again.set_index("index")
         tm.assert_frame_equal(written_and_read_again, expected)
 
     def test_variable_labels(self, datapath):
@@ -1000,18 +1003,19 @@ def test_categorical_writing(self, version):
                 "unlabeled",
             ],
         )
-        expected = original.copy()
+        with tm.ensure_clean() as path:
+            original.astype("category").to_stata(path, version=version)
+            written_and_read_again = self.read_dta(path)
 
-        # these are all categoricals
-        original = pd.concat(
-            [original[col].astype("category") for col in original], axis=1
-        )
+        res = written_and_read_again.set_index("index")
+
+        expected = original
         expected.index = expected.index.set_names("index").astype(np.int32)
 
         expected["incompletely_labeled"] = expected["incompletely_labeled"].apply(str)
         expected["unlabeled"] = expected["unlabeled"].apply(str)
         for col in expected:
-            orig = expected[col].copy()
+            orig = expected[col]
 
             cat = orig.astype("category")._values
             cat = cat.as_ordered()
@@ -1022,11 +1026,6 @@ def test_categorical_writing(self, version):
 
             expected[col] = cat
 
-        with tm.ensure_clean() as path:
-            original.to_stata(path, version=version)
-            written_and_read_again = self.read_dta(path)
-
-        res = written_and_read_again.set_index("index")
         tm.assert_frame_equal(res, expected)
 
     def test_categorical_warnings_and_errors(self):
@@ -1037,9 +1036,7 @@ def test_categorical_warnings_and_errors(self):
             columns=["Too_long"],
         )
 
-        original = pd.concat(
-            [original[col].astype("category") for col in original], axis=1
-        )
+        original = original.astype("category")
         with tm.ensure_clean() as path:
             msg = (
                 "Stata value labels for a single variable must have "
@@ -1050,10 +1047,7 @@ def test_categorical_warnings_and_errors(self):
 
         original = DataFrame.from_records(
             [["a"], ["b"], ["c"], ["d"], [1]], columns=["Too_long"]
-        )
-        original = pd.concat(
-            [original[col].astype("category") for col in original], axis=1
-        )
+        ).astype("category")
 
         with tm.assert_produces_warning(ValueLabelTypeMismatch):
             original.to_stata(path)
@@ -1074,7 +1068,7 @@ def test_categorical_with_stata_missing_values(self, version):
 
         res = written_and_read_again.set_index("index")
 
-        expected = original.copy()
+        expected = original
         for col in expected:
             cat = expected[col]._values
             new_cats = cat.remove_unused_categories().categories
@@ -1525,7 +1519,7 @@ def test_out_of_range_float(self):
             reread = read_stata(path)
 
         original["ColumnTooBig"] = original["ColumnTooBig"].astype(np.float64)
-        expected = original.copy()
+        expected = original
         expected.index = expected.index.astype(np.int32)
         tm.assert_frame_equal(reread.set_index("index"), expected)
 
@@ -1672,13 +1666,13 @@ def test_writer_117(self):
                 version=117,
             )
             written_and_read_again = self.read_dta(path)
-            # original.index is np.int32, read index is np.int64
-            tm.assert_frame_equal(
-                written_and_read_again.set_index("index"),
-                original,
-                check_index_type=False,
-            )
-            tm.assert_frame_equal(original, copy)
+        # original.index is np.int32, read index is np.int64
+        tm.assert_frame_equal(
+            written_and_read_again.set_index("index"),
+            original,
+            check_index_type=False,
+        )
+        tm.assert_frame_equal(original, copy)
 
     def test_convert_strl_name_swap(self):
         original = DataFrame(
@@ -2052,7 +2046,7 @@ def test_compression(compression, version, use_dict, infer, compression_to_exten
             fp = path
         reread = read_stata(fp, index_col="index")
 
-    expected = df.copy()
+    expected = df
     expected.index = expected.index.astype(np.int32)
     tm.assert_frame_equal(reread, expected)
 
@@ -2078,7 +2072,7 @@ def test_compression_dict(method, file_ext):
             fp = path
         reread = read_stata(fp, index_col="index")
 
-    expected = df.copy()
+    expected = df
     expected.index = expected.index.astype(np.int32)
     tm.assert_frame_equal(reread, expected)
 

diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py
@@ -134,7 +134,7 @@ def test_resample_empty_series(freq, index, resample_method):
 
     if resample_method == "ohlc":
         expected = DataFrame(
-            [], index=ser.index[:0].copy(), columns=["open", "high", "low", "close"]
+            [], index=ser.index[:0], columns=["open", "high", "low", "close"]
         )
         expected.index = _asfreq_compat(ser.index, freq)
         tm.assert_frame_equal(result, expected, check_dtype=False)
@@ -167,7 +167,7 @@ def test_resample_nat_index_series(freq, resample_method):
 
     if resample_method == "ohlc":
         expected = DataFrame(
-            [], index=ser.index[:0].copy(), columns=["open", "high", "low", "close"]
+            [], index=ser.index[:0], columns=["open", "high", "low", "close"]
         )
         tm.assert_frame_equal(result, expected, check_dtype=False)
     else:
@@ -248,9 +248,7 @@ def test_resample_empty_dataframe(index, freq, resample_method):
     if resample_method == "ohlc":
         # TODO: no tests with len(df.columns) > 0
         mi = MultiIndex.from_product([df.columns, ["open", "high", "low", "close"]])
-        expected = DataFrame(
-            [], index=df.index[:0].copy(), columns=mi, dtype=np.float64
-        )
+        expected = DataFrame([], index=df.index[:0], columns=mi, dtype=np.float64)
         expected.index = _asfreq_compat(df.index, freq)
 
     elif resample_method != "size":

diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py
@@ -397,10 +397,9 @@ def test_median_duplicate_columns():
         columns=list("aaa"),
         index=date_range("2012-01-01", periods=20, freq="s"),
     )
-    df2 = df.copy()
-    df2.columns = ["a", "b", "c"]
-    expected = df2.resample("5s").median()
     result = df.resample("5s").median()
+    df.columns = ["a", "b", "c"]
+    expected = df.resample("5s").median()
     expected.columns = result.columns
     tm.assert_frame_equal(result, expected)
 

diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py
@@ -412,7 +412,7 @@ def test_concat_bug_1719(self):
         ts1 = Series(
             np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
         )
-        ts2 = ts1.copy()[::2]
+        ts2 = ts1[::2]
 
         # to join with union
         # these two are of different length!

diff --git a/pandas/tests/reshape/concat/test_series.py b/pandas/tests/reshape/concat/test_series.py
@@ -30,11 +30,11 @@ def test_concat_series(self):
 
         result = concat(pieces, keys=[0, 1, 2])
         expected = ts.copy()
-
-        ts.index = DatetimeIndex(np.array(ts.index.values, dtype="M8[ns]"))
-
         exp_codes = [np.repeat([0, 1, 2], [len(x) for x in pieces]), np.arange(len(ts))]
-        exp_index = MultiIndex(levels=[[0, 1, 2], ts.index], codes=exp_codes)
+        exp_index = MultiIndex(
+            levels=[[0, 1, 2], DatetimeIndex(ts.index.to_numpy(dtype="M8[ns]"))],
+            codes=exp_codes,
+        )
         expected.index = exp_index
         tm.assert_series_equal(result, expected)
 

diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py
@@ -153,13 +153,12 @@ def test_join_on(self, target_source, infer_string):
             target.join(source, on="E")
 
         # overlap
-        source_copy = source.copy()
         msg = (
             "You are trying to merge on float64 and object|string columns for key "
             "'A'. If you wish to proceed you should use pd.concat"
         )
         with pytest.raises(ValueError, match=msg):
-            target.join(source_copy, on="A")
+            target.join(source, on="A")
 
     def test_join_on_fails_with_different_right_index(self):
         df = DataFrame(

diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py
@@ -126,9 +126,7 @@ def run_asserts(left, right, sort):
             "2nd",
             np.random.default_rng(2).integers(0, 10, len(left)).astype("float"),
         )
-
-        i = np.random.default_rng(2).permutation(len(left))
-        right = left.iloc[i].copy()
+        right = left.sample(frac=1, random_state=np.random.default_rng(2))
 
         left["4th"] = bind_cols(left)
         right["5th"] = -bind_cols(right)

diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py
@@ -349,13 +349,12 @@ def test_melt_missing_columns_raises(self):
             df.melt(["a", "b", "not_here", "or_there"], ["c", "d"])
 
         # Multiindex melt fails if column is missing from multilevel melt
-        multi = df.copy()
-        multi.columns = [list("ABCD"), list("abcd")]
+        df.columns = [list("ABCD"), list("abcd")]
         with pytest.raises(KeyError, match=msg):
-            multi.melt([("E", "a")], [("B", "b")])
+            df.melt([("E", "a")], [("B", "b")])
         # Multiindex fails if column is missing from single level melt
         with pytest.raises(KeyError, match=msg):
-            multi.melt(["A"], ["F"], col_level=0)
+            df.melt(["A"], ["F"], col_level=0)
 
     def test_melt_mixed_int_str_id_vars(self):
         # GH 29718

diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py
@@ -430,7 +430,7 @@ def test_indexing():
     result = ts["2001"]
     tm.assert_series_equal(result, ts.iloc[:12])
 
-    df = DataFrame({"A": ts.copy()})
+    df = DataFrame({"A": ts})
 
     # GH#36179 pre-2.0 df["2001"] operated as slicing on rows. in 2.0 it behaves
     #  like any other key, so raises