Skip to content

Commit 256f38d

Browse files
mroeschkepmhatre1
authored andcommitted
TST/CLN: Remove unnecessary copies in tests (pandas-dev#56749)
1 parent d844024 commit 256f38d

25 files changed

+120
-144
lines changed

pandas/tests/io/test_stata.py

+47-53
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,12 @@ def test_read_empty_dta_with_dtypes(self, version):
9090
"f64": np.array([0], dtype=np.float64),
9191
}
9292
)
93-
expected = empty_df_typed.copy()
93+
# GH 7369, make sure can read a 0-obs dta file
94+
with tm.ensure_clean() as path:
95+
empty_df_typed.to_stata(path, write_index=False, version=version)
96+
empty_reread = read_stata(path)
97+
98+
expected = empty_df_typed
9499
# No uint# support. Downcast since values in range for int#
95100
expected["u8"] = expected["u8"].astype(np.int8)
96101
expected["u16"] = expected["u16"].astype(np.int16)
@@ -99,12 +104,8 @@ def test_read_empty_dta_with_dtypes(self, version):
99104
expected["u64"] = expected["u64"].astype(np.int32)
100105
expected["i64"] = expected["i64"].astype(np.int32)
101106

102-
# GH 7369, make sure can read a 0-obs dta file
103-
with tm.ensure_clean() as path:
104-
empty_df_typed.to_stata(path, write_index=False, version=version)
105-
empty_reread = read_stata(path)
106-
tm.assert_frame_equal(expected, empty_reread)
107-
tm.assert_series_equal(expected.dtypes, empty_reread.dtypes)
107+
tm.assert_frame_equal(expected, empty_reread)
108+
tm.assert_series_equal(expected.dtypes, empty_reread.dtypes)
108109

109110
@pytest.mark.parametrize("version", [114, 117, 118, 119, None])
110111
def test_read_index_col_none(self, version):
@@ -115,7 +116,7 @@ def test_read_index_col_none(self, version):
115116
read_df = read_stata(path)
116117

117118
assert isinstance(read_df.index, pd.RangeIndex)
118-
expected = df.copy()
119+
expected = df
119120
expected["a"] = expected["a"].astype(np.int32)
120121
tm.assert_frame_equal(read_df, expected, check_index_type=True)
121122

@@ -325,7 +326,7 @@ def test_read_write_dta5(self):
325326
original.to_stata(path, convert_dates=None)
326327
written_and_read_again = self.read_dta(path)
327328

328-
expected = original.copy()
329+
expected = original
329330
expected.index = expected.index.astype(np.int32)
330331
tm.assert_frame_equal(written_and_read_again.set_index("index"), expected)
331332

@@ -424,7 +425,7 @@ def test_read_write_dta11(self):
424425

425426
written_and_read_again = self.read_dta(path)
426427

427-
expected = formatted.copy()
428+
expected = formatted
428429
expected.index = expected.index.astype(np.int32)
429430
tm.assert_frame_equal(written_and_read_again.set_index("index"), expected)
430431

@@ -462,7 +463,7 @@ def test_read_write_dta12(self, version):
462463

463464
written_and_read_again = self.read_dta(path)
464465

465-
expected = formatted.copy()
466+
expected = formatted
466467
expected.index = expected.index.astype(np.int32)
467468
tm.assert_frame_equal(written_and_read_again.set_index("index"), expected)
468469

@@ -480,7 +481,7 @@ def test_read_write_dta13(self):
480481
original.to_stata(path)
481482
written_and_read_again = self.read_dta(path)
482483

483-
expected = formatted.copy()
484+
expected = formatted
484485
expected.index = expected.index.astype(np.int32)
485486
tm.assert_frame_equal(written_and_read_again.set_index("index"), expected)
486487

@@ -561,7 +562,7 @@ def test_numeric_column_names(self):
561562
convert_col_name = lambda x: int(x[1])
562563
written_and_read_again.columns = map(convert_col_name, columns)
563564

564-
expected = original.copy()
565+
expected = original
565566
expected.index = expected.index.astype(np.int32)
566567
tm.assert_frame_equal(expected, written_and_read_again)
567568

@@ -579,7 +580,7 @@ def test_nan_to_missing_value(self, version):
579580
written_and_read_again = self.read_dta(path)
580581

581582
written_and_read_again = written_and_read_again.set_index("index")
582-
expected = original.copy()
583+
expected = original
583584
expected.index = expected.index.astype(np.int32)
584585
tm.assert_frame_equal(written_and_read_again, expected)
585586

@@ -602,7 +603,7 @@ def test_string_no_dates(self):
602603
original.to_stata(path)
603604
written_and_read_again = self.read_dta(path)
604605

605-
expected = original.copy()
606+
expected = original
606607
expected.index = expected.index.astype(np.int32)
607608
tm.assert_frame_equal(written_and_read_again.set_index("index"), expected)
608609

@@ -619,7 +620,7 @@ def test_large_value_conversion(self):
619620

620621
written_and_read_again = self.read_dta(path)
621622

622-
modified = original.copy()
623+
modified = original
623624
modified["s1"] = Series(modified["s1"], dtype=np.int16)
624625
modified["s2"] = Series(modified["s2"], dtype=np.int32)
625626
modified["s3"] = Series(modified["s3"], dtype=np.float64)
@@ -635,7 +636,7 @@ def test_dates_invalid_column(self):
635636

636637
written_and_read_again = self.read_dta(path)
637638

638-
modified = original.copy()
639+
modified = original
639640
modified.columns = ["_0"]
640641
modified.index = original.index.astype(np.int32)
641642
tm.assert_frame_equal(written_and_read_again.set_index("index"), modified)
@@ -721,8 +722,15 @@ def test_bool_uint(self, byteorder, version):
721722
{"s0": s0, "s1": s1, "s2": s2, "s3": s3, "s4": s4, "s5": s5, "s6": s6}
722723
)
723724
original.index.name = "index"
724-
expected = original.copy()
725-
expected.index = original.index.astype(np.int32)
725+
726+
with tm.ensure_clean() as path:
727+
original.to_stata(path, byteorder=byteorder, version=version)
728+
written_and_read_again = self.read_dta(path)
729+
730+
written_and_read_again = written_and_read_again.set_index("index")
731+
732+
expected = original
733+
expected.index = expected.index.astype(np.int32)
726734
expected_types = (
727735
np.int8,
728736
np.int8,
@@ -735,11 +743,6 @@ def test_bool_uint(self, byteorder, version):
735743
for c, t in zip(expected.columns, expected_types):
736744
expected[c] = expected[c].astype(t)
737745

738-
with tm.ensure_clean() as path:
739-
original.to_stata(path, byteorder=byteorder, version=version)
740-
written_and_read_again = self.read_dta(path)
741-
742-
written_and_read_again = written_and_read_again.set_index("index")
743746
tm.assert_frame_equal(written_and_read_again, expected)
744747

745748
def test_variable_labels(self, datapath):
@@ -1000,18 +1003,19 @@ def test_categorical_writing(self, version):
10001003
"unlabeled",
10011004
],
10021005
)
1003-
expected = original.copy()
1006+
with tm.ensure_clean() as path:
1007+
original.astype("category").to_stata(path, version=version)
1008+
written_and_read_again = self.read_dta(path)
10041009

1005-
# these are all categoricals
1006-
original = pd.concat(
1007-
[original[col].astype("category") for col in original], axis=1
1008-
)
1010+
res = written_and_read_again.set_index("index")
1011+
1012+
expected = original
10091013
expected.index = expected.index.set_names("index").astype(np.int32)
10101014

10111015
expected["incompletely_labeled"] = expected["incompletely_labeled"].apply(str)
10121016
expected["unlabeled"] = expected["unlabeled"].apply(str)
10131017
for col in expected:
1014-
orig = expected[col].copy()
1018+
orig = expected[col]
10151019

10161020
cat = orig.astype("category")._values
10171021
cat = cat.as_ordered()
@@ -1022,11 +1026,6 @@ def test_categorical_writing(self, version):
10221026

10231027
expected[col] = cat
10241028

1025-
with tm.ensure_clean() as path:
1026-
original.to_stata(path, version=version)
1027-
written_and_read_again = self.read_dta(path)
1028-
1029-
res = written_and_read_again.set_index("index")
10301029
tm.assert_frame_equal(res, expected)
10311030

10321031
def test_categorical_warnings_and_errors(self):
@@ -1037,9 +1036,7 @@ def test_categorical_warnings_and_errors(self):
10371036
columns=["Too_long"],
10381037
)
10391038

1040-
original = pd.concat(
1041-
[original[col].astype("category") for col in original], axis=1
1042-
)
1039+
original = original.astype("category")
10431040
with tm.ensure_clean() as path:
10441041
msg = (
10451042
"Stata value labels for a single variable must have "
@@ -1050,10 +1047,7 @@ def test_categorical_warnings_and_errors(self):
10501047

10511048
original = DataFrame.from_records(
10521049
[["a"], ["b"], ["c"], ["d"], [1]], columns=["Too_long"]
1053-
)
1054-
original = pd.concat(
1055-
[original[col].astype("category") for col in original], axis=1
1056-
)
1050+
).astype("category")
10571051

10581052
with tm.assert_produces_warning(ValueLabelTypeMismatch):
10591053
original.to_stata(path)
@@ -1074,7 +1068,7 @@ def test_categorical_with_stata_missing_values(self, version):
10741068

10751069
res = written_and_read_again.set_index("index")
10761070

1077-
expected = original.copy()
1071+
expected = original
10781072
for col in expected:
10791073
cat = expected[col]._values
10801074
new_cats = cat.remove_unused_categories().categories
@@ -1525,7 +1519,7 @@ def test_out_of_range_float(self):
15251519
reread = read_stata(path)
15261520

15271521
original["ColumnTooBig"] = original["ColumnTooBig"].astype(np.float64)
1528-
expected = original.copy()
1522+
expected = original
15291523
expected.index = expected.index.astype(np.int32)
15301524
tm.assert_frame_equal(reread.set_index("index"), expected)
15311525

@@ -1672,13 +1666,13 @@ def test_writer_117(self):
16721666
version=117,
16731667
)
16741668
written_and_read_again = self.read_dta(path)
1675-
# original.index is np.int32, read index is np.int64
1676-
tm.assert_frame_equal(
1677-
written_and_read_again.set_index("index"),
1678-
original,
1679-
check_index_type=False,
1680-
)
1681-
tm.assert_frame_equal(original, copy)
1669+
# original.index is np.int32, read index is np.int64
1670+
tm.assert_frame_equal(
1671+
written_and_read_again.set_index("index"),
1672+
original,
1673+
check_index_type=False,
1674+
)
1675+
tm.assert_frame_equal(original, copy)
16821676

16831677
def test_convert_strl_name_swap(self):
16841678
original = DataFrame(
@@ -2052,7 +2046,7 @@ def test_compression(compression, version, use_dict, infer, compression_to_exten
20522046
fp = path
20532047
reread = read_stata(fp, index_col="index")
20542048

2055-
expected = df.copy()
2049+
expected = df
20562050
expected.index = expected.index.astype(np.int32)
20572051
tm.assert_frame_equal(reread, expected)
20582052

@@ -2078,7 +2072,7 @@ def test_compression_dict(method, file_ext):
20782072
fp = path
20792073
reread = read_stata(fp, index_col="index")
20802074

2081-
expected = df.copy()
2075+
expected = df
20822076
expected.index = expected.index.astype(np.int32)
20832077
tm.assert_frame_equal(reread, expected)
20842078

pandas/tests/resample/test_base.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ def test_resample_empty_series(freq, index, resample_method):
134134

135135
if resample_method == "ohlc":
136136
expected = DataFrame(
137-
[], index=ser.index[:0].copy(), columns=["open", "high", "low", "close"]
137+
[], index=ser.index[:0], columns=["open", "high", "low", "close"]
138138
)
139139
expected.index = _asfreq_compat(ser.index, freq)
140140
tm.assert_frame_equal(result, expected, check_dtype=False)
@@ -167,7 +167,7 @@ def test_resample_nat_index_series(freq, resample_method):
167167

168168
if resample_method == "ohlc":
169169
expected = DataFrame(
170-
[], index=ser.index[:0].copy(), columns=["open", "high", "low", "close"]
170+
[], index=ser.index[:0], columns=["open", "high", "low", "close"]
171171
)
172172
tm.assert_frame_equal(result, expected, check_dtype=False)
173173
else:
@@ -248,9 +248,7 @@ def test_resample_empty_dataframe(index, freq, resample_method):
248248
if resample_method == "ohlc":
249249
# TODO: no tests with len(df.columns) > 0
250250
mi = MultiIndex.from_product([df.columns, ["open", "high", "low", "close"]])
251-
expected = DataFrame(
252-
[], index=df.index[:0].copy(), columns=mi, dtype=np.float64
253-
)
251+
expected = DataFrame([], index=df.index[:0], columns=mi, dtype=np.float64)
254252
expected.index = _asfreq_compat(df.index, freq)
255253

256254
elif resample_method != "size":

pandas/tests/resample/test_resampler_grouper.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -397,10 +397,9 @@ def test_median_duplicate_columns():
397397
columns=list("aaa"),
398398
index=date_range("2012-01-01", periods=20, freq="s"),
399399
)
400-
df2 = df.copy()
401-
df2.columns = ["a", "b", "c"]
402-
expected = df2.resample("5s").median()
403400
result = df.resample("5s").median()
401+
df.columns = ["a", "b", "c"]
402+
expected = df.resample("5s").median()
404403
expected.columns = result.columns
405404
tm.assert_frame_equal(result, expected)
406405

pandas/tests/reshape/concat/test_concat.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,7 @@ def test_concat_bug_1719(self):
412412
ts1 = Series(
413413
np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
414414
)
415-
ts2 = ts1.copy()[::2]
415+
ts2 = ts1[::2]
416416

417417
# to join with union
418418
# these two are of different length!

pandas/tests/reshape/concat/test_series.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,11 @@ def test_concat_series(self):
3030

3131
result = concat(pieces, keys=[0, 1, 2])
3232
expected = ts.copy()
33-
34-
ts.index = DatetimeIndex(np.array(ts.index.values, dtype="M8[ns]"))
35-
3633
exp_codes = [np.repeat([0, 1, 2], [len(x) for x in pieces]), np.arange(len(ts))]
37-
exp_index = MultiIndex(levels=[[0, 1, 2], ts.index], codes=exp_codes)
34+
exp_index = MultiIndex(
35+
levels=[[0, 1, 2], DatetimeIndex(ts.index.to_numpy(dtype="M8[ns]"))],
36+
codes=exp_codes,
37+
)
3838
expected.index = exp_index
3939
tm.assert_series_equal(result, expected)
4040

pandas/tests/reshape/merge/test_join.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -153,13 +153,12 @@ def test_join_on(self, target_source, infer_string):
153153
target.join(source, on="E")
154154

155155
# overlap
156-
source_copy = source.copy()
157156
msg = (
158157
"You are trying to merge on float64 and object|string columns for key "
159158
"'A'. If you wish to proceed you should use pd.concat"
160159
)
161160
with pytest.raises(ValueError, match=msg):
162-
target.join(source_copy, on="A")
161+
target.join(source, on="A")
163162

164163
def test_join_on_fails_with_different_right_index(self):
165164
df = DataFrame(

pandas/tests/reshape/merge/test_multi.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -126,9 +126,7 @@ def run_asserts(left, right, sort):
126126
"2nd",
127127
np.random.default_rng(2).integers(0, 10, len(left)).astype("float"),
128128
)
129-
130-
i = np.random.default_rng(2).permutation(len(left))
131-
right = left.iloc[i].copy()
129+
right = left.sample(frac=1, random_state=np.random.default_rng(2))
132130

133131
left["4th"] = bind_cols(left)
134132
right["5th"] = -bind_cols(right)

pandas/tests/reshape/test_melt.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -349,13 +349,12 @@ def test_melt_missing_columns_raises(self):
349349
df.melt(["a", "b", "not_here", "or_there"], ["c", "d"])
350350

351351
# Multiindex melt fails if column is missing from multilevel melt
352-
multi = df.copy()
353-
multi.columns = [list("ABCD"), list("abcd")]
352+
df.columns = [list("ABCD"), list("abcd")]
354353
with pytest.raises(KeyError, match=msg):
355-
multi.melt([("E", "a")], [("B", "b")])
354+
df.melt([("E", "a")], [("B", "b")])
356355
# Multiindex fails if column is missing from single level melt
357356
with pytest.raises(KeyError, match=msg):
358-
multi.melt(["A"], ["F"], col_level=0)
357+
df.melt(["A"], ["F"], col_level=0)
359358

360359
def test_melt_mixed_int_str_id_vars(self):
361360
# GH 29718

pandas/tests/series/indexing/test_datetime.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@ def test_indexing():
430430
result = ts["2001"]
431431
tm.assert_series_equal(result, ts.iloc[:12])
432432

433-
df = DataFrame({"A": ts.copy()})
433+
df = DataFrame({"A": ts})
434434

435435
# GH#36179 pre-2.0 df["2001"] operated as slicing on rows. in 2.0 it behaves
436436
# like any other key, so raises

0 commit comments

Comments
 (0)