Skip to content

Commit 0287cde

Browse files
authored
TST: Remove unnecessary read_csv usage during testing (#55643)
* Remove unnecessary read_csv usage * Remove csvs * Evaluate splits * Typo
1 parent 73e085e commit 0287cde

18 files changed

+2087
-503
lines changed

pandas/tests/frame/test_stack_unstack.py

+15-16
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from datetime import datetime
2-
from io import StringIO
32
import itertools
43
import re
54

@@ -1771,21 +1770,21 @@ def test_stack_duplicate_index(self, idx, columns, exp_idx, future_stack):
17711770
"ignore:The previous implementation of stack is deprecated"
17721771
)
17731772
def test_unstack_odd_failure(self, future_stack):
1774-
data = """day,time,smoker,sum,len
1775-
Fri,Dinner,No,8.25,3.
1776-
Fri,Dinner,Yes,27.03,9
1777-
Fri,Lunch,No,3.0,1
1778-
Fri,Lunch,Yes,13.68,6
1779-
Sat,Dinner,No,139.63,45
1780-
Sat,Dinner,Yes,120.77,42
1781-
Sun,Dinner,No,180.57,57
1782-
Sun,Dinner,Yes,66.82,19
1783-
Thu,Dinner,No,3.0,1
1784-
Thu,Lunch,No,117.32,44
1785-
Thu,Lunch,Yes,51.51,17"""
1786-
1787-
df = pd.read_csv(StringIO(data)).set_index(["day", "time", "smoker"])
1788-
1773+
mi = MultiIndex.from_arrays(
1774+
[
1775+
["Fri"] * 4 + ["Sat"] * 2 + ["Sun"] * 2 + ["Thu"] * 3,
1776+
["Dinner"] * 2 + ["Lunch"] * 2 + ["Dinner"] * 5 + ["Lunch"] * 2,
1777+
["No", "Yes"] * 4 + ["No", "No", "Yes"],
1778+
],
1779+
names=["day", "time", "smoker"],
1780+
)
1781+
df = DataFrame(
1782+
{
1783+
"sum": np.arange(11, dtype="float64"),
1784+
"len": np.arange(11, dtype="float64"),
1785+
},
1786+
index=mi,
1787+
)
17891788
# it works, #2100
17901789
result = df.unstack(2)
17911790

pandas/tests/groupby/test_apply.py

+65-25
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
date,
33
datetime,
44
)
5-
from io import StringIO
65

76
import numpy as np
87
import pytest
@@ -38,39 +37,80 @@ def store(group):
3837
tm.assert_frame_equal(groups[0], expected_value)
3938

4039

41-
def test_apply_issues():
40+
def test_apply_index_date():
4241
# GH 5788
43-
44-
s = """2011.05.16,00:00,1.40893
45-
2011.05.16,01:00,1.40760
46-
2011.05.16,02:00,1.40750
47-
2011.05.16,03:00,1.40649
48-
2011.05.17,02:00,1.40893
49-
2011.05.17,03:00,1.40760
50-
2011.05.17,04:00,1.40750
51-
2011.05.17,05:00,1.40649
52-
2011.05.18,02:00,1.40893
53-
2011.05.18,03:00,1.40760
54-
2011.05.18,04:00,1.40750
55-
2011.05.18,05:00,1.40649"""
56-
57-
df = pd.read_csv(
58-
StringIO(s),
59-
header=None,
60-
names=["date", "time", "value"],
61-
parse_dates=[["date", "time"]],
42+
ts = [
43+
"2011-05-16 00:00",
44+
"2011-05-16 01:00",
45+
"2011-05-16 02:00",
46+
"2011-05-16 03:00",
47+
"2011-05-17 02:00",
48+
"2011-05-17 03:00",
49+
"2011-05-17 04:00",
50+
"2011-05-17 05:00",
51+
"2011-05-18 02:00",
52+
"2011-05-18 03:00",
53+
"2011-05-18 04:00",
54+
"2011-05-18 05:00",
55+
]
56+
df = DataFrame(
57+
{
58+
"value": [
59+
1.40893,
60+
1.40760,
61+
1.40750,
62+
1.40649,
63+
1.40893,
64+
1.40760,
65+
1.40750,
66+
1.40649,
67+
1.40893,
68+
1.40760,
69+
1.40750,
70+
1.40649,
71+
],
72+
},
73+
index=Index(pd.to_datetime(ts), name="date_time"),
6274
)
63-
df = df.set_index("date_time")
64-
6575
expected = df.groupby(df.index.date).idxmax()
6676
result = df.groupby(df.index.date).apply(lambda x: x.idxmax())
6777
tm.assert_frame_equal(result, expected)
6878

79+
80+
def test_apply_index_date_object():
6981
# GH 5789
7082
# don't auto coerce dates
71-
df = pd.read_csv(StringIO(s), header=None, names=["date", "time", "value"])
83+
ts = [
84+
"2011-05-16 00:00",
85+
"2011-05-16 01:00",
86+
"2011-05-16 02:00",
87+
"2011-05-16 03:00",
88+
"2011-05-17 02:00",
89+
"2011-05-17 03:00",
90+
"2011-05-17 04:00",
91+
"2011-05-17 05:00",
92+
"2011-05-18 02:00",
93+
"2011-05-18 03:00",
94+
"2011-05-18 04:00",
95+
"2011-05-18 05:00",
96+
]
97+
df = DataFrame([row.split() for row in ts], columns=["date", "time"])
98+
df["value"] = [
99+
1.40893,
100+
1.40760,
101+
1.40750,
102+
1.40649,
103+
1.40893,
104+
1.40760,
105+
1.40750,
106+
1.40649,
107+
1.40893,
108+
1.40760,
109+
1.40750,
110+
1.40649,
111+
]
72112
exp_idx = Index(
73-
["2011.05.16", "2011.05.17", "2011.05.18"], dtype=object, name="date"
113+
["2011-05-16", "2011-05-17", "2011-05-18"], dtype=object, name="date"
74114
)
75115
expected = Series(["00:00", "02:00", "02:00"], index=exp_idx)
76116
msg = "DataFrameGroupBy.apply operated on the grouping columns"

pandas/tests/groupby/test_reductions.py

+12-8
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import builtins
22
import datetime as dt
3-
from io import StringIO
43
from string import ascii_lowercase
54

65
import numpy as np
@@ -589,13 +588,18 @@ def test_min_empty_string_dtype(func):
589588

590589

591590
def test_max_nan_bug():
592-
raw = """,Date,app,File
593-
-04-23,2013-04-23 00:00:00,,log080001.log
594-
-05-06,2013-05-06 00:00:00,,log.log
595-
-05-07,2013-05-07 00:00:00,OE,xlsx"""
596-
597-
with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
598-
df = pd.read_csv(StringIO(raw), parse_dates=[0])
591+
df = DataFrame(
592+
{
593+
"Unnamed: 0": ["-04-23", "-05-06", "-05-07"],
594+
"Date": [
595+
"2013-04-23 00:00:00",
596+
"2013-05-06 00:00:00",
597+
"2013-05-07 00:00:00",
598+
],
599+
"app": Series([np.nan, np.nan, "OE"]),
600+
"File": ["log080001.log", "log.log", "xlsx"],
601+
}
602+
)
599603
gb = df.groupby("Date")
600604
r = gb[["File"]].max()
601605
e = gb["File"].max().to_frame()

pandas/tests/groupby/test_timegrouper.py

+19-8
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
datetime,
66
timedelta,
77
)
8-
from io import StringIO
98

109
import numpy as np
1110
import pytest
@@ -607,14 +606,26 @@ def test_frame_datetime64_handling_groupby(self):
607606

608607
def test_groupby_multi_timezone(self):
609608
# combining multiple / different timezones yields UTC
609+
df = DataFrame(
610+
{
611+
"value": range(5),
612+
"date": [
613+
"2000-01-28 16:47:00",
614+
"2000-01-29 16:48:00",
615+
"2000-01-30 16:49:00",
616+
"2000-01-31 16:50:00",
617+
"2000-01-01 16:50:00",
618+
],
619+
"tz": [
620+
"America/Chicago",
621+
"America/Chicago",
622+
"America/Los_Angeles",
623+
"America/Chicago",
624+
"America/New_York",
625+
],
626+
}
627+
)
610628

611-
data = """0,2000-01-28 16:47:00,America/Chicago
612-
1,2000-01-29 16:48:00,America/Chicago
613-
2,2000-01-30 16:49:00,America/Los_Angeles
614-
3,2000-01-31 16:50:00,America/Chicago
615-
4,2000-01-01 16:50:00,America/New_York"""
616-
617-
df = pd.read_csv(StringIO(data), header=None, names=["value", "date", "tz"])
618629
result = df.groupby("tz", group_keys=False).date.apply(
619630
lambda x: pd.to_datetime(x).dt.tz_localize(x.name)
620631
)

pandas/tests/groupby/transform/test_transform.py

+22-18
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
""" test with the .transform """
2-
from io import StringIO
3-
42
import numpy as np
53
import pytest
64

@@ -337,22 +335,28 @@ def test_transform_datetime_to_numeric():
337335

338336
def test_transform_casting():
339337
# 13046
340-
data = """
341-
idx A ID3 DATETIME
342-
0 B-028 b76cd912ff "2014-10-08 13:43:27"
343-
1 B-054 4a57ed0b02 "2014-10-08 14:26:19"
344-
2 B-076 1a682034f8 "2014-10-08 14:29:01"
345-
3 B-023 b76cd912ff "2014-10-08 18:39:34"
346-
4 B-023 f88g8d7sds "2014-10-08 18:40:18"
347-
5 B-033 b76cd912ff "2014-10-08 18:44:30"
348-
6 B-032 b76cd912ff "2014-10-08 18:46:00"
349-
7 B-037 b76cd912ff "2014-10-08 18:52:15"
350-
8 B-046 db959faf02 "2014-10-08 18:59:59"
351-
9 B-053 b76cd912ff "2014-10-08 19:17:48"
352-
10 B-065 b76cd912ff "2014-10-08 19:21:38"
353-
"""
354-
df = pd.read_csv(
355-
StringIO(data), sep=r"\s+", index_col=[0], parse_dates=["DATETIME"]
338+
times = [
339+
"13:43:27",
340+
"14:26:19",
341+
"14:29:01",
342+
"18:39:34",
343+
"18:40:18",
344+
"18:44:30",
345+
"18:46:00",
346+
"18:52:15",
347+
"18:59:59",
348+
"19:17:48",
349+
"19:21:38",
350+
]
351+
df = DataFrame(
352+
{
353+
"A": [f"B-{i}" for i in range(11)],
354+
"ID3": np.take(
355+
["a", "b", "c", "d", "e"], [0, 1, 2, 1, 3, 1, 1, 1, 4, 1, 1]
356+
),
357+
"DATETIME": pd.to_datetime([f"2014-10-08 {time}" for time in times]),
358+
},
359+
index=pd.RangeIndex(11, name="idx"),
356360
)
357361

358362
result = df.groupby("ID3")["DATETIME"].transform(lambda x: x.diff())

pandas/tests/indexes/test_base.py

+15-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from collections import defaultdict
22
from datetime import datetime
3-
from io import StringIO
43
import math
54
import operator
65
import re
@@ -1174,13 +1173,21 @@ def test_groupby(self):
11741173
def test_equals_op_multiindex(self, mi, expected):
11751174
# GH9785
11761175
# test comparisons of multiindex
1177-
df = pd.read_csv(StringIO("a,b,c\n1,2,3\n4,5,6"), index_col=[0, 1])
1176+
df = DataFrame(
1177+
[3, 6],
1178+
columns=["c"],
1179+
index=MultiIndex.from_arrays([[1, 4], [2, 5]], names=["a", "b"]),
1180+
)
11781181

11791182
result = df.index == mi
11801183
tm.assert_numpy_array_equal(result, expected)
11811184

11821185
def test_equals_op_multiindex_identify(self):
1183-
df = pd.read_csv(StringIO("a,b,c\n1,2,3\n4,5,6"), index_col=[0, 1])
1186+
df = DataFrame(
1187+
[3, 6],
1188+
columns=["c"],
1189+
index=MultiIndex.from_arrays([[1, 4], [2, 5]], names=["a", "b"]),
1190+
)
11841191

11851192
result = df.index == df.index
11861193
expected = np.array([True, True])
@@ -1194,7 +1201,11 @@ def test_equals_op_multiindex_identify(self):
11941201
],
11951202
)
11961203
def test_equals_op_mismatched_multiindex_raises(self, index):
1197-
df = pd.read_csv(StringIO("a,b,c\n1,2,3\n4,5,6"), index_col=[0, 1])
1204+
df = DataFrame(
1205+
[3, 6],
1206+
columns=["c"],
1207+
index=MultiIndex.from_arrays([[1, 4], [2, 5]], names=["a", "b"]),
1208+
)
11981209

11991210
with pytest.raises(ValueError, match="Lengths must match"):
12001211
df.index == index

pandas/tests/indexing/multiindex/test_loc.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -698,10 +698,19 @@ def test_loc_mi_with_level1_named_0():
698698
tm.assert_series_equal(result, expected)
699699

700700

701-
def test_getitem_str_slice(datapath):
701+
def test_getitem_str_slice():
702702
# GH#15928
703-
path = datapath("reshape", "merge", "data", "quotes2.csv")
704-
df = pd.read_csv(path, parse_dates=["time"])
703+
df = DataFrame(
704+
[
705+
["20160525 13:30:00.023", "MSFT", "51.95", "51.95"],
706+
["20160525 13:30:00.048", "GOOG", "720.50", "720.93"],
707+
["20160525 13:30:00.076", "AAPL", "98.55", "98.56"],
708+
["20160525 13:30:00.131", "AAPL", "98.61", "98.62"],
709+
["20160525 13:30:00.135", "MSFT", "51.92", "51.95"],
710+
["20160525 13:30:00.135", "AAPL", "98.61", "98.62"],
711+
],
712+
columns="time,ticker,bid,ask".split(","),
713+
)
705714
df2 = df.set_index(["ticker", "time"]).sort_index()
706715

707716
res = df2.loc[("AAPL", slice("2016-05-25 13:30:00")), :].droplevel(0)

0 commit comments

Comments
 (0)