Skip to content

TST: Remove unnecessary read_csv usage during testing #55643

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Oct 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 15 additions & 16 deletions pandas/tests/frame/test_stack_unstack.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from datetime import datetime
from io import StringIO
import itertools
import re

Expand Down Expand Up @@ -1771,21 +1770,21 @@ def test_stack_duplicate_index(self, idx, columns, exp_idx, future_stack):
"ignore:The previous implementation of stack is deprecated"
)
def test_unstack_odd_failure(self, future_stack):
data = """day,time,smoker,sum,len
Fri,Dinner,No,8.25,3.
Fri,Dinner,Yes,27.03,9
Fri,Lunch,No,3.0,1
Fri,Lunch,Yes,13.68,6
Sat,Dinner,No,139.63,45
Sat,Dinner,Yes,120.77,42
Sun,Dinner,No,180.57,57
Sun,Dinner,Yes,66.82,19
Thu,Dinner,No,3.0,1
Thu,Lunch,No,117.32,44
Thu,Lunch,Yes,51.51,17"""

df = pd.read_csv(StringIO(data)).set_index(["day", "time", "smoker"])

mi = MultiIndex.from_arrays(
[
["Fri"] * 4 + ["Sat"] * 2 + ["Sun"] * 2 + ["Thu"] * 3,
["Dinner"] * 2 + ["Lunch"] * 2 + ["Dinner"] * 5 + ["Lunch"] * 2,
["No", "Yes"] * 4 + ["No", "No", "Yes"],
],
names=["day", "time", "smoker"],
)
df = DataFrame(
{
"sum": np.arange(11, dtype="float64"),
"len": np.arange(11, dtype="float64"),
},
index=mi,
)
# it works, #2100
result = df.unstack(2)

Expand Down
90 changes: 65 additions & 25 deletions pandas/tests/groupby/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
date,
datetime,
)
from io import StringIO

import numpy as np
import pytest
Expand Down Expand Up @@ -38,39 +37,80 @@ def store(group):
tm.assert_frame_equal(groups[0], expected_value)


def test_apply_issues():
def test_apply_index_date():
# GH 5788

s = """2011.05.16,00:00,1.40893
2011.05.16,01:00,1.40760
2011.05.16,02:00,1.40750
2011.05.16,03:00,1.40649
2011.05.17,02:00,1.40893
2011.05.17,03:00,1.40760
2011.05.17,04:00,1.40750
2011.05.17,05:00,1.40649
2011.05.18,02:00,1.40893
2011.05.18,03:00,1.40760
2011.05.18,04:00,1.40750
2011.05.18,05:00,1.40649"""

df = pd.read_csv(
StringIO(s),
header=None,
names=["date", "time", "value"],
parse_dates=[["date", "time"]],
ts = [
"2011-05-16 00:00",
"2011-05-16 01:00",
"2011-05-16 02:00",
"2011-05-16 03:00",
"2011-05-17 02:00",
"2011-05-17 03:00",
"2011-05-17 04:00",
"2011-05-17 05:00",
"2011-05-18 02:00",
"2011-05-18 03:00",
"2011-05-18 04:00",
"2011-05-18 05:00",
]
df = DataFrame(
{
"value": [
1.40893,
1.40760,
1.40750,
1.40649,
1.40893,
1.40760,
1.40750,
1.40649,
1.40893,
1.40760,
1.40750,
1.40649,
],
},
index=Index(pd.to_datetime(ts), name="date_time"),
)
df = df.set_index("date_time")

expected = df.groupby(df.index.date).idxmax()
result = df.groupby(df.index.date).apply(lambda x: x.idxmax())
tm.assert_frame_equal(result, expected)


def test_apply_index_date_object():
# GH 5789
# don't auto coerce dates
df = pd.read_csv(StringIO(s), header=None, names=["date", "time", "value"])
ts = [
"2011-05-16 00:00",
"2011-05-16 01:00",
"2011-05-16 02:00",
"2011-05-16 03:00",
"2011-05-17 02:00",
"2011-05-17 03:00",
"2011-05-17 04:00",
"2011-05-17 05:00",
"2011-05-18 02:00",
"2011-05-18 03:00",
"2011-05-18 04:00",
"2011-05-18 05:00",
]
df = DataFrame([row.split() for row in ts], columns=["date", "time"])
df["value"] = [
1.40893,
1.40760,
1.40750,
1.40649,
1.40893,
1.40760,
1.40750,
1.40649,
1.40893,
1.40760,
1.40750,
1.40649,
]
exp_idx = Index(
["2011.05.16", "2011.05.17", "2011.05.18"], dtype=object, name="date"
["2011-05-16", "2011-05-17", "2011-05-18"], dtype=object, name="date"
)
expected = Series(["00:00", "02:00", "02:00"], index=exp_idx)
msg = "DataFrameGroupBy.apply operated on the grouping columns"
Expand Down
20 changes: 12 additions & 8 deletions pandas/tests/groupby/test_reductions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import builtins
import datetime as dt
from io import StringIO
from string import ascii_lowercase

import numpy as np
Expand Down Expand Up @@ -589,13 +588,18 @@ def test_min_empty_string_dtype(func):


def test_max_nan_bug():
raw = """,Date,app,File
-04-23,2013-04-23 00:00:00,,log080001.log
-05-06,2013-05-06 00:00:00,,log.log
-05-07,2013-05-07 00:00:00,OE,xlsx"""

with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
df = pd.read_csv(StringIO(raw), parse_dates=[0])
df = DataFrame(
{
"Unnamed: 0": ["-04-23", "-05-06", "-05-07"],
"Date": [
"2013-04-23 00:00:00",
"2013-05-06 00:00:00",
"2013-05-07 00:00:00",
],
"app": Series([np.nan, np.nan, "OE"]),
"File": ["log080001.log", "log.log", "xlsx"],
}
)
gb = df.groupby("Date")
r = gb[["File"]].max()
e = gb["File"].max().to_frame()
Expand Down
27 changes: 19 additions & 8 deletions pandas/tests/groupby/test_timegrouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
datetime,
timedelta,
)
from io import StringIO

import numpy as np
import pytest
Expand Down Expand Up @@ -607,14 +606,26 @@ def test_frame_datetime64_handling_groupby(self):

def test_groupby_multi_timezone(self):
# combining multiple / different timezones yields UTC
df = DataFrame(
{
"value": range(5),
"date": [
"2000-01-28 16:47:00",
"2000-01-29 16:48:00",
"2000-01-30 16:49:00",
"2000-01-31 16:50:00",
"2000-01-01 16:50:00",
],
"tz": [
"America/Chicago",
"America/Chicago",
"America/Los_Angeles",
"America/Chicago",
"America/New_York",
],
}
)

data = """0,2000-01-28 16:47:00,America/Chicago
1,2000-01-29 16:48:00,America/Chicago
2,2000-01-30 16:49:00,America/Los_Angeles
3,2000-01-31 16:50:00,America/Chicago
4,2000-01-01 16:50:00,America/New_York"""

df = pd.read_csv(StringIO(data), header=None, names=["value", "date", "tz"])
result = df.groupby("tz", group_keys=False).date.apply(
lambda x: pd.to_datetime(x).dt.tz_localize(x.name)
)
Expand Down
40 changes: 22 additions & 18 deletions pandas/tests/groupby/transform/test_transform.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
""" test with the .transform """
from io import StringIO

import numpy as np
import pytest

Expand Down Expand Up @@ -337,22 +335,28 @@ def test_transform_datetime_to_numeric():

def test_transform_casting():
# 13046
data = """
idx A ID3 DATETIME
0 B-028 b76cd912ff "2014-10-08 13:43:27"
1 B-054 4a57ed0b02 "2014-10-08 14:26:19"
2 B-076 1a682034f8 "2014-10-08 14:29:01"
3 B-023 b76cd912ff "2014-10-08 18:39:34"
4 B-023 f88g8d7sds "2014-10-08 18:40:18"
5 B-033 b76cd912ff "2014-10-08 18:44:30"
6 B-032 b76cd912ff "2014-10-08 18:46:00"
7 B-037 b76cd912ff "2014-10-08 18:52:15"
8 B-046 db959faf02 "2014-10-08 18:59:59"
9 B-053 b76cd912ff "2014-10-08 19:17:48"
10 B-065 b76cd912ff "2014-10-08 19:21:38"
"""
df = pd.read_csv(
StringIO(data), sep=r"\s+", index_col=[0], parse_dates=["DATETIME"]
times = [
"13:43:27",
"14:26:19",
"14:29:01",
"18:39:34",
"18:40:18",
"18:44:30",
"18:46:00",
"18:52:15",
"18:59:59",
"19:17:48",
"19:21:38",
]
df = DataFrame(
{
"A": [f"B-{i}" for i in range(11)],
"ID3": np.take(
["a", "b", "c", "d", "e"], [0, 1, 2, 1, 3, 1, 1, 1, 4, 1, 1]
),
"DATETIME": pd.to_datetime([f"2014-10-08 {time}" for time in times]),
},
index=pd.RangeIndex(11, name="idx"),
)

result = df.groupby("ID3")["DATETIME"].transform(lambda x: x.diff())
Expand Down
19 changes: 15 additions & 4 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from collections import defaultdict
from datetime import datetime
from io import StringIO
import math
import operator
import re
Expand Down Expand Up @@ -1174,13 +1173,21 @@ def test_groupby(self):
def test_equals_op_multiindex(self, mi, expected):
# GH9785
# test comparisons of multiindex
df = pd.read_csv(StringIO("a,b,c\n1,2,3\n4,5,6"), index_col=[0, 1])
df = DataFrame(
[3, 6],
columns=["c"],
index=MultiIndex.from_arrays([[1, 4], [2, 5]], names=["a", "b"]),
)

result = df.index == mi
tm.assert_numpy_array_equal(result, expected)

def test_equals_op_multiindex_identify(self):
df = pd.read_csv(StringIO("a,b,c\n1,2,3\n4,5,6"), index_col=[0, 1])
df = DataFrame(
[3, 6],
columns=["c"],
index=MultiIndex.from_arrays([[1, 4], [2, 5]], names=["a", "b"]),
)

result = df.index == df.index
expected = np.array([True, True])
Expand All @@ -1194,7 +1201,11 @@ def test_equals_op_multiindex_identify(self):
],
)
def test_equals_op_mismatched_multiindex_raises(self, index):
df = pd.read_csv(StringIO("a,b,c\n1,2,3\n4,5,6"), index_col=[0, 1])
df = DataFrame(
[3, 6],
columns=["c"],
index=MultiIndex.from_arrays([[1, 4], [2, 5]], names=["a", "b"]),
)

with pytest.raises(ValueError, match="Lengths must match"):
df.index == index
Expand Down
15 changes: 12 additions & 3 deletions pandas/tests/indexing/multiindex/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -698,10 +698,19 @@ def test_loc_mi_with_level1_named_0():
tm.assert_series_equal(result, expected)


def test_getitem_str_slice(datapath):
def test_getitem_str_slice():
# GH#15928
path = datapath("reshape", "merge", "data", "quotes2.csv")
df = pd.read_csv(path, parse_dates=["time"])
df = DataFrame(
[
["20160525 13:30:00.023", "MSFT", "51.95", "51.95"],
["20160525 13:30:00.048", "GOOG", "720.50", "720.93"],
["20160525 13:30:00.076", "AAPL", "98.55", "98.56"],
["20160525 13:30:00.131", "AAPL", "98.61", "98.62"],
["20160525 13:30:00.135", "MSFT", "51.92", "51.95"],
["20160525 13:30:00.135", "AAPL", "98.61", "98.62"],
],
columns="time,ticker,bid,ask".split(","),
)
df2 = df.set_index(["ticker", "time"]).sort_index()

res = df2.loc[("AAPL", slice("2016-05-25 13:30:00")), :].droplevel(0)
Expand Down
Loading