Skip to content

Json parametrize more2 #33163

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 31, 2020
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,66 @@ def empty_frame():
return DataFrame()


@pytest.fixture
def int_frame():
"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this duplicate?

Fixture for DataFrame of ints with index of unique strings

Columns are ['A', 'B', 'C', 'D']

A B C D
vpBeWjM651 1 0 1 0
5JyxmrP1En -1 0 0 0
qEDaoD49U2 -1 1 0 0
m66TkTfsFe 0 0 0 0
EHPaNzEUFm -1 0 -1 0
fpRJCevQhi 2 0 0 0
OlQvnmfi3Q 0 0 -2 0
... .. .. .. ..
uB1FPlz4uP 0 0 0 1
EcSe6yNzCU 0 0 -1 0
L50VudaiI8 -1 1 -2 0
y3bpw4nwIp 0 -1 0 0
H0RdLLwrCT 1 1 0 0
rY82K0vMwm 0 0 0 0
1OPIUjnkjk 2 0 0 0

[30 rows x 4 columns]
"""
df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()})
# force these all to int64 to avoid platform testing issues
return DataFrame({c: s for c, s in df.items()}, dtype=np.int64)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this could be done more directly as:

return DataFrame(tm.getSeriesData()).astype("int64")

I realize this was just moved as-is so could maybe wait for a follow-up but since the diff is relatively small I don't think it'd hurt to simplify while we're here.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yea I agree - nice catch!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i assumed this had to do with the uniqueness mentioned in the docstring



@pytest.fixture
def datetime_frame():
"""
Fixture for DataFrame of floats with DatetimeIndex

Columns are ['A', 'B', 'C', 'D']

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

doesn't this duplicate some existing fixtures?

A B C D
2000-01-03 -1.122153 0.468535 0.122226 1.693711
2000-01-04 0.189378 0.486100 0.007864 -1.216052
2000-01-05 0.041401 -0.835752 -0.035279 -0.414357
2000-01-06 0.430050 0.894352 0.090719 0.036939
2000-01-07 -0.620982 -0.668211 -0.706153 1.466335
2000-01-10 -0.752633 0.328434 -0.815325 0.699674
2000-01-11 -2.236969 0.615737 -0.829076 -1.196106
... ... ... ... ...
2000-02-03 1.642618 -0.579288 0.046005 1.385249
2000-02-04 -0.544873 -1.160962 -0.284071 -1.418351
2000-02-07 -2.656149 -0.601387 1.410148 0.444150
2000-02-08 -1.201881 -1.289040 0.772992 -1.445300
2000-02-09 1.377373 0.398619 1.008453 -0.928207
2000-02-10 0.473194 -0.636677 0.984058 0.511519
2000-02-11 -0.965556 0.408313 -1.312844 -0.381948

[30 rows x 4 columns]
"""
return DataFrame(tm.getTimeSeriesData())


@pytest.fixture
def float_frame():
"""
Expand Down
60 changes: 0 additions & 60 deletions pandas/tests/frame/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,66 +79,6 @@ def bool_frame_with_na():
return df


@pytest.fixture
def int_frame():
"""
Fixture for DataFrame of ints with index of unique strings

Columns are ['A', 'B', 'C', 'D']

A B C D
vpBeWjM651 1 0 1 0
5JyxmrP1En -1 0 0 0
qEDaoD49U2 -1 1 0 0
m66TkTfsFe 0 0 0 0
EHPaNzEUFm -1 0 -1 0
fpRJCevQhi 2 0 0 0
OlQvnmfi3Q 0 0 -2 0
... .. .. .. ..
uB1FPlz4uP 0 0 0 1
EcSe6yNzCU 0 0 -1 0
L50VudaiI8 -1 1 -2 0
y3bpw4nwIp 0 -1 0 0
H0RdLLwrCT 1 1 0 0
rY82K0vMwm 0 0 0 0
1OPIUjnkjk 2 0 0 0

[30 rows x 4 columns]
"""
df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()})
# force these all to int64 to avoid platform testing issues
return DataFrame({c: s for c, s in df.items()}, dtype=np.int64)


@pytest.fixture
def datetime_frame():
"""
Fixture for DataFrame of floats with DatetimeIndex

Columns are ['A', 'B', 'C', 'D']

A B C D
2000-01-03 -1.122153 0.468535 0.122226 1.693711
2000-01-04 0.189378 0.486100 0.007864 -1.216052
2000-01-05 0.041401 -0.835752 -0.035279 -0.414357
2000-01-06 0.430050 0.894352 0.090719 0.036939
2000-01-07 -0.620982 -0.668211 -0.706153 1.466335
2000-01-10 -0.752633 0.328434 -0.815325 0.699674
2000-01-11 -2.236969 0.615737 -0.829076 -1.196106
... ... ... ... ...
2000-02-03 1.642618 -0.579288 0.046005 1.385249
2000-02-04 -0.544873 -1.160962 -0.284071 -1.418351
2000-02-07 -2.656149 -0.601387 1.410148 0.444150
2000-02-08 -1.201881 -1.289040 0.772992 -1.445300
2000-02-09 1.377373 0.398619 1.008453 -0.928207
2000-02-10 0.473194 -0.636677 0.984058 0.511519
2000-02-11 -0.965556 0.408313 -1.312844 -0.381948

[30 rows x 4 columns]
"""
return DataFrame(tm.getTimeSeriesData())


@pytest.fixture
def float_string_frame():
"""
Expand Down
53 changes: 20 additions & 33 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,15 @@
import pandas._testing as tm

_seriesd = tm.getSeriesData()
_tsd = tm.getTimeSeriesData()

_frame = DataFrame(_seriesd)
_intframe = DataFrame({k: v.astype(np.int64) for k, v in _seriesd.items()})

_tsframe = DataFrame(_tsd)
_cat_frame = _frame.copy()
cat = ["bah"] * 5 + ["bar"] * 5 + ["baz"] * 5 + ["foo"] * (len(_cat_frame) - 15)
_cat_frame.index = pd.CategoricalIndex(cat, name="E")
_cat_frame["E"] = list(reversed(cat))
_cat_frame["sort"] = np.arange(len(_cat_frame), dtype="int64")

_mixed_frame = _frame.copy()


def assert_json_roundtrip_equal(result, expected, orient):
if orient == "records" or orient == "values":
Expand All @@ -43,17 +38,10 @@ def assert_json_roundtrip_equal(result, expected, orient):
class TestPandasContainer:
@pytest.fixture(autouse=True)
def setup(self):
self.intframe = _intframe.copy()
self.tsframe = _tsframe.copy()
self.mixed_frame = _mixed_frame.copy()
self.categorical = _cat_frame.copy()

yield

del self.intframe
del self.tsframe
del self.mixed_frame

def test_frame_double_encoded_labels(self, orient):
df = DataFrame(
[["a", "b"], ["c", "d"]],
Expand Down Expand Up @@ -137,12 +125,12 @@ def test_roundtrip_simple(self, orient, convert_axes, numpy, dtype, float_frame)
@pytest.mark.parametrize("dtype", [False, np.int64])
@pytest.mark.parametrize("convert_axes", [True, False])
@pytest.mark.parametrize("numpy", [True, False])
def test_roundtrip_intframe(self, orient, convert_axes, numpy, dtype):
data = self.intframe.to_json(orient=orient)
def test_roundtrip_intframe(self, orient, convert_axes, numpy, dtype, int_frame):
data = int_frame.to_json(orient=orient)
result = pd.read_json(
data, orient=orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype
)
expected = self.intframe.copy()
expected = int_frame
if (
numpy
and (is_platform_32bit() or is_platform_windows())
Expand Down Expand Up @@ -236,13 +224,13 @@ def test_roundtrip_empty(self, orient, convert_axes, numpy, empty_frame):

@pytest.mark.parametrize("convert_axes", [True, False])
@pytest.mark.parametrize("numpy", [True, False])
def test_roundtrip_timestamp(self, orient, convert_axes, numpy):
def test_roundtrip_timestamp(self, orient, convert_axes, numpy, datetime_frame):
# TODO: improve coverage with date_format parameter
data = self.tsframe.to_json(orient=orient)
data = datetime_frame.to_json(orient=orient)
result = pd.read_json(
data, orient=orient, convert_axes=convert_axes, numpy=numpy
)
expected = self.tsframe.copy()
expected = datetime_frame.copy()

if not convert_axes: # one off for ts handling
# DTI gets converted to epoch values
Expand Down Expand Up @@ -730,34 +718,33 @@ def test_reconstruction_index(self):
result = read_json(df.to_json())
tm.assert_frame_equal(result, df)

def test_path(self, float_frame):
def test_path(self, float_frame, int_frame, datetime_frame):
with tm.ensure_clean("test.json") as path:
for df in [
float_frame,
self.intframe,
self.tsframe,
self.mixed_frame,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just removed mixed_frame here as this is its only use in the module; doesn't seem important to re-create, and in its current state it was really the same thing as float_frame anyway

int_frame,
datetime_frame,
]:
df.to_json(path)
read_json(path)

def test_axis_dates(self, datetime_series):
def test_axis_dates(self, datetime_series, datetime_frame):

# frame
json = self.tsframe.to_json()
json = datetime_frame.to_json()
result = read_json(json)
tm.assert_frame_equal(result, self.tsframe)
tm.assert_frame_equal(result, datetime_frame)

# series
json = datetime_series.to_json()
result = read_json(json, typ="series")
tm.assert_series_equal(result, datetime_series, check_names=False)
assert result.name is None

def test_convert_dates(self, datetime_series):
def test_convert_dates(self, datetime_series, datetime_frame):

# frame
df = self.tsframe.copy()
df = datetime_frame
df["date"] = Timestamp("20130101")

json = df.to_json()
Expand Down Expand Up @@ -837,8 +824,8 @@ def test_convert_dates_infer(self, infer_word):
("20130101 20:43:42.123456789", "ns"),
],
)
def test_date_format_frame(self, date, date_unit):
df = self.tsframe.copy()
def test_date_format_frame(self, date, date_unit, datetime_frame):
df = datetime_frame

df["date"] = Timestamp(date)
df.iloc[1, df.columns.get_loc("date")] = pd.NaT
Expand All @@ -853,8 +840,8 @@ def test_date_format_frame(self, date, date_unit):
expected["date"] = expected["date"].dt.tz_localize("UTC")
tm.assert_frame_equal(result, expected)

def test_date_format_frame_raises(self):
df = self.tsframe.copy()
def test_date_format_frame_raises(self, datetime_frame):
df = datetime_frame
msg = "Invalid value 'foo' for option 'date_unit'"
with pytest.raises(ValueError, match=msg):
df.to_json(date_format="iso", date_unit="foo")
Expand Down Expand Up @@ -890,8 +877,8 @@ def test_date_format_series_raises(self, datetime_series):
ts.to_json(date_format="iso", date_unit="foo")

@pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
def test_date_unit(self, unit):
df = self.tsframe.copy()
def test_date_unit(self, unit, datetime_frame):
df = datetime_frame
df["date"] = Timestamp("20130101 20:43:42")
dl = df.columns.get_loc("date")
df.iloc[1, dl] = Timestamp("19710101 20:43:42")
Expand Down