Skip to content

TST: parametrize test_info #37887

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Nov 25, 2020
120 changes: 70 additions & 50 deletions pandas/tests/io/formats/test_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
Series,
date_range,
option_context,
reset_option,
set_option,
)
import pandas._testing as tm

Expand Down Expand Up @@ -65,9 +63,7 @@ def test_info_empty():
assert result == expected


def test_info_categorical_column():

# make sure it works
def test_info_categorical_column_just_works():
n = 2500
df = DataFrame({"int64": np.random.randint(100, size=n)})
df["category"] = Series(
Expand All @@ -82,18 +78,41 @@ def test_info_categorical_column():
df2.info(buf=buf)


def test_info(float_frame, datetime_frame):
def test_info_frame_float_frame_just_works(float_frame):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"just_works" -> "smoke_test" maybe?

the existing test is a pattern i really dislike, where we have two unrelated fixtures for what should be separate tests (that you've separated, which i like). but it would be nice to find a way to parametrize over float_frame/datetime_frame. i think indirect might be related, but never fully got the hang of that

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I managed to do so using the approach described here: https://stackoverflow.com/a/64246323
For that, however, I needed to explicitly import float_frame. Without the import it would not work.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now pre-commit check complains about the non-standard imports because I explicitly import fixture functions.
Please suggest

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed that as @jreback suggested #37887 (comment)

io = StringIO()
float_frame.info(buf=io)


def test_info_datetime_just_works(datetime_frame):
io = StringIO()
datetime_frame.info(buf=io)

frame = DataFrame(np.random.randn(5, 3))

frame.info()
frame.info(verbose=False)
@pytest.mark.parametrize(
"num_columns, max_info_columns, verbose",
[
(10, 100, True),
(10, 11, True),
(10, 10, True),
(10, 9, False),
(10, 1, False),
],
)
def test_info_default_verbose_selection(num_columns, max_info_columns, verbose):
frame = DataFrame(np.random.randn(5, num_columns))
with option_context("display.max_info_columns", max_info_columns):
io_default = StringIO()
frame.info(buf=io_default)
result = io_default.getvalue()

io_explicit = StringIO()
frame.info(buf=io_explicit, verbose=verbose)
expected = io_explicit.getvalue()

assert result == expected


def test_info_verbose():
def test_info_verbose_check_header_separator_body():
buf = StringIO()
size = 1001
start = 5
Expand Down Expand Up @@ -202,33 +221,29 @@ def test_info_wide():

io = StringIO()
df.info(buf=io, max_cols=101)
rs = io.getvalue()
assert len(rs.splitlines()) > 100
xp = rs
result = io.getvalue()
assert len(result.splitlines()) > 100

set_option("display.max_info_columns", 101)
io = StringIO()
df.info(buf=io)
assert rs == xp
reset_option("display.max_info_columns")
expected = result
with option_context("display.max_info_columns", 101):
io = StringIO()
df.info(buf=io)
result = io.getvalue()
assert result == expected


def test_info_duplicate_columns():
def test_info_duplicate_columns_just_works():
io = StringIO()

# it works!
frame = DataFrame(np.random.randn(1500, 4), columns=["a", "a", "b", "b"])
frame.info(buf=io)


def test_info_duplicate_columns_shows_correct_dtypes():
# GH11761
io = StringIO()

frame = DataFrame([[1, 2.0]], columns=["a", "a"])
frame.info(buf=io)
io.seek(0)
lines = io.readlines()
lines = io.getvalue().splitlines(True)
assert " 0 a 1 non-null int64 \n" == lines[5]
assert " 1 a 1 non-null float64\n" == lines[6]

Expand Down Expand Up @@ -272,7 +287,6 @@ def test_info_max_cols():
assert len(res.strip().split("\n")) == len_

for len_, verbose in [(12, None), (5, False), (12, True)]:

# max_cols not exceeded
with option_context("max_info_columns", 5):
buf = StringIO()
Expand Down Expand Up @@ -417,31 +431,36 @@ def test_usage_via_getsizeof():
assert abs(diff) < 100


def test_info_memory_usage_qualified():

buf = StringIO()
df = DataFrame(1, columns=list("ab"), index=[1, 2, 3])
df.info(buf=buf)
assert "+" not in buf.getvalue()

buf = StringIO()
df = DataFrame(1, columns=list("ab"), index=list("ABC"))
df.info(buf=buf)
assert "+" in buf.getvalue()

buf = StringIO()
df = DataFrame(
1, columns=list("ab"), index=MultiIndex.from_product([range(3), range(3)])
)
df.info(buf=buf)
assert "+" not in buf.getvalue()

@pytest.mark.parametrize(
"frame, plus",
[
(DataFrame(1, columns=list("ab"), index=[1, 2, 3]), False),
(DataFrame(1, columns=list("ab"), index=list("ABC")), True),
(
DataFrame(
1,
columns=list("ab"),
index=MultiIndex.from_product([range(3), range(3)]),
),
False,
),
(
DataFrame(
1,
columns=list("ab"),
index=MultiIndex.from_product([range(3), ["foo", "bar"]]),
),
True,
),
],
)
def test_info_memory_usage_qualified(frame, plus):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

separating these i like, but im trying to push back against parametrization that creates DataFrame etc objects at test collection time. it increases the memory footprint which is breaking the windows builds.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, I remember your suggestion to reduce footprint.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reverted back.

buf = StringIO()
df = DataFrame(
1, columns=list("ab"), index=MultiIndex.from_product([range(3), ["foo", "bar"]])
)
df.info(buf=buf)
assert "+" in buf.getvalue()
frame.info(buf=buf)
if plus:
assert "+" in buf.getvalue()
else:
assert "+" not in buf.getvalue()


def test_info_memory_usage_bug_on_multiindex():
Expand All @@ -454,7 +473,8 @@ def memory_usage(f):
N = 100
M = len(uppercase)
index = MultiIndex.from_product(
[list(uppercase), date_range("20160101", periods=N)], names=["id", "date"]
[list(uppercase), date_range("20160101", periods=N)],
names=["id", "date"],
)
df = DataFrame({"value": np.random.randn(N * M)}, index=index)

Expand Down