Skip to content

TST: parametrize test_info #37887

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Nov 25, 2020
115 changes: 53 additions & 62 deletions pandas/tests/io/formats/test_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,39 +16,13 @@
Series,
date_range,
option_context,
reset_option,
set_option,
)
import pandas._testing as tm


@pytest.fixture
def datetime_frame():
"""
Fixture for DataFrame of floats with DatetimeIndex

Columns are ['A', 'B', 'C', 'D']

A B C D
2000-01-03 -1.122153 0.468535 0.122226 1.693711
2000-01-04 0.189378 0.486100 0.007864 -1.216052
2000-01-05 0.041401 -0.835752 -0.035279 -0.414357
2000-01-06 0.430050 0.894352 0.090719 0.036939
2000-01-07 -0.620982 -0.668211 -0.706153 1.466335
2000-01-10 -0.752633 0.328434 -0.815325 0.699674
2000-01-11 -2.236969 0.615737 -0.829076 -1.196106
... ... ... ... ...
2000-02-03 1.642618 -0.579288 0.046005 1.385249
2000-02-04 -0.544873 -1.160962 -0.284071 -1.418351
2000-02-07 -2.656149 -0.601387 1.410148 0.444150
2000-02-08 -1.201881 -1.289040 0.772992 -1.445300
2000-02-09 1.377373 0.398619 1.008453 -0.928207
2000-02-10 0.473194 -0.636677 0.984058 0.511519
2000-02-11 -0.965556 0.408313 -1.312844 -0.381948

[30 rows x 4 columns]
"""
return DataFrame(tm.getTimeSeriesData())
def duplicate_columns_frame():
"""Dataframe with duplicate column names."""
return DataFrame(np.random.randn(1500, 4), columns=["a", "a", "b", "b"])


def test_info_empty():
Expand All @@ -65,9 +39,7 @@ def test_info_empty():
assert result == expected


def test_info_categorical_column():

# make sure it works
def test_info_categorical_column_smoke_test():
n = 2500
df = DataFrame({"int64": np.random.randint(100, size=n)})
df["category"] = Series(
Expand All @@ -82,18 +54,48 @@ def test_info_categorical_column():
df2.info(buf=buf)


def test_info(float_frame, datetime_frame):
io = StringIO()
float_frame.info(buf=io)
datetime_frame.info(buf=io)
@pytest.mark.parametrize(
"fixture_func_name",
[
"int_frame",
"float_frame",
"datetime_frame",
"duplicate_columns_frame",
],
)
def test_info_smoke_test(fixture_func_name, request):
frame = request.getfixturevalue(fixture_func_name)
buf = StringIO()
frame.info(buf=buf)
result = buf.getvalue().splitlines()
assert len(result) > 10

frame = DataFrame(np.random.randn(5, 3))

frame.info()
frame.info(verbose=False)
@pytest.mark.parametrize(
"num_columns, max_info_columns, verbose",
[
(10, 100, True),
(10, 11, True),
(10, 10, True),
(10, 9, False),
(10, 1, False),
],
)
def test_info_default_verbose_selection(num_columns, max_info_columns, verbose):
frame = DataFrame(np.random.randn(5, num_columns))
with option_context("display.max_info_columns", max_info_columns):
io_default = StringIO()
frame.info(buf=io_default)
result = io_default.getvalue()

io_explicit = StringIO()
frame.info(buf=io_explicit, verbose=verbose)
expected = io_explicit.getvalue()

def test_info_verbose():
assert result == expected


def test_info_verbose_check_header_separator_body():
buf = StringIO()
size = 1001
start = 5
Expand Down Expand Up @@ -202,33 +204,23 @@ def test_info_wide():

io = StringIO()
df.info(buf=io, max_cols=101)
rs = io.getvalue()
assert len(rs.splitlines()) > 100
xp = rs

set_option("display.max_info_columns", 101)
io = StringIO()
df.info(buf=io)
assert rs == xp
reset_option("display.max_info_columns")

result = io.getvalue()
assert len(result.splitlines()) > 100

def test_info_duplicate_columns():
io = StringIO()

# it works!
frame = DataFrame(np.random.randn(1500, 4), columns=["a", "a", "b", "b"])
frame.info(buf=io)
expected = result
with option_context("display.max_info_columns", 101):
io = StringIO()
df.info(buf=io)
result = io.getvalue()
assert result == expected


def test_info_duplicate_columns_shows_correct_dtypes():
# GH11761
io = StringIO()

frame = DataFrame([[1, 2.0]], columns=["a", "a"])
frame.info(buf=io)
io.seek(0)
lines = io.readlines()
lines = io.getvalue().splitlines(True)
assert " 0 a 1 non-null int64 \n" == lines[5]
assert " 1 a 1 non-null float64\n" == lines[6]

Expand Down Expand Up @@ -272,7 +264,6 @@ def test_info_max_cols():
assert len(res.strip().split("\n")) == len_

for len_, verbose in [(12, None), (5, False), (12, True)]:

# max_cols not exceeded
with option_context("max_info_columns", 5):
buf = StringIO()
Expand Down Expand Up @@ -418,7 +409,6 @@ def test_usage_via_getsizeof():


def test_info_memory_usage_qualified():

buf = StringIO()
df = DataFrame(1, columns=list("ab"), index=[1, 2, 3])
df.info(buf=buf)
Expand Down Expand Up @@ -454,7 +444,8 @@ def memory_usage(f):
N = 100
M = len(uppercase)
index = MultiIndex.from_product(
[list(uppercase), date_range("20160101", periods=N)], names=["id", "date"]
[list(uppercase), date_range("20160101", periods=N)],
names=["id", "date"],
)
df = DataFrame({"value": np.random.randn(N * M)}, index=index)

Expand Down