-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
TST: parametrize test_info #37887
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
TST: parametrize test_info #37887
Changes from 2 commits
41db5fb
4baed2c
1c27dc0
f91d17d
1ffba54
db0fa5e
429387d
baf5c3d
a8056ab
84263e3
173c0ac
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,8 +16,6 @@ | |
Series, | ||
date_range, | ||
option_context, | ||
reset_option, | ||
set_option, | ||
) | ||
import pandas._testing as tm | ||
|
||
|
@@ -65,9 +63,7 @@ def test_info_empty(): | |
assert result == expected | ||
|
||
|
||
def test_info_categorical_column(): | ||
|
||
# make sure it works | ||
def test_info_categorical_column_just_works(): | ||
n = 2500 | ||
df = DataFrame({"int64": np.random.randint(100, size=n)}) | ||
df["category"] = Series( | ||
|
@@ -82,18 +78,41 @@ def test_info_categorical_column(): | |
df2.info(buf=buf) | ||
|
||
|
||
def test_info(float_frame, datetime_frame): | ||
def test_info_frame_float_frame_just_works(float_frame): | ||
io = StringIO() | ||
float_frame.info(buf=io) | ||
|
||
|
||
def test_info_datetime_just_works(datetime_frame): | ||
io = StringIO() | ||
datetime_frame.info(buf=io) | ||
|
||
frame = DataFrame(np.random.randn(5, 3)) | ||
|
||
frame.info() | ||
frame.info(verbose=False) | ||
@pytest.mark.parametrize( | ||
"num_columns, max_info_columns, verbose", | ||
[ | ||
(10, 100, True), | ||
(10, 11, True), | ||
(10, 10, True), | ||
(10, 9, False), | ||
(10, 1, False), | ||
], | ||
) | ||
def test_info_default_verbose_selection(num_columns, max_info_columns, verbose): | ||
frame = DataFrame(np.random.randn(5, num_columns)) | ||
with option_context("display.max_info_columns", max_info_columns): | ||
io_default = StringIO() | ||
frame.info(buf=io_default) | ||
result = io_default.getvalue() | ||
|
||
io_explicit = StringIO() | ||
frame.info(buf=io_explicit, verbose=verbose) | ||
expected = io_explicit.getvalue() | ||
|
||
assert result == expected | ||
|
||
|
||
def test_info_verbose(): | ||
def test_info_verbose_check_header_separator_body(): | ||
buf = StringIO() | ||
size = 1001 | ||
start = 5 | ||
|
@@ -202,33 +221,29 @@ def test_info_wide(): | |
|
||
io = StringIO() | ||
df.info(buf=io, max_cols=101) | ||
rs = io.getvalue() | ||
assert len(rs.splitlines()) > 100 | ||
xp = rs | ||
result = io.getvalue() | ||
assert len(result.splitlines()) > 100 | ||
|
||
set_option("display.max_info_columns", 101) | ||
io = StringIO() | ||
df.info(buf=io) | ||
assert rs == xp | ||
reset_option("display.max_info_columns") | ||
expected = result | ||
with option_context("display.max_info_columns", 101): | ||
io = StringIO() | ||
df.info(buf=io) | ||
result = io.getvalue() | ||
assert result == expected | ||
|
||
|
||
def test_info_duplicate_columns(): | ||
def test_info_duplicate_columns_just_works(): | ||
io = StringIO() | ||
|
||
# it works! | ||
frame = DataFrame(np.random.randn(1500, 4), columns=["a", "a", "b", "b"]) | ||
frame.info(buf=io) | ||
|
||
|
||
def test_info_duplicate_columns_shows_correct_dtypes(): | ||
# GH11761 | ||
io = StringIO() | ||
|
||
frame = DataFrame([[1, 2.0]], columns=["a", "a"]) | ||
frame.info(buf=io) | ||
io.seek(0) | ||
lines = io.readlines() | ||
lines = io.getvalue().splitlines(True) | ||
assert " 0 a 1 non-null int64 \n" == lines[5] | ||
assert " 1 a 1 non-null float64\n" == lines[6] | ||
|
||
|
@@ -272,7 +287,6 @@ def test_info_max_cols(): | |
assert len(res.strip().split("\n")) == len_ | ||
|
||
for len_, verbose in [(12, None), (5, False), (12, True)]: | ||
|
||
# max_cols not exceeded | ||
with option_context("max_info_columns", 5): | ||
buf = StringIO() | ||
|
@@ -417,31 +431,36 @@ def test_usage_via_getsizeof(): | |
assert abs(diff) < 100 | ||
|
||
|
||
def test_info_memory_usage_qualified(): | ||
|
||
buf = StringIO() | ||
df = DataFrame(1, columns=list("ab"), index=[1, 2, 3]) | ||
df.info(buf=buf) | ||
assert "+" not in buf.getvalue() | ||
|
||
buf = StringIO() | ||
df = DataFrame(1, columns=list("ab"), index=list("ABC")) | ||
df.info(buf=buf) | ||
assert "+" in buf.getvalue() | ||
|
||
buf = StringIO() | ||
df = DataFrame( | ||
1, columns=list("ab"), index=MultiIndex.from_product([range(3), range(3)]) | ||
) | ||
df.info(buf=buf) | ||
assert "+" not in buf.getvalue() | ||
|
||
@pytest.mark.parametrize( | ||
"frame, plus", | ||
[ | ||
(DataFrame(1, columns=list("ab"), index=[1, 2, 3]), False), | ||
(DataFrame(1, columns=list("ab"), index=list("ABC")), True), | ||
( | ||
DataFrame( | ||
1, | ||
columns=list("ab"), | ||
index=MultiIndex.from_product([range(3), range(3)]), | ||
), | ||
False, | ||
), | ||
( | ||
DataFrame( | ||
1, | ||
columns=list("ab"), | ||
index=MultiIndex.from_product([range(3), ["foo", "bar"]]), | ||
), | ||
True, | ||
), | ||
], | ||
) | ||
def test_info_memory_usage_qualified(frame, plus): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. separating these i like, but im trying to push back against parametrization that creates DataFrame etc objects at test collection time. it increases the memory footprint which is breaking the windows builds. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, I remember your suggestion to reduce footprint. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reverted back. |
||
buf = StringIO() | ||
df = DataFrame( | ||
1, columns=list("ab"), index=MultiIndex.from_product([range(3), ["foo", "bar"]]) | ||
) | ||
df.info(buf=buf) | ||
assert "+" in buf.getvalue() | ||
frame.info(buf=buf) | ||
if plus: | ||
assert "+" in buf.getvalue() | ||
else: | ||
assert "+" not in buf.getvalue() | ||
|
||
|
||
def test_info_memory_usage_bug_on_multiindex(): | ||
|
@@ -454,7 +473,8 @@ def memory_usage(f): | |
N = 100 | ||
M = len(uppercase) | ||
index = MultiIndex.from_product( | ||
[list(uppercase), date_range("20160101", periods=N)], names=["id", "date"] | ||
[list(uppercase), date_range("20160101", periods=N)], | ||
names=["id", "date"], | ||
) | ||
df = DataFrame({"value": np.random.randn(N * M)}, index=index) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"just_works" -> "smoke_test" maybe?
the existing test is a pattern i really dislike, where we have two unrelated fixtures for what should be separate tests (that you've separated, which i like). but it would be nice to find a way to parametrize over float_frame/datetime_frame. i think
indirect
might be related, but never fully got the hang of thatThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I managed to do so using the approach described here: https://stackoverflow.com/a/64246323
For that, however, I needed to explicitly import
float_frame
. Without the import it would not work.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Now pre-commit check complains about the non-standard imports because I explicitly import fixture functions.
Please suggest
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed that as @jreback suggested #37887 (comment)