Skip to content

Commit 362a224

Browse files
committed
add tests from DataFrame.info
1 parent df39323 commit 362a224

File tree

4 files changed

+114
-21
lines changed

4 files changed

+114
-21
lines changed

pandas/core/frame.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -2313,13 +2313,11 @@ def to_html(
23132313
1 column_2 1000000 non-null object
23142314
2 column_3 1000000 non-null object
23152315
dtypes: object(3)
2316-
memory usage: 188.8 MB
2317-
""",
2318-
see_also_sub = """
2316+
memory usage: 188.8 MB""",
2317+
see_also_sub="""
23192318
DataFrame.describe: Generate descriptive statistics of DataFrame
23202319
columns.
2321-
DataFrame.memory_usage: Memory usage of DataFrame columns.
2322-
"""
2320+
DataFrame.memory_usage: Memory usage of DataFrame columns.""",
23232321
)
23242322
@Appender(NDFrame.info.__doc__)
23252323
def info(

pandas/core/generic.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1804,7 +1804,7 @@ def info(
18041804
cols = self.columns
18051805
dtypes = self.dtypes
18061806
else:
1807-
cols = pd.Series([self.name])
1807+
cols = pd.Index([self.name])
18081808
dtypes = pd.Series([self.dtypes])
18091809

18101810
col_count = len(cols)
@@ -1900,7 +1900,8 @@ def _verbose_repr():
19001900
)
19011901

19021902
def _non_verbose_repr():
1903-
lines.append(cols._summary(name="Columns"))
1903+
if self._typ == "dataframe":
1904+
lines.append(cols._summary(name="Columns"))
19041905

19051906
def _sizeof_fmt(num, size_qualifier):
19061907
# returns size in human readable format

pandas/core/series.py

+5-9
Original file line numberDiff line numberDiff line change
@@ -4198,17 +4198,13 @@ def replace(
41984198
--- -------------- -----
41994199
0 5 non-null object
42004200
dtypes: object(1)
4201-
memory usage: 80.0+ bytes
4202-
""",
4203-
see_also_sub="""
4204-
Series.describe: Generate descriptive statistics of Series
4205-
Series.memory_usage: Memory usage of Series.
4206-
"""
4201+
memory usage: 80.0+ bytes""",
4202+
see_also_sub="""
4203+
Series.describe: Generate descriptive statistics of Series.
4204+
Series.memory_usage: Memory usage of Series.""",
42074205
)
42084206
@Appender(NDFrame.info.__doc__)
4209-
def info(
4210-
self, verbose=None, buf=None, memory_usage=None, null_counts=None
4211-
) -> None:
4207+
def info(self, verbose=None, buf=None, memory_usage=None, null_counts=None) -> None:
42124208
return super().info(verbose, buf, None, memory_usage, null_counts)
42134209

42144210
@Appender(generic._shared_docs["shift"] % _shared_doc_kwargs)

pandas/tests/series/test_repr.py

+103-5
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
from datetime import datetime, timedelta
2+
from io import StringIO
23

34
import numpy as np
45
import pytest
56

7+
from pandas.compat import PYPY
8+
69
import pandas as pd
710
from pandas import (
811
Categorical,
@@ -488,22 +491,117 @@ def test_categorical_series_repr_timedelta_ordered(self):
488491

489492
assert repr(s) == exp
490493

491-
def test_info(self, capsys):
494+
@pytest.mark.parametrize("verbose", [True, False])
495+
def test_info(self, verbose, capsys):
492496
index = MultiIndex(
493497
levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
494498
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
495499
names=["first", "second"],
496500
)
497501
s = Series(range(len(index)), index=index, name="sth")
498-
s.info()
502+
s.info(verbose=verbose)
499503
expected = """<class 'pandas.core.series.Series'>
500504
MultiIndex: 10 entries, ('foo', 'one') to ('qux', 'three')
501-
Series name: sth
505+
"""
506+
if verbose:
507+
expected += """Series name: sth
502508
# Non-Null Count Dtype
503509
--- -------------- -----
504510
0 10 non-null int64
505-
dtypes: int64(1)
506-
memory usage: 505.0+ bytes
511+
"""
512+
expected += f"""dtypes: int64(1)
513+
memory usage: {s.memory_usage()}.0+ bytes
507514
"""
508515
result = capsys.readouterr().out
509516
assert result == expected
517+
518+
@pytest.mark.skipif(PYPY, reason="on PyPy deep=True doesn't change result")
519+
def test_info_memory_usage_deep_not_pypy(self):
520+
s_with_object_index = pd.Series({"a": [1]}, index=["foo"])
521+
assert s_with_object_index.memory_usage(
522+
index=True, deep=True
523+
) > s_with_object_index.memory_usage(index=True)
524+
525+
s_object = pd.Series({"a": ["a"]})
526+
assert s_object.memory_usage(deep=True) > s_object.memory_usage()
527+
528+
@pytest.mark.skipif(not PYPY, reason="on PyPy deep=True doesn't change result")
529+
def test_info_memory_usage_deep_pypy(self):
530+
s_with_object_index = pd.Series({"a": [1]}, index=["foo"])
531+
assert s_with_object_index.memory_usage(
532+
index=True, deep=True
533+
) == s_with_object_index.memory_usage(index=True)
534+
535+
s_object = pd.Series({"a": ["a"]})
536+
assert s_object.memory_usage(deep=True) == s_object.memory_usage()
537+
538+
def test_info_memory_usage_qualified(self):
539+
540+
buf = StringIO()
541+
s = pd.Series(1, index=[1, 2, 3])
542+
s.info(buf=buf)
543+
assert "+" not in buf.getvalue()
544+
545+
buf = StringIO()
546+
s = pd.Series(1, index=list("ABC"))
547+
s.info(buf=buf)
548+
assert "+" in buf.getvalue()
549+
550+
buf = StringIO()
551+
s = Series(1, index=pd.MultiIndex.from_product([range(3), range(3)]),)
552+
s.info(buf=buf)
553+
assert "+" not in buf.getvalue()
554+
555+
buf = StringIO()
556+
s = Series(1, index=pd.MultiIndex.from_product([range(3), ["foo", "bar"]]),)
557+
s.info(buf=buf)
558+
assert "+" in buf.getvalue()
559+
560+
def test_info_memory_usage_bug_on_multiindex(self):
561+
# GH 14308
562+
# memory usage introspection should not materialize .values
563+
564+
from string import ascii_uppercase as uppercase
565+
566+
def memory_usage(f):
567+
return f.memory_usage(deep=True)
568+
569+
N = 100
570+
M = len(uppercase)
571+
index = pd.MultiIndex.from_product(
572+
[list(uppercase), pd.date_range("20160101", periods=N)],
573+
names=["id", "date"],
574+
)
575+
s = Series(np.random.randn(N * M), index=index)
576+
577+
unstacked = s.unstack("id")
578+
assert s.values.nbytes == unstacked.values.nbytes
579+
assert s.memory_usage(deep=True) > unstacked.memory_usage(deep=True).sum()
580+
581+
# high upper bound
582+
assert (
583+
unstacked.memory_usage(deep=True).sum() - s.memory_usage(deep=True) < 2000
584+
)
585+
586+
def test_info_categorical(self):
587+
# GH14298
588+
idx = pd.CategoricalIndex(["a", "b"])
589+
s = pd.Series(np.zeros((2)), index=idx)
590+
591+
buf = StringIO()
592+
s.info(buf=buf)
593+
594+
def test_info_categorical_column(self):
595+
596+
# make sure it works
597+
n = 2500
598+
s = Series(
599+
np.array(list("abcdefghij")).take(np.random.randint(0, 10, size=n))
600+
).astype("category")
601+
s.isna()
602+
buf = StringIO()
603+
s.info(buf=buf)
604+
605+
s2 = s[s == "d"]
606+
buf = StringIO()
607+
s2.info(buf=buf)

0 commit comments

Comments
 (0)