Skip to content

CLN, TYP: Factor out part of info #34092

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
May 11, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 57 additions & 11 deletions pandas/io/formats/info.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,65 @@
import sys
from typing import IO, Optional, Union
from typing import IO, TYPE_CHECKING, Optional, Tuple, Union

from pandas._config import get_option

from pandas._typing import FrameOrSeries
from pandas._typing import Dtype, FrameOrSeries

from pandas.io.formats import format as fmt
from pandas.io.formats.printing import pprint_thing

if TYPE_CHECKING:
from pandas.core.indexes.api import Index # noqa: F401
from pandas.core.series import Series # noqa: F401

def _put_str(s, space):

def _put_str(s: Union[str, Dtype], space: int) -> str:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a doc-string

"""
Make string of specified length, padding to the right if necessary.

Parameters
----------
s : Union[str, Dtype]
String to be formatted.
space : int
Length to force string to be of.

Returns
-------
str
String coerced to given length.

Examples
--------
>>> pd.io.formats.info._put_str("panda", 6)
'panda '
>>> pd.io.formats.info._put_str("panda", 4)
'pand'
"""
return str(s)[:space].ljust(space)


def _get_ids_and_dtypes(data: FrameOrSeries) -> Tuple["Index", "Series"]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

where is this useful?

Copy link
Member Author

@MarcoGorelli MarcoGorelli May 10, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jreback if I rebase #31796 onto this, the diff is a little easier to read as all that needs to change for this part is a

if isinstance(data, Series)

inside this helper function

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, please add a doc-string

"""
Get DataFrame's columns and dtypes.

Parameters
----------
data : DataFrame
Object that `info` was called on.

Returns
-------
ids : Index
DataFrame's columns.
dtypes : Series
Dtype of each of the DataFrame's columns.
"""
ids = data.columns
dtypes = data.dtypes
return ids, dtypes


def info(
data: FrameOrSeries,
verbose: Optional[bool] = None,
Expand Down Expand Up @@ -80,9 +127,8 @@ def info(
lines.append(str(type(data)))
lines.append(data.index._summary())

cols = data.columns
col_count = len(cols)
dtypes = data.dtypes
ids, dtypes = _get_ids_and_dtypes(data)
col_count = len(ids)

if col_count == 0:
lines.append(f"Empty {type(data).__name__}")
Expand All @@ -108,7 +154,7 @@ def _verbose_repr():
column_head = "Column"
col_space = 2

max_col = max(len(pprint_thing(k)) for k in cols)
max_col = max(len(pprint_thing(k)) for k in ids)
len_column = len(pprint_thing(column_head))
space = max(max_col, len_column) + col_space

Expand Down Expand Up @@ -151,7 +197,7 @@ def _verbose_repr():
+ _put_str("-" * len_dtype, space_dtype)
)

for i, col in enumerate(cols):
for i, col in enumerate(ids):
dtype = dtypes[i]
col = pprint_thing(col)

Expand All @@ -168,7 +214,7 @@ def _verbose_repr():
)

def _non_verbose_repr():
lines.append(cols._summary(name="Columns"))
lines.append(ids._summary(name="Columns"))

def _sizeof_fmt(num, size_qualifier):
# returns size in human readable format
Expand All @@ -190,8 +236,8 @@ def _sizeof_fmt(num, size_qualifier):

# groupby dtype.name to collect e.g. Categorical columns
counts = dtypes.value_counts().groupby(lambda x: x.name).sum()
dtypes = [f"{k[0]}({k[1]:d})" for k in sorted(counts.items())]
lines.append(f"dtypes: {', '.join(dtypes)}")
collected_dtypes = [f"{k[0]}({k[1]:d})" for k in sorted(counts.items())]
lines.append(f"dtypes: {', '.join(collected_dtypes)}")

if memory_usage is None:
memory_usage = get_option("display.memory_usage")
Expand Down