From 57d2e4e6431a23e9be1663b3317ef4ffd225d32f Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 9 May 2020 12:26:46 +0100 Subject: [PATCH 1/4] factor out part of info --- pandas/io/formats/info.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py index d2d5fdc7ab8a2..5783435058acd 100644 --- a/pandas/io/formats/info.py +++ b/pandas/io/formats/info.py @@ -1,5 +1,5 @@ import sys -from typing import IO, Optional, Union +from typing import IO, TYPE_CHECKING, Optional, Tuple, Union from pandas._config import get_option @@ -8,11 +8,21 @@ from pandas.io.formats import format as fmt from pandas.io.formats.printing import pprint_thing +if TYPE_CHECKING: + from pandas.core.indexes.api import Index # noqa: F401 + from pandas.core.series import Series # noqa: F401 + def _put_str(s, space): return str(s)[:space].ljust(space) +def _get_ids_and_dtypes(data: FrameOrSeries) -> Tuple["Index", "Series"]: + ids = data.columns + dtypes = data.dtypes + return ids, dtypes + + def info( data: FrameOrSeries, verbose: Optional[bool] = None, @@ -80,9 +90,8 @@ def info( lines.append(str(type(data))) lines.append(data.index._summary()) - cols = data.columns - col_count = len(cols) - dtypes = data.dtypes + ids, dtypes = _get_ids_and_dtypes(data) + col_count = len(ids) if col_count == 0: lines.append(f"Empty {type(data).__name__}") @@ -108,7 +117,7 @@ def _verbose_repr(): column_head = "Column" col_space = 2 - max_col = max(len(pprint_thing(k)) for k in cols) + max_col = max(len(pprint_thing(k)) for k in ids) len_column = len(pprint_thing(column_head)) space = max(max_col, len_column) + col_space @@ -151,7 +160,7 @@ def _verbose_repr(): + _put_str("-" * len_dtype, space_dtype) ) - for i, col in enumerate(cols): + for i, col in enumerate(ids): dtype = dtypes[i] col = pprint_thing(col) @@ -168,7 +177,7 @@ def _verbose_repr(): ) def _non_verbose_repr(): - lines.append(cols._summary(name="Columns")) + lines.append(ids._summary(name="Columns")) def _sizeof_fmt(num, size_qualifier): # returns size in human readable format From 27408c606774dd1c80da2b146c2bb04c185bee0a Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 9 May 2020 12:27:43 +0100 Subject: [PATCH 2/4] type _put_str --- pandas/io/formats/info.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py index 5783435058acd..75f69bafb51d1 100644 --- a/pandas/io/formats/info.py +++ b/pandas/io/formats/info.py @@ -3,7 +3,7 @@ from pandas._config import get_option -from pandas._typing import FrameOrSeries +from pandas._typing import Dtype, FrameOrSeries from pandas.io.formats import format as fmt from pandas.io.formats.printing import pprint_thing @@ -13,7 +13,7 @@ from pandas.core.series import Series # noqa: F401 -def _put_str(s, space): +def _put_str(s: Union[str, Dtype], space: int) -> str: return str(s)[:space].ljust(space) From dfd1e61a4bbf8650685ece800225e11b3a466217 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 10 May 2020 10:43:18 +0100 Subject: [PATCH 3/4] rename dtype to avoid typing error --- pandas/io/formats/info.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py index 75f69bafb51d1..f6d0f090546c1 100644 --- a/pandas/io/formats/info.py +++ b/pandas/io/formats/info.py @@ -199,8 +199,8 @@ def _sizeof_fmt(num, size_qualifier): # groupby dtype.name to collect e.g. Categorical columns counts = dtypes.value_counts().groupby(lambda x: x.name).sum() - dtypes = [f"{k[0]}({k[1]:d})" for k in sorted(counts.items())] - lines.append(f"dtypes: {', '.join(dtypes)}") + collected_dtypes = [f"{k[0]}({k[1]:d})" for k in sorted(counts.items())] + lines.append(f"dtypes: {', '.join(collected_dtypes)}") if memory_usage is None: memory_usage = get_option("display.memory_usage") From 7bceaf398368ec4857087bc3fb1f6b342d304d38 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 11 May 2020 19:35:53 +0100 Subject: [PATCH 4/4] add docstrings to helper functions --- pandas/io/formats/info.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py index f6d0f090546c1..b1dcafa7a7a8f 100644 --- a/pandas/io/formats/info.py +++ b/pandas/io/formats/info.py @@ -14,10 +14,47 @@ def _put_str(s: Union[str, Dtype], space: int) -> str: + """ + Make string of specified length, padding to the right if necessary. + + Parameters + ---------- + s : Union[str, Dtype] + String to be formatted. + space : int + Length to force string to be of. + + Returns + ------- + str + String coerced to given length. + + Examples + -------- + >>> pd.io.formats.info._put_str("panda", 6) + 'panda ' + >>> pd.io.formats.info._put_str("panda", 4) + 'pand' + """ return str(s)[:space].ljust(space) def _get_ids_and_dtypes(data: FrameOrSeries) -> Tuple["Index", "Series"]: + """ + Get DataFrame's columns and dtypes. + + Parameters + ---------- + data : DataFrame + Object that `info` was called on. + + Returns + ------- + ids : Index + DataFrame's columns. + dtypes : Series + Dtype of each of the DataFrame's columns. + """ ids = data.columns dtypes = data.dtypes return ids, dtypes