Skip to content

Commit 9929fca

Browse files
authored
CLN, TYP: Factor out part of info (pandas-dev#34092)
1 parent f05eb7f commit 9929fca

File tree

1 file changed

+57
-11
lines changed

1 file changed

+57
-11
lines changed

pandas/io/formats/info.py

+57-11
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,65 @@
11
import sys
2-
from typing import IO, Optional, Union
2+
from typing import IO, TYPE_CHECKING, Optional, Tuple, Union
33

44
from pandas._config import get_option
55

6-
from pandas._typing import FrameOrSeries
6+
from pandas._typing import Dtype, FrameOrSeries
77

88
from pandas.io.formats import format as fmt
99
from pandas.io.formats.printing import pprint_thing
1010

11+
if TYPE_CHECKING:
12+
from pandas.core.indexes.api import Index # noqa: F401
13+
from pandas.core.series import Series # noqa: F401
1114

12-
def _put_str(s, space):
15+
16+
def _put_str(s: Union[str, Dtype], space: int) -> str:
17+
"""
18+
Make string of specified length, padding to the right if necessary.
19+
20+
Parameters
21+
----------
22+
s : Union[str, Dtype]
23+
String to be formatted.
24+
space : int
25+
Length to force string to be of.
26+
27+
Returns
28+
-------
29+
str
30+
String coerced to given length.
31+
32+
Examples
33+
--------
34+
>>> pd.io.formats.info._put_str("panda", 6)
35+
'panda '
36+
>>> pd.io.formats.info._put_str("panda", 4)
37+
'pand'
38+
"""
1339
return str(s)[:space].ljust(space)
1440

1541

42+
def _get_ids_and_dtypes(data: FrameOrSeries) -> Tuple["Index", "Series"]:
43+
"""
44+
Get DataFrame's columns and dtypes.
45+
46+
Parameters
47+
----------
48+
data : DataFrame
49+
Object that `info` was called on.
50+
51+
Returns
52+
-------
53+
ids : Index
54+
DataFrame's columns.
55+
dtypes : Series
56+
Dtype of each of the DataFrame's columns.
57+
"""
58+
ids = data.columns
59+
dtypes = data.dtypes
60+
return ids, dtypes
61+
62+
1663
def info(
1764
data: FrameOrSeries,
1865
verbose: Optional[bool] = None,
@@ -80,9 +127,8 @@ def info(
80127
lines.append(str(type(data)))
81128
lines.append(data.index._summary())
82129

83-
cols = data.columns
84-
col_count = len(cols)
85-
dtypes = data.dtypes
130+
ids, dtypes = _get_ids_and_dtypes(data)
131+
col_count = len(ids)
86132

87133
if col_count == 0:
88134
lines.append(f"Empty {type(data).__name__}")
@@ -108,7 +154,7 @@ def _verbose_repr():
108154
column_head = "Column"
109155
col_space = 2
110156

111-
max_col = max(len(pprint_thing(k)) for k in cols)
157+
max_col = max(len(pprint_thing(k)) for k in ids)
112158
len_column = len(pprint_thing(column_head))
113159
space = max(max_col, len_column) + col_space
114160

@@ -151,7 +197,7 @@ def _verbose_repr():
151197
+ _put_str("-" * len_dtype, space_dtype)
152198
)
153199

154-
for i, col in enumerate(cols):
200+
for i, col in enumerate(ids):
155201
dtype = dtypes[i]
156202
col = pprint_thing(col)
157203

@@ -168,7 +214,7 @@ def _verbose_repr():
168214
)
169215

170216
def _non_verbose_repr():
171-
lines.append(cols._summary(name="Columns"))
217+
lines.append(ids._summary(name="Columns"))
172218

173219
def _sizeof_fmt(num, size_qualifier):
174220
# returns size in human readable format
@@ -190,8 +236,8 @@ def _sizeof_fmt(num, size_qualifier):
190236

191237
# groupby dtype.name to collect e.g. Categorical columns
192238
counts = dtypes.value_counts().groupby(lambda x: x.name).sum()
193-
dtypes = [f"{k[0]}({k[1]:d})" for k in sorted(counts.items())]
194-
lines.append(f"dtypes: {', '.join(dtypes)}")
239+
collected_dtypes = [f"{k[0]}({k[1]:d})" for k in sorted(counts.items())]
240+
lines.append(f"dtypes: {', '.join(collected_dtypes)}")
195241

196242
if memory_usage is None:
197243
memory_usage = get_option("display.memory_usage")

0 commit comments

Comments
 (0)