Skip to content

add some type annotations io/formats/format.py #27418

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 69 additions & 59 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from functools import partial
from io import StringIO
from shutil import get_terminal_size
from typing import List, Optional, Tuple, Union
from unicodedata import east_asian_width

import numpy as np
Expand All @@ -32,6 +33,8 @@
is_timedelta64_dtype,
)
from pandas.core.dtypes.generic import (
ABCCategorical,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a blocker but generally have been moving away from using the ABC classes in annotations as they don't actually represent anything statically and just get replaced with Any.

Does importing the actual object impact things negatively here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed ABCs

ABCDataFrame,
ABCIndexClass,
ABCMultiIndex,
ABCSeries,
Expand Down Expand Up @@ -129,14 +132,21 @@


class CategoricalFormatter:
def __init__(self, categorical, buf=None, length=True, na_rep="NaN", footer=True):
def __init__(
self,
categorical: ABCCategorical,
buf: Optional[StringIO] = None,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor but I think should use typing.TextIO here instead

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably, but it's not yet failing mypy, so doing it 'gradually'. will change on next commit.

length: bool = True,
na_rep: str = "NaN",
footer: bool = True,
):
self.categorical = categorical
self.buf = buf if buf is not None else StringIO("")
self.na_rep = na_rep
self.length = length
self.footer = footer

def _get_footer(self):
def _get_footer(self) -> str:
footer = ""

if self.length:
Expand All @@ -153,15 +163,15 @@ def _get_footer(self):

return str(footer)

def _get_formatted_values(self):
def _get_formatted_values(self) -> List[str]:
return format_array(
self.categorical._internal_get_values(),
None,
float_format=None,
na_rep=self.na_rep,
)

def to_string(self):
def to_string(self) -> str:
categorical = self.categorical

if len(categorical) == 0:
Expand All @@ -172,7 +182,7 @@ def to_string(self):

fmt_values = self._get_formatted_values()

result = ["{i}".format(i=i) for i in fmt_values]
result = ["{i}".format(i=i) for i in fmt_values] # type: Union[str, List[str]]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would it be cleaner if this were just List[str] and then on 187 use a different variable name?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

refactored.

result = [i.strip() for i in result]
result = ", ".join(result)
result = ["[" + result + "]"]
Expand All @@ -187,18 +197,18 @@ def to_string(self):
class SeriesFormatter:
def __init__(
self,
series,
buf=None,
length=True,
header=True,
index=True,
na_rep="NaN",
name=False,
float_format=None,
dtype=True,
max_rows=None,
min_rows=None,
):
series: ABCSeries,
buf: Optional[StringIO] = None,
length: bool = True,
header: bool = True,
index: bool = True,
na_rep: str = "NaN",
name: bool = False,
float_format: Optional[str] = None,
dtype: bool = True,
max_rows: Optional[int] = None,
min_rows: Optional[int] = None,
) -> None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can remove this for init - pseudo-standard internally not to add (though was required by earlier mypy versions)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

self.series = series
self.buf = buf if buf is not None else StringIO()
self.name = name
Expand All @@ -214,19 +224,19 @@ def __init__(
self.float_format = float_format
self.dtype = dtype
self.adj = _get_adjustment()
self.truncate_v = None # type: Optional[int]

self._chk_truncate()

def _chk_truncate(self):
def _chk_truncate(self) -> None:
from pandas.core.reshape.concat import concat

min_rows = self.min_rows
max_rows = self.max_rows
# truncation determined by max_rows, actual truncated number of rows
# used below by min_rows
truncate_v = max_rows and (len(self.series) > max_rows)
series = self.series
if truncate_v:
if max_rows and (len(series) > max_rows):
if min_rows:
# if min_rows is set (not None or 0), set max_rows to minimum
# of both
Expand All @@ -237,13 +247,11 @@ def _chk_truncate(self):
else:
row_num = max_rows // 2
series = concat((series.iloc[:row_num], series.iloc[-row_num:]))
self.tr_row_num = row_num
else:
self.tr_row_num = None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just not required anymore?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no. see OP.

self.truncate_v = row_num

self.tr_series = series
self.truncate_v = truncate_v

def _get_footer(self):
def _get_footer(self) -> str:
name = self.series.name
footer = ""

Expand Down Expand Up @@ -281,7 +289,7 @@ def _get_footer(self):

return str(footer)

def _get_formatted_index(self):
def _get_formatted_index(self) -> Tuple[List[str], bool]:
index = self.tr_series.index
is_multi = isinstance(index, ABCMultiIndex)

Expand All @@ -293,13 +301,13 @@ def _get_formatted_index(self):
fmt_index = index.format(name=True)
return fmt_index, have_header

def _get_formatted_values(self):
def _get_formatted_values(self) -> List[str]:
values_to_format = self.tr_series._formatting_values()
return format_array(
values_to_format, None, float_format=self.float_format, na_rep=self.na_rep
)

def to_string(self):
def to_string(self) -> str:
series = self.tr_series
footer = self._get_footer()

Expand All @@ -313,7 +321,7 @@ def to_string(self):

if self.truncate_v:
n_header_rows = 0
row_num = self.tr_row_num
row_num = self.truncate_v
width = self.adj.len(fmt_values[row_num - 1])
if width > 3:
dot_str = "..."
Expand Down Expand Up @@ -498,9 +506,15 @@ def __init__(
else:
self.columns = frame.columns

self.truncate_h = None # type: Optional[int]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this gets assigned to an int subsequently and None isn't valid I think should just type as int.

There's some syntax trickery at play here due to py 3.5 support as in py36 forward you could declare this without initializing I think like:

self.truncate_h: int
self.truncate_v: int

But can't do that in 3.5

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

None is valid, so that we get the same behaviour as False when it was bool

None is when the DataFrame is not truncated. the assigned int is the truncated row/column number.

self.truncate_v = None # type: Optional[int]
self._chk_truncate()
self.adj = _get_adjustment()

@property
def is_truncated(self) -> bool:
return bool(self.truncate_h or self.truncate_v)

def _chk_truncate(self):
"""
Checks whether the frame should be truncated. If so, slices
Expand Down Expand Up @@ -545,39 +559,30 @@ def _chk_truncate(self):
max_cols_adj = self.max_cols_adj
max_rows_adj = self.max_rows_adj

truncate_h = max_cols_adj and (len(self.columns) > max_cols_adj)
truncate_v = max_rows_adj and (len(self.frame) > max_rows_adj)

frame = self.frame
if truncate_h:
if max_cols_adj == 0:
col_num = len(frame.columns)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this not a valid path?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

truncate_h is defined on L548. if max_cols_adj==0, truncate_h is False?

elif max_cols_adj == 1:
if max_cols_adj and (len(self.columns) > max_cols_adj):
if max_cols_adj == 1:
frame = frame.iloc[:, :max_cols]
col_num = max_cols
else:
col_num = max_cols_adj // 2
frame = concat(
(frame.iloc[:, :col_num], frame.iloc[:, -col_num:]), axis=1
)
self.tr_col_num = col_num
if truncate_v:
self.truncate_h = col_num

if max_rows_adj and (len(frame) > max_rows_adj):
if max_rows_adj == 1:
row_num = max_rows
frame = frame.iloc[:max_rows, :]
else:
row_num = max_rows_adj // 2
frame = concat((frame.iloc[:row_num, :], frame.iloc[-row_num:, :]))
self.tr_row_num = row_num
else:
self.tr_row_num = None
self.truncate_v = row_num

self.tr_frame = frame
self.truncate_h = truncate_h
self.truncate_v = truncate_v
self.is_truncated = self.truncate_h or self.truncate_v

def _to_str_columns(self):
def _to_str_columns(self) -> List[List[str]]:
"""
Render a DataFrame to a list of columns (as lists of strings).
"""
Expand Down Expand Up @@ -640,11 +645,11 @@ def _to_str_columns(self):
truncate_v = self.truncate_v

if truncate_h:
col_num = self.tr_col_num
strcols.insert(self.tr_col_num + 1, [" ..."] * (len(str_index)))
col_num = truncate_h
strcols.insert(col_num + 1, [" ..."] * (len(str_index)))
if truncate_v:
n_header_rows = len(str_index) - len(frame)
row_num = self.tr_row_num
row_num = truncate_v
for ix, col in enumerate(strcols):
# infer from above row
cwidth = self.adj.len(strcols[ix][row_num])
Expand All @@ -667,7 +672,7 @@ def _to_str_columns(self):
strcols[ix].insert(row_num + n_header_rows, dot_str)
return strcols

def to_string(self):
def to_string(self) -> None:
"""
Render a DataFrame to a console-friendly tabular output.
"""
Expand Down Expand Up @@ -803,7 +808,7 @@ def to_latex(
else:
raise TypeError("buf is not a file name and it has no write " "method")

def _format_col(self, i):
def _format_col(self, i: int) -> List[str]:
frame = self.tr_frame
formatter = self._get_formatter(i)
values_to_format = frame.iloc[:, i]._formatting_values()
Expand All @@ -816,7 +821,12 @@ def _format_col(self, i):
decimal=self.decimal,
)

def to_html(self, classes=None, notebook=False, border=None):
def to_html(
self,
classes: Optional[Union[str, List, Tuple]] = None,
notebook: bool = False,
border: Optional[int] = None,
) -> None:
"""
Render a DataFrame to a html table.

Expand Down Expand Up @@ -845,7 +855,7 @@ def to_html(self, classes=None, notebook=False, border=None):
else:
raise TypeError("buf is not a file name and it has no write " " method")

def _get_formatted_column_labels(self, frame):
def _get_formatted_column_labels(self, frame: ABCDataFrame) -> List[List[str]]:
from pandas.core.index import _sparsify

columns = frame.columns
Expand Down Expand Up @@ -887,22 +897,22 @@ def space_format(x, y):
return str_columns

@property
def has_index_names(self):
def has_index_names(self) -> bool:
return _has_names(self.frame.index)

@property
def has_column_names(self):
def has_column_names(self) -> bool:
return _has_names(self.frame.columns)

@property
def show_row_idx_names(self):
def show_row_idx_names(self) -> bool:
return all((self.has_index_names, self.index, self.show_index_names))

@property
def show_col_idx_names(self):
def show_col_idx_names(self) -> bool:
return all((self.has_column_names, self.show_index_names, self.header))

def _get_formatted_index(self, frame):
def _get_formatted_index(self, frame: ABCDataFrame) -> List[str]:
# Note: this is only used by to_string() and to_latex(), not by
# to_html().
index = frame.index
Expand Down Expand Up @@ -941,8 +951,8 @@ def _get_formatted_index(self, frame):
else:
return adjoined

def _get_column_name_list(self):
names = []
def _get_column_name_list(self) -> List[str]:
names = [] # type: List[str]
columns = self.frame.columns
if isinstance(columns, ABCMultiIndex):
names.extend("" if name is None else name for name in columns.names)
Expand Down
Loading