Skip to content

add some type annotations io/formats/format.py #27418

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 56 additions & 38 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from functools import partial
from io import StringIO
from shutil import get_terminal_size
from typing import TYPE_CHECKING, List, Optional, TextIO, Tuple, Union, cast
from unicodedata import east_asian_width

import numpy as np
Expand Down Expand Up @@ -47,6 +48,9 @@
from pandas.io.common import _expand_user, _stringify_path
from pandas.io.formats.printing import adjoin, justify, pprint_thing

if TYPE_CHECKING:
from pandas import Series, DataFrame, Categorical

common_docstring = """
Parameters
----------
Expand Down Expand Up @@ -129,14 +133,21 @@


class CategoricalFormatter:
def __init__(self, categorical, buf=None, length=True, na_rep="NaN", footer=True):
def __init__(
self,
categorical: "Categorical",
buf: Optional[TextIO] = None,
length: bool = True,
na_rep: str = "NaN",
footer: bool = True,
):
self.categorical = categorical
self.buf = buf if buf is not None else StringIO("")
self.na_rep = na_rep
self.length = length
self.footer = footer

def _get_footer(self):
def _get_footer(self) -> str:
footer = ""

if self.length:
Expand All @@ -153,15 +164,15 @@ def _get_footer(self):

return str(footer)

def _get_formatted_values(self):
def _get_formatted_values(self) -> List[str]:
return format_array(
self.categorical._internal_get_values(),
None,
float_format=None,
na_rep=self.na_rep,
)

def to_string(self):
def to_string(self) -> str:
categorical = self.categorical

if len(categorical) == 0:
Expand All @@ -172,10 +183,10 @@ def to_string(self):

fmt_values = self._get_formatted_values()

result = ["{i}".format(i=i) for i in fmt_values]
result = [i.strip() for i in result]
result = ", ".join(result)
result = ["[" + result + "]"]
fmt_values = ["{i}".format(i=i) for i in fmt_values]
fmt_values = [i.strip() for i in fmt_values]
values = ", ".join(fmt_values)
result = ["[" + values + "]"]
if self.footer:
footer = self._get_footer()
if footer:
Expand All @@ -187,17 +198,17 @@ def to_string(self):
class SeriesFormatter:
def __init__(
self,
series,
buf=None,
length=True,
header=True,
index=True,
na_rep="NaN",
name=False,
float_format=None,
dtype=True,
max_rows=None,
min_rows=None,
series: "Series",
buf: Optional[TextIO] = None,
length: bool = True,
header: bool = True,
index: bool = True,
na_rep: str = "NaN",
name: bool = False,
float_format: Optional[str] = None,
dtype: bool = True,
max_rows: Optional[int] = None,
min_rows: Optional[int] = None,
):
self.series = series
self.buf = buf if buf is not None else StringIO()
Expand All @@ -217,7 +228,7 @@ def __init__(

self._chk_truncate()

def _chk_truncate(self):
def _chk_truncate(self) -> None:
from pandas.core.reshape.concat import concat

min_rows = self.min_rows
Expand All @@ -227,6 +238,7 @@ def _chk_truncate(self):
truncate_v = max_rows and (len(self.series) > max_rows)
series = self.series
if truncate_v:
max_rows = cast(int, max_rows)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this required? Generally we try to avoid cast and I'm surprised MyPy can't narrow the type down here from None.

If it's fully required does making line 238 say max_rows is not None and ... get around that?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it's fully required does making line 238 say max_rows is not None and ... get around that?

no. gives pandas\io\formats\format.py:249: error: Unsupported operand types for // ("None" and "int") and there might be risk in implementing this differently anyway?

Generally we try to avoid cast

well I would have agreed with you when I started this, but after this PR, I think that adding type annotations should be just that and any refactoring or code cleanup should be deferred to a follow on PR.

I'm surprised MyPy can't narrow the type down here from None.

Agreed. hence the numerous iterations to try and avoid cast originally.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm OK yea seems like a bug / gap with mypy inference then. Can always come back to clean up

if min_rows:
# if min_rows is set (not None or 0), set max_rows to minimum
# of both
Expand All @@ -237,13 +249,13 @@ def _chk_truncate(self):
else:
row_num = max_rows // 2
series = concat((series.iloc[:row_num], series.iloc[-row_num:]))
self.tr_row_num = row_num
self.tr_row_num = row_num # type: Optional[int]
else:
self.tr_row_num = None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just not required anymore?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no. see OP.

self.tr_series = series
self.truncate_v = truncate_v

def _get_footer(self):
def _get_footer(self) -> str:
name = self.series.name
footer = ""

Expand Down Expand Up @@ -281,7 +293,7 @@ def _get_footer(self):

return str(footer)

def _get_formatted_index(self):
def _get_formatted_index(self) -> Tuple[List[str], bool]:
index = self.tr_series.index
is_multi = isinstance(index, ABCMultiIndex)

Expand All @@ -293,13 +305,13 @@ def _get_formatted_index(self):
fmt_index = index.format(name=True)
return fmt_index, have_header

def _get_formatted_values(self):
def _get_formatted_values(self) -> List[str]:
values_to_format = self.tr_series._formatting_values()
return format_array(
values_to_format, None, float_format=self.float_format, na_rep=self.na_rep
)

def to_string(self):
def to_string(self) -> str:
series = self.tr_series
footer = self._get_footer()

Expand All @@ -314,6 +326,7 @@ def to_string(self):
if self.truncate_v:
n_header_rows = 0
row_num = self.tr_row_num
row_num = cast(int, row_num)
width = self.adj.len(fmt_values[row_num - 1])
if width > 3:
dot_str = "..."
Expand Down Expand Up @@ -501,7 +514,7 @@ def __init__(
self._chk_truncate()
self.adj = _get_adjustment()

def _chk_truncate(self):
def _chk_truncate(self) -> None:
"""
Checks whether the frame should be truncated. If so, slices
the frame up.
Expand Down Expand Up @@ -577,7 +590,7 @@ def _chk_truncate(self):
self.truncate_v = truncate_v
self.is_truncated = self.truncate_h or self.truncate_v

def _to_str_columns(self):
def _to_str_columns(self) -> List[List[str]]:
"""
Render a DataFrame to a list of columns (as lists of strings).
"""
Expand Down Expand Up @@ -667,7 +680,7 @@ def _to_str_columns(self):
strcols[ix].insert(row_num + n_header_rows, dot_str)
return strcols

def to_string(self):
def to_string(self) -> None:
"""
Render a DataFrame to a console-friendly tabular output.
"""
Expand Down Expand Up @@ -803,7 +816,7 @@ def to_latex(
else:
raise TypeError("buf is not a file name and it has no write " "method")

def _format_col(self, i):
def _format_col(self, i: int) -> List[str]:
frame = self.tr_frame
formatter = self._get_formatter(i)
values_to_format = frame.iloc[:, i]._formatting_values()
Expand All @@ -816,7 +829,12 @@ def _format_col(self, i):
decimal=self.decimal,
)

def to_html(self, classes=None, notebook=False, border=None):
def to_html(
self,
classes: Optional[Union[str, List, Tuple]] = None,
notebook: bool = False,
border: Optional[int] = None,
) -> None:
"""
Render a DataFrame to a html table.

Expand Down Expand Up @@ -845,7 +863,7 @@ def to_html(self, classes=None, notebook=False, border=None):
else:
raise TypeError("buf is not a file name and it has no write " " method")

def _get_formatted_column_labels(self, frame):
def _get_formatted_column_labels(self, frame: "DataFrame") -> List[List[str]]:
from pandas.core.index import _sparsify

columns = frame.columns
Expand Down Expand Up @@ -887,22 +905,22 @@ def space_format(x, y):
return str_columns

@property
def has_index_names(self):
def has_index_names(self) -> bool:
return _has_names(self.frame.index)

@property
def has_column_names(self):
def has_column_names(self) -> bool:
return _has_names(self.frame.columns)

@property
def show_row_idx_names(self):
def show_row_idx_names(self) -> bool:
return all((self.has_index_names, self.index, self.show_index_names))

@property
def show_col_idx_names(self):
def show_col_idx_names(self) -> bool:
return all((self.has_column_names, self.show_index_names, self.header))

def _get_formatted_index(self, frame):
def _get_formatted_index(self, frame: "DataFrame") -> List[str]:
# Note: this is only used by to_string() and to_latex(), not by
# to_html().
index = frame.index
Expand Down Expand Up @@ -941,8 +959,8 @@ def _get_formatted_index(self, frame):
else:
return adjoined

def _get_column_name_list(self):
names = []
def _get_column_name_list(self) -> List[str]:
names = [] # type: List[str]
columns = self.frame.columns
if isinstance(columns, ABCMultiIndex):
names.extend("" if name is None else name for name in columns.names)
Expand Down
18 changes: 9 additions & 9 deletions pandas/io/formats/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@

from collections import OrderedDict
from textwrap import dedent
from typing import Dict, List, Optional, Tuple, Union
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union

from pandas._config import get_option

from pandas.core.dtypes.generic import ABCIndex, ABCMultiIndex
from pandas.core.dtypes.generic import ABCMultiIndex

from pandas import option_context

Expand Down Expand Up @@ -37,7 +37,7 @@ def __init__(
self,
formatter: DataFrameFormatter,
classes: Optional[Union[str, List, Tuple]] = None,
border: Optional[bool] = None,
border: Optional[int] = None,
) -> None:
self.fmt = formatter
self.classes = classes
Expand Down Expand Up @@ -79,7 +79,7 @@ def row_levels(self) -> int:
# not showing (row) index
return 0

def _get_columns_formatted_values(self) -> ABCIndex:
def _get_columns_formatted_values(self) -> Iterable[Any]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can just make this Iterable - if you don't subscript the generics they default to inclusive of Any anyway

return self.columns

@property
Expand All @@ -90,12 +90,12 @@ def is_truncated(self) -> bool:
def ncols(self) -> int:
return len(self.fmt.tr_frame.columns)

def write(self, s: str, indent: int = 0) -> None:
def write(self, s: Any, indent: int = 0) -> None:
rs = pprint_thing(s)
self.elements.append(" " * indent + rs)

def write_th(
self, s: str, header: bool = False, indent: int = 0, tags: Optional[str] = None
self, s: Any, header: bool = False, indent: int = 0, tags: Optional[str] = None
) -> None:
"""
Method for writting a formatted <th> cell.
Expand Down Expand Up @@ -125,11 +125,11 @@ def write_th(

self._write_cell(s, kind="th", indent=indent, tags=tags)

def write_td(self, s: str, indent: int = 0, tags: Optional[str] = None) -> None:
def write_td(self, s: Any, indent: int = 0, tags: Optional[str] = None) -> None:
self._write_cell(s, kind="td", indent=indent, tags=tags)

def _write_cell(
self, s: str, kind: str = "td", indent: int = 0, tags: Optional[str] = None
self, s: Any, kind: str = "td", indent: int = 0, tags: Optional[str] = None
) -> None:
if tags is not None:
start_tag = "<{kind} {tags}>".format(kind=kind, tags=tags)
Expand Down Expand Up @@ -162,7 +162,7 @@ def _write_cell(

def write_tr(
self,
line: List[str],
line: Iterable[Any],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same thing here for Iterable

indent: int = 0,
indent_delta: int = 0,
header: bool = False,
Expand Down