-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
add some type annotations io/formats/format.py #27418
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 16 commits
c0291fa
5df8ffa
b73d522
ce71739
32d3b29
3cff805
2c37a0b
a8cf8e1
db4bfe7
0ac539b
ff47366
480c808
7dc2af0
035ec58
a58f1cd
66f6069
dd3c46d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ | |
from functools import partial | ||
from io import StringIO | ||
from shutil import get_terminal_size | ||
from typing import TYPE_CHECKING, List, Optional, TextIO, Tuple, Union, cast | ||
from unicodedata import east_asian_width | ||
|
||
import numpy as np | ||
|
@@ -47,6 +48,9 @@ | |
from pandas.io.common import _expand_user, _stringify_path | ||
from pandas.io.formats.printing import adjoin, justify, pprint_thing | ||
|
||
if TYPE_CHECKING: | ||
from pandas import Series, DataFrame, Categorical | ||
|
||
common_docstring = """ | ||
Parameters | ||
---------- | ||
|
@@ -129,14 +133,21 @@ | |
|
||
|
||
class CategoricalFormatter: | ||
def __init__(self, categorical, buf=None, length=True, na_rep="NaN", footer=True): | ||
def __init__( | ||
self, | ||
categorical: "Categorical", | ||
buf: Optional[TextIO] = None, | ||
length: bool = True, | ||
na_rep: str = "NaN", | ||
footer: bool = True, | ||
): | ||
self.categorical = categorical | ||
self.buf = buf if buf is not None else StringIO("") | ||
self.na_rep = na_rep | ||
self.length = length | ||
self.footer = footer | ||
|
||
def _get_footer(self): | ||
def _get_footer(self) -> str: | ||
footer = "" | ||
|
||
if self.length: | ||
|
@@ -153,15 +164,15 @@ def _get_footer(self): | |
|
||
return str(footer) | ||
|
||
def _get_formatted_values(self): | ||
def _get_formatted_values(self) -> List[str]: | ||
return format_array( | ||
self.categorical._internal_get_values(), | ||
None, | ||
float_format=None, | ||
na_rep=self.na_rep, | ||
) | ||
|
||
def to_string(self): | ||
def to_string(self) -> str: | ||
categorical = self.categorical | ||
|
||
if len(categorical) == 0: | ||
|
@@ -172,10 +183,10 @@ def to_string(self): | |
|
||
fmt_values = self._get_formatted_values() | ||
|
||
result = ["{i}".format(i=i) for i in fmt_values] | ||
result = [i.strip() for i in result] | ||
result = ", ".join(result) | ||
result = ["[" + result + "]"] | ||
fmt_values = ["{i}".format(i=i) for i in fmt_values] | ||
fmt_values = [i.strip() for i in fmt_values] | ||
values = ", ".join(fmt_values) | ||
result = ["[" + values + "]"] | ||
if self.footer: | ||
footer = self._get_footer() | ||
if footer: | ||
|
@@ -187,17 +198,17 @@ def to_string(self): | |
class SeriesFormatter: | ||
def __init__( | ||
self, | ||
series, | ||
buf=None, | ||
length=True, | ||
header=True, | ||
index=True, | ||
na_rep="NaN", | ||
name=False, | ||
float_format=None, | ||
dtype=True, | ||
max_rows=None, | ||
min_rows=None, | ||
series: "Series", | ||
buf: Optional[TextIO] = None, | ||
length: bool = True, | ||
header: bool = True, | ||
index: bool = True, | ||
na_rep: str = "NaN", | ||
name: bool = False, | ||
float_format: Optional[str] = None, | ||
dtype: bool = True, | ||
max_rows: Optional[int] = None, | ||
min_rows: Optional[int] = None, | ||
): | ||
self.series = series | ||
self.buf = buf if buf is not None else StringIO() | ||
|
@@ -217,7 +228,7 @@ def __init__( | |
|
||
self._chk_truncate() | ||
|
||
def _chk_truncate(self): | ||
def _chk_truncate(self) -> None: | ||
from pandas.core.reshape.concat import concat | ||
|
||
min_rows = self.min_rows | ||
|
@@ -227,6 +238,7 @@ def _chk_truncate(self): | |
truncate_v = max_rows and (len(self.series) > max_rows) | ||
series = self.series | ||
if truncate_v: | ||
max_rows = cast(int, max_rows) | ||
if min_rows: | ||
# if min_rows is set (not None or 0), set max_rows to minimum | ||
# of both | ||
|
@@ -237,13 +249,13 @@ def _chk_truncate(self): | |
else: | ||
row_num = max_rows // 2 | ||
series = concat((series.iloc[:row_num], series.iloc[-row_num:])) | ||
self.tr_row_num = row_num | ||
self.tr_row_num = row_num # type: Optional[int] | ||
else: | ||
self.tr_row_num = None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just not required anymore? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no. see OP. |
||
self.tr_series = series | ||
self.truncate_v = truncate_v | ||
|
||
def _get_footer(self): | ||
def _get_footer(self) -> str: | ||
name = self.series.name | ||
footer = "" | ||
|
||
|
@@ -281,7 +293,7 @@ def _get_footer(self): | |
|
||
return str(footer) | ||
|
||
def _get_formatted_index(self): | ||
def _get_formatted_index(self) -> Tuple[List[str], bool]: | ||
index = self.tr_series.index | ||
is_multi = isinstance(index, ABCMultiIndex) | ||
|
||
|
@@ -293,13 +305,13 @@ def _get_formatted_index(self): | |
fmt_index = index.format(name=True) | ||
return fmt_index, have_header | ||
|
||
def _get_formatted_values(self): | ||
def _get_formatted_values(self) -> List[str]: | ||
values_to_format = self.tr_series._formatting_values() | ||
return format_array( | ||
values_to_format, None, float_format=self.float_format, na_rep=self.na_rep | ||
) | ||
|
||
def to_string(self): | ||
def to_string(self) -> str: | ||
series = self.tr_series | ||
footer = self._get_footer() | ||
|
||
|
@@ -314,6 +326,7 @@ def to_string(self): | |
if self.truncate_v: | ||
n_header_rows = 0 | ||
row_num = self.tr_row_num | ||
row_num = cast(int, row_num) | ||
width = self.adj.len(fmt_values[row_num - 1]) | ||
if width > 3: | ||
dot_str = "..." | ||
|
@@ -501,7 +514,7 @@ def __init__( | |
self._chk_truncate() | ||
self.adj = _get_adjustment() | ||
|
||
def _chk_truncate(self): | ||
def _chk_truncate(self) -> None: | ||
""" | ||
Checks whether the frame should be truncated. If so, slices | ||
the frame up. | ||
|
@@ -577,7 +590,7 @@ def _chk_truncate(self): | |
self.truncate_v = truncate_v | ||
self.is_truncated = self.truncate_h or self.truncate_v | ||
|
||
def _to_str_columns(self): | ||
def _to_str_columns(self) -> List[List[str]]: | ||
""" | ||
Render a DataFrame to a list of columns (as lists of strings). | ||
""" | ||
|
@@ -667,7 +680,7 @@ def _to_str_columns(self): | |
strcols[ix].insert(row_num + n_header_rows, dot_str) | ||
return strcols | ||
|
||
def to_string(self): | ||
def to_string(self) -> None: | ||
""" | ||
Render a DataFrame to a console-friendly tabular output. | ||
""" | ||
|
@@ -803,7 +816,7 @@ def to_latex( | |
else: | ||
raise TypeError("buf is not a file name and it has no write " "method") | ||
|
||
def _format_col(self, i): | ||
def _format_col(self, i: int) -> List[str]: | ||
frame = self.tr_frame | ||
formatter = self._get_formatter(i) | ||
values_to_format = frame.iloc[:, i]._formatting_values() | ||
|
@@ -816,7 +829,12 @@ def _format_col(self, i): | |
decimal=self.decimal, | ||
) | ||
|
||
def to_html(self, classes=None, notebook=False, border=None): | ||
def to_html( | ||
self, | ||
classes: Optional[Union[str, List, Tuple]] = None, | ||
notebook: bool = False, | ||
border: Optional[int] = None, | ||
) -> None: | ||
""" | ||
Render a DataFrame to a html table. | ||
|
||
|
@@ -845,7 +863,7 @@ def to_html(self, classes=None, notebook=False, border=None): | |
else: | ||
raise TypeError("buf is not a file name and it has no write " " method") | ||
|
||
def _get_formatted_column_labels(self, frame): | ||
def _get_formatted_column_labels(self, frame: "DataFrame") -> List[List[str]]: | ||
from pandas.core.index import _sparsify | ||
|
||
columns = frame.columns | ||
|
@@ -887,22 +905,22 @@ def space_format(x, y): | |
return str_columns | ||
|
||
@property | ||
def has_index_names(self): | ||
def has_index_names(self) -> bool: | ||
return _has_names(self.frame.index) | ||
|
||
@property | ||
def has_column_names(self): | ||
def has_column_names(self) -> bool: | ||
return _has_names(self.frame.columns) | ||
|
||
@property | ||
def show_row_idx_names(self): | ||
def show_row_idx_names(self) -> bool: | ||
return all((self.has_index_names, self.index, self.show_index_names)) | ||
|
||
@property | ||
def show_col_idx_names(self): | ||
def show_col_idx_names(self) -> bool: | ||
return all((self.has_column_names, self.show_index_names, self.header)) | ||
|
||
def _get_formatted_index(self, frame): | ||
def _get_formatted_index(self, frame: "DataFrame") -> List[str]: | ||
# Note: this is only used by to_string() and to_latex(), not by | ||
# to_html(). | ||
index = frame.index | ||
|
@@ -941,8 +959,8 @@ def _get_formatted_index(self, frame): | |
else: | ||
return adjoined | ||
|
||
def _get_column_name_list(self): | ||
names = [] | ||
def _get_column_name_list(self) -> List[str]: | ||
names = [] # type: List[str] | ||
columns = self.frame.columns | ||
if isinstance(columns, ABCMultiIndex): | ||
names.extend("" if name is None else name for name in columns.names) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,11 +4,11 @@ | |
|
||
from collections import OrderedDict | ||
from textwrap import dedent | ||
from typing import Dict, List, Optional, Tuple, Union | ||
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union | ||
|
||
from pandas._config import get_option | ||
|
||
from pandas.core.dtypes.generic import ABCIndex, ABCMultiIndex | ||
from pandas.core.dtypes.generic import ABCMultiIndex | ||
|
||
from pandas import option_context | ||
|
||
|
@@ -37,7 +37,7 @@ def __init__( | |
self, | ||
formatter: DataFrameFormatter, | ||
classes: Optional[Union[str, List, Tuple]] = None, | ||
border: Optional[bool] = None, | ||
border: Optional[int] = None, | ||
) -> None: | ||
self.fmt = formatter | ||
self.classes = classes | ||
|
@@ -79,7 +79,7 @@ def row_levels(self) -> int: | |
# not showing (row) index | ||
return 0 | ||
|
||
def _get_columns_formatted_values(self) -> ABCIndex: | ||
def _get_columns_formatted_values(self) -> Iterable[Any]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can just make this |
||
return self.columns | ||
|
||
@property | ||
|
@@ -90,12 +90,12 @@ def is_truncated(self) -> bool: | |
def ncols(self) -> int: | ||
return len(self.fmt.tr_frame.columns) | ||
|
||
def write(self, s: str, indent: int = 0) -> None: | ||
def write(self, s: Any, indent: int = 0) -> None: | ||
rs = pprint_thing(s) | ||
self.elements.append(" " * indent + rs) | ||
|
||
def write_th( | ||
self, s: str, header: bool = False, indent: int = 0, tags: Optional[str] = None | ||
self, s: Any, header: bool = False, indent: int = 0, tags: Optional[str] = None | ||
) -> None: | ||
""" | ||
Method for writting a formatted <th> cell. | ||
|
@@ -125,11 +125,11 @@ def write_th( | |
|
||
self._write_cell(s, kind="th", indent=indent, tags=tags) | ||
|
||
def write_td(self, s: str, indent: int = 0, tags: Optional[str] = None) -> None: | ||
def write_td(self, s: Any, indent: int = 0, tags: Optional[str] = None) -> None: | ||
self._write_cell(s, kind="td", indent=indent, tags=tags) | ||
|
||
def _write_cell( | ||
self, s: str, kind: str = "td", indent: int = 0, tags: Optional[str] = None | ||
self, s: Any, kind: str = "td", indent: int = 0, tags: Optional[str] = None | ||
) -> None: | ||
if tags is not None: | ||
start_tag = "<{kind} {tags}>".format(kind=kind, tags=tags) | ||
|
@@ -162,7 +162,7 @@ def _write_cell( | |
|
||
def write_tr( | ||
self, | ||
line: List[str], | ||
line: Iterable[Any], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same thing here for Iterable |
||
indent: int = 0, | ||
indent_delta: int = 0, | ||
header: bool = False, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this required? Generally we try to avoid cast and I'm surprised MyPy can't narrow the type down here from None.
If it's fully required does making line 238 say
max_rows is not None and ...
get around that?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
no. gives
pandas\io\formats\format.py:249: error: Unsupported operand types for // ("None" and "int")
and there might be risk in implementing this differently anyway?well I would have agreed with you when I started this, but after this PR, I think that adding type annotations should be just that and any refactoring or code cleanup should be deferred to a follow on PR.
Agreed. hence the numerous iterations to try and avoid cast originally.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm OK yea seems like a bug / gap with mypy inference then. Can always come back to clean up