Skip to content

add some type annotations io/formats/format.py #27418

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 75 additions & 62 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from functools import partial
from io import StringIO
from shutil import get_terminal_size
from typing import List, Optional, TextIO, Tuple, Union
from unicodedata import east_asian_width

import numpy as np
Expand All @@ -32,6 +33,8 @@
is_timedelta64_dtype,
)
from pandas.core.dtypes.generic import (
ABCCategorical,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a blocker but generally have been moving away from using the ABC classes in annotations as they don't actually represent anything statically and just get replaced with Any.

Does importing the actual object impact things negatively here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed ABCs

ABCDataFrame,
ABCIndexClass,
ABCMultiIndex,
ABCSeries,
Expand Down Expand Up @@ -129,14 +132,21 @@


class CategoricalFormatter:
def __init__(self, categorical, buf=None, length=True, na_rep="NaN", footer=True):
def __init__(
self,
categorical: ABCCategorical,
buf: Optional[TextIO] = None,
length: bool = True,
na_rep: str = "NaN",
footer: bool = True,
):
self.categorical = categorical
self.buf = buf if buf is not None else StringIO("")
self.na_rep = na_rep
self.length = length
self.footer = footer

def _get_footer(self):
def _get_footer(self) -> str:
footer = ""

if self.length:
Expand All @@ -153,15 +163,15 @@ def _get_footer(self):

return str(footer)

def _get_formatted_values(self):
def _get_formatted_values(self) -> List[str]:
return format_array(
self.categorical._internal_get_values(),
None,
float_format=None,
na_rep=self.na_rep,
)

def to_string(self):
def to_string(self) -> str:
categorical = self.categorical

if len(categorical) == 0:
Expand All @@ -170,34 +180,30 @@ def to_string(self):
else:
return ""

fmt_values = self._get_formatted_values()

result = ["{i}".format(i=i) for i in fmt_values]
result = [i.strip() for i in result]
result = ", ".join(result)
result = ["[" + result + "]"]
values = [i.strip() for i in self._get_formatted_values()]
lines = ["[" + ", ".join(values) + "]"]
if self.footer:
footer = self._get_footer()
if footer:
result.append(footer)
lines.append(footer)

return str("\n".join(result))
return str("\n".join(lines))


class SeriesFormatter:
def __init__(
self,
series,
buf=None,
length=True,
header=True,
index=True,
na_rep="NaN",
name=False,
float_format=None,
dtype=True,
max_rows=None,
min_rows=None,
series: ABCSeries,
buf: Optional[TextIO] = None,
length: bool = True,
header: bool = True,
index: bool = True,
na_rep: str = "NaN",
name: bool = False,
float_format: Optional[str] = None,
dtype: bool = True,
max_rows: Optional[int] = None,
min_rows: Optional[int] = None,
):
self.series = series
self.buf = buf if buf is not None else StringIO()
Expand All @@ -217,16 +223,19 @@ def __init__(

self._chk_truncate()

def _chk_truncate(self):
@property
def truncate_v(self) -> bool:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have to excuse my lack of knowledge with this module but this isn't entirely consistent with the previous logic right? Any risk in implementing this differently?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed properties

return hasattr(self, "tr_row_num")

def _chk_truncate(self) -> None:
from pandas.core.reshape.concat import concat

min_rows = self.min_rows
max_rows = self.max_rows
# truncation determined by max_rows, actual truncated number of rows
# used below by min_rows
truncate_v = max_rows and (len(self.series) > max_rows)
series = self.series
if truncate_v:
if max_rows and (len(series) > max_rows):
if min_rows:
# if min_rows is set (not None or 0), set max_rows to minimum
# of both
Expand All @@ -238,12 +247,10 @@ def _chk_truncate(self):
row_num = max_rows // 2
series = concat((series.iloc[:row_num], series.iloc[-row_num:]))
self.tr_row_num = row_num
else:
self.tr_row_num = None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just not required anymore?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no. see OP.


self.tr_series = series
self.truncate_v = truncate_v

def _get_footer(self):
def _get_footer(self) -> str:
name = self.series.name
footer = ""

Expand Down Expand Up @@ -281,7 +288,7 @@ def _get_footer(self):

return str(footer)

def _get_formatted_index(self):
def _get_formatted_index(self) -> Tuple[List[str], bool]:
index = self.tr_series.index
is_multi = isinstance(index, ABCMultiIndex)

Expand All @@ -293,13 +300,13 @@ def _get_formatted_index(self):
fmt_index = index.format(name=True)
return fmt_index, have_header

def _get_formatted_values(self):
def _get_formatted_values(self) -> List[str]:
values_to_format = self.tr_series._formatting_values()
return format_array(
values_to_format, None, float_format=self.float_format, na_rep=self.na_rep
)

def to_string(self):
def to_string(self) -> str:
series = self.tr_series
footer = self._get_footer()

Expand Down Expand Up @@ -501,6 +508,18 @@ def __init__(
self._chk_truncate()
self.adj = _get_adjustment()

@property
def truncate_v(self) -> bool:
return hasattr(self, "tr_row_num")

@property
def truncate_h(self) -> bool:
return hasattr(self, "tr_col_num")

@property
def is_truncated(self) -> bool:
return self.truncate_h or self.truncate_v

def _chk_truncate(self):
"""
Checks whether the frame should be truncated. If so, slices
Expand Down Expand Up @@ -545,14 +564,9 @@ def _chk_truncate(self):
max_cols_adj = self.max_cols_adj
max_rows_adj = self.max_rows_adj

truncate_h = max_cols_adj and (len(self.columns) > max_cols_adj)
truncate_v = max_rows_adj and (len(self.frame) > max_rows_adj)

frame = self.frame
if truncate_h:
if max_cols_adj == 0:
col_num = len(frame.columns)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this not a valid path?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

truncate_h is defined on L548. if max_cols_adj==0, truncate_h is False?

elif max_cols_adj == 1:
if max_cols_adj and (len(self.columns) > max_cols_adj):
if max_cols_adj == 1:
frame = frame.iloc[:, :max_cols]
col_num = max_cols
else:
Expand All @@ -561,23 +575,19 @@ def _chk_truncate(self):
(frame.iloc[:, :col_num], frame.iloc[:, -col_num:]), axis=1
)
self.tr_col_num = col_num
if truncate_v:

if max_rows_adj and (len(frame) > max_rows_adj):
if max_rows_adj == 1:
row_num = max_rows
frame = frame.iloc[:max_rows, :]
else:
row_num = max_rows_adj // 2
frame = concat((frame.iloc[:row_num, :], frame.iloc[-row_num:, :]))
self.tr_row_num = row_num
else:
self.tr_row_num = None

self.tr_frame = frame
self.truncate_h = truncate_h
self.truncate_v = truncate_v
self.is_truncated = self.truncate_h or self.truncate_v

def _to_str_columns(self):
def _to_str_columns(self) -> List[List[str]]:
"""
Render a DataFrame to a list of columns (as lists of strings).
"""
Expand Down Expand Up @@ -641,7 +651,7 @@ def _to_str_columns(self):

if truncate_h:
col_num = self.tr_col_num
strcols.insert(self.tr_col_num + 1, [" ..."] * (len(str_index)))
strcols.insert(col_num + 1, [" ..."] * (len(str_index)))
if truncate_v:
n_header_rows = len(str_index) - len(frame)
row_num = self.tr_row_num
Expand All @@ -667,7 +677,7 @@ def _to_str_columns(self):
strcols[ix].insert(row_num + n_header_rows, dot_str)
return strcols

def to_string(self):
def to_string(self) -> None:
"""
Render a DataFrame to a console-friendly tabular output.
"""
Expand Down Expand Up @@ -803,7 +813,7 @@ def to_latex(
else:
raise TypeError("buf is not a file name and it has no write " "method")

def _format_col(self, i):
def _format_col(self, i: int) -> List[str]:
frame = self.tr_frame
formatter = self._get_formatter(i)
values_to_format = frame.iloc[:, i]._formatting_values()
Expand All @@ -816,7 +826,12 @@ def _format_col(self, i):
decimal=self.decimal,
)

def to_html(self, classes=None, notebook=False, border=None):
def to_html(
self,
classes: Optional[Union[str, List, Tuple]] = None,
notebook: bool = False,
border: Optional[int] = None,
) -> None:
"""
Render a DataFrame to a html table.

Expand Down Expand Up @@ -845,7 +860,7 @@ def to_html(self, classes=None, notebook=False, border=None):
else:
raise TypeError("buf is not a file name and it has no write " " method")

def _get_formatted_column_labels(self, frame):
def _get_formatted_column_labels(self, frame: ABCDataFrame) -> List[List[str]]:
from pandas.core.index import _sparsify

columns = frame.columns
Expand Down Expand Up @@ -887,22 +902,22 @@ def space_format(x, y):
return str_columns

@property
def has_index_names(self):
def has_index_names(self) -> bool:
return _has_names(self.frame.index)

@property
def has_column_names(self):
def has_column_names(self) -> bool:
return _has_names(self.frame.columns)

@property
def show_row_idx_names(self):
def show_row_idx_names(self) -> bool:
return all((self.has_index_names, self.index, self.show_index_names))

@property
def show_col_idx_names(self):
def show_col_idx_names(self) -> bool:
return all((self.has_column_names, self.show_index_names, self.header))

def _get_formatted_index(self, frame):
def _get_formatted_index(self, frame: ABCDataFrame) -> List[str]:
# Note: this is only used by to_string() and to_latex(), not by
# to_html().
index = frame.index
Expand Down Expand Up @@ -941,14 +956,12 @@ def _get_formatted_index(self, frame):
else:
return adjoined

def _get_column_name_list(self):
names = []
def _get_column_name_list(self) -> List[str]:
columns = self.frame.columns
if isinstance(columns, ABCMultiIndex):
names.extend("" if name is None else name for name in columns.names)
return ["" if name is None else name for name in columns.names]
else:
names.append("" if columns.name is None else columns.name)
return names
return ["" if columns.name is None else columns.name]


# ----------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/formats/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def __init__(
self,
formatter: DataFrameFormatter,
classes: Optional[Union[str, List, Tuple]] = None,
border: Optional[bool] = None,
border: Optional[int] = None,
) -> None:
self.fmt = formatter
self.classes = classes
Expand Down