From c0291fa700037cea3c7c0501855ff33d244eb60b Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 16 Jul 2019 16:32:53 +0100 Subject: [PATCH 01/15] add some type annotations and refactor io/formats/format.py --- pandas/io/formats/format.py | 128 +++++++++++++++++++----------------- pandas/io/formats/html.py | 18 ++--- 2 files changed, 78 insertions(+), 68 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 0e8ed7b25d665..fbb4989be6aca 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -6,6 +6,7 @@ from functools import partial from io import StringIO from shutil import get_terminal_size +from typing import List, Optional, Tuple, Union from unicodedata import east_asian_width import numpy as np @@ -32,6 +33,8 @@ is_timedelta64_dtype, ) from pandas.core.dtypes.generic import ( + ABCCategorical, + ABCDataFrame, ABCIndexClass, ABCMultiIndex, ABCSeries, @@ -129,14 +132,21 @@ class CategoricalFormatter: - def __init__(self, categorical, buf=None, length=True, na_rep="NaN", footer=True): + def __init__( + self, + categorical: ABCCategorical, + buf: Optional[StringIO] = None, + length: bool = True, + na_rep: str = "NaN", + footer: bool = True, + ): self.categorical = categorical self.buf = buf if buf is not None else StringIO("") self.na_rep = na_rep self.length = length self.footer = footer - def _get_footer(self): + def _get_footer(self) -> str: footer = "" if self.length: @@ -153,7 +163,7 @@ def _get_footer(self): return str(footer) - def _get_formatted_values(self): + def _get_formatted_values(self) -> List[str]: return format_array( self.categorical._internal_get_values(), None, @@ -161,7 +171,7 @@ def _get_formatted_values(self): na_rep=self.na_rep, ) - def to_string(self): + def to_string(self) -> str: categorical = self.categorical if len(categorical) == 0: @@ -172,7 +182,7 @@ def to_string(self): fmt_values = self._get_formatted_values() - result = ["{i}".format(i=i) for i in fmt_values] + result = ["{i}".format(i=i) for i in fmt_values] # type: Union[str, List[str]] result = [i.strip() for i in result] result = ", ".join(result) result = ["[" + result + "]"] @@ -187,18 +197,18 @@ def to_string(self): class SeriesFormatter: def __init__( self, - series, - buf=None, - length=True, - header=True, - index=True, - na_rep="NaN", - name=False, - float_format=None, - dtype=True, - max_rows=None, - min_rows=None, - ): + series: ABCSeries, + buf: Optional[StringIO] = None, + length: bool = True, + header: bool = True, + index: bool = True, + na_rep: str = "NaN", + name: bool = False, + float_format: Optional[str] = None, + dtype: bool = True, + max_rows: Optional[int] = None, + min_rows: Optional[int] = None, + ) -> None: self.series = series self.buf = buf if buf is not None else StringIO() self.name = name @@ -214,19 +224,19 @@ def __init__( self.float_format = float_format self.dtype = dtype self.adj = _get_adjustment() + self.truncate_v = None # type: Optional[int] self._chk_truncate() - def _chk_truncate(self): + def _chk_truncate(self) -> None: from pandas.core.reshape.concat import concat min_rows = self.min_rows max_rows = self.max_rows # truncation determined by max_rows, actual truncated number of rows # used below by min_rows - truncate_v = max_rows and (len(self.series) > max_rows) series = self.series - if truncate_v: + if max_rows and (len(series) > max_rows): if min_rows: # if min_rows is set (not None or 0), set max_rows to minimum # of both @@ -237,13 +247,11 @@ def _chk_truncate(self): else: row_num = max_rows // 2 series = concat((series.iloc[:row_num], series.iloc[-row_num:])) - self.tr_row_num = row_num - else: - self.tr_row_num = None + self.truncate_v = row_num + self.tr_series = series - self.truncate_v = truncate_v - def _get_footer(self): + def _get_footer(self) -> str: name = self.series.name footer = "" @@ -281,7 +289,7 @@ def _get_footer(self): return str(footer) - def _get_formatted_index(self): + def _get_formatted_index(self) -> Tuple[List[str], bool]: index = self.tr_series.index is_multi = isinstance(index, ABCMultiIndex) @@ -293,13 +301,13 @@ def _get_formatted_index(self): fmt_index = index.format(name=True) return fmt_index, have_header - def _get_formatted_values(self): + def _get_formatted_values(self) -> List[str]: values_to_format = self.tr_series._formatting_values() return format_array( values_to_format, None, float_format=self.float_format, na_rep=self.na_rep ) - def to_string(self): + def to_string(self) -> str: series = self.tr_series footer = self._get_footer() @@ -313,7 +321,7 @@ def to_string(self): if self.truncate_v: n_header_rows = 0 - row_num = self.tr_row_num + row_num = self.truncate_v width = self.adj.len(fmt_values[row_num - 1]) if width > 3: dot_str = "..." @@ -498,9 +506,15 @@ def __init__( else: self.columns = frame.columns + self.truncate_h = None # type: Optional[int] + self.truncate_v = None # type: Optional[int] self._chk_truncate() self.adj = _get_adjustment() + @property + def is_truncated(self) -> bool: + return bool(self.truncate_h or self.truncate_v) + def _chk_truncate(self): """ Checks whether the frame should be truncated. If so, slices @@ -545,14 +559,9 @@ def _chk_truncate(self): max_cols_adj = self.max_cols_adj max_rows_adj = self.max_rows_adj - truncate_h = max_cols_adj and (len(self.columns) > max_cols_adj) - truncate_v = max_rows_adj and (len(self.frame) > max_rows_adj) - frame = self.frame - if truncate_h: - if max_cols_adj == 0: - col_num = len(frame.columns) - elif max_cols_adj == 1: + if max_cols_adj and (len(self.columns) > max_cols_adj): + if max_cols_adj == 1: frame = frame.iloc[:, :max_cols] col_num = max_cols else: @@ -560,24 +569,20 @@ def _chk_truncate(self): frame = concat( (frame.iloc[:, :col_num], frame.iloc[:, -col_num:]), axis=1 ) - self.tr_col_num = col_num - if truncate_v: + self.truncate_h = col_num + + if max_rows_adj and (len(frame) > max_rows_adj): if max_rows_adj == 1: row_num = max_rows frame = frame.iloc[:max_rows, :] else: row_num = max_rows_adj // 2 frame = concat((frame.iloc[:row_num, :], frame.iloc[-row_num:, :])) - self.tr_row_num = row_num - else: - self.tr_row_num = None + self.truncate_v = row_num self.tr_frame = frame - self.truncate_h = truncate_h - self.truncate_v = truncate_v - self.is_truncated = self.truncate_h or self.truncate_v - def _to_str_columns(self): + def _to_str_columns(self) -> List[List[str]]: """ Render a DataFrame to a list of columns (as lists of strings). """ @@ -640,11 +645,11 @@ def _to_str_columns(self): truncate_v = self.truncate_v if truncate_h: - col_num = self.tr_col_num - strcols.insert(self.tr_col_num + 1, [" ..."] * (len(str_index))) + col_num = truncate_h + strcols.insert(col_num + 1, [" ..."] * (len(str_index))) if truncate_v: n_header_rows = len(str_index) - len(frame) - row_num = self.tr_row_num + row_num = truncate_v for ix, col in enumerate(strcols): # infer from above row cwidth = self.adj.len(strcols[ix][row_num]) @@ -667,7 +672,7 @@ def _to_str_columns(self): strcols[ix].insert(row_num + n_header_rows, dot_str) return strcols - def to_string(self): + def to_string(self) -> None: """ Render a DataFrame to a console-friendly tabular output. """ @@ -803,7 +808,7 @@ def to_latex( else: raise TypeError("buf is not a file name and it has no write " "method") - def _format_col(self, i): + def _format_col(self, i: int) -> List[str]: frame = self.tr_frame formatter = self._get_formatter(i) values_to_format = frame.iloc[:, i]._formatting_values() @@ -816,7 +821,12 @@ def _format_col(self, i): decimal=self.decimal, ) - def to_html(self, classes=None, notebook=False, border=None): + def to_html( + self, + classes: Optional[Union[str, List, Tuple]] = None, + notebook: bool = False, + border: Optional[int] = None, + ) -> None: """ Render a DataFrame to a html table. @@ -845,7 +855,7 @@ def to_html(self, classes=None, notebook=False, border=None): else: raise TypeError("buf is not a file name and it has no write " " method") - def _get_formatted_column_labels(self, frame): + def _get_formatted_column_labels(self, frame: ABCDataFrame) -> List[List[str]]: from pandas.core.index import _sparsify columns = frame.columns @@ -887,22 +897,22 @@ def space_format(x, y): return str_columns @property - def has_index_names(self): + def has_index_names(self) -> bool: return _has_names(self.frame.index) @property - def has_column_names(self): + def has_column_names(self) -> bool: return _has_names(self.frame.columns) @property - def show_row_idx_names(self): + def show_row_idx_names(self) -> bool: return all((self.has_index_names, self.index, self.show_index_names)) @property - def show_col_idx_names(self): + def show_col_idx_names(self) -> bool: return all((self.has_column_names, self.show_index_names, self.header)) - def _get_formatted_index(self, frame): + def _get_formatted_index(self, frame: ABCDataFrame) -> List[str]: # Note: this is only used by to_string() and to_latex(), not by # to_html(). index = frame.index @@ -941,8 +951,8 @@ def _get_formatted_index(self, frame): else: return adjoined - def _get_column_name_list(self): - names = [] + def _get_column_name_list(self) -> List[str]: + names = [] # type: List[str] columns = self.frame.columns if isinstance(columns, ABCMultiIndex): names.extend("" if name is None else name for name in columns.names) diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index c2f4ee2c4a68b..dab076b92d3f4 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -37,7 +37,7 @@ def __init__( self, formatter: DataFrameFormatter, classes: Optional[Union[str, List, Tuple]] = None, - border: Optional[bool] = None, + border: Optional[int] = None, ) -> None: self.fmt = formatter self.classes = classes @@ -252,7 +252,7 @@ def _write_col_header(self, indent: int) -> None: for lnum, (records, values) in enumerate(zip(level_lengths, levels)): if truncate_h: # modify the header lines - ins_col = self.fmt.tr_col_num + ins_col = truncate_h if self.fmt.sparsify: recs_new = {} # Increment tags after ... col. @@ -348,7 +348,7 @@ def _write_col_header(self, indent: int) -> None: align = self.fmt.justify if truncate_h: - ins_col = self.row_levels + self.fmt.tr_col_num + ins_col = self.row_levels + truncate_h row.insert(ins_col, "...") self.write_tr(row, indent, self.indent_delta, header=True, align=align) @@ -406,7 +406,7 @@ def _write_regular_rows( row = [] # type: List[str] for i in range(nrows): - if truncate_v and i == (self.fmt.tr_row_num): + if truncate_v and i == truncate_v: str_sep_row = ["..."] * len(row) self.write_tr( str_sep_row, @@ -428,7 +428,7 @@ def _write_regular_rows( row.extend(fmt_values[j][i] for j in range(self.ncols)) if truncate_h: - dot_col_ix = self.fmt.tr_col_num + self.row_levels + dot_col_ix = truncate_h + self.row_levels row.insert(dot_col_ix, "...") self.write_tr( row, indent, self.indent_delta, tags=None, nindex_levels=self.row_levels @@ -457,7 +457,7 @@ def _write_hierarchical_rows( if truncate_v: # Insert ... row and adjust idx_values and # level_lengths to take this into account. - ins_row = self.fmt.tr_row_num + ins_row = truncate_v inserted = False for lnum, records in enumerate(level_lengths): rec_new = {} @@ -520,7 +520,7 @@ def _write_hierarchical_rows( row.extend(fmt_values[j][i] for j in range(self.ncols)) if truncate_h: row.insert( - self.row_levels - sparse_offset + self.fmt.tr_col_num, "..." + self.row_levels - sparse_offset + truncate_h, "..." ) self.write_tr( row, @@ -532,7 +532,7 @@ def _write_hierarchical_rows( else: row = [] for i in range(len(frame)): - if truncate_v and i == (self.fmt.tr_row_num): + if truncate_v and i == truncate_v: str_sep_row = ["..."] * len(row) self.write_tr( str_sep_row, @@ -549,7 +549,7 @@ def _write_hierarchical_rows( row.extend(idx_values[i]) row.extend(fmt_values[j][i] for j in range(self.ncols)) if truncate_h: - row.insert(self.row_levels + self.fmt.tr_col_num, "...") + row.insert(self.row_levels + truncate_h, "...") self.write_tr( row, indent, From 5df8ffa281c5964b7f3ba1027a7a0bc9141a2146 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 16 Jul 2019 17:15:45 +0100 Subject: [PATCH 02/15] blackify --- pandas/io/formats/html.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index dab076b92d3f4..ad3e32400a102 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -519,9 +519,7 @@ def _write_hierarchical_rows( row.extend(fmt_values[j][i] for j in range(self.ncols)) if truncate_h: - row.insert( - self.row_levels - sparse_offset + truncate_h, "..." - ) + row.insert(self.row_levels - sparse_offset + truncate_h, "...") self.write_tr( row, indent, From b73d522d44946c26e385e6dd2d82221df9e9df30 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 16 Jul 2019 18:46:52 +0100 Subject: [PATCH 03/15] refactor to remove redefinition of variable with incompatible type --- pandas/io/formats/format.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index fbb4989be6aca..f6cf7f6a82a99 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -180,18 +180,14 @@ def to_string(self) -> str: else: return "" - fmt_values = self._get_formatted_values() - - result = ["{i}".format(i=i) for i in fmt_values] # type: Union[str, List[str]] - result = [i.strip() for i in result] - result = ", ".join(result) - result = ["[" + result + "]"] + values = ["{i}".format(i=i.strip()) for i in self._get_formatted_values()] + lines = ["[" + ", ".join(values) + "]"] if self.footer: footer = self._get_footer() if footer: - result.append(footer) + lines.append(footer) - return str("\n".join(result)) + return str("\n".join(lines)) class SeriesFormatter: From ce717391abc38005a49ad900e560e15473200391 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 16 Jul 2019 18:55:40 +0100 Subject: [PATCH 04/15] simplify --- pandas/io/formats/format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index f6cf7f6a82a99..deffefa2c26cc 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -180,7 +180,7 @@ def to_string(self) -> str: else: return "" - values = ["{i}".format(i=i.strip()) for i in self._get_formatted_values()] + values = [i.strip() for i in self._get_formatted_values()] lines = ["[" + ", ".join(values) + "]"] if self.footer: footer = self._get_footer() From 32d3b29934d19811f26669df8dbb1c40f00dd901 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 16 Jul 2019 19:27:04 +0100 Subject: [PATCH 05/15] refactor _get_column_name_list --- pandas/io/formats/format.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index deffefa2c26cc..0de487734f3dd 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -948,13 +948,11 @@ def _get_formatted_index(self, frame: ABCDataFrame) -> List[str]: return adjoined def _get_column_name_list(self) -> List[str]: - names = [] # type: List[str] columns = self.frame.columns if isinstance(columns, ABCMultiIndex): - names.extend("" if name is None else name for name in columns.names) + return ["" if name is None else name for name in columns.names] else: - names.append("" if columns.name is None else columns.name) - return names + return ["" if columns.name is None else columns.name] # ---------------------------------------------------------------------- From 3cff8054c9e81386a1bc0c6f9c00d0ce5cb10223 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 16 Jul 2019 19:28:44 +0100 Subject: [PATCH 06/15] remove NoneType return value from __init__ --- pandas/io/formats/format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 0de487734f3dd..05898e0c3deb0 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -204,7 +204,7 @@ def __init__( dtype: bool = True, max_rows: Optional[int] = None, min_rows: Optional[int] = None, - ) -> None: + ): self.series = series self.buf = buf if buf is not None else StringIO() self.name = name From 2c37a0b43a0859bcd9d3837658039fc9e48c41c8 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 16 Jul 2019 19:31:07 +0100 Subject: [PATCH 07/15] use typing.TextIO instead of StringIO --- pandas/io/formats/format.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 05898e0c3deb0..74ec9d384ae92 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -6,7 +6,7 @@ from functools import partial from io import StringIO from shutil import get_terminal_size -from typing import List, Optional, Tuple, Union +from typing import List, Optional, TextIO, Tuple, Union from unicodedata import east_asian_width import numpy as np @@ -135,7 +135,7 @@ class CategoricalFormatter: def __init__( self, categorical: ABCCategorical, - buf: Optional[StringIO] = None, + buf: Optional[TextIO] = None, length: bool = True, na_rep: str = "NaN", footer: bool = True, @@ -194,7 +194,7 @@ class SeriesFormatter: def __init__( self, series: ABCSeries, - buf: Optional[StringIO] = None, + buf: Optional[TextIO] = None, length: bool = True, header: bool = True, index: bool = True, From db4bfe7c6b2b0eda79ba2dada8b432d933e84d13 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 16 Jul 2019 21:18:49 +0100 Subject: [PATCH 08/15] reduce diff --- pandas/io/formats/format.py | 29 +++++++++++++++++++---------- pandas/io/formats/html.py | 18 ++++++++++-------- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 74ec9d384ae92..2d4423428196c 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -220,10 +220,13 @@ def __init__( self.float_format = float_format self.dtype = dtype self.adj = _get_adjustment() - self.truncate_v = None # type: Optional[int] self._chk_truncate() + @property + def truncate_v(self) -> bool: + return hasattr(self, 'tr_row_num') + def _chk_truncate(self) -> None: from pandas.core.reshape.concat import concat @@ -243,7 +246,7 @@ def _chk_truncate(self) -> None: else: row_num = max_rows // 2 series = concat((series.iloc[:row_num], series.iloc[-row_num:])) - self.truncate_v = row_num + self.tr_row_num = row_num self.tr_series = series @@ -317,7 +320,7 @@ def to_string(self) -> str: if self.truncate_v: n_header_rows = 0 - row_num = self.truncate_v + row_num = self.tr_row_num width = self.adj.len(fmt_values[row_num - 1]) if width > 3: dot_str = "..." @@ -502,14 +505,20 @@ def __init__( else: self.columns = frame.columns - self.truncate_h = None # type: Optional[int] - self.truncate_v = None # type: Optional[int] self._chk_truncate() self.adj = _get_adjustment() + @property + def truncate_v(self) -> bool: + return hasattr(self, 'tr_row_num') + + @property + def truncate_h(self) -> bool: + return hasattr(self, 'tr_col_num') + @property def is_truncated(self) -> bool: - return bool(self.truncate_h or self.truncate_v) + return self.truncate_h or self.truncate_v def _chk_truncate(self): """ @@ -565,7 +574,7 @@ def _chk_truncate(self): frame = concat( (frame.iloc[:, :col_num], frame.iloc[:, -col_num:]), axis=1 ) - self.truncate_h = col_num + self.tr_col_num = col_num if max_rows_adj and (len(frame) > max_rows_adj): if max_rows_adj == 1: @@ -574,7 +583,7 @@ def _chk_truncate(self): else: row_num = max_rows_adj // 2 frame = concat((frame.iloc[:row_num, :], frame.iloc[-row_num:, :])) - self.truncate_v = row_num + self.tr_row_num = row_num self.tr_frame = frame @@ -641,11 +650,11 @@ def _to_str_columns(self) -> List[List[str]]: truncate_v = self.truncate_v if truncate_h: - col_num = truncate_h + col_num = self.tr_col_num strcols.insert(col_num + 1, [" ..."] * (len(str_index))) if truncate_v: n_header_rows = len(str_index) - len(frame) - row_num = truncate_v + row_num = self.tr_row_num for ix, col in enumerate(strcols): # infer from above row cwidth = self.adj.len(strcols[ix][row_num]) diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index ad3e32400a102..16e4024c4bf66 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -252,7 +252,7 @@ def _write_col_header(self, indent: int) -> None: for lnum, (records, values) in enumerate(zip(level_lengths, levels)): if truncate_h: # modify the header lines - ins_col = truncate_h + ins_col = self.fmt.tr_col_num if self.fmt.sparsify: recs_new = {} # Increment tags after ... col. @@ -348,7 +348,7 @@ def _write_col_header(self, indent: int) -> None: align = self.fmt.justify if truncate_h: - ins_col = self.row_levels + truncate_h + ins_col = self.row_levels + self.fmt.tr_col_num row.insert(ins_col, "...") self.write_tr(row, indent, self.indent_delta, header=True, align=align) @@ -406,7 +406,7 @@ def _write_regular_rows( row = [] # type: List[str] for i in range(nrows): - if truncate_v and i == truncate_v: + if truncate_v and i == (self.fmt.tr_row_num): str_sep_row = ["..."] * len(row) self.write_tr( str_sep_row, @@ -428,7 +428,7 @@ def _write_regular_rows( row.extend(fmt_values[j][i] for j in range(self.ncols)) if truncate_h: - dot_col_ix = truncate_h + self.row_levels + dot_col_ix = self.fmt.tr_col_num + self.row_levels row.insert(dot_col_ix, "...") self.write_tr( row, indent, self.indent_delta, tags=None, nindex_levels=self.row_levels @@ -457,7 +457,7 @@ def _write_hierarchical_rows( if truncate_v: # Insert ... row and adjust idx_values and # level_lengths to take this into account. - ins_row = truncate_v + ins_row = self.fmt.tr_row_num inserted = False for lnum, records in enumerate(level_lengths): rec_new = {} @@ -519,7 +519,9 @@ def _write_hierarchical_rows( row.extend(fmt_values[j][i] for j in range(self.ncols)) if truncate_h: - row.insert(self.row_levels - sparse_offset + truncate_h, "...") + row.insert( + self.row_levels - sparse_offset + self.fmt.tr_col_num, "..." + ) self.write_tr( row, indent, @@ -530,7 +532,7 @@ def _write_hierarchical_rows( else: row = [] for i in range(len(frame)): - if truncate_v and i == truncate_v: + if truncate_v and i == (self.fmt.tr_row_num): str_sep_row = ["..."] * len(row) self.write_tr( str_sep_row, @@ -547,7 +549,7 @@ def _write_hierarchical_rows( row.extend(idx_values[i]) row.extend(fmt_values[j][i] for j in range(self.ncols)) if truncate_h: - row.insert(self.row_levels + truncate_h, "...") + row.insert(self.row_levels + self.fmt.tr_col_num, "...") self.write_tr( row, indent, From 0ac539b3ce87857bb5632d2fc165f95706c579f8 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 16 Jul 2019 21:25:58 +0100 Subject: [PATCH 09/15] blackify --- pandas/io/formats/format.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 2d4423428196c..ecab079bd4b78 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -225,7 +225,7 @@ def __init__( @property def truncate_v(self) -> bool: - return hasattr(self, 'tr_row_num') + return hasattr(self, "tr_row_num") def _chk_truncate(self) -> None: from pandas.core.reshape.concat import concat @@ -510,11 +510,11 @@ def __init__( @property def truncate_v(self) -> bool: - return hasattr(self, 'tr_row_num') + return hasattr(self, "tr_row_num") @property def truncate_h(self) -> bool: - return hasattr(self, 'tr_col_num') + return hasattr(self, "tr_col_num") @property def is_truncated(self) -> bool: From 480c80833ef74a3e7b86b0e7911a9e8edf0851ce Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 19 Jul 2019 19:41:01 +0100 Subject: [PATCH 10/15] remove properties --- pandas/io/formats/format.py | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index ecab079bd4b78..e71357d608451 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -223,10 +223,6 @@ def __init__( self._chk_truncate() - @property - def truncate_v(self) -> bool: - return hasattr(self, "tr_row_num") - def _chk_truncate(self) -> None: from pandas.core.reshape.concat import concat @@ -234,6 +230,7 @@ def _chk_truncate(self) -> None: max_rows = self.max_rows # truncation determined by max_rows, actual truncated number of rows # used below by min_rows + self.truncate_v = False series = self.series if max_rows and (len(series) > max_rows): if min_rows: @@ -247,6 +244,7 @@ def _chk_truncate(self) -> None: row_num = max_rows // 2 series = concat((series.iloc[:row_num], series.iloc[-row_num:])) self.tr_row_num = row_num + self.truncate_v = True self.tr_series = series @@ -508,19 +506,7 @@ def __init__( self._chk_truncate() self.adj = _get_adjustment() - @property - def truncate_v(self) -> bool: - return hasattr(self, "tr_row_num") - - @property - def truncate_h(self) -> bool: - return hasattr(self, "tr_col_num") - - @property - def is_truncated(self) -> bool: - return self.truncate_h or self.truncate_v - - def _chk_truncate(self): + def _chk_truncate(self) -> None: """ Checks whether the frame should be truncated. If so, slices the frame up. @@ -564,6 +550,8 @@ def _chk_truncate(self): max_cols_adj = self.max_cols_adj max_rows_adj = self.max_rows_adj + self.truncate_h = False + self.truncate_v = False frame = self.frame if max_cols_adj and (len(self.columns) > max_cols_adj): if max_cols_adj == 1: @@ -575,6 +563,7 @@ def _chk_truncate(self): (frame.iloc[:, :col_num], frame.iloc[:, -col_num:]), axis=1 ) self.tr_col_num = col_num + self.truncate_h = True if max_rows_adj and (len(frame) > max_rows_adj): if max_rows_adj == 1: @@ -584,8 +573,10 @@ def _chk_truncate(self): row_num = max_rows_adj // 2 frame = concat((frame.iloc[:row_num, :], frame.iloc[-row_num:, :])) self.tr_row_num = row_num + self.truncate_v = True self.tr_frame = frame + self.is_truncated = self.truncate_h or self.truncate_v def _to_str_columns(self) -> List[List[str]]: """ From 7dc2af0a2b2c6a7ae5a5e23beaa30b896969c24f Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 20 Jul 2019 09:56:26 +0100 Subject: [PATCH 11/15] remove abcs --- pandas/io/formats/format.py | 15 ++++++++------- pandas/io/formats/html.py | 4 ++-- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index e71357d608451..b79f469bb0584 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -6,7 +6,7 @@ from functools import partial from io import StringIO from shutil import get_terminal_size -from typing import List, Optional, TextIO, Tuple, Union +from typing import TYPE_CHECKING, List, Optional, TextIO, Tuple, Union from unicodedata import east_asian_width import numpy as np @@ -33,8 +33,6 @@ is_timedelta64_dtype, ) from pandas.core.dtypes.generic import ( - ABCCategorical, - ABCDataFrame, ABCIndexClass, ABCMultiIndex, ABCSeries, @@ -50,6 +48,9 @@ from pandas.io.common import _expand_user, _stringify_path from pandas.io.formats.printing import adjoin, justify, pprint_thing +if TYPE_CHECKING: + from pandas import Series, DataFrame, Categorical + common_docstring = """ Parameters ---------- @@ -134,7 +135,7 @@ class CategoricalFormatter: def __init__( self, - categorical: ABCCategorical, + categorical: "Categorical", buf: Optional[TextIO] = None, length: bool = True, na_rep: str = "NaN", @@ -193,7 +194,7 @@ def to_string(self) -> str: class SeriesFormatter: def __init__( self, - series: ABCSeries, + series: "Series", buf: Optional[TextIO] = None, length: bool = True, header: bool = True, @@ -851,7 +852,7 @@ def to_html( else: raise TypeError("buf is not a file name and it has no write " " method") - def _get_formatted_column_labels(self, frame: ABCDataFrame) -> List[List[str]]: + def _get_formatted_column_labels(self, frame: "DataFrame") -> List[List[str]]: from pandas.core.index import _sparsify columns = frame.columns @@ -908,7 +909,7 @@ def show_row_idx_names(self) -> bool: def show_col_idx_names(self) -> bool: return all((self.has_column_names, self.show_index_names, self.header)) - def _get_formatted_index(self, frame: ABCDataFrame) -> List[str]: + def _get_formatted_index(self, frame: "DataFrame") -> List[str]: # Note: this is only used by to_string() and to_latex(), not by # to_html(). index = frame.index diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index 16e4024c4bf66..c92acb77b75b0 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -8,7 +8,7 @@ from pandas._config import get_option -from pandas.core.dtypes.generic import ABCIndex, ABCMultiIndex +from pandas.core.dtypes.generic import ABCMultiIndex from pandas import option_context @@ -79,7 +79,7 @@ def row_levels(self) -> int: # not showing (row) index return 0 - def _get_columns_formatted_values(self) -> ABCIndex: + def _get_columns_formatted_values(self): return self.columns @property From 035ec58bcae81166c92ac971069ebf5b246c3de6 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 20 Jul 2019 18:37:26 +0100 Subject: [PATCH 12/15] revert refactor of _chk_truncate --- pandas/io/formats/format.py | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index b79f469bb0584..a9e92da85a3e5 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -6,7 +6,7 @@ from functools import partial from io import StringIO from shutil import get_terminal_size -from typing import TYPE_CHECKING, List, Optional, TextIO, Tuple, Union +from typing import TYPE_CHECKING, List, Optional, TextIO, Tuple, Union, cast from unicodedata import east_asian_width import numpy as np @@ -231,9 +231,10 @@ def _chk_truncate(self) -> None: max_rows = self.max_rows # truncation determined by max_rows, actual truncated number of rows # used below by min_rows - self.truncate_v = False + truncate_v = max_rows and (len(self.series) > max_rows) series = self.series - if max_rows and (len(series) > max_rows): + if truncate_v: + max_rows = cast(int, max_rows) if min_rows: # if min_rows is set (not None or 0), set max_rows to minimum # of both @@ -244,10 +245,11 @@ def _chk_truncate(self) -> None: else: row_num = max_rows // 2 series = concat((series.iloc[:row_num], series.iloc[-row_num:])) - self.tr_row_num = row_num - self.truncate_v = True - + self.tr_row_num = row_num # type: Optional[int] + else: + self.tr_row_num = None self.tr_series = series + self.truncate_v = truncate_v def _get_footer(self) -> str: name = self.series.name @@ -320,6 +322,7 @@ def to_string(self) -> str: if self.truncate_v: n_header_rows = 0 row_num = self.tr_row_num + row_num = cast(int, row_num) width = self.adj.len(fmt_values[row_num - 1]) if width > 3: dot_str = "..." @@ -551,11 +554,14 @@ def _chk_truncate(self) -> None: max_cols_adj = self.max_cols_adj max_rows_adj = self.max_rows_adj - self.truncate_h = False - self.truncate_v = False + truncate_h = max_cols_adj and (len(self.columns) > max_cols_adj) + truncate_v = max_rows_adj and (len(self.frame) > max_rows_adj) + frame = self.frame - if max_cols_adj and (len(self.columns) > max_cols_adj): - if max_cols_adj == 1: + if truncate_h: + if max_cols_adj == 0: + col_num = len(frame.columns) + elif max_cols_adj == 1: frame = frame.iloc[:, :max_cols] col_num = max_cols else: @@ -564,9 +570,7 @@ def _chk_truncate(self) -> None: (frame.iloc[:, :col_num], frame.iloc[:, -col_num:]), axis=1 ) self.tr_col_num = col_num - self.truncate_h = True - - if max_rows_adj and (len(frame) > max_rows_adj): + if truncate_v: if max_rows_adj == 1: row_num = max_rows frame = frame.iloc[:max_rows, :] @@ -574,9 +578,12 @@ def _chk_truncate(self) -> None: row_num = max_rows_adj // 2 frame = concat((frame.iloc[:row_num, :], frame.iloc[-row_num:, :])) self.tr_row_num = row_num - self.truncate_v = True + else: + self.tr_row_num = None self.tr_frame = frame + self.truncate_h = truncate_h + self.truncate_v = truncate_v self.is_truncated = self.truncate_h or self.truncate_v def _to_str_columns(self) -> List[List[str]]: @@ -643,7 +650,7 @@ def _to_str_columns(self) -> List[List[str]]: if truncate_h: col_num = self.tr_col_num - strcols.insert(col_num + 1, [" ..."] * (len(str_index))) + strcols.insert(self.tr_col_num + 1, [" ..."] * (len(str_index))) if truncate_v: n_header_rows = len(str_index) - len(frame) row_num = self.tr_row_num From a58f1cd768cf3226b3cccf335884d450260e0348 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 20 Jul 2019 20:12:21 +0100 Subject: [PATCH 13/15] revert other refactorings --- pandas/io/formats/format.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index a9e92da85a3e5..ff31a3b4e6a1f 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -181,14 +181,18 @@ def to_string(self) -> str: else: return "" - values = [i.strip() for i in self._get_formatted_values()] - lines = ["[" + ", ".join(values) + "]"] + fmt_values = self._get_formatted_values() + + fmt_values = ["{i}".format(i=i) for i in fmt_values] + fmt_values = [i.strip() for i in fmt_values] + values = ", ".join(fmt_values) + result = ["[" + values + "]"] if self.footer: footer = self._get_footer() if footer: - lines.append(footer) + result.append(footer) - return str("\n".join(lines)) + return str("\n".join(result)) class SeriesFormatter: @@ -956,11 +960,13 @@ def _get_formatted_index(self, frame: "DataFrame") -> List[str]: return adjoined def _get_column_name_list(self) -> List[str]: + names = [] # type: List[str] columns = self.frame.columns if isinstance(columns, ABCMultiIndex): - return ["" if name is None else name for name in columns.names] + names.extend("" if name is None else name for name in columns.names) else: - return ["" if columns.name is None else columns.name] + names.append("" if columns.name is None else columns.name) + return names # ---------------------------------------------------------------------- From 66f60692fbc7fb39eb64ea35e9d1cbfc58ebc3f7 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 20 Jul 2019 20:40:13 +0100 Subject: [PATCH 14/15] add type hint to _get_columns_formatted_values was abc --- pandas/io/formats/html.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index c92acb77b75b0..f682ddcc3c77e 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -4,7 +4,7 @@ from collections import OrderedDict from textwrap import dedent -from typing import Dict, List, Optional, Tuple, Union +from typing import Any, Dict, Iterable, List, Optional, Tuple, Union from pandas._config import get_option @@ -79,7 +79,7 @@ def row_levels(self) -> int: # not showing (row) index return 0 - def _get_columns_formatted_values(self): + def _get_columns_formatted_values(self) -> Iterable[Any]: return self.columns @property @@ -90,12 +90,12 @@ def is_truncated(self) -> bool: def ncols(self) -> int: return len(self.fmt.tr_frame.columns) - def write(self, s: str, indent: int = 0) -> None: + def write(self, s: Any, indent: int = 0) -> None: rs = pprint_thing(s) self.elements.append(" " * indent + rs) def write_th( - self, s: str, header: bool = False, indent: int = 0, tags: Optional[str] = None + self, s: Any, header: bool = False, indent: int = 0, tags: Optional[str] = None ) -> None: """ Method for writting a formatted cell. @@ -125,11 +125,11 @@ def write_th( self._write_cell(s, kind="th", indent=indent, tags=tags) - def write_td(self, s: str, indent: int = 0, tags: Optional[str] = None) -> None: + def write_td(self, s: Any, indent: int = 0, tags: Optional[str] = None) -> None: self._write_cell(s, kind="td", indent=indent, tags=tags) def _write_cell( - self, s: str, kind: str = "td", indent: int = 0, tags: Optional[str] = None + self, s: Any, kind: str = "td", indent: int = 0, tags: Optional[str] = None ) -> None: if tags is not None: start_tag = "<{kind} {tags}>".format(kind=kind, tags=tags) @@ -162,7 +162,7 @@ def _write_cell( def write_tr( self, - line: List[str], + line: Iterable[Any], indent: int = 0, indent_delta: int = 0, header: bool = False, From dd3c46dde867b30f474b8ae805d73e4d6c2bd435 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 20 Jul 2019 23:12:58 +0100 Subject: [PATCH 15/15] remove Any from Iterable --- pandas/io/formats/html.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index f682ddcc3c77e..91e90a78d87a7 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -79,7 +79,7 @@ def row_levels(self) -> int: # not showing (row) index return 0 - def _get_columns_formatted_values(self) -> Iterable[Any]: + def _get_columns_formatted_values(self) -> Iterable: return self.columns @property @@ -162,7 +162,7 @@ def _write_cell( def write_tr( self, - line: Iterable[Any], + line: Iterable, indent: int = 0, indent_delta: int = 0, header: bool = False,