From 3245c0bb2676f224ad5f2a90baf01965214d5b61 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 3 Apr 2019 08:48:21 +0100 Subject: [PATCH 1/6] initial refactor --- pandas/io/formats/format.py | 49 +++++++++++++++++++++++-------------- pandas/io/formats/html.py | 19 ++++++++------ 2 files changed, 42 insertions(+), 26 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index b658c8a53dc8b..df14a53a18684 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -374,6 +374,15 @@ def _get_formatter(self, i): i = self.columns[i] return self.formatters.get(i, None) + def _format_col(self, i): + frame = self.tr_frame + formatter = self._get_formatter(i) + values_to_format = frame.iloc[:, i]._formatting_values() + return format_array(values_to_format, formatter, + float_format=self.float_format, na_rep=self.na_rep, + space=self.col_space, decimal=self.decimal, + max_colwidth=self.max_colwidth) + class DataFrameFormatter(TableFormatter): """ @@ -437,6 +446,7 @@ def __init__(self, frame, buf=None, columns=None, col_space=None, self._chk_truncate() self.adj = _get_adjustment() + self.max_colwidth = None # use display.max_colwidth setting def _chk_truncate(self): """ @@ -712,14 +722,6 @@ def to_latex(self, column_format=None, longtable=False, encoding=None, raise TypeError('buf is not a file name and it has no write ' 'method') - def _format_col(self, i): - frame = self.tr_frame - formatter = self._get_formatter(i) - values_to_format = frame.iloc[:, i]._formatting_values() - return format_array(values_to_format, formatter, - float_format=self.float_format, na_rep=self.na_rep, - space=self.col_space, decimal=self.decimal) - def to_html(self, classes=None, notebook=False, border=None): """ Render a DataFrame to a html table. @@ -855,7 +857,7 @@ def _get_column_name_list(self): def format_array(values, formatter, float_format=None, na_rep='NaN', digits=None, space=None, justify='right', decimal='.', - leading_space=None): + leading_space=None, max_colwidth=None): """ Format an array for printing. @@ -877,6 +879,11 @@ def format_array(values, formatter, float_format=None, na_rep='NaN', When formatting an Index subclass (e.g. IntervalIndex._format_native_types), we don't want the leading space since it should be left-aligned. + max_colwidth: False, int or None, optional, default None + Whether the array should be formatted with strings truncated. + * False: do not truncate strings + * int: the maximum width of strings + * None: use display.max_colwidth setting Returns ------- @@ -907,10 +914,13 @@ def format_array(values, formatter, float_format=None, na_rep='NaN', if digits is None: digits = get_option("display.precision") + if max_colwidth is None: + max_colwidth = get_option("display.max_colwidth") + fmt_obj = fmt_klass(values, digits=digits, na_rep=na_rep, float_format=float_format, formatter=formatter, space=space, justify=justify, decimal=decimal, - leading_space=leading_space) + leading_space=leading_space, max_colwidth=max_colwidth) return fmt_obj.get_result() @@ -919,7 +929,8 @@ class GenericArrayFormatter(object): def __init__(self, values, digits=7, formatter=None, na_rep='NaN', space=12, float_format=None, justify='right', decimal='.', - quoting=None, fixed_width=True, leading_space=None): + quoting=None, fixed_width=True, leading_space=None, + max_colwidth=None): self.values = values self.digits = digits self.na_rep = na_rep @@ -931,10 +942,12 @@ def __init__(self, values, digits=7, formatter=None, na_rep='NaN', self.quoting = quoting self.fixed_width = fixed_width self.leading_space = leading_space + self.max_colwidth = max_colwidth def get_result(self): fmt_values = self._format_strings() - return _make_fixed_width(fmt_values, self.justify) + return _make_fixed_width(fmt_values, self.justify, + max_colwidth=self.max_colwidth) def _format_strings(self): if self.float_format is None: @@ -1395,7 +1408,8 @@ def _formatter(x): return _formatter -def _make_fixed_width(strings, justify='right', minimum=None, adj=None): +def _make_fixed_width(strings, justify='right', minimum=None, adj=None, + max_colwidth=None): if len(strings) == 0 or justify == 'all': return strings @@ -1408,13 +1422,12 @@ def _make_fixed_width(strings, justify='right', minimum=None, adj=None): if minimum is not None: max_len = max(minimum, max_len) - conf_max = get_option("display.max_colwidth") - if conf_max is not None and max_len > conf_max: - max_len = conf_max + if max_colwidth is not None and max_len > max_colwidth: + max_len = max_colwidth def just(x): - if conf_max is not None: - if (conf_max > 3) & (adj.len(x) > max_len): + if max_colwidth is not None: + if (max_colwidth > 3) & (adj.len(x) > max_len): x = x[:max_len - 3] + '...' return x diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index eba56bd0e4d87..8d6bddb58f221 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -12,8 +12,6 @@ from pandas.core.dtypes.generic import ABCMultiIndex -from pandas import option_context - from pandas.io.common import _is_url from pandas.io.formats.format import TableFormatter, get_level_lengths from pandas.io.formats.printing import pprint_thing @@ -46,6 +44,13 @@ def __init__(self, formatter, classes=None, border=None): self.border = border self.table_id = self.fmt.table_id self.render_links = self.fmt.render_links + self.max_colwidth = False # do not truncate strings + self.tr_frame = self.fmt.tr_frame + self.formatters = self.fmt.formatters + self.float_format = self.fmt.float_format + self.na_rep = self.fmt.na_rep + self.col_space = self.fmt.col_space + self.decimal = self.fmt.decimal @property def show_row_idx_names(self): @@ -320,10 +325,7 @@ def _write_header(self, indent): self.write('', indent) def _get_formatted_values(self): - with option_context('display.max_colwidth', 999999): - fmt_values = {i: self.fmt._format_col(i) - for i in range(self.ncols)} - return fmt_values + return {i: self._format_col(i) for i in range(self.ncols)} def _write_body(self, indent): self.write('', indent) @@ -491,8 +493,9 @@ class NotebookFormatter(HTMLFormatter): DataFrame._repr_html_() and DataFrame.to_html(notebook=True) """ - def _get_formatted_values(self): - return {i: self.fmt._format_col(i) for i in range(self.ncols)} + def __init__(self, formatter, classes=None, border=None): + super().__init__(formatter, classes, border) + self.max_colwidth = None # use display.max_colwidth setting def write_style(self): # We use the "scoped" attribute here so that the desired From 103fc2cbe06ee09785c0e2f6c0e8d799d70488ef Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 4 Jul 2019 11:57:46 +0100 Subject: [PATCH 2/6] apply black code style to changes --- pandas/io/formats/format.py | 76 +++++++++++++++++++++++++++---------- 1 file changed, 55 insertions(+), 21 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 4fba8e0955fd3..439a54ba3c8fd 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -424,10 +424,15 @@ def _format_col(self, i): frame = self.tr_frame formatter = self._get_formatter(i) values_to_format = frame.iloc[:, i]._formatting_values() - return format_array(values_to_format, formatter, - float_format=self.float_format, na_rep=self.na_rep, - space=self.col_space, decimal=self.decimal, - max_colwidth=self.max_colwidth) + return format_array( + values_to_format, + formatter, + float_format=self.float_format, + na_rep=self.na_rep, + space=self.col_space, + decimal=self.decimal, + max_colwidth=self.max_colwidth, + ) class DataFrameFormatter(TableFormatter): @@ -953,9 +958,18 @@ def _get_column_name_list(self): # Array formatters -def format_array(values, formatter, float_format=None, na_rep='NaN', - digits=None, space=None, justify='right', decimal='.', - leading_space=None, max_colwidth=None): +def format_array( + values, + formatter, + float_format=None, + na_rep="NaN", + digits=None, + space=None, + justify="right", + decimal=".", + leading_space=None, + max_colwidth=None, +): """ Format an array for printing. @@ -1015,20 +1029,38 @@ def format_array(values, formatter, float_format=None, na_rep='NaN', if max_colwidth is None: max_colwidth = get_option("display.max_colwidth") - fmt_obj = fmt_klass(values, digits=digits, na_rep=na_rep, - float_format=float_format, formatter=formatter, - space=space, justify=justify, decimal=decimal, - leading_space=leading_space, max_colwidth=max_colwidth) + fmt_obj = fmt_klass( + values, + digits=digits, + na_rep=na_rep, + float_format=float_format, + formatter=formatter, + space=space, + justify=justify, + decimal=decimal, + leading_space=leading_space, + max_colwidth=max_colwidth, + ) return fmt_obj.get_result() class GenericArrayFormatter: - - def __init__(self, values, digits=7, formatter=None, na_rep='NaN', - space=12, float_format=None, justify='right', decimal='.', - quoting=None, fixed_width=True, leading_space=None, - max_colwidth=None): + def __init__( + self, + values, + digits=7, + formatter=None, + na_rep="NaN", + space=12, + float_format=None, + justify="right", + decimal=".", + quoting=None, + fixed_width=True, + leading_space=None, + max_colwidth=None, + ): self.values = values self.digits = digits self.na_rep = na_rep @@ -1044,8 +1076,9 @@ def __init__(self, values, digits=7, formatter=None, na_rep='NaN', def get_result(self): fmt_values = self._format_strings() - return _make_fixed_width(fmt_values, self.justify, - max_colwidth=self.max_colwidth) + return _make_fixed_width( + fmt_values, self.justify, max_colwidth=self.max_colwidth + ) def _format_strings(self): if self.float_format is None: @@ -1535,8 +1568,9 @@ def _formatter(x): return _formatter -def _make_fixed_width(strings, justify='right', minimum=None, adj=None, - max_colwidth=None): +def _make_fixed_width( + strings, justify="right", minimum=None, adj=None, max_colwidth=None +): if len(strings) == 0 or justify == "all": return strings @@ -1555,7 +1589,7 @@ def _make_fixed_width(strings, justify='right', minimum=None, adj=None, def just(x): if max_colwidth is not None: if (max_colwidth > 3) & (adj.len(x) > max_len): - x = x[:max_len - 3] + '...' + x = x[: max_len - 3] + "..." return x strings = [just(x) for x in strings] From a1fd43b87cd90fe8da9b0bb998a04b3a104b6aa4 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 4 Jul 2019 12:35:05 +0100 Subject: [PATCH 3/6] add docstring, type annotations and versionadded tag --- pandas/io/formats/format.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 439a54ba3c8fd..c0839d9e4ee7f 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -6,6 +6,7 @@ from functools import partial from io import StringIO from shutil import get_terminal_size +from typing import Optional, Union from unicodedata import east_asian_width import numpy as np @@ -420,7 +421,11 @@ def _get_formatter(self, i): i = self.columns[i] return self.formatters.get(i, None) - def _format_col(self, i): + def _format_col(self, i: int): + """ + Calls `format_array` for column `i` of truncated DataFrame with + optional `formatter` + """ frame = self.tr_frame formatter = self._get_formatter(i) values_to_format = frame.iloc[:, i]._formatting_values() @@ -968,7 +973,7 @@ def format_array( justify="right", decimal=".", leading_space=None, - max_colwidth=None, + max_colwidth: Optional[Union[bool, int]] = None, ): """ Format an array for printing. @@ -997,6 +1002,8 @@ def format_array( * int: the maximum width of strings * None: use display.max_colwidth setting + .. versionadded:: 1.0 + Returns ------- List[str] From 136256565ff5ac7092203f8091d42ae671334c73 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 4 Jul 2019 17:13:45 +0100 Subject: [PATCH 4/6] fix mypy errors --- pandas/io/formats/format.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index c0839d9e4ee7f..36a21326c0a5f 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -6,7 +6,7 @@ from functools import partial from io import StringIO from shutil import get_terminal_size -from typing import Optional, Union +from typing import Any, Optional, Union from unicodedata import east_asian_width import numpy as np @@ -40,6 +40,7 @@ ) from pandas.core.dtypes.missing import isna, notna +from pandas import DataFrame from pandas.core.base import PandasObject import pandas.core.common as com from pandas.core.index import Index, ensure_index @@ -403,6 +404,12 @@ class TableFormatter: is_truncated = False show_dimensions = None + tr_frame = None # type: DataFrame + float_format = None + na_rep = None + col_space = None + decimal = None + max_colwidth = None @property def should_show_dimensions(self): @@ -1010,7 +1017,7 @@ def format_array( """ if is_datetime64_dtype(values.dtype): - fmt_klass = Datetime64Formatter + fmt_klass = Datetime64Formatter # type: Any elif is_datetime64tz_dtype(values): fmt_klass = Datetime64TZFormatter elif is_timedelta64_dtype(values.dtype): From a5217ad423fdcb01ccf595e8b23f210bb979cc2f Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 4 Jul 2019 17:25:30 +0100 Subject: [PATCH 5/6] fix ImportError --- pandas/io/formats/format.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 36a21326c0a5f..e21d0dd37cfeb 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -33,6 +33,7 @@ is_timedelta64_dtype, ) from pandas.core.dtypes.generic import ( + ABCDataFrame, ABCIndexClass, ABCMultiIndex, ABCSeries, @@ -40,7 +41,6 @@ ) from pandas.core.dtypes.missing import isna, notna -from pandas import DataFrame from pandas.core.base import PandasObject import pandas.core.common as com from pandas.core.index import Index, ensure_index @@ -404,7 +404,7 @@ class TableFormatter: is_truncated = False show_dimensions = None - tr_frame = None # type: DataFrame + tr_frame = None # type: ABCDataFrame float_format = None na_rep = None col_space = None From 3a1fc5f029ffa7353015b62932601836426a24c6 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 4 Jul 2019 17:44:07 +0100 Subject: [PATCH 6/6] update annotation --- pandas/io/formats/format.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index e21d0dd37cfeb..f691ab2119067 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -6,7 +6,7 @@ from functools import partial from io import StringIO from shutil import get_terminal_size -from typing import Any, Optional, Union +from typing import Optional, Type, Union from unicodedata import east_asian_width import numpy as np @@ -1017,7 +1017,7 @@ def format_array( """ if is_datetime64_dtype(values.dtype): - fmt_klass = Datetime64Formatter # type: Any + fmt_klass = Datetime64Formatter # type: Type[GenericArrayFormatter] elif is_datetime64tz_dtype(values): fmt_klass = Datetime64TZFormatter elif is_timedelta64_dtype(values.dtype):