diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 10522ff797c59..c1ab04a136e0f 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -294,6 +294,7 @@ Other enhancements - :meth:`~pandas.io.json.read_json` now accepts `nrows` parameter. (:issue:`33916`). - :meth `~pandas.io.gbq.read_gbq` now allows to disable progress bar (:issue:`33360`). - :meth:`~pandas.io.gbq.read_gbq` now supports the ``max_results`` kwarg from ``pandas-gbq`` (:issue:`34639`). +- :meth:`DataFrame.to_html` and :meth:`DataFrame.to_string`'s ``col_space`` parameter now accepts a list of dict to change only some specific columns' width (:issue:`28917`). .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 68c06715e1ea4..2c80f57e4ef5d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -776,7 +776,7 @@ def _repr_html_(self) -> Optional[str]: header="Write out the column names. If a list of strings " "is given, it is assumed to be aliases for the " "column names", - col_space_type="int", + col_space_type="int, list or dict of int", col_space="The minimum width of each column", ) @Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring) @@ -2328,7 +2328,7 @@ def to_parquet( @Substitution( header_type="bool", header="Whether to print column labels, default True", - col_space_type="str or int", + col_space_type="str or int, list or dict of int or str", col_space="The minimum width of each column in CSS length " "units. An int is assumed to be px units.\n\n" " .. versionadded:: 0.25.0\n" diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 02339f4344d4d..68a88fee83187 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -38,7 +38,7 @@ from pandas._libs.tslib import format_array_from_datetime from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT from pandas._libs.tslibs.nattype import NaTType -from pandas._typing import FilePathOrBuffer +from pandas._typing import FilePathOrBuffer, Label from pandas.errors import AbstractMethodError from pandas.core.dtypes.common import ( @@ -77,6 +77,10 @@ List[Callable], Tuple[Callable, ...], Mapping[Union[str, int], Callable] ] FloatFormatType = Union[str, Callable, "EngFormatter"] +ColspaceType = Mapping[Label, Union[str, int]] +ColspaceArgType = Union[ + str, int, Sequence[Union[str, int]], Mapping[Label, Union[str, int]], +] common_docstring = """ Parameters @@ -530,11 +534,13 @@ class DataFrameFormatter(TableFormatter): __doc__ = __doc__ if __doc__ else "" __doc__ += common_docstring + return_docstring + col_space: ColspaceType + def __init__( self, frame: "DataFrame", columns: Optional[Sequence[str]] = None, - col_space: Optional[Union[str, int]] = None, + col_space: Optional[ColspaceArgType] = None, header: Union[bool, Sequence[str]] = True, index: bool = True, na_rep: str = "NaN", @@ -574,7 +580,27 @@ def __init__( ) self.na_rep = na_rep self.decimal = decimal - self.col_space = col_space + if col_space is None: + self.col_space = {} + elif isinstance(col_space, (int, str)): + self.col_space = {"": col_space} + self.col_space.update({column: col_space for column in self.frame.columns}) + elif isinstance(col_space, dict): + for column in col_space.keys(): + if column not in self.frame.columns and column != "": + raise ValueError( + f"Col_space is defined for an unknown column: {column}" + ) + self.col_space = col_space + else: + col_space = cast(Sequence, col_space) + if len(frame.columns) != len(col_space): + raise ValueError( + f"Col_space length({len(col_space)}) should match " + f"DataFrame number of columns({len(frame.columns)})" + ) + self.col_space = dict(zip(self.frame.columns, col_space)) + self.header = header self.index = index self.line_width = line_width @@ -702,7 +728,7 @@ def _to_str_columns(self) -> List[List[str]]: """ # this method is not used by to_html where self.col_space # could be a string so safe to cast - self.col_space = cast(int, self.col_space) + col_space = {k: cast(int, v) for k, v in self.col_space.items()} frame = self.tr_frame # may include levels names also @@ -714,10 +740,7 @@ def _to_str_columns(self) -> List[List[str]]: for i, c in enumerate(frame): fmt_values = self._format_col(i) fmt_values = _make_fixed_width( - fmt_values, - self.justify, - minimum=(self.col_space or 0), - adj=self.adj, + fmt_values, self.justify, minimum=col_space.get(c, 0), adj=self.adj, ) stringified.append(fmt_values) else: @@ -741,7 +764,7 @@ def _to_str_columns(self) -> List[List[str]]: for i, c in enumerate(frame): cheader = str_columns[i] header_colwidth = max( - self.col_space or 0, *(self.adj.len(x) for x in cheader) + col_space.get(c, 0), *(self.adj.len(x) for x in cheader) ) fmt_values = self._format_col(i) fmt_values = _make_fixed_width( @@ -932,7 +955,7 @@ def _format_col(self, i: int) -> List[str]: formatter, float_format=self.float_format, na_rep=self.na_rep, - space=self.col_space, + space=self.col_space.get(frame.columns[i]), decimal=self.decimal, ) @@ -1025,7 +1048,7 @@ def show_col_idx_names(self) -> bool: def _get_formatted_index(self, frame: "DataFrame") -> List[str]: # Note: this is only used by to_string() and to_latex(), not by # to_html(). so safe to cast col_space here. - self.col_space = cast(int, self.col_space) + col_space = {k: cast(int, v) for k, v in self.col_space.items()} index = frame.index columns = frame.columns fmt = self._get_formatter("__index__") @@ -1043,7 +1066,7 @@ def _get_formatted_index(self, frame: "DataFrame") -> List[str]: fmt_index = [ tuple( _make_fixed_width( - list(x), justify="left", minimum=(self.col_space or 0), adj=self.adj + list(x), justify="left", minimum=col_space.get("", 0), adj=self.adj, ) ) for x in fmt_index diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index e31d977512f1e..7ea2417ceb24b 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -53,8 +53,11 @@ def __init__( self.border = border self.table_id = self.fmt.table_id self.render_links = self.fmt.render_links - if isinstance(self.fmt.col_space, int): - self.fmt.col_space = f"{self.fmt.col_space}px" + + self.col_space = { + column: f"{value}px" if isinstance(value, int) else value + for column, value in self.fmt.col_space.items() + } @property def show_row_idx_names(self) -> bool: @@ -120,9 +123,11 @@ def write_th( ------- A written cell. """ - if header and self.fmt.col_space is not None: + col_space = self.col_space.get(s, None) + + if header and col_space is not None: tags = tags or "" - tags += f'style="min-width: {self.fmt.col_space};"' + tags += f'style="min-width: {col_space};"' self._write_cell(s, kind="th", indent=indent, tags=tags) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 23cad043f2177..3c40a2ae8d6b8 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1047,6 +1047,33 @@ def test_to_string_with_col_space(self): no_header = df.to_string(col_space=20, header=False) assert len(with_header_row1) == len(no_header) + def test_to_string_with_column_specific_col_space_raises(self): + df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"]) + + msg = ( + "Col_space length\\(\\d+\\) should match " + "DataFrame number of columns\\(\\d+\\)" + ) + with pytest.raises(ValueError, match=msg): + df.to_string(col_space=[30, 40]) + + with pytest.raises(ValueError, match=msg): + df.to_string(col_space=[30, 40, 50, 60]) + + msg = "unknown column" + with pytest.raises(ValueError, match=msg): + df.to_string(col_space={"a": "foo", "b": 23, "d": 34}) + + def test_to_string_with_column_specific_col_space(self): + df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"]) + + result = df.to_string(col_space={"a": 10, "b": 11, "c": 12}) + # 3 separating space + each col_space for (id, a, b, c) + assert len(result.split("\n")[1]) == (3 + 1 + 10 + 11 + 12) + + result = df.to_string(col_space=[10, 11, 12]) + assert len(result.split("\n")[1]) == (3 + 1 + 10 + 11 + 12) + def test_to_string_truncate_indices(self): for index in [ tm.makeStringIndex, diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 9a14022d6f776..e85fd398964d0 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -78,6 +78,40 @@ def test_to_html_with_col_space(col_space): assert str(col_space) in h +def test_to_html_with_column_specific_col_space_raises(): + df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"]) + + msg = ( + "Col_space length\\(\\d+\\) should match " + "DataFrame number of columns\\(\\d+\\)" + ) + with pytest.raises(ValueError, match=msg): + df.to_html(col_space=[30, 40]) + + with pytest.raises(ValueError, match=msg): + df.to_html(col_space=[30, 40, 50, 60]) + + msg = "unknown column" + with pytest.raises(ValueError, match=msg): + df.to_html(col_space={"a": "foo", "b": 23, "d": 34}) + + +def test_to_html_with_column_specific_col_space(): + df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"]) + + result = df.to_html(col_space={"a": "2em", "b": 23}) + hdrs = [x for x in result.split("\n") if re.search(r"\s]", x)] + assert 'min-width: 2em;">a' in hdrs[1] + assert 'min-width: 23px;">b' in hdrs[2] + assert "c" in hdrs[3] + + result = df.to_html(col_space=["1em", 2, 3]) + hdrs = [x for x in result.split("\n") if re.search(r"\s]", x)] + assert 'min-width: 1em;">a' in hdrs[1] + assert 'min-width: 2px;">b' in hdrs[2] + assert 'min-width: 3px;">c' in hdrs[3] + + def test_to_html_with_empty_string_label(): # GH 3547, to_html regards empty string labels as repeated labels data = {"c1": ["a", "b"], "c2": ["a", ""], "data": [1, 2]}