EHN: to_{html, string} col_space col specific (pandas-dev#32903)

quangngd · web-flow · commit a07748f93259 · 2020-06-18T08:21:37.000-07:00
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -294,6 +294,7 @@ Other enhancements
 - :meth:`~pandas.io.json.read_json` now accepts `nrows` parameter. (:issue:`33916`).
 - :meth:`~pandas.io.gbq.read_gbq` now allows to disable progress bar (:issue:`33360`).
 - :meth:`~pandas.io.gbq.read_gbq` now supports the ``max_results`` kwarg from ``pandas-gbq`` (:issue:`34639`).
+- :meth:`DataFrame.to_html` and :meth:`DataFrame.to_string`'s ``col_space`` parameter now accepts a list of dict to change only some specific columns' width (:issue:`28917`).
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -776,7 +776,7 @@ def _repr_html_(self) -> Optional[str]:
         header="Write out the column names. If a list of strings "
         "is given, it is assumed to be aliases for the "
         "column names",
-        col_space_type="int",
+        col_space_type="int, list or dict of int",
         col_space="The minimum width of each column",
     )
     @Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring)
@@ -2328,7 +2328,7 @@ def to_parquet(
     @Substitution(
         header_type="bool",
         header="Whether to print column labels, default True",
-        col_space_type="str or int",
+        col_space_type="str or int, list or dict of int or str",
         col_space="The minimum width of each column in CSS length "
         "units.  An int is assumed to be px units.\n\n"
         "            .. versionadded:: 0.25.0\n"
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -38,7 +38,7 @@
 from pandas._libs.tslib import format_array_from_datetime
 from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT
 from pandas._libs.tslibs.nattype import NaTType
-from pandas._typing import FilePathOrBuffer
+from pandas._typing import FilePathOrBuffer, Label
 from pandas.errors import AbstractMethodError
 
 from pandas.core.dtypes.common import (
@@ -77,6 +77,10 @@
     List[Callable], Tuple[Callable, ...], Mapping[Union[str, int], Callable]
 ]
 FloatFormatType = Union[str, Callable, "EngFormatter"]
+ColspaceType = Mapping[Label, Union[str, int]]
+ColspaceArgType = Union[
+    str, int, Sequence[Union[str, int]], Mapping[Label, Union[str, int]],
+]
 
 common_docstring = """
         Parameters
@@ -530,11 +534,13 @@ class DataFrameFormatter(TableFormatter):
     __doc__ = __doc__ if __doc__ else ""
     __doc__ += common_docstring + return_docstring
 
+    col_space: ColspaceType
+
     def __init__(
         self,
         frame: "DataFrame",
         columns: Optional[Sequence[str]] = None,
-        col_space: Optional[Union[str, int]] = None,
+        col_space: Optional[ColspaceArgType] = None,
         header: Union[bool, Sequence[str]] = True,
         index: bool = True,
         na_rep: str = "NaN",
@@ -574,7 +580,27 @@ def __init__(
             )
         self.na_rep = na_rep
         self.decimal = decimal
-        self.col_space = col_space
+        if col_space is None:
+            self.col_space = {}
+        elif isinstance(col_space, (int, str)):
+            self.col_space = {"": col_space}
+            self.col_space.update({column: col_space for column in self.frame.columns})
+        elif isinstance(col_space, dict):
+            for column in col_space.keys():
+                if column not in self.frame.columns and column != "":
+                    raise ValueError(
+                        f"Col_space is defined for an unknown column: {column}"
+                    )
+            self.col_space = col_space
+        else:
+            col_space = cast(Sequence, col_space)
+            if len(frame.columns) != len(col_space):
+                raise ValueError(
+                    f"Col_space length({len(col_space)}) should match "
+                    f"DataFrame number of columns({len(frame.columns)})"
+                )
+            self.col_space = dict(zip(self.frame.columns, col_space))
+
         self.header = header
         self.index = index
         self.line_width = line_width
@@ -702,7 +728,7 @@ def _to_str_columns(self) -> List[List[str]]:
         """
         # this method is not used by to_html where self.col_space
         # could be a string so safe to cast
-        self.col_space = cast(int, self.col_space)
+        col_space = {k: cast(int, v) for k, v in self.col_space.items()}
 
         frame = self.tr_frame
         # may include levels names also
@@ -714,10 +740,7 @@ def _to_str_columns(self) -> List[List[str]]:
             for i, c in enumerate(frame):
                 fmt_values = self._format_col(i)
                 fmt_values = _make_fixed_width(
-                    fmt_values,
-                    self.justify,
-                    minimum=(self.col_space or 0),
-                    adj=self.adj,
+                    fmt_values, self.justify, minimum=col_space.get(c, 0), adj=self.adj,
                 )
                 stringified.append(fmt_values)
         else:
@@ -741,7 +764,7 @@ def _to_str_columns(self) -> List[List[str]]:
             for i, c in enumerate(frame):
                 cheader = str_columns[i]
                 header_colwidth = max(
-                    self.col_space or 0, *(self.adj.len(x) for x in cheader)
+                    col_space.get(c, 0), *(self.adj.len(x) for x in cheader)
                 )
                 fmt_values = self._format_col(i)
                 fmt_values = _make_fixed_width(
@@ -932,7 +955,7 @@ def _format_col(self, i: int) -> List[str]:
             formatter,
             float_format=self.float_format,
             na_rep=self.na_rep,
-            space=self.col_space,
+            space=self.col_space.get(frame.columns[i]),
             decimal=self.decimal,
         )
 
@@ -1025,7 +1048,7 @@ def show_col_idx_names(self) -> bool:
     def _get_formatted_index(self, frame: "DataFrame") -> List[str]:
         # Note: this is only used by to_string() and to_latex(), not by
         # to_html(). so safe to cast col_space here.
-        self.col_space = cast(int, self.col_space)
+        col_space = {k: cast(int, v) for k, v in self.col_space.items()}
         index = frame.index
         columns = frame.columns
         fmt = self._get_formatter("__index__")
@@ -1043,7 +1066,7 @@ def _get_formatted_index(self, frame: "DataFrame") -> List[str]:
         fmt_index = [
             tuple(
                 _make_fixed_width(
-                    list(x), justify="left", minimum=(self.col_space or 0), adj=self.adj
+                    list(x), justify="left", minimum=col_space.get("", 0), adj=self.adj,
                 )
             )
             for x in fmt_index
diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py
@@ -53,8 +53,11 @@ def __init__(
         self.border = border
         self.table_id = self.fmt.table_id
         self.render_links = self.fmt.render_links
-        if isinstance(self.fmt.col_space, int):
-            self.fmt.col_space = f"{self.fmt.col_space}px"
+
+        self.col_space = {
+            column: f"{value}px" if isinstance(value, int) else value
+            for column, value in self.fmt.col_space.items()
+        }
 
     @property
     def show_row_idx_names(self) -> bool:
@@ -120,9 +123,11 @@ def write_th(
         -------
         A written <th> cell.
         """
-        if header and self.fmt.col_space is not None:
+        col_space = self.col_space.get(s, None)
+
+        if header and col_space is not None:
             tags = tags or ""
-            tags += f'style="min-width: {self.fmt.col_space};"'
+            tags += f'style="min-width: {col_space};"'
 
         self._write_cell(s, kind="th", indent=indent, tags=tags)
 
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
@@ -1047,6 +1047,33 @@ def test_to_string_with_col_space(self):
         no_header = df.to_string(col_space=20, header=False)
         assert len(with_header_row1) == len(no_header)
 
+    def test_to_string_with_column_specific_col_space_raises(self):
+        df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"])
+
+        msg = (
+            "Col_space length\\(\\d+\\) should match "
+            "DataFrame number of columns\\(\\d+\\)"
+        )
+        with pytest.raises(ValueError, match=msg):
+            df.to_string(col_space=[30, 40])
+
+        with pytest.raises(ValueError, match=msg):
+            df.to_string(col_space=[30, 40, 50, 60])
+
+        msg = "unknown column"
+        with pytest.raises(ValueError, match=msg):
+            df.to_string(col_space={"a": "foo", "b": 23, "d": 34})
+
+    def test_to_string_with_column_specific_col_space(self):
+        df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"])
+
+        result = df.to_string(col_space={"a": 10, "b": 11, "c": 12})
+        # 3 separating space + each col_space for (id, a, b, c)
+        assert len(result.split("\n")[1]) == (3 + 1 + 10 + 11 + 12)
+
+        result = df.to_string(col_space=[10, 11, 12])
+        assert len(result.split("\n")[1]) == (3 + 1 + 10 + 11 + 12)
+
     def test_to_string_truncate_indices(self):
         for index in [
             tm.makeStringIndex,
diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py
@@ -78,6 +78,40 @@ def test_to_html_with_col_space(col_space):
         assert str(col_space) in h
 
 
+def test_to_html_with_column_specific_col_space_raises():
+    df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"])
+
+    msg = (
+        "Col_space length\\(\\d+\\) should match "
+        "DataFrame number of columns\\(\\d+\\)"
+    )
+    with pytest.raises(ValueError, match=msg):
+        df.to_html(col_space=[30, 40])
+
+    with pytest.raises(ValueError, match=msg):
+        df.to_html(col_space=[30, 40, 50, 60])
+
+    msg = "unknown column"
+    with pytest.raises(ValueError, match=msg):
+        df.to_html(col_space={"a": "foo", "b": 23, "d": 34})
+
+
+def test_to_html_with_column_specific_col_space():
+    df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"])
+
+    result = df.to_html(col_space={"a": "2em", "b": 23})
+    hdrs = [x for x in result.split("\n") if re.search(r"<th[>\s]", x)]
+    assert 'min-width: 2em;">a</th>' in hdrs[1]
+    assert 'min-width: 23px;">b</th>' in hdrs[2]
+    assert "<th>c</th>" in hdrs[3]
+
+    result = df.to_html(col_space=["1em", 2, 3])
+    hdrs = [x for x in result.split("\n") if re.search(r"<th[>\s]", x)]
+    assert 'min-width: 1em;">a</th>' in hdrs[1]
+    assert 'min-width: 2px;">b</th>' in hdrs[2]
+    assert 'min-width: 3px;">c</th>' in hdrs[3]
+
+
 def test_to_html_with_empty_string_label():
     # GH 3547, to_html regards empty string labels as repeated labels
     data = {"c1": ["a", "b"], "c2": ["a", ""], "data": [1, 2]}