Skip to content

Commit a07748f

Browse files
authored
EHN: to_{html, string} col_space col specific (pandas-dev#32903)
1 parent 085e75e commit a07748f

File tree

6 files changed

+108
-18
lines changed

6 files changed

+108
-18
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,7 @@ Other enhancements
294294
- :meth:`~pandas.io.json.read_json` now accepts `nrows` parameter. (:issue:`33916`).
295295
- :meth:`~pandas.io.gbq.read_gbq` now allows to disable progress bar (:issue:`33360`).
296296
- :meth:`~pandas.io.gbq.read_gbq` now supports the ``max_results`` kwarg from ``pandas-gbq`` (:issue:`34639`).
297+
- :meth:`DataFrame.to_html` and :meth:`DataFrame.to_string`'s ``col_space`` parameter now accepts a list of dict to change only some specific columns' width (:issue:`28917`).
297298

298299
.. ---------------------------------------------------------------------------
299300

pandas/core/frame.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -776,7 +776,7 @@ def _repr_html_(self) -> Optional[str]:
776776
header="Write out the column names. If a list of strings "
777777
"is given, it is assumed to be aliases for the "
778778
"column names",
779-
col_space_type="int",
779+
col_space_type="int, list or dict of int",
780780
col_space="The minimum width of each column",
781781
)
782782
@Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring)
@@ -2328,7 +2328,7 @@ def to_parquet(
23282328
@Substitution(
23292329
header_type="bool",
23302330
header="Whether to print column labels, default True",
2331-
col_space_type="str or int",
2331+
col_space_type="str or int, list or dict of int or str",
23322332
col_space="The minimum width of each column in CSS length "
23332333
"units. An int is assumed to be px units.\n\n"
23342334
" .. versionadded:: 0.25.0\n"

pandas/io/formats/format.py

+35-12
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
from pandas._libs.tslib import format_array_from_datetime
3939
from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT
4040
from pandas._libs.tslibs.nattype import NaTType
41-
from pandas._typing import FilePathOrBuffer
41+
from pandas._typing import FilePathOrBuffer, Label
4242
from pandas.errors import AbstractMethodError
4343

4444
from pandas.core.dtypes.common import (
@@ -77,6 +77,10 @@
7777
List[Callable], Tuple[Callable, ...], Mapping[Union[str, int], Callable]
7878
]
7979
FloatFormatType = Union[str, Callable, "EngFormatter"]
80+
ColspaceType = Mapping[Label, Union[str, int]]
81+
ColspaceArgType = Union[
82+
str, int, Sequence[Union[str, int]], Mapping[Label, Union[str, int]],
83+
]
8084

8185
common_docstring = """
8286
Parameters
@@ -530,11 +534,13 @@ class DataFrameFormatter(TableFormatter):
530534
__doc__ = __doc__ if __doc__ else ""
531535
__doc__ += common_docstring + return_docstring
532536

537+
col_space: ColspaceType
538+
533539
def __init__(
534540
self,
535541
frame: "DataFrame",
536542
columns: Optional[Sequence[str]] = None,
537-
col_space: Optional[Union[str, int]] = None,
543+
col_space: Optional[ColspaceArgType] = None,
538544
header: Union[bool, Sequence[str]] = True,
539545
index: bool = True,
540546
na_rep: str = "NaN",
@@ -574,7 +580,27 @@ def __init__(
574580
)
575581
self.na_rep = na_rep
576582
self.decimal = decimal
577-
self.col_space = col_space
583+
if col_space is None:
584+
self.col_space = {}
585+
elif isinstance(col_space, (int, str)):
586+
self.col_space = {"": col_space}
587+
self.col_space.update({column: col_space for column in self.frame.columns})
588+
elif isinstance(col_space, dict):
589+
for column in col_space.keys():
590+
if column not in self.frame.columns and column != "":
591+
raise ValueError(
592+
f"Col_space is defined for an unknown column: {column}"
593+
)
594+
self.col_space = col_space
595+
else:
596+
col_space = cast(Sequence, col_space)
597+
if len(frame.columns) != len(col_space):
598+
raise ValueError(
599+
f"Col_space length({len(col_space)}) should match "
600+
f"DataFrame number of columns({len(frame.columns)})"
601+
)
602+
self.col_space = dict(zip(self.frame.columns, col_space))
603+
578604
self.header = header
579605
self.index = index
580606
self.line_width = line_width
@@ -702,7 +728,7 @@ def _to_str_columns(self) -> List[List[str]]:
702728
"""
703729
# this method is not used by to_html where self.col_space
704730
# could be a string so safe to cast
705-
self.col_space = cast(int, self.col_space)
731+
col_space = {k: cast(int, v) for k, v in self.col_space.items()}
706732

707733
frame = self.tr_frame
708734
# may include levels names also
@@ -714,10 +740,7 @@ def _to_str_columns(self) -> List[List[str]]:
714740
for i, c in enumerate(frame):
715741
fmt_values = self._format_col(i)
716742
fmt_values = _make_fixed_width(
717-
fmt_values,
718-
self.justify,
719-
minimum=(self.col_space or 0),
720-
adj=self.adj,
743+
fmt_values, self.justify, minimum=col_space.get(c, 0), adj=self.adj,
721744
)
722745
stringified.append(fmt_values)
723746
else:
@@ -741,7 +764,7 @@ def _to_str_columns(self) -> List[List[str]]:
741764
for i, c in enumerate(frame):
742765
cheader = str_columns[i]
743766
header_colwidth = max(
744-
self.col_space or 0, *(self.adj.len(x) for x in cheader)
767+
col_space.get(c, 0), *(self.adj.len(x) for x in cheader)
745768
)
746769
fmt_values = self._format_col(i)
747770
fmt_values = _make_fixed_width(
@@ -932,7 +955,7 @@ def _format_col(self, i: int) -> List[str]:
932955
formatter,
933956
float_format=self.float_format,
934957
na_rep=self.na_rep,
935-
space=self.col_space,
958+
space=self.col_space.get(frame.columns[i]),
936959
decimal=self.decimal,
937960
)
938961

@@ -1025,7 +1048,7 @@ def show_col_idx_names(self) -> bool:
10251048
def _get_formatted_index(self, frame: "DataFrame") -> List[str]:
10261049
# Note: this is only used by to_string() and to_latex(), not by
10271050
# to_html(). so safe to cast col_space here.
1028-
self.col_space = cast(int, self.col_space)
1051+
col_space = {k: cast(int, v) for k, v in self.col_space.items()}
10291052
index = frame.index
10301053
columns = frame.columns
10311054
fmt = self._get_formatter("__index__")
@@ -1043,7 +1066,7 @@ def _get_formatted_index(self, frame: "DataFrame") -> List[str]:
10431066
fmt_index = [
10441067
tuple(
10451068
_make_fixed_width(
1046-
list(x), justify="left", minimum=(self.col_space or 0), adj=self.adj
1069+
list(x), justify="left", minimum=col_space.get("", 0), adj=self.adj,
10471070
)
10481071
)
10491072
for x in fmt_index

pandas/io/formats/html.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,11 @@ def __init__(
5353
self.border = border
5454
self.table_id = self.fmt.table_id
5555
self.render_links = self.fmt.render_links
56-
if isinstance(self.fmt.col_space, int):
57-
self.fmt.col_space = f"{self.fmt.col_space}px"
56+
57+
self.col_space = {
58+
column: f"{value}px" if isinstance(value, int) else value
59+
for column, value in self.fmt.col_space.items()
60+
}
5861

5962
@property
6063
def show_row_idx_names(self) -> bool:
@@ -120,9 +123,11 @@ def write_th(
120123
-------
121124
A written <th> cell.
122125
"""
123-
if header and self.fmt.col_space is not None:
126+
col_space = self.col_space.get(s, None)
127+
128+
if header and col_space is not None:
124129
tags = tags or ""
125-
tags += f'style="min-width: {self.fmt.col_space};"'
130+
tags += f'style="min-width: {col_space};"'
126131

127132
self._write_cell(s, kind="th", indent=indent, tags=tags)
128133

pandas/tests/io/formats/test_format.py

+27
Original file line numberDiff line numberDiff line change
@@ -1047,6 +1047,33 @@ def test_to_string_with_col_space(self):
10471047
no_header = df.to_string(col_space=20, header=False)
10481048
assert len(with_header_row1) == len(no_header)
10491049

1050+
def test_to_string_with_column_specific_col_space_raises(self):
1051+
df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"])
1052+
1053+
msg = (
1054+
"Col_space length\\(\\d+\\) should match "
1055+
"DataFrame number of columns\\(\\d+\\)"
1056+
)
1057+
with pytest.raises(ValueError, match=msg):
1058+
df.to_string(col_space=[30, 40])
1059+
1060+
with pytest.raises(ValueError, match=msg):
1061+
df.to_string(col_space=[30, 40, 50, 60])
1062+
1063+
msg = "unknown column"
1064+
with pytest.raises(ValueError, match=msg):
1065+
df.to_string(col_space={"a": "foo", "b": 23, "d": 34})
1066+
1067+
def test_to_string_with_column_specific_col_space(self):
1068+
df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"])
1069+
1070+
result = df.to_string(col_space={"a": 10, "b": 11, "c": 12})
1071+
# 3 separating space + each col_space for (id, a, b, c)
1072+
assert len(result.split("\n")[1]) == (3 + 1 + 10 + 11 + 12)
1073+
1074+
result = df.to_string(col_space=[10, 11, 12])
1075+
assert len(result.split("\n")[1]) == (3 + 1 + 10 + 11 + 12)
1076+
10501077
def test_to_string_truncate_indices(self):
10511078
for index in [
10521079
tm.makeStringIndex,

pandas/tests/io/formats/test_to_html.py

+34
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,40 @@ def test_to_html_with_col_space(col_space):
7878
assert str(col_space) in h
7979

8080

81+
def test_to_html_with_column_specific_col_space_raises():
82+
df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"])
83+
84+
msg = (
85+
"Col_space length\\(\\d+\\) should match "
86+
"DataFrame number of columns\\(\\d+\\)"
87+
)
88+
with pytest.raises(ValueError, match=msg):
89+
df.to_html(col_space=[30, 40])
90+
91+
with pytest.raises(ValueError, match=msg):
92+
df.to_html(col_space=[30, 40, 50, 60])
93+
94+
msg = "unknown column"
95+
with pytest.raises(ValueError, match=msg):
96+
df.to_html(col_space={"a": "foo", "b": 23, "d": 34})
97+
98+
99+
def test_to_html_with_column_specific_col_space():
100+
df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"])
101+
102+
result = df.to_html(col_space={"a": "2em", "b": 23})
103+
hdrs = [x for x in result.split("\n") if re.search(r"<th[>\s]", x)]
104+
assert 'min-width: 2em;">a</th>' in hdrs[1]
105+
assert 'min-width: 23px;">b</th>' in hdrs[2]
106+
assert "<th>c</th>" in hdrs[3]
107+
108+
result = df.to_html(col_space=["1em", 2, 3])
109+
hdrs = [x for x in result.split("\n") if re.search(r"<th[>\s]", x)]
110+
assert 'min-width: 1em;">a</th>' in hdrs[1]
111+
assert 'min-width: 2px;">b</th>' in hdrs[2]
112+
assert 'min-width: 3px;">c</th>' in hdrs[3]
113+
114+
81115
def test_to_html_with_empty_string_label():
82116
# GH 3547, to_html regards empty string labels as repeated labels
83117
data = {"c1": ["a", "b"], "c2": ["a", ""], "data": [1, 2]}

0 commit comments

Comments
 (0)