Skip to content

EHN: to_{html, string} col_space col specific #32903

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Jun 18, 2020
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,7 +777,7 @@ def _repr_html_(self) -> Optional[str]:
header="Write out the column names. If a list of strings "
"is given, it is assumed to be aliases for the "
"column names",
col_space_type="int",
col_space_type="int, list or dict of int",
col_space="The minimum width of each column",
)
@Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring)
Expand Down Expand Up @@ -2318,7 +2318,7 @@ def to_parquet(
@Substitution(
header_type="bool",
header="Whether to print column labels, default True",
col_space_type="str or int",
col_space_type="str or int, list or dict of int or str",
col_space="The minimum width of each column in CSS length "
"units. An int is assumed to be px units.\n\n"
" .. versionadded:: 0.25.0\n"
Expand Down
50 changes: 39 additions & 11 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,16 @@
List[Callable], Tuple[Callable, ...], Mapping[Union[str, int], Callable]
]
FloatFormatType = Union[str, Callable, "EngFormatter"]
ColspaceType = Union[
Mapping[Union[str, int], Union[str, int]],
]
ColspaceArgType = Union[
str,
int,
List[Union[str, int]],
Tuple[Union[str, int], ...],
Mapping[Union[str, int], Union[str, int]],
]

common_docstring = """
Parameters
Expand Down Expand Up @@ -530,11 +540,13 @@ class DataFrameFormatter(TableFormatter):
__doc__ = __doc__ if __doc__ else ""
__doc__ += common_docstring + return_docstring

col_space: ColspaceType

def __init__(
self,
frame: "DataFrame",
columns: Optional[Sequence[str]] = None,
col_space: Optional[Union[str, int]] = None,
col_space: Optional[ColspaceArgType] = None,
header: Union[bool, Sequence[str]] = True,
index: bool = True,
na_rep: str = "NaN",
Expand Down Expand Up @@ -574,7 +586,26 @@ def __init__(
)
self.na_rep = na_rep
self.decimal = decimal
self.col_space = col_space
if col_space is None:
self.col_space = {}
elif isinstance(col_space, (int, str)):
self.col_space = {"": col_space}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor but you can just create the dict inline using a comprehension on one line instead of doing this then calling update on the next

Copy link
Contributor Author

@quangngd quangngd Jun 18, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@WillAyd The "" (empty) key is for the row id column. Couldn't think of how to turn this into a oneliner?

self.col_space.update({column: col_space for column in self.frame.columns})
elif isinstance(col_space, dict):
for column in col_space.keys():
if column not in self.frame.columns and column != "":
raise ValueError(
f"Col_space is defined for an unknown column: {column}"
)
self.col_space = col_space
else:
if len(frame.columns) != len(col_space):
raise ValueError(
f"Col_space length({len(col_space)}) should match "
f"DataFrame number of columns({len(frame.columns)})"
)
self.col_space = dict(zip(self.frame.columns, col_space))

self.header = header
self.index = index
self.line_width = line_width
Expand Down Expand Up @@ -702,7 +733,7 @@ def _to_str_columns(self) -> List[List[str]]:
"""
# this method is not used by to_html where self.col_space
# could be a string so safe to cast
self.col_space = cast(int, self.col_space)
col_space = {k: cast(int, v) for k, v in self.col_space.items()}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the cast still required here?

Copy link
Contributor Author

@quangngd quangngd Jun 18, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@WillAyd Not an actual cast, typing.cast just used to signal to the type checker (mypy). In this case self.col_space can be of type dict<str> by definition but we are sure that it is dict<int> within this scope


frame = self.tr_frame
# may include levels names also
Expand All @@ -714,10 +745,7 @@ def _to_str_columns(self) -> List[List[str]]:
for i, c in enumerate(frame):
fmt_values = self._format_col(i)
fmt_values = _make_fixed_width(
fmt_values,
self.justify,
minimum=(self.col_space or 0),
adj=self.adj,
fmt_values, self.justify, minimum=col_space.get(c, 0), adj=self.adj,
)
stringified.append(fmt_values)
else:
Expand All @@ -741,7 +769,7 @@ def _to_str_columns(self) -> List[List[str]]:
for i, c in enumerate(frame):
cheader = str_columns[i]
header_colwidth = max(
self.col_space or 0, *(self.adj.len(x) for x in cheader)
col_space.get(c, 0), *(self.adj.len(x) for x in cheader)
)
fmt_values = self._format_col(i)
fmt_values = _make_fixed_width(
Expand Down Expand Up @@ -932,7 +960,7 @@ def _format_col(self, i: int) -> List[str]:
formatter,
float_format=self.float_format,
na_rep=self.na_rep,
space=self.col_space,
space=self.col_space.get(frame.columns[i]),
decimal=self.decimal,
)

Expand Down Expand Up @@ -1025,7 +1053,7 @@ def show_col_idx_names(self) -> bool:
def _get_formatted_index(self, frame: "DataFrame") -> List[str]:
# Note: this is only used by to_string() and to_latex(), not by
# to_html(). so safe to cast col_space here.
self.col_space = cast(int, self.col_space)
col_space = {k: cast(int, v) for k, v in self.col_space.items()}
index = frame.index
columns = frame.columns
fmt = self._get_formatter("__index__")
Expand All @@ -1043,7 +1071,7 @@ def _get_formatted_index(self, frame: "DataFrame") -> List[str]:
fmt_index = [
tuple(
_make_fixed_width(
list(x), justify="left", minimum=(self.col_space or 0), adj=self.adj
list(x), justify="left", minimum=col_space.get("", 0), adj=self.adj,
)
)
for x in fmt_index
Expand Down
13 changes: 9 additions & 4 deletions pandas/io/formats/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,11 @@ def __init__(
self.border = border
self.table_id = self.fmt.table_id
self.render_links = self.fmt.render_links
if isinstance(self.fmt.col_space, int):
self.fmt.col_space = f"{self.fmt.col_space}px"

self.col_space = {
column: f"{value}px" if isinstance(value, int) else value
for column, value in self.fmt.col_space.items()
}

@property
def show_row_idx_names(self) -> bool:
Expand Down Expand Up @@ -120,9 +123,11 @@ def write_th(
-------
A written <th> cell.
"""
if header and self.fmt.col_space is not None:
col_space = self.col_space.get(s, None)

if header and col_space is not None:
tags = tags or ""
tags += f'style="min-width: {self.fmt.col_space};"'
tags += f'style="min-width: {col_space};"'

self._write_cell(s, kind="th", indent=indent, tags=tags)

Expand Down
24 changes: 24 additions & 0 deletions pandas/tests/io/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1047,6 +1047,30 @@ def test_to_string_with_col_space(self):
no_header = df.to_string(col_space=20, header=False)
assert len(with_header_row1) == len(no_header)

def test_to_string_with_column_specific_col_space(self):
df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"])

msg = (
"Col_space length\\(\\d+\\) should match "
"DataFrame number of columns\\(\\d+\\)"
)
with pytest.raises(ValueError, match=msg):
df.to_string(col_space=[30, 40])

with pytest.raises(ValueError, match=msg):
df.to_string(col_space=[30, 40, 50, 60])

msg = "unknown column"
with pytest.raises(ValueError, match=msg):
df.to_string(col_space={"a": "foo", "b": 23, "d": 34})

result = df.to_string(col_space={"a": 10, "b": 11, "c": 12})
# 3 separating space + each col_space for (id, a, b, c)
assert len(result.split("\n")[1]) == (3 + 1 + 10 + 11 + 12)

result = df.to_string(col_space=[10, 11, 12])
assert len(result.split("\n")[1]) == (3 + 1 + 10 + 11 + 12)

def test_to_string_truncate_indices(self):
for index in [
tm.makeStringIndex,
Expand Down
30 changes: 30 additions & 0 deletions pandas/tests/io/formats/test_to_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,36 @@ def test_to_html_with_col_space(col_space):
assert str(col_space) in h


def test_to_html_with_column_specific_col_space():
df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"])

msg = (
"Col_space length\\(\\d+\\) should match "
"DataFrame number of columns\\(\\d+\\)"
)
with pytest.raises(ValueError, match=msg):
df.to_html(col_space=[30, 40])

with pytest.raises(ValueError, match=msg):
df.to_html(col_space=[30, 40, 50, 60])

msg = "unknown column"
with pytest.raises(ValueError, match=msg):
df.to_html(col_space={"a": "foo", "b": 23, "d": 34})

result = df.to_html(col_space={"a": "2em", "b": 23})
hdrs = [x for x in result.split("\n") if re.search(r"<th[>\s]", x)]
assert 'min-width: 2em;">a</th>' in hdrs[1]
assert 'min-width: 23px;">b</th>' in hdrs[2]
assert "<th>c</th>" in hdrs[3]

result = df.to_html(col_space=["1em", 2, 3])
hdrs = [x for x in result.split("\n") if re.search(r"<th[>\s]", x)]
assert 'min-width: 1em;">a</th>' in hdrs[1]
assert 'min-width: 2px;">b</th>' in hdrs[2]
assert 'min-width: 3px;">c</th>' in hdrs[3]


def test_to_html_with_empty_string_label():
# GH 3547, to_html regards empty string labels as repeated labels
data = {"c1": ["a", "b"], "c2": ["a", ""], "data": [1, 2]}
Expand Down