-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: Don't ignore na_rep in DataFrame.to_html #36690
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 25 commits
e1614ae
f56d893
247e79b
f33e4d0
e83b7ed
a240b49
7ee3ef2
72a812a
ca57e06
dd25388
4a599b2
dc6287a
faa8e2c
1374cdd
c81aa04
52f16fc
8c46ab7
6cb161c
b166ffc
199c560
5a054d7
4423dd7
0c49eb0
4b53c91
87c172a
265e2a8
8f0ca15
7be7c38
5a50ad0
1af22a5
37cc78c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -98,15 +98,20 @@ | |
index : bool, optional, default True | ||
Whether to print index (row) labels. | ||
na_rep : str, optional, default 'NaN' | ||
String representation of NAN to use. | ||
String representation of ``NaN`` to use. | ||
formatters : list, tuple or dict of one-param. functions, optional | ||
Formatter functions to apply to columns' elements by position or | ||
name. | ||
The result of each function must be a unicode string. | ||
List/tuple must be of length equal to the number of columns. | ||
float_format : one-parameter function, optional, default None | ||
Formatter function to apply to columns' elements if they are | ||
floats. The result of this function must be a unicode string. | ||
floats. This function must return a unicode string and will be | ||
applied only to the non-``NaN`` elements, with ``NaN`` being | ||
handled by ``na_rep``. | ||
|
||
.. versionchanged:: 1.2.0 | ||
|
||
sparsify : bool, optional, default True | ||
Set to False for a DataFrame with a hierarchical index to print | ||
every multiindex key at each row. | ||
|
@@ -1444,8 +1449,19 @@ def get_result_as_array(self) -> np.ndarray: | |
Returns the float values converted into strings using | ||
the parameters given at initialisation, as a numpy array | ||
""" | ||
|
||
def format_with_na_rep(values, formatter, na_rep): | ||
mask = isna(values) | ||
formatted = np.array( | ||
[ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you use this function more generally? (e.g. maybe define it in the module); can be a followup as well. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems similar patterns occur elsewhere, although most are at the scalar level |
||
formatter(val) if not m else na_rep | ||
for val, m in zip(values.ravel(), mask.ravel()) | ||
] | ||
).reshape(values.shape) | ||
return formatted | ||
|
||
if self.formatter is not None: | ||
return np.array([self.formatter(x) for x in self.values]) | ||
return format_with_na_rep(self.values, self.formatter, self.na_rep) | ||
|
||
if self.fixed_width: | ||
threshold = get_option("display.chop_threshold") | ||
|
@@ -1466,13 +1482,7 @@ def format_values_with(float_format): | |
# separate the wheat from the chaff | ||
values = self.values | ||
is_complex = is_complex_dtype(values) | ||
mask = isna(values) | ||
values = np.array(values, dtype="object") | ||
values[mask] = na_rep | ||
imask = (~mask).ravel() | ||
values.flat[imask] = np.array( | ||
[formatter(val) for val in values.ravel()[imask]] | ||
) | ||
values = format_with_na_rep(values, formatter, na_rep) | ||
|
||
if self.fixed_width: | ||
if is_complex: | ||
|
@@ -1534,10 +1544,6 @@ def format_values_with(float_format): | |
return formatted_values | ||
|
||
def _format_strings(self) -> List[str]: | ||
# shortcut | ||
if self.formatter is not None: | ||
return [self.formatter(x) for x in self.values] | ||
|
||
return list(self.get_result_as_array()) | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -820,3 +820,38 @@ def test_html_repr_min_rows(datapath, max_rows, min_rows, expected): | |
with option_context("display.max_rows", max_rows, "display.min_rows", min_rows): | ||
result = df._repr_html_() | ||
assert result == expected | ||
|
||
|
||
@pytest.mark.parametrize("na_rep", ["NaN", "Ted"]) | ||
def test_to_html_na_rep_and_float_format(na_rep): | ||
# https://github.com/pandas-dev/pandas/issues/13828 | ||
df = DataFrame( | ||
[ | ||
["A", 1.2225], | ||
["A", None], | ||
], | ||
columns=["Group", "Data"], | ||
) | ||
result = df.to_html(na_rep=na_rep, float_format="{:.2f}".format) | ||
expected = f"""<table border="1" class="dataframe"> | ||
<thead> | ||
<tr style="text-align: right;"> | ||
<th></th> | ||
<th>Group</th> | ||
<th>Data</th> | ||
</tr> | ||
</thead> | ||
<tbody> | ||
<tr> | ||
<th>0</th> | ||
<td>A</td> | ||
<td>1.22</td> | ||
</tr> | ||
<tr> | ||
<th>1</th> | ||
<td>A</td> | ||
<td>{na_rep}</td> | ||
</tr> | ||
</tbody> | ||
</table>""" | ||
assert result == expected | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for html tests, can use expected_html fixture. see test_to_html_justify for usage as template. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you type
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added some types