Skip to content

Commit 89c377b

Browse files
dsaxtonKevin D Smith
authored and
Kevin D Smith
committed
BUG: Don't ignore na_rep in DataFrame.to_html (pandas-dev#36690)
* BUG: Don't ignore na_rep in DataFrame.to_html * Back to list * Move test and cover * Test for to_latex * More tests * Maybe * Note * Refactor * Move note * Nothing * Fixup * Remove * Doc * Type
1 parent d0fba04 commit 89c377b

File tree

5 files changed

+89
-15
lines changed

5 files changed

+89
-15
lines changed

doc/source/whatsnew/v1.2.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,6 @@ Strings
415415
- Bug in :func:`to_numeric` raising a ``TypeError`` when attempting to convert a string dtype :class:`Series` containing only numeric strings and ``NA`` (:issue:`37262`)
416416
-
417417

418-
419418
Interval
420419
^^^^^^^^
421420

@@ -466,6 +465,7 @@ I/O
466465
- Bug in :func:`read_table` and :func:`read_csv` when ``delim_whitespace=True`` and ``sep=default`` (:issue:`36583`)
467466
- Bug in :meth:`to_json` with ``lines=True`` and ``orient='records'`` the last line of the record is not appended with 'new line character' (:issue:`36888`)
468467
- Bug in :meth:`read_parquet` with fixed offset timezones. String representation of timezones was not recognized (:issue:`35997`, :issue:`36004`)
468+
- Bug in :meth:`DataFrame.to_html`, :meth:`DataFrame.to_string`, and :meth:`DataFrame.to_latex` ignoring the ``na_rep`` argument when ``float_format`` was also specified (:issue:`9046`, :issue:`13828`)
469469
- Bug in output rendering of complex numbers showing too many trailing zeros (:issue:`36799`)
470470
- Bug in :class:`HDFStore` threw a ``TypeError`` when exporting an empty :class:`DataFrame` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`)
471471

pandas/io/formats/format.py

+21-14
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT
4141
from pandas._libs.tslibs.nattype import NaTType
4242
from pandas._typing import (
43+
ArrayLike,
4344
CompressionOptions,
4445
FilePathOrBuffer,
4546
FloatFormatType,
@@ -104,15 +105,20 @@
104105
index : bool, optional, default True
105106
Whether to print index (row) labels.
106107
na_rep : str, optional, default 'NaN'
107-
String representation of NAN to use.
108+
String representation of ``NaN`` to use.
108109
formatters : list, tuple or dict of one-param. functions, optional
109110
Formatter functions to apply to columns' elements by position or
110111
name.
111112
The result of each function must be a unicode string.
112113
List/tuple must be of length equal to the number of columns.
113114
float_format : one-parameter function, optional, default None
114115
Formatter function to apply to columns' elements if they are
115-
floats. The result of this function must be a unicode string.
116+
floats. This function must return a unicode string and will be
117+
applied only to the non-``NaN`` elements, with ``NaN`` being
118+
handled by ``na_rep``.
119+
120+
.. versionchanged:: 1.2.0
121+
116122
sparsify : bool, optional, default True
117123
Set to False for a DataFrame with a hierarchical index to print
118124
every multiindex key at each row.
@@ -1364,8 +1370,19 @@ def get_result_as_array(self) -> np.ndarray:
13641370
Returns the float values converted into strings using
13651371
the parameters given at initialisation, as a numpy array
13661372
"""
1373+
1374+
def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str):
1375+
mask = isna(values)
1376+
formatted = np.array(
1377+
[
1378+
formatter(val) if not m else na_rep
1379+
for val, m in zip(values.ravel(), mask.ravel())
1380+
]
1381+
).reshape(values.shape)
1382+
return formatted
1383+
13671384
if self.formatter is not None:
1368-
return np.array([self.formatter(x) for x in self.values])
1385+
return format_with_na_rep(self.values, self.formatter, self.na_rep)
13691386

13701387
if self.fixed_width:
13711388
threshold = get_option("display.chop_threshold")
@@ -1386,13 +1403,7 @@ def format_values_with(float_format):
13861403
# separate the wheat from the chaff
13871404
values = self.values
13881405
is_complex = is_complex_dtype(values)
1389-
mask = isna(values)
1390-
values = np.array(values, dtype="object")
1391-
values[mask] = na_rep
1392-
imask = (~mask).ravel()
1393-
values.flat[imask] = np.array(
1394-
[formatter(val) for val in values.ravel()[imask]]
1395-
)
1406+
values = format_with_na_rep(values, formatter, na_rep)
13961407

13971408
if self.fixed_width:
13981409
if is_complex:
@@ -1454,10 +1465,6 @@ def format_values_with(float_format):
14541465
return formatted_values
14551466

14561467
def _format_strings(self) -> List[str]:
1457-
# shortcut
1458-
if self.formatter is not None:
1459-
return [self.formatter(x) for x in self.values]
1460-
14611468
return list(self.get_result_as_array())
14621469

14631470

pandas/tests/io/formats/test_to_html.py

+35
Original file line numberDiff line numberDiff line change
@@ -820,3 +820,38 @@ def test_html_repr_min_rows(datapath, max_rows, min_rows, expected):
820820
with option_context("display.max_rows", max_rows, "display.min_rows", min_rows):
821821
result = df._repr_html_()
822822
assert result == expected
823+
824+
825+
@pytest.mark.parametrize("na_rep", ["NaN", "Ted"])
826+
def test_to_html_na_rep_and_float_format(na_rep):
827+
# https://github.com/pandas-dev/pandas/issues/13828
828+
df = DataFrame(
829+
[
830+
["A", 1.2225],
831+
["A", None],
832+
],
833+
columns=["Group", "Data"],
834+
)
835+
result = df.to_html(na_rep=na_rep, float_format="{:.2f}".format)
836+
expected = f"""<table border="1" class="dataframe">
837+
<thead>
838+
<tr style="text-align: right;">
839+
<th></th>
840+
<th>Group</th>
841+
<th>Data</th>
842+
</tr>
843+
</thead>
844+
<tbody>
845+
<tr>
846+
<th>0</th>
847+
<td>A</td>
848+
<td>1.22</td>
849+
</tr>
850+
<tr>
851+
<th>1</th>
852+
<td>A</td>
853+
<td>{na_rep}</td>
854+
</tr>
855+
</tbody>
856+
</table>"""
857+
assert result == expected

pandas/tests/io/formats/test_to_latex.py

+21
Original file line numberDiff line numberDiff line change
@@ -1431,3 +1431,24 @@ def test_get_strrow_multindex_multicolumn(self, row_num, expected):
14311431
)
14321432

14331433
assert row_string_converter.get_strrow(row_num=row_num) == expected
1434+
1435+
@pytest.mark.parametrize("na_rep", ["NaN", "Ted"])
1436+
def test_to_latex_na_rep_and_float_format(self, na_rep):
1437+
df = DataFrame(
1438+
[
1439+
["A", 1.2225],
1440+
["A", None],
1441+
],
1442+
columns=["Group", "Data"],
1443+
)
1444+
result = df.to_latex(na_rep=na_rep, float_format="{:.2f}".format)
1445+
expected = f"""\\begin{{tabular}}{{llr}}
1446+
\\toprule
1447+
{{}} & Group & Data \\\\
1448+
\\midrule
1449+
0 & A & 1.22 \\\\
1450+
1 & A & {na_rep} \\\\
1451+
\\bottomrule
1452+
\\end{{tabular}}
1453+
"""
1454+
assert result == expected

pandas/tests/io/formats/test_to_string.py

+11
Original file line numberDiff line numberDiff line change
@@ -220,3 +220,14 @@ def test_nullable_int_to_string(any_nullable_int_dtype):
220220
1 1
221221
2 <NA>"""
222222
assert result == expected
223+
224+
225+
@pytest.mark.parametrize("na_rep", ["NaN", "Ted"])
226+
def test_to_string_na_rep_and_float_format(na_rep):
227+
# GH 13828
228+
df = DataFrame([["A", 1.2225], ["A", None]], columns=["Group", "Data"])
229+
result = df.to_string(na_rep=na_rep, float_format="{:.2f}".format)
230+
expected = f""" Group Data
231+
0 A 1.22
232+
1 A {na_rep}"""
233+
assert result == expected

0 commit comments

Comments
 (0)