BUG: Don't ignore na_rep in DataFrame.to_html (pandas-dev#36690)

dsaxton · Kevin D Smith · commit 89c377bb39a5 · 2020-11-02T08:51:46.000-06:00
* BUG: Don't ignore na_rep in DataFrame.to_html

* Back to list

* Move test and cover

* Test for to_latex

* More tests

* Maybe

* Note

* Refactor

* Move note

* Nothing

* Fixup

* Remove

* Doc

* Type
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -415,7 +415,6 @@ Strings
 - Bug in :func:`to_numeric` raising a ``TypeError`` when attempting to convert a string dtype :class:`Series` containing only numeric strings and ``NA`` (:issue:`37262`)
 -
 
-
 Interval
 ^^^^^^^^
 
@@ -466,6 +465,7 @@ I/O
 - Bug in :func:`read_table` and :func:`read_csv` when ``delim_whitespace=True`` and ``sep=default`` (:issue:`36583`)
 - Bug in :meth:`to_json` with ``lines=True`` and ``orient='records'`` the last line of the record is not appended with 'new line character' (:issue:`36888`)
 - Bug in :meth:`read_parquet` with fixed offset timezones. String representation of timezones was not recognized (:issue:`35997`, :issue:`36004`)
+- Bug in :meth:`DataFrame.to_html`, :meth:`DataFrame.to_string`, and :meth:`DataFrame.to_latex` ignoring the ``na_rep`` argument when ``float_format`` was also specified (:issue:`9046`, :issue:`13828`)
 - Bug in output rendering of complex numbers showing too many trailing zeros (:issue:`36799`)
 - Bug in :class:`HDFStore` threw a ``TypeError`` when exporting an empty :class:`DataFrame` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`)
 
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -40,6 +40,7 @@
 from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT
 from pandas._libs.tslibs.nattype import NaTType
 from pandas._typing import (
+    ArrayLike,
     CompressionOptions,
     FilePathOrBuffer,
     FloatFormatType,
@@ -104,15 +105,20 @@
         index : bool, optional, default True
             Whether to print index (row) labels.
         na_rep : str, optional, default 'NaN'
-            String representation of NAN to use.
+            String representation of ``NaN`` to use.
         formatters : list, tuple or dict of one-param. functions, optional
             Formatter functions to apply to columns' elements by position or
             name.
             The result of each function must be a unicode string.
             List/tuple must be of length equal to the number of columns.
         float_format : one-parameter function, optional, default None
             Formatter function to apply to columns' elements if they are
-            floats. The result of this function must be a unicode string.
+            floats. This function must return a unicode string and will be
+            applied only to the non-``NaN`` elements, with ``NaN`` being
+            handled by ``na_rep``.
+
+            .. versionchanged:: 1.2.0
+
         sparsify : bool, optional, default True
             Set to False for a DataFrame with a hierarchical index to print
             every multiindex key at each row.
@@ -1364,8 +1370,19 @@ def get_result_as_array(self) -> np.ndarray:
         Returns the float values converted into strings using
         the parameters given at initialisation, as a numpy array
         """
+
+        def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str):
+            mask = isna(values)
+            formatted = np.array(
+                [
+                    formatter(val) if not m else na_rep
+                    for val, m in zip(values.ravel(), mask.ravel())
+                ]
+            ).reshape(values.shape)
+            return formatted
+
         if self.formatter is not None:
-            return np.array([self.formatter(x) for x in self.values])
+            return format_with_na_rep(self.values, self.formatter, self.na_rep)
 
         if self.fixed_width:
             threshold = get_option("display.chop_threshold")
@@ -1386,13 +1403,7 @@ def format_values_with(float_format):
             # separate the wheat from the chaff
             values = self.values
             is_complex = is_complex_dtype(values)
-            mask = isna(values)
-            values = np.array(values, dtype="object")
-            values[mask] = na_rep
-            imask = (~mask).ravel()
-            values.flat[imask] = np.array(
-                [formatter(val) for val in values.ravel()[imask]]
-            )
+            values = format_with_na_rep(values, formatter, na_rep)
 
             if self.fixed_width:
                 if is_complex:
@@ -1454,10 +1465,6 @@ def format_values_with(float_format):
         return formatted_values
 
     def _format_strings(self) -> List[str]:
-        # shortcut
-        if self.formatter is not None:
-            return [self.formatter(x) for x in self.values]
-
         return list(self.get_result_as_array())
 
 
diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py
@@ -820,3 +820,38 @@ def test_html_repr_min_rows(datapath, max_rows, min_rows, expected):
     with option_context("display.max_rows", max_rows, "display.min_rows", min_rows):
         result = df._repr_html_()
     assert result == expected
+
+
+@pytest.mark.parametrize("na_rep", ["NaN", "Ted"])
+def test_to_html_na_rep_and_float_format(na_rep):
+    # https://github.com/pandas-dev/pandas/issues/13828
+    df = DataFrame(
+        [
+            ["A", 1.2225],
+            ["A", None],
+        ],
+        columns=["Group", "Data"],
+    )
+    result = df.to_html(na_rep=na_rep, float_format="{:.2f}".format)
+    expected = f"""<table border="1" class="dataframe">
+  <thead>
+    <tr style="text-align: right;">
+      <th></th>
+      <th>Group</th>
+      <th>Data</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <th>0</th>
+      <td>A</td>
+      <td>1.22</td>
+    </tr>
+    <tr>
+      <th>1</th>
+      <td>A</td>
+      <td>{na_rep}</td>
+    </tr>
+  </tbody>
+</table>"""
+    assert result == expected
diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py
@@ -1431,3 +1431,24 @@ def test_get_strrow_multindex_multicolumn(self, row_num, expected):
         )
 
         assert row_string_converter.get_strrow(row_num=row_num) == expected
+
+    @pytest.mark.parametrize("na_rep", ["NaN", "Ted"])
+    def test_to_latex_na_rep_and_float_format(self, na_rep):
+        df = DataFrame(
+            [
+                ["A", 1.2225],
+                ["A", None],
+            ],
+            columns=["Group", "Data"],
+        )
+        result = df.to_latex(na_rep=na_rep, float_format="{:.2f}".format)
+        expected = f"""\\begin{{tabular}}{{llr}}
+\\toprule
+{{}} & Group &  Data \\\\
+\\midrule
+0 &     A &  1.22 \\\\
+1 &     A &   {na_rep} \\\\
+\\bottomrule
+\\end{{tabular}}
+"""
+        assert result == expected
diff --git a/pandas/tests/io/formats/test_to_string.py b/pandas/tests/io/formats/test_to_string.py
@@ -220,3 +220,14 @@ def test_nullable_int_to_string(any_nullable_int_dtype):
 1       1
 2    <NA>"""
     assert result == expected
+
+
+@pytest.mark.parametrize("na_rep", ["NaN", "Ted"])
+def test_to_string_na_rep_and_float_format(na_rep):
+    # GH 13828
+    df = DataFrame([["A", 1.2225], ["A", None]], columns=["Group", "Data"])
+    result = df.to_string(na_rep=na_rep, float_format="{:.2f}".format)
+    expected = f"""  Group  Data
+0     A  1.22
+1     A   {na_rep}"""
+    assert result == expected