diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index cdd8e50d98077..0d83abc0d81cc 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -414,7 +414,6 @@ Strings - Bug in :func:`to_numeric` raising a ``TypeError`` when attempting to convert a string dtype :class:`Series` containing only numeric strings and ``NA`` (:issue:`37262`) - - Interval ^^^^^^^^ @@ -465,6 +464,7 @@ I/O - Bug in :func:`read_table` and :func:`read_csv` when ``delim_whitespace=True`` and ``sep=default`` (:issue:`36583`) - Bug in :meth:`to_json` with ``lines=True`` and ``orient='records'`` the last line of the record is not appended with 'new line character' (:issue:`36888`) - Bug in :meth:`read_parquet` with fixed offset timezones. String representation of timezones was not recognized (:issue:`35997`, :issue:`36004`) +- Bug in :meth:`DataFrame.to_html`, :meth:`DataFrame.to_string`, and :meth:`DataFrame.to_latex` ignoring the ``na_rep`` argument when ``float_format`` was also specified (:issue:`9046`, :issue:`13828`) - Bug in output rendering of complex numbers showing too many trailing zeros (:issue:`36799`) - Bug in :class:`HDFStore` threw a ``TypeError`` when exporting an empty :class:`DataFrame` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 6f4bd2ed8c73a..3c759f477899b 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -40,6 +40,7 @@ from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT from pandas._libs.tslibs.nattype import NaTType from pandas._typing import ( + ArrayLike, CompressionOptions, FilePathOrBuffer, FloatFormatType, @@ -104,7 +105,7 @@ index : bool, optional, default True Whether to print index (row) labels. na_rep : str, optional, default 'NaN' - String representation of NAN to use. + String representation of ``NaN`` to use. formatters : list, tuple or dict of one-param. functions, optional Formatter functions to apply to columns' elements by position or name. @@ -112,7 +113,12 @@ List/tuple must be of length equal to the number of columns. float_format : one-parameter function, optional, default None Formatter function to apply to columns' elements if they are - floats. The result of this function must be a unicode string. + floats. This function must return a unicode string and will be + applied only to the non-``NaN`` elements, with ``NaN`` being + handled by ``na_rep``. + + .. versionchanged:: 1.2.0 + sparsify : bool, optional, default True Set to False for a DataFrame with a hierarchical index to print every multiindex key at each row. @@ -1364,8 +1370,19 @@ def get_result_as_array(self) -> np.ndarray: Returns the float values converted into strings using the parameters given at initialisation, as a numpy array """ + + def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str): + mask = isna(values) + formatted = np.array( + [ + formatter(val) if not m else na_rep + for val, m in zip(values.ravel(), mask.ravel()) + ] + ).reshape(values.shape) + return formatted + if self.formatter is not None: - return np.array([self.formatter(x) for x in self.values]) + return format_with_na_rep(self.values, self.formatter, self.na_rep) if self.fixed_width: threshold = get_option("display.chop_threshold") @@ -1386,13 +1403,7 @@ def format_values_with(float_format): # separate the wheat from the chaff values = self.values is_complex = is_complex_dtype(values) - mask = isna(values) - values = np.array(values, dtype="object") - values[mask] = na_rep - imask = (~mask).ravel() - values.flat[imask] = np.array( - [formatter(val) for val in values.ravel()[imask]] - ) + values = format_with_na_rep(values, formatter, na_rep) if self.fixed_width: if is_complex: @@ -1454,10 +1465,6 @@ def format_values_with(float_format): return formatted_values def _format_strings(self) -> List[str]: - # shortcut - if self.formatter is not None: - return [self.formatter(x) for x in self.values] - return list(self.get_result_as_array()) diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 18cbd7186e931..f5781fff15932 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -820,3 +820,38 @@ def test_html_repr_min_rows(datapath, max_rows, min_rows, expected): with option_context("display.max_rows", max_rows, "display.min_rows", min_rows): result = df._repr_html_() assert result == expected + + +@pytest.mark.parametrize("na_rep", ["NaN", "Ted"]) +def test_to_html_na_rep_and_float_format(na_rep): + # https://github.com/pandas-dev/pandas/issues/13828 + df = DataFrame( + [ + ["A", 1.2225], + ["A", None], + ], + columns=["Group", "Data"], + ) + result = df.to_html(na_rep=na_rep, float_format="{:.2f}".format) + expected = f""" + + + + + + + + + + + + + + + + + + + +
GroupData
0A1.22
1A{na_rep}
""" + assert result == expected diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index 855e69dee7db4..7cf7ed3f77609 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -1431,3 +1431,24 @@ def test_get_strrow_multindex_multicolumn(self, row_num, expected): ) assert row_string_converter.get_strrow(row_num=row_num) == expected + + @pytest.mark.parametrize("na_rep", ["NaN", "Ted"]) + def test_to_latex_na_rep_and_float_format(self, na_rep): + df = DataFrame( + [ + ["A", 1.2225], + ["A", None], + ], + columns=["Group", "Data"], + ) + result = df.to_latex(na_rep=na_rep, float_format="{:.2f}".format) + expected = f"""\\begin{{tabular}}{{llr}} +\\toprule +{{}} & Group & Data \\\\ +\\midrule +0 & A & 1.22 \\\\ +1 & A & {na_rep} \\\\ +\\bottomrule +\\end{{tabular}} +""" + assert result == expected diff --git a/pandas/tests/io/formats/test_to_string.py b/pandas/tests/io/formats/test_to_string.py index 7944a0ea67a5f..a1ed1ba2e8bf5 100644 --- a/pandas/tests/io/formats/test_to_string.py +++ b/pandas/tests/io/formats/test_to_string.py @@ -220,3 +220,14 @@ def test_nullable_int_to_string(any_nullable_int_dtype): 1 1 2 """ assert result == expected + + +@pytest.mark.parametrize("na_rep", ["NaN", "Ted"]) +def test_to_string_na_rep_and_float_format(na_rep): + # GH 13828 + df = DataFrame([["A", 1.2225], ["A", None]], columns=["Group", "Data"]) + result = df.to_string(na_rep=na_rep, float_format="{:.2f}".format) + expected = f""" Group Data +0 A 1.22 +1 A {na_rep}""" + assert result == expected