From b02d81353cec4e20741742f9fcb485a144ec77fe Mon Sep 17 00:00:00 2001 From: Egor Dranischnikow Date: Tue, 18 Jan 2022 11:15:14 +0100 Subject: [PATCH 1/7] only replace float-nans with NaN and not other null objects --- pandas/_libs/missing.pyx | 25 +++++++++++++++++++++++++ pandas/core/indexes/base.py | 3 ++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 585b535775397..4d8118bfa499d 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -248,6 +248,31 @@ cdef bint checknull_with_nat_and_na(object obj): return checknull_with_nat(obj) or obj is C_NA +@cython.wraparound(False) +@cython.boundscheck(False) +def is_float_nan(values: ndarray) -> ndarray: + """ + True for elements which correspond to a float nan + + Returns + ------- + ndarray[bool] + """ + cdef: + ndarray[uint8_t] result + Py_ssize_t i, N + object val + + N = len(values) + result = np.zeros(N, dtype=np.uint8) + + for i in range(N): + val = values[i] + if util.is_nan(val): + result[i] = True + return result.view(bool) + + @cython.wraparound(False) @cython.boundscheck(False) def is_numeric_na(values: ndarray) -> ndarray: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index bb262a9ba416a..717abb998dd65 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -33,6 +33,7 @@ is_datetime_array, no_default, ) +from pandas._libs.missing import is_float_nan from pandas._libs.tslibs import ( IncompatibleFrequency, OutOfBoundsDatetime, @@ -1394,7 +1395,7 @@ def _format_with_header(self, header: list[str_t], na_rep: str_t) -> list[str_t] result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values] # could have nans - mask = isna(values) + mask = is_float_nan(values) if mask.any(): result_arr = np.array(result) result_arr[mask] = na_rep From 7b73be690f273037975de50d4db185ad944b2e65 Mon Sep 17 00:00:00 2001 From: Egor Dranischnikow Date: Tue, 18 Jan 2022 11:23:38 +0100 Subject: [PATCH 2/7] adding examples with series --- pandas/tests/series/test_repr.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index e243f609145f3..a12bc1df37269 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -258,6 +258,13 @@ def test_float_repr(self): expected = "0 1.0\ndtype: object" assert repr(ser) == expected + def test_different_null_objects(self): + # GH#45263 + ser = Series([1, 2, 3, 4], [True, None, np.nan, pd.NaT]) + result = repr(ser) + expected = "True 1\nNone 2\nNaN 3\nNaT 4\ndtype: int64" + assert result == expected + class TestCategoricalRepr: def test_categorical_repr_unicode(self): From 82b0a3790ea0b1be1a1ae2b36205dd7a4848b1b4 Mon Sep 17 00:00:00 2001 From: Egor Dranischnikow Date: Tue, 18 Jan 2022 11:37:18 +0100 Subject: [PATCH 3/7] adding examples with dataframes --- pandas/tests/frame/test_repr_info.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index e71f625a6ead6..f19edf5722ca1 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -68,6 +68,27 @@ def test_repr_with_mi_nat(self, float_string_frame): expected = " X\nNaT a 1\n2013-01-01 b 2" assert result == expected + def test_repr_with_different_nulls(self): + # GH45263 + df = DataFrame([1, 2, 3, 4], [True, None, np.nan, NaT]) + result = repr(df) + expected = """ 0 +True 1 +None 2 +NaN 3 +NaT 4""" + assert result == expected + + def test_repr_with_different_nulls_cols(self): + # GH45263 + d = {np.nan: [1, 2], None: [3, 4], NaT: [6, 7], True: [8, 9]} + df = DataFrame(data=d) + result = repr(df) + expected = """ NaN None NaT True +0 1 3 6 8 +1 2 4 7 9""" + assert result == expected + def test_multiindex_na_repr(self): # only an issue with long columns df3 = DataFrame( From 11572d8222e84189b08970d92957958c8cc85583 Mon Sep 17 00:00:00 2001 From: Egor Dranischnikow Date: Tue, 18 Jan 2022 11:40:53 +0100 Subject: [PATCH 4/7] adding what is new --- doc/source/whatsnew/v1.4.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 47427620c8ece..1ca4e8cc97df0 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -1050,6 +1050,7 @@ Other - Bug in :meth:`Series.replace` raising ``ValueError`` when using ``regex=True`` with a :class:`Series` containing ``np.nan`` values (:issue:`43344`) - Bug in :meth:`DataFrame.to_records` where an incorrect ``n`` was used when missing names were replaced by ``level_n`` (:issue:`44818`) - Bug in :meth:`DataFrame.eval` where ``resolvers`` argument was overriding the default resolvers (:issue:`34966`) +- :meth:`Series.__repr__` and :meth:`DataFrame.__repr__` no longer replace all null-values in indexes with "NaN" but use their real string-representations. "NaN" is used only for ``float("nan")`` (:issue:`45263`) .. --------------------------------------------------------------------------- From cb6c0e73d3fc3e231bc2c99342093a9089fcd8e3 Mon Sep 17 00:00:00 2001 From: Egor Dranischnikow Date: Tue, 18 Jan 2022 15:42:17 +0100 Subject: [PATCH 5/7] fixing test cases --- pandas/tests/indexes/test_base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 3447a2ceef7c1..1145de14ad3c4 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -658,7 +658,8 @@ def test_format_missing(self, vals, nulls_fixture): index = Index(vals) formatted = index.format() - expected = [str(index[0]), str(index[1]), str(index[2]), "NaN"] + null_repr = "NaN" if isinstance(nulls_fixture, float) else str(nulls_fixture) + expected = [str(index[0]), str(index[1]), str(index[2]), null_repr] assert formatted == expected assert index[3] is nulls_fixture From ac460d1bccb2c72e0b766461eb6ed99df33d2027 Mon Sep 17 00:00:00 2001 From: Egor Dranischnikow Date: Tue, 18 Jan 2022 17:10:02 +0100 Subject: [PATCH 6/7] adding def to pyi-file --- pandas/_libs/missing.pyi | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/missing.pyi b/pandas/_libs/missing.pyi index ab6841e7ddf35..3a4cc9def07bd 100644 --- a/pandas/_libs/missing.pyi +++ b/pandas/_libs/missing.pyi @@ -14,3 +14,4 @@ def checknull(val: object, inf_as_na: bool = ...) -> bool: ... def isnaobj(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ... def isnaobj2d(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ... def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ... +def is_float_nan(values: np.ndarray) -> npt.NDArray[np.bool_]: ... From 0877371e3ddc88858c0b036eb510a465237918a8 Mon Sep 17 00:00:00 2001 From: Egor Dranischnikow Date: Tue, 18 Jan 2022 22:08:12 +0100 Subject: [PATCH 7/7] fix intendation --- pandas/_libs/missing.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 4d8118bfa499d..62977f0fd2b4c 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -269,7 +269,7 @@ def is_float_nan(values: ndarray) -> ndarray: for i in range(N): val = values[i] if util.is_nan(val): - result[i] = True + result[i] = True return result.view(bool)