diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 22a2931519ffd..4180cdb245a82 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -316,6 +316,7 @@ Conversion ^^^^^^^^^^ - Bug in :func:`DataFrame.style.to_latex` and :func:`DataFrame.style.to_html` if the DataFrame contains integers with more digits than can be represented by floating point double precision (:issue:`52272`) - Bug in :meth:`ArrowDtype.numpy_dtype` returning nanosecond units for non-nanosecond ``pyarrow.timestamp`` and ``pyarrow.duration`` types (:issue:`51800`) +- Bug in :meth:`DataFrame.__repr__` incorrectly raising a ``TypeError`` when the dtype of a column is ``np.record`` (:issue:`48526`) - Bug in :meth:`DataFrame.info` raising ``ValueError`` when ``use_numba`` is set (:issue:`51922`) - diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 2fb8d3563b792..1b3b8aad6de51 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -283,6 +283,9 @@ def _isna_array(values: ArrayLike, inf_as_na: bool = False): # "Union[ndarray[Any, Any], ExtensionArraySupportsAnyAll]", variable has # type "ndarray[Any, dtype[bool_]]") result = values.isna() # type: ignore[assignment] + elif isinstance(values, np.recarray): + # GH 48526 + result = _isna_recarray_dtype(values, inf_as_na=inf_as_na) elif is_string_or_object_np_dtype(values.dtype): result = _isna_string_dtype(values, inf_as_na=inf_as_na) elif dtype.kind in "mM": @@ -314,6 +317,34 @@ def _isna_string_dtype(values: np.ndarray, inf_as_na: bool) -> npt.NDArray[np.bo return result +def _has_record_inf_value(record_as_array: np.ndarray) -> np.bool_: + is_inf_in_record = np.zeros(len(record_as_array), dtype=bool) + for i, value in enumerate(record_as_array): + is_element_inf = False + try: + is_element_inf = np.isinf(value) + except TypeError: + is_element_inf = False + is_inf_in_record[i] = is_element_inf + + return np.any(is_inf_in_record) + + +def _isna_recarray_dtype(values: np.recarray, inf_as_na: bool) -> npt.NDArray[np.bool_]: + result = np.zeros(values.shape, dtype=bool) + for i, record in enumerate(values): + record_as_array = np.array(record.tolist()) + does_record_contain_nan = isna_all(record_as_array) + does_record_contain_inf = False + if inf_as_na: + does_record_contain_inf = bool(_has_record_inf_value(record_as_array)) + result[i] = np.any( + np.logical_or(does_record_contain_nan, does_record_contain_inf) + ) + + return result + + @overload def notna(obj: Scalar) -> bool: ... diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 66d8084abea68..b42af8cdfcd8c 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -361,6 +361,71 @@ def test_datetime64tz_slice_non_truncate(self): result = repr(df) assert result == expected + def test_to_records_no_typeerror_in_repr(self): + # GH 48526 + df = DataFrame([["a", "b"], ["c", "d"], ["e", "f"]], columns=["left", "right"]) + df["record"] = df[["left", "right"]].to_records() + expected = """ left right record +0 a b [0, a, b] +1 c d [1, c, d] +2 e f [2, e, f]""" + result = repr(df) + assert result == expected + + def test_to_records_with_na_record_value(self): + # GH 48526 + df = DataFrame( + [["a", np.nan], ["c", "d"], ["e", "f"]], columns=["left", "right"] + ) + df["record"] = df[["left", "right"]].to_records() + expected = """ left right record +0 a NaN [0, a, nan] +1 c d [1, c, d] +2 e f [2, e, f]""" + result = repr(df) + assert result == expected + + def test_to_records_with_na_record(self): + # GH 48526 + df = DataFrame( + [["a", "b"], [np.nan, np.nan], ["e", "f"]], columns=[np.nan, "right"] + ) + df["record"] = df[[np.nan, "right"]].to_records() + expected = """ NaN right record +0 a b [0, a, b] +1 NaN NaN [1, nan, nan] +2 e f [2, e, f]""" + result = repr(df) + assert result == expected + + def test_to_records_with_inf_as_na_record(self): + # GH 48526 + with option_context("use_inf_as_na", True): + df = DataFrame( + [[np.inf, "b"], [np.nan, np.nan], ["e", "f"]], columns=[np.nan, np.inf] + ) + df["record"] = df[[np.nan, np.inf]].to_records() + expected = """ NaN inf record +0 NaN b [0, inf, b] +1 NaN NaN [1, nan, nan] +2 e f [2, e, f]""" + result = repr(df) + assert result == expected + + def test_to_records_with_inf_record(self): + # GH 48526 + with option_context("use_inf_as_na", False): + df = DataFrame( + [[np.inf, "b"], [np.nan, np.nan], ["e", "f"]], columns=[np.nan, np.inf] + ) + df["record"] = df[[np.nan, np.inf]].to_records() + expected = """ NaN inf record +0 inf b [0, inf, b] +1 NaN NaN [1, nan, nan] +2 e f [2, e, f]""" + result = repr(df) + assert result == expected + def test_masked_ea_with_formatter(self): # GH#39336 df = DataFrame(