Skip to content

Commit c3f0aac

Browse files
authored
BUG: Fixed DataFrame.__repr__ Type Error when column dtype is np.record (GH 48526) (#48637)
* BUG: Fixed repr type error when column in np.record * Reallocated the test to proposed destination and test the repr of the frame * Assert has gone missing due to merge conflict * Added new function that handles np.recarrays in _isna_array * Addes 2 more tests + refined _isna_recarray_dtype + formatting fixed * Reworked isna_recarray_dtype method + Added test for inf value * Pre-commit fix * Fixing typing * Fixed formatting * Fixed pyright error --------- Co-authored-by: RaphSku <[email protected]>
1 parent f94bdc4 commit c3f0aac

File tree

3 files changed

+97
-0
lines changed

3 files changed

+97
-0
lines changed

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,7 @@ Conversion
318318
^^^^^^^^^^
319319
- Bug in :func:`DataFrame.style.to_latex` and :func:`DataFrame.style.to_html` if the DataFrame contains integers with more digits than can be represented by floating point double precision (:issue:`52272`)
320320
- Bug in :meth:`ArrowDtype.numpy_dtype` returning nanosecond units for non-nanosecond ``pyarrow.timestamp`` and ``pyarrow.duration`` types (:issue:`51800`)
321+
- Bug in :meth:`DataFrame.__repr__` incorrectly raising a ``TypeError`` when the dtype of a column is ``np.record`` (:issue:`48526`)
321322
- Bug in :meth:`DataFrame.info` raising ``ValueError`` when ``use_numba`` is set (:issue:`51922`)
322323
-
323324

pandas/core/dtypes/missing.py

+31
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,9 @@ def _isna_array(values: ArrayLike, inf_as_na: bool = False):
283283
# "Union[ndarray[Any, Any], ExtensionArraySupportsAnyAll]", variable has
284284
# type "ndarray[Any, dtype[bool_]]")
285285
result = values.isna() # type: ignore[assignment]
286+
elif isinstance(values, np.recarray):
287+
# GH 48526
288+
result = _isna_recarray_dtype(values, inf_as_na=inf_as_na)
286289
elif is_string_or_object_np_dtype(values.dtype):
287290
result = _isna_string_dtype(values, inf_as_na=inf_as_na)
288291
elif dtype.kind in "mM":
@@ -314,6 +317,34 @@ def _isna_string_dtype(values: np.ndarray, inf_as_na: bool) -> npt.NDArray[np.bo
314317
return result
315318

316319

320+
def _has_record_inf_value(record_as_array: np.ndarray) -> np.bool_:
321+
is_inf_in_record = np.zeros(len(record_as_array), dtype=bool)
322+
for i, value in enumerate(record_as_array):
323+
is_element_inf = False
324+
try:
325+
is_element_inf = np.isinf(value)
326+
except TypeError:
327+
is_element_inf = False
328+
is_inf_in_record[i] = is_element_inf
329+
330+
return np.any(is_inf_in_record)
331+
332+
333+
def _isna_recarray_dtype(values: np.recarray, inf_as_na: bool) -> npt.NDArray[np.bool_]:
334+
result = np.zeros(values.shape, dtype=bool)
335+
for i, record in enumerate(values):
336+
record_as_array = np.array(record.tolist())
337+
does_record_contain_nan = isna_all(record_as_array)
338+
does_record_contain_inf = False
339+
if inf_as_na:
340+
does_record_contain_inf = bool(_has_record_inf_value(record_as_array))
341+
result[i] = np.any(
342+
np.logical_or(does_record_contain_nan, does_record_contain_inf)
343+
)
344+
345+
return result
346+
347+
317348
@overload
318349
def notna(obj: Scalar) -> bool:
319350
...

pandas/tests/frame/test_repr_info.py

+65
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,71 @@ def test_datetime64tz_slice_non_truncate(self):
361361
result = repr(df)
362362
assert result == expected
363363

364+
def test_to_records_no_typeerror_in_repr(self):
365+
# GH 48526
366+
df = DataFrame([["a", "b"], ["c", "d"], ["e", "f"]], columns=["left", "right"])
367+
df["record"] = df[["left", "right"]].to_records()
368+
expected = """ left right record
369+
0 a b [0, a, b]
370+
1 c d [1, c, d]
371+
2 e f [2, e, f]"""
372+
result = repr(df)
373+
assert result == expected
374+
375+
def test_to_records_with_na_record_value(self):
376+
# GH 48526
377+
df = DataFrame(
378+
[["a", np.nan], ["c", "d"], ["e", "f"]], columns=["left", "right"]
379+
)
380+
df["record"] = df[["left", "right"]].to_records()
381+
expected = """ left right record
382+
0 a NaN [0, a, nan]
383+
1 c d [1, c, d]
384+
2 e f [2, e, f]"""
385+
result = repr(df)
386+
assert result == expected
387+
388+
def test_to_records_with_na_record(self):
389+
# GH 48526
390+
df = DataFrame(
391+
[["a", "b"], [np.nan, np.nan], ["e", "f"]], columns=[np.nan, "right"]
392+
)
393+
df["record"] = df[[np.nan, "right"]].to_records()
394+
expected = """ NaN right record
395+
0 a b [0, a, b]
396+
1 NaN NaN [1, nan, nan]
397+
2 e f [2, e, f]"""
398+
result = repr(df)
399+
assert result == expected
400+
401+
def test_to_records_with_inf_as_na_record(self):
402+
# GH 48526
403+
with option_context("use_inf_as_na", True):
404+
df = DataFrame(
405+
[[np.inf, "b"], [np.nan, np.nan], ["e", "f"]], columns=[np.nan, np.inf]
406+
)
407+
df["record"] = df[[np.nan, np.inf]].to_records()
408+
expected = """ NaN inf record
409+
0 NaN b [0, inf, b]
410+
1 NaN NaN [1, nan, nan]
411+
2 e f [2, e, f]"""
412+
result = repr(df)
413+
assert result == expected
414+
415+
def test_to_records_with_inf_record(self):
416+
# GH 48526
417+
with option_context("use_inf_as_na", False):
418+
df = DataFrame(
419+
[[np.inf, "b"], [np.nan, np.nan], ["e", "f"]], columns=[np.nan, np.inf]
420+
)
421+
df["record"] = df[[np.nan, np.inf]].to_records()
422+
expected = """ NaN inf record
423+
0 inf b [0, inf, b]
424+
1 NaN NaN [1, nan, nan]
425+
2 e f [2, e, f]"""
426+
result = repr(df)
427+
assert result == expected
428+
364429
def test_masked_ea_with_formatter(self):
365430
# GH#39336
366431
df = DataFrame(

0 commit comments

Comments
 (0)