Skip to content

Commit cbbaf20

Browse files
authored
TYP: to_arrays, BUG: from_records empty dtypes (#40121)
1 parent 80b3e8d commit cbbaf20

File tree

5 files changed

+21
-13
lines changed

5 files changed

+21
-13
lines changed

doc/source/whatsnew/v1.3.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ Conversion
401401
^^^^^^^^^^
402402
- Bug in :meth:`Series.to_dict` with ``orient='records'`` now returns python native types (:issue:`25969`)
403403
- Bug in :meth:`Series.view` and :meth:`Index.view` when converting between datetime-like (``datetime64[ns]``, ``datetime64[ns, tz]``, ``timedelta64``, ``period``) dtypes (:issue:`39788`)
404-
-
404+
- Bug in creating a :class:`DataFrame` from an empty ``np.recarray`` not retaining the original dtypes (:issue:`40121`)
405405
-
406406

407407
Strings

pandas/core/frame.py

+2
Original file line numberDiff line numberDiff line change
@@ -605,6 +605,8 @@ def __init__(
605605
if is_dataclass(data[0]):
606606
data = dataclasses_to_dicts(data)
607607
if treat_as_nested(data):
608+
if columns is not None:
609+
columns = ensure_index(columns)
608610
arrays, columns, index = nested_data_to_arrays(
609611
data, columns, index, dtype
610612
)

pandas/core/internals/construction.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -602,7 +602,9 @@ def dataclasses_to_dicts(data):
602602
# Conversion of Inputs to Arrays
603603

604604

605-
def to_arrays(data, columns: Optional[Index], dtype: Optional[DtypeObj] = None):
605+
def to_arrays(
606+
data, columns: Optional[Index], dtype: Optional[DtypeObj] = None
607+
) -> Tuple[List[ArrayLike], Index]:
606608
"""
607609
Return list of arrays, columns.
608610
"""
@@ -623,8 +625,10 @@ def to_arrays(data, columns: Optional[Index], dtype: Optional[DtypeObj] = None):
623625
if isinstance(data, np.ndarray):
624626
columns = data.dtype.names
625627
if columns is not None:
626-
return [[]] * len(columns), columns
627-
return [], [] # columns if columns is not None else []
628+
# i.e. numpy structured array
629+
arrays = [data[name] for name in columns]
630+
return arrays, ensure_index(columns)
631+
return [], ensure_index([])
628632

629633
elif isinstance(data[0], Categorical):
630634
if columns is None:

pandas/tests/frame/constructors/test_from_records.py

+9-8
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
CategoricalIndex,
1212
DataFrame,
1313
Index,
14+
Int64Index,
1415
Interval,
1516
RangeIndex,
1617
Series,
@@ -437,11 +438,11 @@ def test_from_records_empty(self):
437438
def test_from_records_empty_with_nonempty_fields_gh3682(self):
438439
a = np.array([(1, 2)], dtype=[("id", np.int64), ("value", np.int64)])
439440
df = DataFrame.from_records(a, index="id")
440-
tm.assert_index_equal(df.index, Index([1], name="id"))
441-
assert df.index.name == "id"
442-
tm.assert_index_equal(df.columns, Index(["value"]))
443-
444-
b = np.array([], dtype=[("id", np.int64), ("value", np.int64)])
445-
df = DataFrame.from_records(b, index="id")
446-
tm.assert_index_equal(df.index, Index([], name="id"))
447-
assert df.index.name == "id"
441+
442+
ex_index = Int64Index([1], name="id")
443+
expected = DataFrame({"value": [2]}, index=ex_index, columns=["value"])
444+
tm.assert_frame_equal(df, expected)
445+
446+
b = a[:0]
447+
df2 = DataFrame.from_records(b, index="id")
448+
tm.assert_frame_equal(df2, df.iloc[:0])

pandas/tests/frame/test_constructors.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1170,7 +1170,8 @@ def test_constructor_unequal_length_nested_list_column(self):
11701170
# GH 32173
11711171
arrays = [list("abcd"), list("cde")]
11721172

1173-
msg = "Length of columns passed for MultiIndex columns is different"
1173+
# exception raised inside MultiIndex constructor
1174+
msg = "all arrays must be same length"
11741175
with pytest.raises(ValueError, match=msg):
11751176
DataFrame([[1, 2, 3, 4], [4, 5, 6, 7]], columns=arrays)
11761177

0 commit comments

Comments
 (0)