diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 3f7c4b3b0ccb7..05a25b122d1f4 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -1092,6 +1092,7 @@ Numeric - Bug in :class:`Series` constructor with an int or float list where specifying ``dtype=str``, ``dtype='str'`` or ``dtype='U'`` failed to convert the data elements to strings (:issue:`16605`) - Bug in :class:`Index` multiplication and division methods where operating with a ``Series`` would return an ``Index`` object instead of a ``Series`` object (:issue:`19042`) - Bug in the :class:`DataFrame` constructor in which data containing very large positive or very large negative numbers was causing ``OverflowError`` (:issue:`18584`) +- Bug in the :meth:`DataFrame.from_records` constructor losing the dtypes of a empty NumPy record array (:issue:`20805`) - Bug in :class:`Index` constructor with ``dtype='uint64'`` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`) - Bug in :class:`DataFrame` flex arithmetic (e.g. ``df.add(other, fill_value=foo)``) with a ``fill_value`` other than ``None`` failed to raise ``NotImplementedError`` in corner cases where either the frame or ``other`` has length zero (:issue:`19522`) - Multiplication and division of numeric-dtyped :class:`Index` objects with timedelta-like scalars returns ``TimedeltaIndex`` instead of raising ``TypeError`` (:issue:`19333`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b67ed9cfd2241..1696e3a910924 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7387,7 +7387,10 @@ def _to_arrays(data, columns, coerce_float=False, dtype=None): if isinstance(data, np.ndarray): columns = data.dtype.names if columns is not None: - return [[]] * len(columns), columns + arrays = [np.array([], dtype=field_type) + for field_type, _ in data.dtype.fields.values()] + return arrays, columns + return [], [] # columns if columns is not None else [] if isinstance(data[0], (list, tuple)): return _list_to_arrays(data, columns, coerce_float=coerce_float, diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 47b7d60e3b6e8..7344b99db758c 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1933,9 +1933,18 @@ def test_from_records_empty_with_nonempty_fields_gh3682(self): b = np.array([], dtype=[('id', np.int64), ('value', np.int64)]) df = DataFrame.from_records(b, index='id') - tm.assert_index_equal(df.index, Index([], name='id')) + tm.assert_index_equal(df.index, Index([], name='id', dtype='int')) assert df.index.name == 'id' + def test_from_records_empty_dtypes(self): + # https://github.com/pandas-dev/pandas/issues/20805 + a = np.array([(1, 2)], dtype=[('id', 'u8'), ('value', 'i8')]) + result = DataFrame.from_records(a[:0]) + expected = pd.DataFrame({"id": np.array([], dtype='u8'), + "value": np.array([], dtype='i8')}, + columns=['id', 'value']) + tm.assert_frame_equal(result, expected) + def test_from_records_with_datetimes(self): # this may fail on certain platforms because of a numpy issue