Skip to content

BUG: Fix from_records() column reorder issue, if columns!=None use passed param (#59717) #59809

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,7 @@ I/O
^^^
- Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping`` elements. (:issue:`57915`)
- Bug in :meth:`.DataFrame.to_json` when ``"index"`` was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`)
- Bug in :meth:`DataFrame.from_records` where ``columns`` parameter with numpy structured array was not reordering and filtering out the columns (:issue:`59717`)
- Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`)
- Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
- Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`)
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -750,7 +750,8 @@ def to_arrays(

elif isinstance(data, np.ndarray) and data.dtype.names is not None:
# e.g. recarray
columns = Index(list(data.dtype.names))
if columns is None:
columns = Index(data.dtype.names)
arrays = [data[k] for k in columns]
return arrays, columns

Expand Down
23 changes: 23 additions & 0 deletions pandas/tests/frame/constructors/test_from_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,3 +469,26 @@ def test_from_records_empty2(self):

alt = DataFrame(arr)
tm.assert_frame_equal(alt, expected)

def test_from_records_structured_array(self):
# GH 59717
data = np.array(
[
("John", 25, "New York", 50000),
("Jane", 30, "San Francisco", 75000),
("Bob", 35, "Chicago", 65000),
("Alice", 28, "Los Angeles", 60000),
],
dtype=[("name", "U10"), ("age", "i4"), ("city", "U15"), ("salary", "i4")],
)

actual_result = DataFrame.from_records(data, columns=["name", "salary", "city"])

modified_data = {
"name": ["John", "Jane", "Bob", "Alice"],
"salary": np.array([50000, 75000, 65000, 60000], dtype="int32"),
"city": ["New York", "San Francisco", "Chicago", "Los Angeles"],
}
expected_result = DataFrame(modified_data)

tm.assert_frame_equal(actual_result, expected_result)
34 changes: 34 additions & 0 deletions pandas/tests/frame/methods/test_to_arrays.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import numpy as np
from numpy import array

import pandas._testing as tm
from pandas.core.indexes.api import ensure_index
from pandas.core.internals.construction import to_arrays


def test_to_arrays():
# GH 59717
data = np.array(
[
("John", 25, "New York", 50000),
("Jane", 30, "San Francisco", 75000),
("Bob", 35, "Chicago", 65000),
("Alice", 28, "Los Angeles", 60000),
],
dtype=[("name", "U10"), ("age", "i4"), ("city", "U15"), ("salary", "i4")],
)

columns = ["name", "salary", "city"]
indexed_columns = ensure_index(columns)

actual_arrays, actual_cols = to_arrays(data, indexed_columns)
expected_arrays = [
array(["John", "Jane", "Bob", "Alice"], dtype="<U10"),
array([50000, 75000, 65000, 60000], dtype="int32"),
array(["New York", "San Francisco", "Chicago", "Los Angeles"], dtype="<U15"),
]

for actual, expected in zip(actual_arrays, expected_arrays):
tm.assert_numpy_array_equal(actual, expected)

assert actual_cols.equals(indexed_columns)