Skip to content

Commit dfc66f6

Browse files
authored
Backport PR #57474 on branch 2.2.x (REGR: DataFrame.transpose resulting in not contiguous data on nullable EAs) (#57496)
Backport PR #57474: REGR: DataFrame.transpose resulting in not contiguous data on nullable EAs
1 parent b79fe7e commit dfc66f6

File tree

3 files changed

+32
-3
lines changed

3 files changed

+32
-3
lines changed

doc/source/whatsnew/v2.2.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ Fixed regressions
3232
- Fixed regression in :meth:`DataFrame.to_dict` with ``orient='list'`` and datetime or timedelta types returning integers (:issue:`54824`)
3333
- Fixed regression in :meth:`DataFrame.to_json` converting nullable integers to floats (:issue:`57224`)
3434
- Fixed regression in :meth:`DataFrame.to_sql` when ``method="multi"`` is passed and the dialect type is not Oracle (:issue:`57310`)
35+
- Fixed regression in :meth:`DataFrame.transpose` with nullable extension dtypes not having F-contiguous data potentially causing exceptions when used (:issue:`57315`)
3536
- Fixed regression in :meth:`DataFrame.update` emitting incorrect warnings about downcasting (:issue:`57124`)
3637
- Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` ignoring the ``skipna`` argument (:issue:`57040`)
3738
- Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` where values containing the minimum or maximum value for the dtype could produce incorrect results (:issue:`57040`)

pandas/core/arrays/masked.py

+14-3
Original file line numberDiff line numberDiff line change
@@ -1621,13 +1621,24 @@ def transpose_homogeneous_masked_arrays(
16211621
same dtype. The caller is responsible for ensuring validity of input data.
16221622
"""
16231623
masked_arrays = list(masked_arrays)
1624+
dtype = masked_arrays[0].dtype
1625+
16241626
values = [arr._data.reshape(1, -1) for arr in masked_arrays]
1625-
transposed_values = np.concatenate(values, axis=0)
1627+
transposed_values = np.concatenate(
1628+
values,
1629+
axis=0,
1630+
out=np.empty(
1631+
(len(masked_arrays), len(masked_arrays[0])),
1632+
order="F",
1633+
dtype=dtype.numpy_dtype,
1634+
),
1635+
)
16261636

16271637
masks = [arr._mask.reshape(1, -1) for arr in masked_arrays]
1628-
transposed_masks = np.concatenate(masks, axis=0)
1638+
transposed_masks = np.concatenate(
1639+
masks, axis=0, out=np.empty_like(transposed_values, dtype=bool)
1640+
)
16291641

1630-
dtype = masked_arrays[0].dtype
16311642
arr_type = dtype.construct_array_type()
16321643
transposed_arrays: list[BaseMaskedArray] = []
16331644
for i in range(transposed_values.shape[1]):

pandas/tests/frame/methods/test_transpose.py

+17
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import pandas.util._test_decorators as td
55

6+
import pandas as pd
67
from pandas import (
78
DataFrame,
89
DatetimeIndex,
@@ -190,3 +191,19 @@ def test_transpose_not_inferring_dt_mixed_blocks(self):
190191
dtype=object,
191192
)
192193
tm.assert_frame_equal(result, expected)
194+
195+
@pytest.mark.parametrize("dtype1", ["Int64", "Float64"])
196+
@pytest.mark.parametrize("dtype2", ["Int64", "Float64"])
197+
def test_transpose(self, dtype1, dtype2):
198+
# GH#57315 - transpose should have F contiguous blocks
199+
df = DataFrame(
200+
{
201+
"a": pd.array([1, 1, 2], dtype=dtype1),
202+
"b": pd.array([3, 4, 5], dtype=dtype2),
203+
}
204+
)
205+
result = df.T
206+
for blk in result._mgr.blocks:
207+
# When dtypes are unequal, we get NumPy object array
208+
data = blk.values._data if dtype1 == dtype2 else blk.values
209+
assert data.flags["F_CONTIGUOUS"]

0 commit comments

Comments
 (0)