Backport PR #57474 on branch 2.2.x (REGR: DataFrame.transpose resulting in not contiguous data on nullable EAs) (#57496)

rhshadrach · web-flow · commit dfc66f6e5da8 · 2024-02-19T10:51:13.000+01:00
Backport PR #57474: REGR: DataFrame.transpose resulting in not contiguous data on nullable EAs
diff --git a/doc/source/whatsnew/v2.2.1.rst b/doc/source/whatsnew/v2.2.1.rst
@@ -32,6 +32,7 @@ Fixed regressions
 - Fixed regression in :meth:`DataFrame.to_dict` with ``orient='list'`` and datetime or timedelta types returning integers (:issue:`54824`)
 - Fixed regression in :meth:`DataFrame.to_json` converting nullable integers to floats (:issue:`57224`)
 - Fixed regression in :meth:`DataFrame.to_sql` when ``method="multi"`` is passed and the dialect type is not Oracle (:issue:`57310`)
+- Fixed regression in :meth:`DataFrame.transpose` with nullable extension dtypes not having F-contiguous data potentially causing exceptions when used (:issue:`57315`)
 - Fixed regression in :meth:`DataFrame.update` emitting incorrect warnings about downcasting (:issue:`57124`)
 - Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` ignoring the ``skipna`` argument (:issue:`57040`)
 - Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` where values containing the minimum or maximum value for the dtype could produce incorrect results (:issue:`57040`)
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
@@ -1621,13 +1621,24 @@ def transpose_homogeneous_masked_arrays(
     same dtype. The caller is responsible for ensuring validity of input data.
     """
     masked_arrays = list(masked_arrays)
+    dtype = masked_arrays[0].dtype
+
     values = [arr._data.reshape(1, -1) for arr in masked_arrays]
-    transposed_values = np.concatenate(values, axis=0)
+    transposed_values = np.concatenate(
+        values,
+        axis=0,
+        out=np.empty(
+            (len(masked_arrays), len(masked_arrays[0])),
+            order="F",
+            dtype=dtype.numpy_dtype,
+        ),
+    )
 
     masks = [arr._mask.reshape(1, -1) for arr in masked_arrays]
-    transposed_masks = np.concatenate(masks, axis=0)
+    transposed_masks = np.concatenate(
+        masks, axis=0, out=np.empty_like(transposed_values, dtype=bool)
+    )
 
-    dtype = masked_arrays[0].dtype
     arr_type = dtype.construct_array_type()
     transposed_arrays: list[BaseMaskedArray] = []
     for i in range(transposed_values.shape[1]):
diff --git a/pandas/tests/frame/methods/test_transpose.py b/pandas/tests/frame/methods/test_transpose.py
@@ -3,6 +3,7 @@
 
 import pandas.util._test_decorators as td
 
+import pandas as pd
 from pandas import (
     DataFrame,
     DatetimeIndex,
@@ -190,3 +191,19 @@ def test_transpose_not_inferring_dt_mixed_blocks(self):
             dtype=object,
         )
         tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("dtype1", ["Int64", "Float64"])
+    @pytest.mark.parametrize("dtype2", ["Int64", "Float64"])
+    def test_transpose(self, dtype1, dtype2):
+        # GH#57315 - transpose should have F contiguous blocks
+        df = DataFrame(
+            {
+                "a": pd.array([1, 1, 2], dtype=dtype1),
+                "b": pd.array([3, 4, 5], dtype=dtype2),
+            }
+        )
+        result = df.T
+        for blk in result._mgr.blocks:
+            # When dtypes are unequal, we get NumPy object array
+            data = blk.values._data if dtype1 == dtype2 else blk.values
+            assert data.flags["F_CONTIGUOUS"]