diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 838a34adeaf82..47efcb4d5e51f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -106,7 +106,10 @@ needs_i8_conversion, pandas_dtype, ) -from pandas.core.dtypes.dtypes import ExtensionDtype +from pandas.core.dtypes.dtypes import ( + BaseMaskedDtype, + ExtensionDtype, +) from pandas.core.dtypes.missing import ( isna, notna, @@ -3602,9 +3605,14 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: # We have EAs with the same dtype. We can preserve that dtype in transpose. dtype = dtypes[0] arr_type = dtype.construct_array_type() - values = self.values - - new_values = [arr_type._from_sequence(row, dtype=dtype) for row in values] + if isinstance(dtype, BaseMaskedDtype): + data, mask = self._mgr.as_array_masked() + new_values = [arr_type(data[i], mask[i]) for i in range(self.shape[0])] + else: + values = self.values + new_values = [ + arr_type._from_sequence(row, dtype=dtype) for row in values + ] result = type(self)._from_arrays( new_values, index=self.columns, columns=self.index ) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 7c02781b16456..57c945819a50e 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1749,6 +1749,40 @@ def as_array( return arr.transpose() + def as_array_masked(self) -> tuple[np.ndarray, np.ndarray]: + """ + Convert the blockmanager data into an numpy array. + + Returns + ------- + arr : ndarray + """ + # # TODO(CoW) handle case where resulting array is a view + # if len(self.blocks) == 0: + # arr = np.empty(self.shape, dtype=float) + # return arr.transpose() + + # TODO we already established we only have a single dtype, but this + # could be generalized to be a mix of all masked dtypes + dtype = self.blocks[0].dtype.numpy_dtype + + result_data = np.empty(self.shape, dtype=dtype) + result_mask = np.empty(self.shape, dtype="bool") + + # itemmask = np.zeros(self.shape[0]) + + for blk in self.blocks: + rl = blk.mgr_locs + arr = blk.values + result_data[rl.indexer] = arr._data + result_mask[rl.indexer] = arr._mask + # itemmask[rl.indexer] = 1 + + # if not itemmask.all(): + # raise AssertionError("Some items were not contained in blocks") + + return result_data.transpose(), result_mask.transpose() + def _interleave( self, dtype: np.dtype | None = None,