diff --git a/doc/source/whatsnew/v1.3.1.rst b/doc/source/whatsnew/v1.3.1.rst index 65719a11243f8..255747c3c5c6d 100644 --- a/doc/source/whatsnew/v1.3.1.rst +++ b/doc/source/whatsnew/v1.3.1.rst @@ -17,6 +17,7 @@ Fixed regressions - Pandas could not be built on PyPy (:issue:`42355`) - :class:`DataFrame` constructed with with an older version of pandas could not be unpickled (:issue:`42345`) - Performance regression in constructing a :class:`DataFrame` from a dictionary of dictionaries (:issue:`42338`) +- Fixed regression in :meth:`DataFrame.agg` dropping values when the DataFrame had an Extension Array dtype, a duplicate index, and ``axis=1`` (:issue:`42380`) - .. --------------------------------------------------------------------------- @@ -25,7 +26,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- Fixed bug in :meth:`DataFrame.transpose` dropping values when the DataFrame had an Extension Array dtype and a duplicate index (:issue:`42380`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 954ea24d0d8fc..ccea94228c563 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3344,8 +3344,8 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: values = self.values new_values = [arr_type._from_sequence(row, dtype=dtype) for row in values] - result = self._constructor( - dict(zip(self.index, new_values)), index=self.columns + result = type(self)._from_arrays( + new_values, index=self.columns, columns=self.index ) else: diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 14266a2c29a7f..995f404dc49d3 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -53,6 +53,17 @@ def test_apply_axis1_with_ea(): tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize( + "data, dtype", + [(1, None), (1, CategoricalDtype([1])), (Timestamp("2013-01-01", tz="UTC"), None)], +) +def test_agg_axis1_duplicate_index(data, dtype): + # GH 42380 + expected = DataFrame([[data], [data]], index=["a", "a"], dtype=dtype) + result = expected.agg(lambda x: x, axis=1) + tm.assert_frame_equal(result, expected) + + def test_apply_mixed_datetimelike(): # mixed datetimelike # GH 7778 diff --git a/pandas/tests/base/test_transpose.py b/pandas/tests/base/test_transpose.py index 5ba278368834c..246f33d27476c 100644 --- a/pandas/tests/base/test_transpose.py +++ b/pandas/tests/base/test_transpose.py @@ -1,6 +1,10 @@ import numpy as np import pytest +from pandas import ( + CategoricalDtype, + DataFrame, +) import pandas._testing as tm @@ -25,3 +29,28 @@ def test_numpy_transpose(index_or_series_obj): with pytest.raises(ValueError, match=msg): np.transpose(obj, axes=1) + + +@pytest.mark.parametrize( + "data, transposed_data, index, columns, dtype", + [ + ([[1], [2]], [[1, 2]], ["a", "a"], ["b"], int), + ([[1], [2]], [[1, 2]], ["a", "a"], ["b"], CategoricalDtype([1, 2])), + ([[1, 2]], [[1], [2]], ["b"], ["a", "a"], int), + ([[1, 2]], [[1], [2]], ["b"], ["a", "a"], CategoricalDtype([1, 2])), + ([[1, 2], [3, 4]], [[1, 3], [2, 4]], ["a", "a"], ["b", "b"], int), + ( + [[1, 2], [3, 4]], + [[1, 3], [2, 4]], + ["a", "a"], + ["b", "b"], + CategoricalDtype([1, 2, 3, 4]), + ), + ], +) +def test_duplicate_labels(data, transposed_data, index, columns, dtype): + # GH 42380 + df = DataFrame(data, index=index, columns=columns, dtype=dtype) + result = df.T + expected = DataFrame(transposed_data, index=columns, columns=index, dtype=dtype) + tm.assert_frame_equal(result, expected)