Skip to content

Commit 3bcbe9b

Browse files
Backport PR #42449: REGR: DataFrame.agg with axis=1, EA dtype, and duplicate index (#42460)
Co-authored-by: Richard Shadrach <[email protected]>
1 parent 456aa5c commit 3bcbe9b

File tree

4 files changed

+44
-3
lines changed

4 files changed

+44
-3
lines changed

doc/source/whatsnew/v1.3.1.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Fixed regressions
1717
- Pandas could not be built on PyPy (:issue:`42355`)
1818
- :class:`DataFrame` constructed with with an older version of pandas could not be unpickled (:issue:`42345`)
1919
- Performance regression in constructing a :class:`DataFrame` from a dictionary of dictionaries (:issue:`42338`)
20+
- Fixed regression in :meth:`DataFrame.agg` dropping values when the DataFrame had an Extension Array dtype, a duplicate index, and ``axis=1`` (:issue:`42380`)
2021
-
2122

2223
.. ---------------------------------------------------------------------------
@@ -25,7 +26,7 @@ Fixed regressions
2526

2627
Bug fixes
2728
~~~~~~~~~
28-
-
29+
- Fixed bug in :meth:`DataFrame.transpose` dropping values when the DataFrame had an Extension Array dtype and a duplicate index (:issue:`42380`)
2930
-
3031

3132
.. ---------------------------------------------------------------------------

pandas/core/frame.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -3344,8 +3344,8 @@ def transpose(self, *args, copy: bool = False) -> DataFrame:
33443344
values = self.values
33453345

33463346
new_values = [arr_type._from_sequence(row, dtype=dtype) for row in values]
3347-
result = self._constructor(
3348-
dict(zip(self.index, new_values)), index=self.columns
3347+
result = type(self)._from_arrays(
3348+
new_values, index=self.columns, columns=self.index
33493349
)
33503350

33513351
else:

pandas/tests/apply/test_frame_apply.py

+11
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,17 @@ def test_apply_axis1_with_ea():
5353
tm.assert_frame_equal(result, expected)
5454

5555

56+
@pytest.mark.parametrize(
57+
"data, dtype",
58+
[(1, None), (1, CategoricalDtype([1])), (Timestamp("2013-01-01", tz="UTC"), None)],
59+
)
60+
def test_agg_axis1_duplicate_index(data, dtype):
61+
# GH 42380
62+
expected = DataFrame([[data], [data]], index=["a", "a"], dtype=dtype)
63+
result = expected.agg(lambda x: x, axis=1)
64+
tm.assert_frame_equal(result, expected)
65+
66+
5667
def test_apply_mixed_datetimelike():
5768
# mixed datetimelike
5869
# GH 7778

pandas/tests/base/test_transpose.py

+29
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
import numpy as np
22
import pytest
33

4+
from pandas import (
5+
CategoricalDtype,
6+
DataFrame,
7+
)
48
import pandas._testing as tm
59

610

@@ -25,3 +29,28 @@ def test_numpy_transpose(index_or_series_obj):
2529

2630
with pytest.raises(ValueError, match=msg):
2731
np.transpose(obj, axes=1)
32+
33+
34+
@pytest.mark.parametrize(
35+
"data, transposed_data, index, columns, dtype",
36+
[
37+
([[1], [2]], [[1, 2]], ["a", "a"], ["b"], int),
38+
([[1], [2]], [[1, 2]], ["a", "a"], ["b"], CategoricalDtype([1, 2])),
39+
([[1, 2]], [[1], [2]], ["b"], ["a", "a"], int),
40+
([[1, 2]], [[1], [2]], ["b"], ["a", "a"], CategoricalDtype([1, 2])),
41+
([[1, 2], [3, 4]], [[1, 3], [2, 4]], ["a", "a"], ["b", "b"], int),
42+
(
43+
[[1, 2], [3, 4]],
44+
[[1, 3], [2, 4]],
45+
["a", "a"],
46+
["b", "b"],
47+
CategoricalDtype([1, 2, 3, 4]),
48+
),
49+
],
50+
)
51+
def test_duplicate_labels(data, transposed_data, index, columns, dtype):
52+
# GH 42380
53+
df = DataFrame(data, index=index, columns=columns, dtype=dtype)
54+
result = df.T
55+
expected = DataFrame(transposed_data, index=columns, columns=index, dtype=dtype)
56+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)