Skip to content

Commit cd44f94

Browse files
jschendelPingviinituutti
authored andcommitted
BUG: Fix DataFrame.astype(ExtensionDtype) with duplicate column names (pandas-dev#24717)
1 parent 9ed29b7 commit cd44f94

File tree

3 files changed

+16
-3
lines changed

3 files changed

+16
-3
lines changed

doc/source/whatsnew/v0.24.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1639,6 +1639,7 @@ Conversion
16391639
- Bug in :meth:`DataFrame.combine_first` in which column types were unexpectedly converted to float (:issue:`20699`)
16401640
- Bug in :meth:`DataFrame.clip` in which column types are not preserved and casted to float (:issue:`24162`)
16411641
- Bug in :meth:`DataFrame.clip` when order of columns of dataframes doesn't match, result observed is wrong in numeric values (:issue:`20911`)
1642+
- Bug in :meth:`DataFrame.astype` where converting to an extension dtype when duplicate column names are present causes a ``RecursionError`` (:issue:`24704`)
16421643

16431644
Strings
16441645
^^^^^^^

pandas/core/generic.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -5670,9 +5670,10 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs):
56705670
results.append(results.append(col.copy() if copy else col))
56715671

56725672
elif is_extension_array_dtype(dtype) and self.ndim > 1:
5673-
# GH 18099: columnwise conversion to categorical
5674-
# and extension dtype
5675-
results = (self[col].astype(dtype, copy=copy) for col in self)
5673+
# GH 18099/22869: columnwise conversion to extension dtype
5674+
# GH 24704: use iloc to handle duplicate column names
5675+
results = (self.iloc[:, i].astype(dtype, copy=copy)
5676+
for i in range(len(self.columns)))
56765677

56775678
else:
56785679
# else, only a single dtype is given

pandas/tests/frame/test_dtypes.py

+11
Original file line numberDiff line numberDiff line change
@@ -709,6 +709,17 @@ def test_astype_extension_dtypes_1d(self, dtype):
709709
tm.assert_frame_equal(df.astype(dtype), expected1)
710710
tm.assert_frame_equal(df.astype('int64').astype(dtype), expected1)
711711

712+
@pytest.mark.parametrize("dtype", ['category', 'Int64'])
713+
def test_astype_extension_dtypes_duplicate_col(self, dtype):
714+
# GH 24704
715+
a1 = Series([0, np.nan, 4], name='a')
716+
a2 = Series([np.nan, 3, 5], name='a')
717+
df = concat([a1, a2], axis=1)
718+
719+
result = df.astype(dtype)
720+
expected = concat([a1.astype(dtype), a2.astype(dtype)], axis=1)
721+
assert_frame_equal(result, expected)
722+
712723
@pytest.mark.parametrize('dtype', [
713724
{100: 'float64', 200: 'uint64'}, 'category', 'float64'])
714725
def test_astype_column_metadata(self, dtype):

0 commit comments

Comments
 (0)