Skip to content

Commit af89b86

Browse files
authored
BUG: DataFrame.astype(series) with duplicate columns (#44417)
1 parent 927e541 commit af89b86

File tree

5 files changed

+46
-7
lines changed

5 files changed

+46
-7
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,7 @@ Styler
715715

716716
Other
717717
^^^^^
718+
- Bug in :meth:`DataFrame.astype` with non-unique columns and a :class:`Series` ``dtype`` argument (:issue:`44417`)
718719
- Bug in :meth:`CustomBusinessMonthBegin.__add__` (:meth:`CustomBusinessMonthEnd.__add__`) not applying the extra ``offset`` parameter when beginning (end) of the target month is already a business day (:issue:`41356`)
719720
- Bug in :meth:`RangeIndex.union` with another ``RangeIndex`` with matching (even) ``step`` and starts differing by strictly less than ``step / 2`` (:issue:`44019`)
720721
- Bug in :meth:`RangeIndex.difference` with ``sort=None`` and ``step<0`` failing to sort (:issue:`44085`)

pandas/core/generic.py

+14-6
Original file line numberDiff line numberDiff line change
@@ -5827,14 +5827,22 @@ def astype(
58275827
"Only a column name can be used for the "
58285828
"key in a dtype mappings argument."
58295829
)
5830+
5831+
# GH#44417 cast to Series so we can use .iat below, which will be
5832+
# robust in case we
5833+
from pandas import Series
5834+
5835+
dtype_ser = Series(dtype, dtype=object)
5836+
dtype_ser = dtype_ser.reindex(self.columns, fill_value=None, copy=False)
5837+
58305838
results = []
5831-
for col_name, col in self.items():
5832-
if col_name in dtype:
5833-
results.append(
5834-
col.astype(dtype=dtype[col_name], copy=copy, errors=errors)
5835-
)
5839+
for i, (col_name, col) in enumerate(self.items()):
5840+
cdt = dtype_ser.iat[i]
5841+
if isna(cdt):
5842+
res_col = col.copy() if copy else col
58365843
else:
5837-
results.append(col.copy() if copy else col)
5844+
res_col = col.astype(dtype=cdt, copy=copy, errors=errors)
5845+
results.append(res_col)
58385846

58395847
elif is_extension_array_dtype(dtype) and self.ndim > 1:
58405848
# GH 18099/22869: columnwise conversion to extension dtype

pandas/core/groupby/generic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -992,7 +992,7 @@ def _wrap_applied_output(
992992
result = self.obj._constructor(
993993
index=self.grouper.result_index, columns=data.columns
994994
)
995-
result = result.astype(data.dtypes.to_dict(), copy=False)
995+
result = result.astype(data.dtypes, copy=False)
996996
return result
997997

998998
# GH12824

pandas/tests/frame/methods/test_astype.py

+20
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,26 @@ def test_astype_duplicate_col(self):
261261
expected = concat([a1_str, b, a2_str], axis=1)
262262
tm.assert_frame_equal(result, expected)
263263

264+
def test_astype_duplicate_col_series_arg(self):
265+
# GH#44417
266+
vals = np.random.randn(3, 4)
267+
df = DataFrame(vals, columns=["A", "B", "C", "A"])
268+
dtypes = df.dtypes
269+
dtypes.iloc[0] = str
270+
dtypes.iloc[2] = "Float64"
271+
272+
result = df.astype(dtypes)
273+
expected = DataFrame(
274+
{
275+
0: vals[:, 0].astype(str),
276+
1: vals[:, 1],
277+
2: pd.array(vals[:, 2], dtype="Float64"),
278+
3: vals[:, 3],
279+
}
280+
)
281+
expected.columns = df.columns
282+
tm.assert_frame_equal(result, expected)
283+
264284
@pytest.mark.parametrize(
265285
"dtype",
266286
[

pandas/tests/groupby/test_groupby.py

+10
Original file line numberDiff line numberDiff line change
@@ -2031,6 +2031,16 @@ def get_result():
20312031
tm.assert_equal(result, expected)
20322032

20332033

2034+
def test_empty_groupby_apply_nonunique_columns():
2035+
# GH#44417
2036+
df = DataFrame(np.random.randn(0, 4))
2037+
df[3] = df[3].astype(np.int64)
2038+
df.columns = [0, 1, 2, 0]
2039+
gb = df.groupby(df[1])
2040+
res = gb.apply(lambda x: x)
2041+
assert (res.dtypes == df.dtypes).all()
2042+
2043+
20342044
def test_tuple_as_grouping():
20352045
# https://github.com/pandas-dev/pandas/issues/18314
20362046
df = DataFrame(

0 commit comments

Comments
 (0)