Skip to content

Commit d61d4e0

Browse files
committed
handle duplicate column case
1 parent 9d78f65 commit d61d4e0

File tree

2 files changed

+22
-12
lines changed

2 files changed

+22
-12
lines changed

pandas/core/groupby.py

+16-12
Original file line numberDiff line numberDiff line change
@@ -2776,8 +2776,7 @@ def _transform_fast(self, func):
27762776
func = getattr(self, func)
27772777

27782778
ids, _, ngroup = self.grouper.group_info
2779-
counts = self.size().fillna(0).values
2780-
cast = (counts == 0).any()
2779+
cast = self.size().isnull().any()
27812780
out = algos.take_1d(func().values, ids)
27822781
if cast:
27832782
out = self._try_cast(out, self.obj)
@@ -3459,23 +3458,28 @@ def transform(self, func, *args, **kwargs):
34593458
if not result.columns.equals(obj.columns):
34603459
return self._transform_general(func, *args, **kwargs)
34613460

3462-
# Fast transform path for aggregations
3461+
return self._transform_fast(result, obj)
34633462

3463+
def _transform_fast(self, result, obj):
3464+
"""
3465+
Fast transform path for aggregations
3466+
"""
34643467
# if there were groups with no observations (Categorical only?)
34653468
# try casting data to original dtype
3466-
counts = self.size().fillna(0).values
3467-
cast = (counts == 0).any()
3469+
cast = self.size().isnull().any()
34683470

3469-
# by column (could be by block?) reshape aggregated data to
3470-
# size of original frame by repeating obvservations with take
3471+
# for each col, reshape to to size of original frame
3472+
# by take operation
34713473
ids, _, ngroup = self.grouper.group_info
3472-
out = {}
3473-
for col in result:
3474-
out[col] = algos.take_nd(result[col].values, ids)
3474+
output = []
3475+
for i, _ in enumerate(result.columns):
3476+
res = algos.take_1d(result.iloc[:, i].values, ids)
34753477
if cast:
3476-
out[col] = self._try_cast(out[col], obj[col])
3478+
res = self._try_cast(res, obj.iloc[:, i])
3479+
output.append(res)
34773480

3478-
return DataFrame(out, columns=result.columns, index=obj.index)
3481+
return DataFrame._from_arrays(output, columns=result.columns,
3482+
index=obj.index)
34793483

34803484
def _define_paths(self, func, *args, **kwargs):
34813485
if isinstance(func, compat.string_types):

pandas/tests/test_groupby.py

+6
Original file line numberDiff line numberDiff line change
@@ -1078,6 +1078,12 @@ def test_transform_fast(self):
10781078
expected = expected[['f', 'i']]
10791079
assert_frame_equal(result, expected)
10801080

1081+
# dup columns
1082+
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['g', 'a', 'a'])
1083+
result = df.groupby('g').transform('first')
1084+
expected = df.drop('g', axis=1)
1085+
assert_frame_equal(result, expected)
1086+
10811087
def test_transform_broadcast(self):
10821088
grouped = self.ts.groupby(lambda x: x.month)
10831089
result = grouped.transform(np.mean)

0 commit comments

Comments
 (0)