handle duplicate column case

chris-b1 · chris-b1 · commit d61d4e0bff49 · 2016-05-17T18:39:55.000-05:00
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -2776,8 +2776,7 @@ def _transform_fast(self, func):
             func = getattr(self, func)
 
         ids, _, ngroup = self.grouper.group_info
-        counts = self.size().fillna(0).values
-        cast = (counts == 0).any()
+        cast = self.size().isnull().any()
         out = algos.take_1d(func().values, ids)
         if cast:
             out = self._try_cast(out, self.obj)
@@ -3459,23 +3458,28 @@ def transform(self, func, *args, **kwargs):
         if not result.columns.equals(obj.columns):
             return self._transform_general(func, *args, **kwargs)
 
-        # Fast transform path for aggregations
+        return self._transform_fast(result, obj)
 
+    def _transform_fast(self, result, obj):
+        """
+        Fast transform path for aggregations
+        """
         # if there were groups with no observations (Categorical only?)
         # try casting data to original dtype
-        counts = self.size().fillna(0).values
-        cast = (counts == 0).any()
+        cast = self.size().isnull().any()
 
-        # by column (could be by block?) reshape aggregated data to
-        # size of original frame by repeating obvservations with take
+        # for each col, reshape to to size of original frame
+        # by take operation
         ids, _, ngroup = self.grouper.group_info
-        out = {}
-        for col in result:
-            out[col] = algos.take_nd(result[col].values, ids)
+        output = []
+        for i, _ in enumerate(result.columns):
+            res = algos.take_1d(result.iloc[:, i].values, ids)
             if cast:
-                out[col] = self._try_cast(out[col], obj[col])
+                res = self._try_cast(res, obj.iloc[:, i])
+            output.append(res)
 
-        return DataFrame(out, columns=result.columns, index=obj.index)
+        return DataFrame._from_arrays(output, columns=result.columns,
+                                      index=obj.index)
 
     def _define_paths(self, func, *args, **kwargs):
         if isinstance(func, compat.string_types):
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
@@ -1078,6 +1078,12 @@ def test_transform_fast(self):
         expected = expected[['f', 'i']]
         assert_frame_equal(result, expected)
 
+        # dup columns
+        df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['g', 'a', 'a'])
+        result = df.groupby('g').transform('first')
+        expected = df.drop('g', axis=1)
+        assert_frame_equal(result, expected)
+
     def test_transform_broadcast(self):
         grouped = self.ts.groupby(lambda x: x.month)
         result = grouped.transform(np.mean)