BUG: enabled applymap to work (and updated internals/convert to use iget) when

jreback · jreback · commit b4677c195f33 · 2013-05-01T20:44:42.000-04:00
using a non-unique index (GH2786 for the warning and GH3230 for applymap)

TST: test for GH2194 (which is fixed)
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -68,8 +68,14 @@ pandas 0.11.1
     - Fix assigning a new index to a duplicate index in a DataFrame would fail
     - Fix construction of a DataFrame with a duplicate index
     - ref_locs support to allow duplicative indices across dtypes
+      (GH2194_)
+    - applymap on a DataFrame with a non-unique index now works
+      (removed warning) (GH2786_), and fix (GH3230_)
 
 .. _GH3164: https://github.com/pydata/pandas/issues/3164
+.. _GH2786: https://github.com/pydata/pandas/issues/2786
+.. _GH2194: https://github.com/pydata/pandas/issues/2194
+.. _GH3230: https://github.com/pydata/pandas/issues/3230
 .. _GH3251: https://github.com/pydata/pandas/issues/3251
 .. _GH3379: https://github.com/pydata/pandas/issues/3379
 .. _GH3480: https://github.com/pydata/pandas/issues/3480
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4261,9 +4261,6 @@ def infer(x):
             if com.is_datetime64_dtype(x):
                 x = lib.map_infer(x, lib.Timestamp)
             return lib.map_infer(x, func)
-        #GH2786
-        if not self.columns.is_unique:
-            raise ValueError("applymap does not support dataframes having duplicate column labels")
         return self.apply(infer)
 
     #----------------------------------------------------------------------
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -165,6 +165,9 @@ def get(self, item):
         loc = self.items.get_loc(item)
         return self.values[loc]
 
+    def iget(self, i):
+        return self.values[i]
+
     def set(self, item, value):
         """
         Modify Block in-place with new item value
@@ -711,7 +714,7 @@ def convert(self, convert_dates = True, convert_numeric = True, copy = True):
         # attempt to create new type blocks
         blocks = []
         for i, c in enumerate(self.items):
-            values = self.get(c)
+            values = self.iget(i)
 
             values = com._possibly_convert_objects(values, convert_dates=convert_dates, convert_numeric=convert_numeric)
             values = _block_shape(values)
@@ -920,17 +923,14 @@ def set_axis(self, axis, value):
         self.axes[axis] = value
 
         if axis == 0:
-            # unique, we can take
-            if cur_axis.is_unique:
-                for block in self.blocks:
-                    block.set_ref_items(self.items, maybe_rename=True)
 
-            # compute a duplicate indexer that we can use to take
-            # the new items from ref_items (in place of _ref_items)
-            else:
+            # we have a non-unique index, so setup the ref_locs
+            if not cur_axis.is_unique:
                 self.set_ref_locs(cur_axis)
-                for block in self.blocks:
-                    block.set_ref_items(self.items, maybe_rename=True)
+
+            # take via ref_locs
+            for block in self.blocks:
+                block.set_ref_items(self.items, maybe_rename=True)
 
     def set_ref_locs(self, labels = None):
         # if we have a non-unique index on this axis, set the indexers
@@ -945,8 +945,9 @@ def set_ref_locs(self, labels = None):
         #### THIS IS POTENTIALLY VERY SLOW #####
 
         # if we are already computed, then we are done
-        if getattr(self,'_ref_locs',None) is not None:
-            return self._ref_locs
+        rl = getattr(self,'_ref_locs',None)
+        if rl is not None:
+            return rl
 
         blocks = self.blocks
 
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -7492,12 +7492,15 @@ def test_applymap(self):
         self.assert_(result.dtypes[0] == object)
 
         # GH2786
-        df = DataFrame(np.random.random((3,4)))
-        df.columns = ['a','a','a','a']
-        try:
-            df.applymap(str)
-        except ValueError as e:
-            self.assertTrue("support" in str(e))
+        df  = DataFrame(np.random.random((3,4)))
+        df2 = df.copy()
+        cols = ['a','a','a','a']
+        df.columns = cols
+
+        expected = df2.applymap(str)
+        expected.columns = cols
+        result = df.applymap(str)
+        assert_frame_equal(result,expected)
 
     def test_filter(self):
         # items
@@ -9201,7 +9204,7 @@ def test_assign_columns(self):
         assert_series_equal(self.frame['C'], frame['baz'])
         assert_series_equal(self.frame['hi'], frame['foo2'])
 
-    def test_assign_columns_with_dups(self):
+    def test_columns_with_dups(self):
 
         # GH 3468 related
 
@@ -9246,6 +9249,17 @@ def test_assign_columns_with_dups(self):
         result = df._data.set_ref_locs()
         self.assert_(len(result) == len(df.columns))
 
+        # testing iget
+        for i in range(len(df.columns)):
+             df.iloc[:,i]
+
+        # dup columns across dtype GH 2079/2194
+        vals = [[1, -1, 2.], [2, -2, 3.]] 
+        rs = DataFrame(vals, columns=['A', 'A', 'B']) 
+        xp = DataFrame(vals) 
+        xp.columns = ['A', 'A', 'B'] 
+        assert_frame_equal(rs, xp) 
+
     def test_cast_internals(self):
         casted = DataFrame(self.frame._data, dtype=int)
         expected = DataFrame(self.frame._series, dtype=int)