Skip to content

Commit b4677c1

Browse files
committed
BUG: enabled applymap to work (and updated internals/convert to use iget) when
using a non-unique index (GH2786 for the warning and GH3230 for applymap) TST: test for GH2194 (which is fixed)
1 parent 4c756e2 commit b4677c1

File tree

4 files changed

+40
-22
lines changed

4 files changed

+40
-22
lines changed

RELEASE.rst

+6
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,14 @@ pandas 0.11.1
6868
- Fix assigning a new index to a duplicate index in a DataFrame would fail
6969
- Fix construction of a DataFrame with a duplicate index
7070
- ref_locs support to allow duplicative indices across dtypes
71+
(GH2194_)
72+
- applymap on a DataFrame with a non-unique index now works
73+
(removed warning) (GH2786_), and fix (GH3230_)
7174

7275
.. _GH3164: https://github.com/pydata/pandas/issues/3164
76+
.. _GH2786: https://github.com/pydata/pandas/issues/2786
77+
.. _GH2194: https://github.com/pydata/pandas/issues/2194
78+
.. _GH3230: https://github.com/pydata/pandas/issues/3230
7379
.. _GH3251: https://github.com/pydata/pandas/issues/3251
7480
.. _GH3379: https://github.com/pydata/pandas/issues/3379
7581
.. _GH3480: https://github.com/pydata/pandas/issues/3480

pandas/core/frame.py

-3
Original file line numberDiff line numberDiff line change
@@ -4261,9 +4261,6 @@ def infer(x):
42614261
if com.is_datetime64_dtype(x):
42624262
x = lib.map_infer(x, lib.Timestamp)
42634263
return lib.map_infer(x, func)
4264-
#GH2786
4265-
if not self.columns.is_unique:
4266-
raise ValueError("applymap does not support dataframes having duplicate column labels")
42674264
return self.apply(infer)
42684265

42694266
#----------------------------------------------------------------------

pandas/core/internals.py

+13-12
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,9 @@ def get(self, item):
165165
loc = self.items.get_loc(item)
166166
return self.values[loc]
167167

168+
def iget(self, i):
169+
return self.values[i]
170+
168171
def set(self, item, value):
169172
"""
170173
Modify Block in-place with new item value
@@ -711,7 +714,7 @@ def convert(self, convert_dates = True, convert_numeric = True, copy = True):
711714
# attempt to create new type blocks
712715
blocks = []
713716
for i, c in enumerate(self.items):
714-
values = self.get(c)
717+
values = self.iget(i)
715718

716719
values = com._possibly_convert_objects(values, convert_dates=convert_dates, convert_numeric=convert_numeric)
717720
values = _block_shape(values)
@@ -920,17 +923,14 @@ def set_axis(self, axis, value):
920923
self.axes[axis] = value
921924

922925
if axis == 0:
923-
# unique, we can take
924-
if cur_axis.is_unique:
925-
for block in self.blocks:
926-
block.set_ref_items(self.items, maybe_rename=True)
927926

928-
# compute a duplicate indexer that we can use to take
929-
# the new items from ref_items (in place of _ref_items)
930-
else:
927+
# we have a non-unique index, so setup the ref_locs
928+
if not cur_axis.is_unique:
931929
self.set_ref_locs(cur_axis)
932-
for block in self.blocks:
933-
block.set_ref_items(self.items, maybe_rename=True)
930+
931+
# take via ref_locs
932+
for block in self.blocks:
933+
block.set_ref_items(self.items, maybe_rename=True)
934934

935935
def set_ref_locs(self, labels = None):
936936
# if we have a non-unique index on this axis, set the indexers
@@ -945,8 +945,9 @@ def set_ref_locs(self, labels = None):
945945
#### THIS IS POTENTIALLY VERY SLOW #####
946946

947947
# if we are already computed, then we are done
948-
if getattr(self,'_ref_locs',None) is not None:
949-
return self._ref_locs
948+
rl = getattr(self,'_ref_locs',None)
949+
if rl is not None:
950+
return rl
950951

951952
blocks = self.blocks
952953

pandas/tests/test_frame.py

+21-7
Original file line numberDiff line numberDiff line change
@@ -7492,12 +7492,15 @@ def test_applymap(self):
74927492
self.assert_(result.dtypes[0] == object)
74937493

74947494
# GH2786
7495-
df = DataFrame(np.random.random((3,4)))
7496-
df.columns = ['a','a','a','a']
7497-
try:
7498-
df.applymap(str)
7499-
except ValueError as e:
7500-
self.assertTrue("support" in str(e))
7495+
df = DataFrame(np.random.random((3,4)))
7496+
df2 = df.copy()
7497+
cols = ['a','a','a','a']
7498+
df.columns = cols
7499+
7500+
expected = df2.applymap(str)
7501+
expected.columns = cols
7502+
result = df.applymap(str)
7503+
assert_frame_equal(result,expected)
75017504

75027505
def test_filter(self):
75037506
# items
@@ -9201,7 +9204,7 @@ def test_assign_columns(self):
92019204
assert_series_equal(self.frame['C'], frame['baz'])
92029205
assert_series_equal(self.frame['hi'], frame['foo2'])
92039206

9204-
def test_assign_columns_with_dups(self):
9207+
def test_columns_with_dups(self):
92059208

92069209
# GH 3468 related
92079210

@@ -9246,6 +9249,17 @@ def test_assign_columns_with_dups(self):
92469249
result = df._data.set_ref_locs()
92479250
self.assert_(len(result) == len(df.columns))
92489251

9252+
# testing iget
9253+
for i in range(len(df.columns)):
9254+
df.iloc[:,i]
9255+
9256+
# dup columns across dtype GH 2079/2194
9257+
vals = [[1, -1, 2.], [2, -2, 3.]]
9258+
rs = DataFrame(vals, columns=['A', 'A', 'B'])
9259+
xp = DataFrame(vals)
9260+
xp.columns = ['A', 'A', 'B']
9261+
assert_frame_equal(rs, xp)
9262+
92499263
def test_cast_internals(self):
92509264
casted = DataFrame(self.frame._data, dtype=int)
92519265
expected = DataFrame(self.frame._series, dtype=int)

0 commit comments

Comments
 (0)