Skip to content

Commit 2cc49b7

Browse files
committed
Merge pull request #5097 from jreback/panel_dup
BUG: non-unique indexing in a Panel (GH4960)
2 parents a2c8f44 + 420fae8 commit 2cc49b7

File tree

7 files changed

+117
-13
lines changed

7 files changed

+117
-13
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,7 @@ API Changes
294294
call with additional keyword args (:issue:`4435`)
295295
- Provide __dir__ method (and local context) for tab completion / remove ipython completers code
296296
(:issue:`4501`)
297+
- Support non-unique axes in a Panel via indexing operations (:issue:`4960`)
297298

298299

299300
Internal Refactoring

pandas/core/indexing.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -623,8 +623,9 @@ def _getitem_lowerdim(self, tup):
623623

624624
# might have been a MultiIndex
625625
elif section.ndim == self.ndim:
626+
626627
new_key = tup[:i] + (_NS,) + tup[i + 1:]
627-
# new_key = tup[:i] + tup[i+1:]
628+
628629
else:
629630
new_key = tup[:i] + tup[i + 1:]
630631

pandas/core/internals.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -2413,12 +2413,17 @@ def _interleave(self, items):
24132413

24142414
return result
24152415

2416-
def xs(self, key, axis=1, copy=True):
2416+
def xs(self, key, axis=1, copy=True, takeable=False):
24172417
if axis < 1:
24182418
raise AssertionError('Can only take xs across axis >= 1, got %d'
24192419
% axis)
24202420

2421-
loc = self.axes[axis].get_loc(key)
2421+
# take by position
2422+
if takeable:
2423+
loc = key
2424+
else:
2425+
loc = self.axes[axis].get_loc(key)
2426+
24222427
slicer = [slice(None, None) for _ in range(self.ndim)]
24232428
slicer[axis] = loc
24242429
slicer = tuple(slicer)

pandas/core/panel.py

+25-4
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,15 @@ def set_value(self, *args):
504504
return result.set_value(*args)
505505

506506
def _box_item_values(self, key, values):
507+
if self.ndim == values.ndim:
508+
result = self._constructor(values)
509+
510+
# a dup selection will yield a full ndim
511+
if result._get_axis(0).is_unique:
512+
result = result[key]
513+
514+
return result
515+
507516
d = self._construct_axes_dict_for_slice(self._AXIS_ORDERS[1:])
508517
return self._constructor_sliced(values, **d)
509518

@@ -745,15 +754,27 @@ def xs(self, key, axis=1, copy=True):
745754
_xs = xs
746755

747756
def _ixs(self, i, axis=0):
748-
# for compatibility with .ix indexing
749-
# Won't work with hierarchical indexing yet
757+
"""
758+
i : int, slice, or sequence of integers
759+
axis : int
760+
"""
761+
750762
key = self._get_axis(axis)[i]
751763

752764
# xs cannot handle a non-scalar key, so just reindex here
753765
if _is_list_like(key):
754-
return self.reindex(**{self._get_axis_name(axis): key})
766+
indexer = { self._get_axis_name(axis): key }
767+
return self.reindex(**indexer)
768+
769+
# a reduction
770+
if axis == 0:
771+
values = self._data.iget(i)
772+
return self._box_item_values(key,values)
755773

756-
return self.xs(key, axis=axis)
774+
# xs by position
775+
self._consolidate_inplace()
776+
new_data = self._data.xs(i, axis=axis, copy=True, takeable=True)
777+
return self._construct_return_type(new_data)
757778

758779
def groupby(self, function, axis='major'):
759780
"""

pandas/sparse/panel.py

+15
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,21 @@ def _set_items(self, new_items):
172172
# DataFrame's columns / "items"
173173
minor_axis = SparsePanelAxis('_minor_axis', 'columns')
174174

175+
def _ixs(self, i, axis=0):
176+
"""
177+
for compat as we don't support Block Manager here
178+
i : int, slice, or sequence of integers
179+
axis : int
180+
"""
181+
182+
key = self._get_axis(axis)[i]
183+
184+
# xs cannot handle a non-scalar key, so just reindex here
185+
if com.is_list_like(key):
186+
return self.reindex(**{self._get_axis_name(axis): key})
187+
188+
return self.xs(key, axis=axis)
189+
175190
def _get_item_cache(self, key):
176191
return self._frames[key]
177192

pandas/tests/test_panel.py

+59
Original file line numberDiff line numberDiff line change
@@ -1335,6 +1335,65 @@ def test_to_panel_duplicates(self):
13351335
idf = df.set_index(['a', 'b'])
13361336
assertRaisesRegexp(ValueError, 'non-uniquely indexed', idf.to_panel)
13371337

1338+
def test_panel_dups(self):
1339+
1340+
# GH 4960
1341+
# duplicates in an index
1342+
1343+
# items
1344+
data = np.random.randn(5, 100, 5)
1345+
no_dup_panel = Panel(data, items=list("ABCDE"))
1346+
panel = Panel(data, items=list("AACDE"))
1347+
1348+
expected = no_dup_panel['A']
1349+
result = panel.iloc[0]
1350+
assert_frame_equal(result, expected)
1351+
1352+
expected = no_dup_panel['E']
1353+
result = panel.loc['E']
1354+
assert_frame_equal(result, expected)
1355+
1356+
expected = no_dup_panel.loc[['A','B']]
1357+
expected.items = ['A','A']
1358+
result = panel.loc['A']
1359+
assert_panel_equal(result, expected)
1360+
1361+
# major
1362+
data = np.random.randn(5, 5, 5)
1363+
no_dup_panel = Panel(data, major_axis=list("ABCDE"))
1364+
panel = Panel(data, major_axis=list("AACDE"))
1365+
1366+
expected = no_dup_panel.loc[:,'A']
1367+
result = panel.iloc[:,0]
1368+
assert_frame_equal(result, expected)
1369+
1370+
expected = no_dup_panel.loc[:,'E']
1371+
result = panel.loc[:,'E']
1372+
assert_frame_equal(result, expected)
1373+
1374+
expected = no_dup_panel.loc[:,['A','B']]
1375+
expected.major_axis = ['A','A']
1376+
result = panel.loc[:,'A']
1377+
assert_panel_equal(result, expected)
1378+
1379+
# minor
1380+
data = np.random.randn(5, 100, 5)
1381+
no_dup_panel = Panel(data, minor_axis=list("ABCDE"))
1382+
panel = Panel(data, minor_axis=list("AACDE"))
1383+
1384+
expected = no_dup_panel.loc[:,:,'A']
1385+
result = panel.iloc[:,:,0]
1386+
assert_frame_equal(result, expected)
1387+
1388+
expected = no_dup_panel.loc[:,:,'E']
1389+
result = panel.loc[:,:,'E']
1390+
assert_frame_equal(result, expected)
1391+
1392+
expected = no_dup_panel.loc[:,:,['A','B']]
1393+
expected.minor_axis = ['A','A']
1394+
result = panel.loc[:,:,'A']
1395+
assert_panel_equal(result, expected)
1396+
13381397
def test_filter(self):
13391398
pass
13401399

pandas/util/testing.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -357,12 +357,14 @@ def assert_panelnd_equal(left, right,
357357
right_ind = getattr(right, axis)
358358
assert_index_equal(left_ind, right_ind)
359359

360-
for col, series in compat.iteritems(left):
361-
assert col in right, "non-matching column '%s'" % col
362-
assert_func(series, right[col], check_less_precise=check_less_precise)
363-
364-
for col in right:
365-
assert col in left
360+
for i, item in enumerate(left._get_axis(0)):
361+
assert item in right, "non-matching item (right) '%s'" % item
362+
litem = left.iloc[i]
363+
ritem = right.iloc[i]
364+
assert_func(litem, ritem, check_less_precise=check_less_precise)
365+
366+
for i, item in enumerate(right._get_axis(0)):
367+
assert item in left, "non-matching item (left) '%s'" % item
366368

367369
# TODO: strangely check_names fails in py3 ?
368370
_panel_frame_equal = partial(assert_frame_equal, check_names=False)

0 commit comments

Comments
 (0)