Skip to content

Commit a79f08c

Browse files
committed
Merge pull request #3027 from jreback/take
BUG: Bug in user-facing take with negative indicies was incorrect
2 parents 2f7b0e4 + b59bf6c commit a79f08c

File tree

7 files changed

+87
-16
lines changed

7 files changed

+87
-16
lines changed

RELEASE.rst

+4-1
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,8 @@ pandas 0.11.0
140140
- Bug in value_counts of ``datetime64[ns]`` Series (GH3002_)
141141
- Fixed printing of ``NaT` in an index
142142
- Bug in idxmin/idxmax of ``datetime64[ns]`` Series with ``NaT`` (GH2982__)
143-
- Bug in ``icol`` with negative indicies was incorrect producing incorrect return values (see GH2922_)
143+
- Bug in ``icol, take`` with negative indicies was producing incorrect return
144+
values (see GH2922_, GH2892_), also check for out-of-bounds indices (GH3029_)
144145
- Bug in DataFrame column insertion when the column creation fails, existing frame is left in
145146
an irrecoverable state (GH3010_)
146147
- Bug in DataFrame update where non-specified values could cause dtype changes (GH3016_)
@@ -161,6 +162,7 @@ pandas 0.11.0
161162
.. _GH2807: https://github.com/pydata/pandas/issues/2807
162163
.. _GH2849: https://github.com/pydata/pandas/issues/2849
163164
.. _GH2898: https://github.com/pydata/pandas/issues/2898
165+
.. _GH2892: https://github.com/pydata/pandas/issues/2892
164166
.. _GH2909: https://github.com/pydata/pandas/issues/2909
165167
.. _GH2922: https://github.com/pydata/pandas/issues/2922
166168
.. _GH2931: https://github.com/pydata/pandas/issues/2931
@@ -171,6 +173,7 @@ pandas 0.11.0
171173
.. _GH3002: https://github.com/pydata/pandas/issues/3002
172174
.. _GH3010: https://github.com/pydata/pandas/issues/3010
173175
.. _GH3012: https://github.com/pydata/pandas/issues/3012
176+
.. _GH3029: https://github.com/pydata/pandas/issues/3029
174177

175178

176179
pandas 0.10.1

pandas/core/frame.py

+7-9
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@
2828
from pandas.core.generic import NDFrame
2929
from pandas.core.index import Index, MultiIndex, _ensure_index
3030
from pandas.core.indexing import (_NDFrameIndexer, _maybe_droplevels,
31-
_is_index_slice, _check_bool_indexer)
31+
_is_index_slice, _check_bool_indexer,
32+
_maybe_convert_indices)
3233
from pandas.core.internals import BlockManager, make_block, form_blocks
3334
from pandas.core.series import Series, _radd_compat
3435
import pandas.core.expressions as expressions
@@ -1928,11 +1929,6 @@ def _ixs(self, i, axis=0, copy=False):
19281929
label = self.columns[i]
19291930
if isinstance(label, Index):
19301931

1931-
# if we have negative indicies, translate to postive here
1932-
# (take doesen't deal properly with these)
1933-
l = len(self.columns)
1934-
i = [ v if v >= 0 else l+v for v in i ]
1935-
19361932
return self.take(i, axis=1)
19371933

19381934
values = self._data.iget(i)
@@ -2911,11 +2907,13 @@ def take(self, indices, axis=0):
29112907
-------
29122908
taken : DataFrame
29132909
"""
2914-
if isinstance(indices, list):
2915-
indices = np.array(indices)
2910+
2911+
# check/convert indicies here
2912+
indices = _maybe_convert_indices(indices, len(self._get_axis(axis)))
2913+
29162914
if self._is_mixed_type:
29172915
if axis == 0:
2918-
new_data = self._data.take(indices, axis=1)
2916+
new_data = self._data.take(indices, axis=1, verify=False)
29192917
return DataFrame(new_data)
29202918
else:
29212919
new_columns = self.columns.take(indices)

pandas/core/generic.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from pandas.core.index import MultiIndex
66
import pandas.core.indexing as indexing
7+
from pandas.core.indexing import _maybe_convert_indices
78
from pandas.tseries.index import DatetimeIndex
89
import pandas.core.common as com
910
import pandas.lib as lib
@@ -943,12 +944,16 @@ def take(self, indices, axis=0):
943944
-------
944945
taken : type of caller
945946
"""
947+
948+
# check/convert indicies here
949+
indices = _maybe_convert_indices(indices, len(self._get_axis(axis)))
950+
946951
if axis == 0:
947952
labels = self._get_axis(axis)
948953
new_items = labels.take(indices)
949954
new_data = self._data.reindex_axis(new_items, axis=0)
950955
else:
951-
new_data = self._data.take(indices, axis=axis)
956+
new_data = self._data.take(indices, axis=axis, verify=False)
952957
return self._constructor(new_data)
953958

954959
def tz_convert(self, tz, axis=0, copy=True):

pandas/core/indexing.py

+14
Original file line numberDiff line numberDiff line change
@@ -913,6 +913,20 @@ def _is_series(obj):
913913
return isinstance(obj, Series)
914914

915915

916+
def _maybe_convert_indices(indices, n):
917+
""" if we have negative indicies, translate to postive here
918+
if have indicies that are out-of-bounds, raise an IndexError """
919+
if isinstance(indices, list):
920+
indices = np.array(indices)
921+
922+
mask = indices<0
923+
if mask.any():
924+
indices[mask] += n
925+
mask = (indices>=n) | (indices<0)
926+
if mask.any():
927+
raise IndexError("indices are out-of-bounds")
928+
return indices
929+
916930
def _maybe_convert_ix(*args):
917931
"""
918932
We likely want to take the cross-product

pandas/core/internals.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import numpy as np
66

77
from pandas.core.index import Index, _ensure_index, _handle_legacy_indexes
8-
from pandas.core.indexing import _check_slice_bounds
8+
from pandas.core.indexing import _check_slice_bounds, _maybe_convert_indices
99
import pandas.core.common as com
1010
import pandas.lib as lib
1111
import pandas.tslib as tslib
@@ -1517,13 +1517,16 @@ def _make_na_block(self, items, ref_items, fill_value=np.nan):
15171517
na_block = make_block(block_values, items, ref_items)
15181518
return na_block
15191519

1520-
def take(self, indexer, axis=1):
1520+
def take(self, indexer, axis=1, verify=True):
15211521
if axis < 1:
15221522
raise AssertionError('axis must be at least 1, got %d' % axis)
15231523

15241524
indexer = com._ensure_platform_int(indexer)
1525-
15261525
n = len(self.axes[axis])
1526+
1527+
if verify:
1528+
indexer = _maybe_convert_indices(indexer, n)
1529+
15271530
if ((indexer == -1) | (indexer >= n)).any():
15281531
raise Exception('Indices must be nonzero and less than '
15291532
'the axis length')

pandas/tests/test_frame.py

+45-1
Original file line numberDiff line numberDiff line change
@@ -8615,12 +8615,43 @@ def test_fillna_col_reordering(self):
86158615
self.assert_(df.columns.tolist() == filled.columns.tolist())
86168616

86178617
def test_take(self):
8618+
86188619
# homogeneous
86198620
#----------------------------------------
8621+
order = [3, 1, 2, 0]
8622+
for df in [self.frame]:
8623+
8624+
result = df.take(order, axis=0)
8625+
expected = df.reindex(df.index.take(order))
8626+
assert_frame_equal(result, expected)
8627+
8628+
# axis = 1
8629+
result = df.take(order, axis=1)
8630+
expected = df.ix[:, ['D', 'B', 'C', 'A']]
8631+
assert_frame_equal(result, expected, check_names=False)
8632+
8633+
# neg indicies
8634+
order = [2,1,-1]
8635+
for df in [self.frame]:
8636+
8637+
result = df.take(order, axis=0)
8638+
expected = df.reindex(df.index.take(order))
8639+
assert_frame_equal(result, expected)
8640+
8641+
# axis = 1
8642+
result = df.take(order, axis=1)
8643+
expected = df.ix[:, ['C', 'B', 'D']]
8644+
assert_frame_equal(result, expected, check_names=False)
8645+
8646+
# illegal indices
8647+
self.assertRaises(IndexError, df.take, [3,1,2,30], axis=0)
8648+
self.assertRaises(IndexError, df.take, [3,1,2,-31], axis=0)
8649+
self.assertRaises(IndexError, df.take, [3,1,2,5], axis=1)
8650+
self.assertRaises(IndexError, df.take, [3,1,2,-5], axis=1)
86208651

86218652
# mixed-dtype
86228653
#----------------------------------------
8623-
order = [4, 1, 2, 0, 3]
8654+
order = [4, 1, 2, 0, 3]
86248655
for df in [self.mixed_frame]:
86258656

86268657
result = df.take(order, axis=0)
@@ -8632,6 +8663,19 @@ def test_take(self):
86328663
expected = df.ix[:, ['foo', 'B', 'C', 'A', 'D']]
86338664
assert_frame_equal(result, expected)
86348665

8666+
# neg indicies
8667+
order = [4,1,-2]
8668+
for df in [self.mixed_frame]:
8669+
8670+
result = df.take(order, axis=0)
8671+
expected = df.reindex(df.index.take(order))
8672+
assert_frame_equal(result, expected)
8673+
8674+
# axis = 1
8675+
result = df.take(order, axis=1)
8676+
expected = df.ix[:, ['foo', 'B', 'D']]
8677+
assert_frame_equal(result, expected)
8678+
86358679
# by dtype
86368680
order = [1, 2, 0, 3]
86378681
for df in [self.mixed_float,self.mixed_int]:

pandas/tests/test_panel.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1013,7 +1013,11 @@ def test_take(self):
10131013
expected = self.panel.reindex(minor=['D', 'A', 'B', 'C'])
10141014
assert_panel_equal(result, expected)
10151015

1016-
self.assertRaises(Exception, self.panel.take, [3, -1, 1, 2], axis=2)
1016+
# neg indicies ok
1017+
expected = self.panel.reindex(minor=['D', 'D', 'B', 'C'])
1018+
result = self.panel.take([3, -1, 1, 2], axis=2)
1019+
assert_panel_equal(result, expected)
1020+
10171021
self.assertRaises(Exception, self.panel.take, [4, 0, 1, 2], axis=2)
10181022

10191023
def test_sort_index(self):

0 commit comments

Comments
 (0)