Skip to content

Commit e97e2be

Browse files
committed
Merge pull request #6031 from jreback/index_segfault
BUG: Possible segfault when chained indexing with an object array under numpy 1.7.1 (GH6016)
2 parents 2258efd + 7acbbd4 commit e97e2be

File tree

5 files changed

+59
-9
lines changed

5 files changed

+59
-9
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ Bug Fixes
134134
- Bug in Series.xs with a multi-index (:issue:`6018`)
135135
- Bug in Series construction of mixed type with datelike and an integer (which should result in
136136
object type and not automatic conversion) (:issue:`6028`)
137+
- Possible segfault when chained indexing with an object array under numpy 1.7.1 (:issue:`6016`)
137138

138139
pandas 0.13.0
139140
-------------

pandas/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
_np_version = np.version.short_version
3030
_np_version_under1p6 = LooseVersion(_np_version) < '1.6'
3131
_np_version_under1p7 = LooseVersion(_np_version) < '1.7'
32+
_np_version_under1p8 = LooseVersion(_np_version) < '1.8'
3233

3334
from pandas.version import version as __version__
3435
from pandas.info import __doc__

pandas/core/generic.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -1005,9 +1005,14 @@ def _box_item_values(self, key, values):
10051005
raise NotImplementedError
10061006

10071007
def _maybe_cache_changed(self, item, value):
1008-
""" the object has called back to us saying
1009-
maybe it has changed """
1010-
self._data.set(item, value)
1008+
"""
1009+
the object has called back to us saying
1010+
maybe it has changed
1011+
1012+
numpy < 1.8 has an issue with object arrays and aliasing
1013+
GH6026
1014+
"""
1015+
self._data.set(item, value, check=pd._np_version_under1p8)
10111016

10121017
@property
10131018
def _is_cached(self):

pandas/core/internals.py

+26-6
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
from pandas.compat import range, lrange, lmap, callable, map, zip, u
2727
from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type
2828

29-
3029
class Block(PandasObject):
3130

3231
"""
@@ -279,7 +278,7 @@ def get(self, item):
279278
def iget(self, i):
280279
return self.values[i]
281280

282-
def set(self, item, value):
281+
def set(self, item, value, check=False):
283282
"""
284283
Modify Block in-place with new item value
285284
@@ -1360,6 +1359,26 @@ def convert(self, convert_dates=True, convert_numeric=True, convert_timedeltas=T
13601359

13611360
return blocks
13621361

1362+
def set(self, item, value, check=False):
1363+
"""
1364+
Modify Block in-place with new item value
1365+
1366+
Returns
1367+
-------
1368+
None
1369+
"""
1370+
1371+
loc = self.items.get_loc(item)
1372+
1373+
# GH6026
1374+
if check:
1375+
try:
1376+
if (self.values[loc] == value).all():
1377+
return
1378+
except:
1379+
pass
1380+
self.values[loc] = value
1381+
13631382
def _maybe_downcast(self, blocks, downcast=None):
13641383

13651384
if downcast is not None:
@@ -1601,7 +1620,7 @@ def astype(self, dtype, copy=False, raise_on_error=True):
16011620
return self._astype(dtype, copy=copy, raise_on_error=raise_on_error,
16021621
klass=klass)
16031622

1604-
def set(self, item, value):
1623+
def set(self, item, value, check=False):
16051624
"""
16061625
Modify Block in-place with new item value
16071626
@@ -1714,7 +1733,7 @@ def prepare_for_merge(self, **kwargs):
17141733
def post_merge(self, items, **kwargs):
17151734
return self
17161735

1717-
def set(self, item, value):
1736+
def set(self, item, value, check=False):
17181737
self.values = value
17191738

17201739
def get(self, item):
@@ -2879,10 +2898,11 @@ def delete(self, item):
28792898
if not is_unique:
28802899
self._consolidate_inplace()
28812900

2882-
def set(self, item, value):
2901+
def set(self, item, value, check=False):
28832902
"""
28842903
Set new item in-place. Does not consolidate. Adds new Block if not
28852904
contained in the current set of items
2905+
if check, then validate that we are not setting the same data in-place
28862906
"""
28872907
if not isinstance(value, SparseArray):
28882908
if value.ndim == self.ndim - 1:
@@ -2898,7 +2918,7 @@ def _set_item(item, arr):
28982918
self._delete_from_block(i, item)
28992919
self._add_new_block(item, arr, loc=None)
29002920
else:
2901-
block.set(item, arr)
2921+
block.set(item, arr, check=check)
29022922

29032923
try:
29042924

pandas/tests/test_indexing.py

+23
Original file line numberDiff line numberDiff line change
@@ -1950,6 +1950,29 @@ def test_setitem_cache_updating(self):
19501950
self.assert_(df.ix[0,'c'] == 0.0)
19511951
self.assert_(df.ix[7,'c'] == 1.0)
19521952

1953+
def test_setitem_chained_setfault(self):
1954+
1955+
# GH6026
1956+
# setfaults under numpy 1.7.1 (ok on 1.8)
1957+
data = ['right', 'left', 'left', 'left', 'right', 'left', 'timeout']
1958+
mdata = ['right', 'left', 'left', 'left', 'right', 'left', 'none']
1959+
1960+
df = DataFrame({'response': np.array(data)})
1961+
mask = df.response == 'timeout'
1962+
df.response[mask] = 'none'
1963+
assert_frame_equal(df, DataFrame({'response': mdata }))
1964+
1965+
recarray = np.rec.fromarrays([data], names=['response'])
1966+
df = DataFrame(recarray)
1967+
mask = df.response == 'timeout'
1968+
df.response[mask] = 'none'
1969+
assert_frame_equal(df, DataFrame({'response': mdata }))
1970+
1971+
df = DataFrame({'response': data, 'response1' : data })
1972+
mask = df.response == 'timeout'
1973+
df.response[mask] = 'none'
1974+
assert_frame_equal(df, DataFrame({'response': mdata, 'response1' : data }))
1975+
19531976
def test_detect_chained_assignment(self):
19541977

19551978
pd.set_option('chained_assignment','raise')

0 commit comments

Comments
 (0)