Skip to content

Commit ee6c0cd

Browse files
gfyoungjreback
authored andcommitted
BUG: Fix segfault in lib.isnullobj
Weird segfault arises when you call `lib.isnullobj` with an array that uses 0-field values to mean `None`. Changed input to be a `Python` object (i.e. no typing), and the segfault went away. Discovered when there were segfaults in printing a `DataFrame` containing such an array. Closes #13717. Author: gfyoung <[email protected]> Closes #13764 from gfyoung/isnullobj-segfault and squashes the following commits: 0338b5d [gfyoung] BUG: Fix segfault in lib.isnullobj
1 parent 6cae23d commit ee6c0cd

File tree

4 files changed

+79
-4
lines changed

4 files changed

+79
-4
lines changed

asv_bench/benchmarks/frame_methods.py

+16
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from .pandas_vb_common import *
2+
import string
23

34

45
class frame_apply_axis_1(object):
@@ -606,6 +607,21 @@ def time_frame_isnull(self):
606607
isnull(self.df)
607608

608609

610+
class frame_isnull_strings(object):
611+
goal_time = 0.2
612+
613+
def setup(self):
614+
np.random.seed(1234)
615+
self.sample = np.array(list(string.ascii_lowercase) +
616+
list(string.ascii_uppercase) +
617+
list(string.whitespace))
618+
self.data = np.random.choice(self.sample, (1000, 1000))
619+
self.df = DataFrame(self.data)
620+
621+
def time_frame_isnull(self):
622+
isnull(self.df)
623+
624+
609625
class frame_isnull_obj(object):
610626
goal_time = 0.2
611627

doc/source/whatsnew/v0.19.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -694,6 +694,7 @@ Bug Fixes
694694
- Bug in ``pd.read_hdf()`` where attempting to load an HDF file with a single dataset, that had one or more categorical columns, failed unless the key argument was set to the name of the dataset. (:issue:`13231`)
695695
- Bug in ``.rolling()`` that allowed a negative integer window in contruction of the ``Rolling()`` object, but would later fail on aggregation (:issue:`13383`)
696696

697+
- Bug in printing ``pd.DataFrame`` where unusual elements with the ``object`` dtype were causing segfaults (:issue:`13717`)
697698
- Bug in various index types, which did not propagate the name of passed index (:issue:`12309`)
698699
- Bug in ``DatetimeIndex``, which did not honour the ``copy=True`` (:issue:`13205`)
699700
- Bug in ``DatetimeIndex.is_normalized`` returns incorrectly for normalized date_range in case of local timezones (:issue:`13459`)

pandas/lib.pyx

+12-4
Original file line numberDiff line numberDiff line change
@@ -342,11 +342,13 @@ def item_from_zerodim(object val):
342342

343343
@cython.wraparound(False)
344344
@cython.boundscheck(False)
345-
def isnullobj(ndarray[object] arr):
345+
def isnullobj(ndarray arr):
346346
cdef Py_ssize_t i, n
347347
cdef object val
348348
cdef ndarray[uint8_t] result
349349

350+
assert arr.ndim == 1, "'arr' must be 1-D."
351+
350352
n = len(arr)
351353
result = np.empty(n, dtype=np.uint8)
352354
for i from 0 <= i < n:
@@ -356,11 +358,13 @@ def isnullobj(ndarray[object] arr):
356358

357359
@cython.wraparound(False)
358360
@cython.boundscheck(False)
359-
def isnullobj_old(ndarray[object] arr):
361+
def isnullobj_old(ndarray arr):
360362
cdef Py_ssize_t i, n
361363
cdef object val
362364
cdef ndarray[uint8_t] result
363365

366+
assert arr.ndim == 1, "'arr' must be 1-D."
367+
364368
n = len(arr)
365369
result = np.zeros(n, dtype=np.uint8)
366370
for i from 0 <= i < n:
@@ -370,11 +374,13 @@ def isnullobj_old(ndarray[object] arr):
370374

371375
@cython.wraparound(False)
372376
@cython.boundscheck(False)
373-
def isnullobj2d(ndarray[object, ndim=2] arr):
377+
def isnullobj2d(ndarray arr):
374378
cdef Py_ssize_t i, j, n, m
375379
cdef object val
376380
cdef ndarray[uint8_t, ndim=2] result
377381

382+
assert arr.ndim == 2, "'arr' must be 2-D."
383+
378384
n, m = (<object> arr).shape
379385
result = np.zeros((n, m), dtype=np.uint8)
380386
for i from 0 <= i < n:
@@ -386,11 +392,13 @@ def isnullobj2d(ndarray[object, ndim=2] arr):
386392

387393
@cython.wraparound(False)
388394
@cython.boundscheck(False)
389-
def isnullobj2d_old(ndarray[object, ndim=2] arr):
395+
def isnullobj2d_old(ndarray arr):
390396
cdef Py_ssize_t i, j, n, m
391397
cdef object val
392398
cdef ndarray[uint8_t, ndim=2] result
393399

400+
assert arr.ndim == 2, "'arr' must be 2-D."
401+
394402
n, m = (<object> arr).shape
395403
result = np.zeros((n, m), dtype=np.uint8)
396404
for i from 0 <= i < n:

pandas/tests/test_lib.py

+50
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# -*- coding: utf-8 -*-
22

33
import numpy as np
4+
import pandas as pd
45
import pandas.lib as lib
56
import pandas.util.testing as tm
67

@@ -184,6 +185,55 @@ def test_get_reverse_indexer(self):
184185
self.assertTrue(np.array_equal(result, expected))
185186

186187

188+
class TestNullObj(tm.TestCase):
189+
190+
_1d_methods = ['isnullobj', 'isnullobj_old']
191+
_2d_methods = ['isnullobj2d', 'isnullobj2d_old']
192+
193+
def _check_behavior(self, arr, expected):
194+
for method in TestNullObj._1d_methods:
195+
result = getattr(lib, method)(arr)
196+
tm.assert_numpy_array_equal(result, expected)
197+
198+
arr = np.atleast_2d(arr)
199+
expected = np.atleast_2d(expected)
200+
201+
for method in TestNullObj._2d_methods:
202+
result = getattr(lib, method)(arr)
203+
tm.assert_numpy_array_equal(result, expected)
204+
205+
def test_basic(self):
206+
arr = np.array([1, None, 'foo', -5.1, pd.NaT, np.nan])
207+
expected = np.array([False, True, False, False, True, True])
208+
209+
self._check_behavior(arr, expected)
210+
211+
def test_non_obj_dtype(self):
212+
arr = np.array([1, 3, np.nan, 5], dtype=float)
213+
expected = np.array([False, False, True, False])
214+
215+
self._check_behavior(arr, expected)
216+
217+
def test_empty_arr(self):
218+
arr = np.array([])
219+
expected = np.array([], dtype=bool)
220+
221+
self._check_behavior(arr, expected)
222+
223+
def test_empty_str_inp(self):
224+
arr = np.array([""]) # empty but not null
225+
expected = np.array([False])
226+
227+
self._check_behavior(arr, expected)
228+
229+
def test_empty_like(self):
230+
# see gh-13717: no segfaults!
231+
arr = np.empty_like([None])
232+
expected = np.array([True])
233+
234+
self._check_behavior(arr, expected)
235+
236+
187237
def test_duplicated_with_nas():
188238
keys = np.array([0, 1, np.nan, 0, 2, np.nan], dtype=object)
189239

0 commit comments

Comments
 (0)