pandas-dev · gfyoung · Jul 23, 2016 · jreback · Jul 24, 2016 · gfyoung
diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
@@ -1,4 +1,5 @@
 from .pandas_vb_common import *
+import string
 
 
 class frame_apply_axis_1(object):
@@ -606,6 +607,21 @@ def time_frame_isnull(self):
         isnull(self.df)
 
 
+class frame_isnull_strings(object):
+    goal_time = 0.2
+
+    def setup(self):
+        np.random.seed(1234)
+        self.sample = np.array(list(string.ascii_lowercase) +
+                               list(string.ascii_uppercase) +
+                               list(string.whitespace))
+        self.data = np.random.choice(self.choice, (1000, 1000))
+        self.df = DataFrame(self.data)
+
+    def time_frame_isnull(self):
+        isnull(self.df)
+
+
 class frame_isnull_obj(object):
     goal_time = 0.2
 

diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
@@ -694,6 +694,7 @@ Bug Fixes
 - Bug in ``pd.read_hdf()`` where attempting to load an HDF file with a single dataset, that had one or more categorical columns, failed unless the key argument was set to the name of the dataset. (:issue:`13231`)
 - Bug in ``.rolling()`` that allowed a negative integer window in contruction of the ``Rolling()`` object, but would later fail on aggregation (:issue:`13383`)
 
+- Bug in printing ``pd.DataFrame`` where unusual elements with the object dtype were causing segfaults (:issue:`13717`)
 - Bug in various index types, which did not propagate the name of passed index (:issue:`12309`)
 - Bug in ``DatetimeIndex``, which did not honour the ``copy=True`` (:issue:`13205`)
 - Bug in ``DatetimeIndex.is_normalized`` returns incorrectly for normalized date_range in case of local timezones (:issue:`13459`)

diff --git a/pandas/lib.pyx b/pandas/lib.pyx
@@ -342,11 +342,13 @@ def item_from_zerodim(object val):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def isnullobj(ndarray[object] arr):
+def isnullobj(ndarray arr):
     cdef Py_ssize_t i, n
     cdef object val
     cdef ndarray[uint8_t] result
 
+    assert arr.ndim == 1, "'arr' must be 1-D."
+
     n = len(arr)
     result = np.empty(n, dtype=np.uint8)
     for i from 0 <= i < n:
@@ -356,11 +358,13 @@ def isnullobj(ndarray[object] arr):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def isnullobj_old(ndarray[object] arr):
+def isnullobj_old(ndarray arr):
     cdef Py_ssize_t i, n
     cdef object val
     cdef ndarray[uint8_t] result
 
+    assert arr.ndim == 1, "'arr' must be 1-D."
+
     n = len(arr)
     result = np.zeros(n, dtype=np.uint8)
     for i from 0 <= i < n:
@@ -370,11 +374,13 @@ def isnullobj_old(ndarray[object] arr):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def isnullobj2d(ndarray[object, ndim=2] arr):
+def isnullobj2d(ndarray arr):
     cdef Py_ssize_t i, j, n, m
     cdef object val
     cdef ndarray[uint8_t, ndim=2] result
 
+    assert arr.ndim == 2, "'arr' must be 2-D."
+
     n, m = (<object> arr).shape
     result = np.zeros((n, m), dtype=np.uint8)
     for i from 0 <= i < n:
@@ -386,11 +392,13 @@ def isnullobj2d(ndarray[object, ndim=2] arr):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def isnullobj2d_old(ndarray[object, ndim=2] arr):
+def isnullobj2d_old(ndarray arr):
     cdef Py_ssize_t i, j, n, m
     cdef object val
     cdef ndarray[uint8_t, ndim=2] result
 
+    assert arr.ndim == 2, "'arr' must be 2-D."
+
     n, m = (<object> arr).shape
     result = np.zeros((n, m), dtype=np.uint8)
     for i from 0 <= i < n:

diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 
 import numpy as np
+import pandas as pd
 import pandas.lib as lib
 import pandas.util.testing as tm
 
@@ -184,6 +185,55 @@ def test_get_reverse_indexer(self):
         self.assertTrue(np.array_equal(result, expected))
 
 
+class TestNullObj(tm.TestCase):
+
+    _1d_methods = ['isnullobj', 'isnullobj_old']
+    _2d_methods = ['isnullobj2d', 'isnullobj2d_old']
+
+    def _check_behavior(self, arr, expected):
+        for method in TestNullObj._1d_methods:
+            result = getattr(lib, method)(arr)
+            tm.assert_numpy_array_equal(result, expected)
+
+        arr = np.atleast_2d(arr)
+        expected = np.atleast_2d(expected)
+
+        for method in TestNullObj._2d_methods:
+            result = getattr(lib, method)(arr)
+            tm.assert_numpy_array_equal(result, expected)
+
+    def test_basic(self):
+        arr = np.array([1, None, 'foo', -5.1, pd.NaT, np.nan])
+        expected = np.array([False, True, False, False, True, True])
+
+        self._check_behavior(arr, expected)
+
+    def test_non_obj_dtype(self):
+        arr = np.array([1, 3, np.nan, 5], dtype=float)
+        expected = np.array([False, False, True, False])
+
+        self._check_behavior(arr, expected)
+
+    def test_empty_arr(self):
+        arr = np.array([])
+        expected = np.array([], dtype=bool)
+
+        self._check_behavior(arr, expected)
+
+    def test_empty_str_inp(self):
+        arr = np.array([""])  # empty but not null
+        expected = np.array([False])
+
+        self._check_behavior(arr, expected)
+
+    def test_empty_like(self):
+        # see gh-13717: no segfaults!
+        arr = np.empty_like([None])
+        expected = np.array([True])
+
+        self._check_behavior(arr, expected)
+
+
 def test_duplicated_with_nas():
     keys = np.array([0, 1, np.nan, 0, 2, np.nan], dtype=object)