pandas-dev · jorisvandenbossche · Mar 5, 2024 · WillAyd · Mar 5, 2024 · jorisvandenbossche
diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx
@@ -220,6 +220,51 @@ cpdef ndarray[uint8_t] isnaobj(ndarray arr):
     return result.view(np.bool_)
 
 
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cpdef ndarray[uint8_t] isna_string(ndarray arr):
+    """
+    Return boolean mask denoting which elements of a 1-D array are na-like,
+    assuming the array only contains pd.NA or strings (guaranteed by the
+    StringDtype).
+
+    This is a special case version of `isnaobj` above, specialized a subset
+    of possible values.
+
+    Parameters
+    ----------
+    arr : ndarray
+
+    Returns
+    -------
+    result : ndarray (dtype=np.bool_)
+    """
+    cdef:
+        Py_ssize_t i, n = arr.size
+        object val
+        bint is_null
+        ndarray result = np.zeros((<object>arr).shape, dtype=np.uint8)
+        flatiter it = cnp.PyArray_IterNew(arr)
+        flatiter it2 = cnp.PyArray_IterNew(result)
+
+    for i in range(n):
+        # The PyArray_GETITEM and PyArray_ITER_NEXT are faster
+        #  equivalents to `val = values[i]`
+        val = cnp.PyArray_GETITEM(arr, cnp.PyArray_ITER_DATA(it))
+        cnp.PyArray_ITER_NEXT(it)
+        if val is C_NA:
+            # Dereference pointer (set value)
+            (<uint8_t *>(cnp.PyArray_ITER_DATA(it2)))[0] = <uint8_t>1
+        elif not isinstance(val, str):
+            # this should never be reached, but for safety
+            # (and doesn't give much overhead)
+            raise ValueError(
+                f"Array should only contain strings or pd.NA, got {type(val)}"
+            )
+        cnp.PyArray_ITER_NEXT(it2)
+    return result.view(np.bool_)
+
+
 def isposinf_scalar(val: object) -> bool:
     return util.is_float_object(val) and val == INF
 

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
@@ -439,6 +439,9 @@ def __arrow_array__(self, type=None):
         values[self.isna()] = None
         return pa.array(values, type=type, from_pandas=True)
 
+    def isna(self) -> np.ndarray:
+        return libmissing.isna_string(self._ndarray)
+
     def _values_for_factorize(self) -> tuple[np.ndarray, None]:
         arr = self._ndarray.copy()
         mask = self.isna()