Skip to content

Commit 6240b1f

Browse files
authored
PERF: lib.Validator iteration (#44495)
1 parent 0e8ff31 commit 6240b1f

File tree

4 files changed

+18
-8
lines changed

4 files changed

+18
-8
lines changed

pandas/_libs/lib.pyx

+14-4
Original file line numberDiff line numberDiff line change
@@ -1704,10 +1704,15 @@ cdef class Validator:
17041704
cdef bint _validate(self, ndarray values) except -1:
17051705
cdef:
17061706
Py_ssize_t i
1707-
Py_ssize_t n = self.n
1707+
Py_ssize_t n = values.size
1708+
flatiter it = PyArray_IterNew(values)
17081709

17091710
for i in range(n):
1710-
if not self.is_valid(values[i]):
1711+
# The PyArray_GETITEM and PyArray_ITER_NEXT are faster
1712+
# equivalents to `val = values[i]`
1713+
val = PyArray_GETITEM(values, PyArray_ITER_DATA(it))
1714+
PyArray_ITER_NEXT(it)
1715+
if not self.is_valid(val):
17111716
return False
17121717

17131718
return True
@@ -1717,10 +1722,15 @@ cdef class Validator:
17171722
cdef bint _validate_skipna(self, ndarray values) except -1:
17181723
cdef:
17191724
Py_ssize_t i
1720-
Py_ssize_t n = self.n
1725+
Py_ssize_t n = values.size
1726+
flatiter it = PyArray_IterNew(values)
17211727

17221728
for i in range(n):
1723-
if not self.is_valid_skipna(values[i]):
1729+
# The PyArray_GETITEM and PyArray_ITER_NEXT are faster
1730+
# equivalents to `val = values[i]`
1731+
val = PyArray_GETITEM(values, PyArray_ITER_DATA(it))
1732+
PyArray_ITER_NEXT(it)
1733+
if not self.is_valid_skipna(val):
17241734
return False
17251735

17261736
return True

pandas/core/arrays/string_.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -318,9 +318,7 @@ def __init__(self, values, copy=False):
318318

319319
def _validate(self):
320320
"""Validate that we only store NA or strings."""
321-
if len(self._ndarray) and not lib.is_string_array(
322-
self._ndarray.ravel("K"), skipna=True
323-
):
321+
if len(self._ndarray) and not lib.is_string_array(self._ndarray, skipna=True):
324322
raise ValueError("StringArray requires a sequence of strings or pandas.NA")
325323
if self._ndarray.dtype != "object":
326324
raise ValueError(

pandas/core/dtypes/inference.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -447,5 +447,5 @@ def is_inferred_bool_dtype(arr: ArrayLike) -> bool:
447447
if dtype == np.dtype(bool):
448448
return True
449449
elif dtype == np.dtype("object"):
450-
return lib.is_bool_array(arr.ravel("K"))
450+
return lib.is_bool_array(arr)
451451
return False

pandas/tests/dtypes/test_inference.py

+2
Original file line numberDiff line numberDiff line change
@@ -1429,9 +1429,11 @@ def test_other_dtypes_for_array(self, func):
14291429
func = getattr(lib, func)
14301430
arr = np.array(["foo", "bar"])
14311431
assert not func(arr)
1432+
assert not func(arr.reshape(2, 1))
14321433

14331434
arr = np.array([1, 2])
14341435
assert not func(arr)
1436+
assert not func(arr.reshape(2, 1))
14351437

14361438
def test_date(self):
14371439

0 commit comments

Comments
 (0)