Skip to content

Commit f1d7ac6

Browse files
jbrockmendeljreback
authored andcommitted
PERF: optimize is_scalar, is_iterator (#31294)
1 parent 823bf6c commit f1d7ac6

File tree

3 files changed

+70
-39
lines changed

3 files changed

+70
-39
lines changed

pandas/_libs/lib.pyx

+48-4
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ from cython import Py_ssize_t
1111
from cpython.object cimport PyObject_RichCompareBool, Py_EQ
1212
from cpython.ref cimport Py_INCREF
1313
from cpython.tuple cimport PyTuple_SET_ITEM, PyTuple_New
14+
from cpython.iterator cimport PyIter_Check
15+
from cpython.sequence cimport PySequence_Check
16+
from cpython.number cimport PyNumber_Check
1417

1518
from cpython.datetime cimport (PyDateTime_Check, PyDate_Check,
1619
PyTime_Check, PyDelta_Check,
@@ -156,22 +159,63 @@ def is_scalar(val: object) -> bool:
156159
True
157160
"""
158161

159-
return (cnp.PyArray_IsAnyScalar(val)
162+
# Start with C-optimized checks
163+
if (cnp.PyArray_IsAnyScalar(val)
160164
# PyArray_IsAnyScalar is always False for bytearrays on Py3
161165
or PyDate_Check(val)
162166
or PyDelta_Check(val)
163167
or PyTime_Check(val)
164168
# We differ from numpy, which claims that None is not scalar;
165169
# see np.isscalar
166170
or val is C_NA
167-
or val is None
168-
or isinstance(val, (Fraction, Number))
171+
or val is None):
172+
return True
173+
174+
# Next use C-optimized checks to exclude common non-scalars before falling
175+
# back to non-optimized checks.
176+
if PySequence_Check(val):
177+
# e.g. list, tuple
178+
# includes np.ndarray, Series which PyNumber_Check can return True for
179+
return False
180+
181+
# Note: PyNumber_Check check includes Decimal, Fraction, numbers.Number
182+
return (PyNumber_Check(val)
169183
or util.is_period_object(val)
170-
or is_decimal(val)
171184
or is_interval(val)
172185
or util.is_offset_object(val))
173186

174187

188+
def is_iterator(obj: object) -> bool:
189+
"""
190+
Check if the object is an iterator.
191+
192+
This is intended for generators, not list-like objects.
193+
194+
Parameters
195+
----------
196+
obj : The object to check
197+
198+
Returns
199+
-------
200+
is_iter : bool
201+
Whether `obj` is an iterator.
202+
203+
Examples
204+
--------
205+
>>> is_iterator((x for x in []))
206+
True
207+
>>> is_iterator([1, 2, 3])
208+
False
209+
>>> is_iterator(datetime(2017, 1, 1))
210+
False
211+
>>> is_iterator("foo")
212+
False
213+
>>> is_iterator(1)
214+
False
215+
"""
216+
return PyIter_Check(obj)
217+
218+
175219
def item_from_zerodim(val: object) -> object:
176220
"""
177221
If the value is a zerodim array, return the item it contains.

pandas/core/dtypes/inference.py

+2-34
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525

2626
is_list_like = lib.is_list_like
2727

28+
is_iterator = lib.is_iterator
29+
2830

2931
def is_number(obj) -> bool:
3032
"""
@@ -93,40 +95,6 @@ def _iterable_not_string(obj) -> bool:
9395
return isinstance(obj, abc.Iterable) and not isinstance(obj, str)
9496

9597

96-
def is_iterator(obj) -> bool:
97-
"""
98-
Check if the object is an iterator.
99-
100-
For example, lists are considered iterators
101-
but not strings or datetime objects.
102-
103-
Parameters
104-
----------
105-
obj : The object to check
106-
107-
Returns
108-
-------
109-
is_iter : bool
110-
Whether `obj` is an iterator.
111-
112-
Examples
113-
--------
114-
>>> is_iterator([1, 2, 3])
115-
True
116-
>>> is_iterator(datetime(2017, 1, 1))
117-
False
118-
>>> is_iterator("foo")
119-
False
120-
>>> is_iterator(1)
121-
False
122-
"""
123-
124-
if not hasattr(obj, "__iter__"):
125-
return False
126-
127-
return hasattr(obj, "__next__")
128-
129-
13098
def is_file_like(obj) -> bool:
13199
"""
132100
Check if the object is a file-like object.

pandas/tests/dtypes/test_inference.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -1346,9 +1346,11 @@ def test_is_scalar_builtin_scalars(self):
13461346
assert is_scalar(None)
13471347
assert is_scalar(True)
13481348
assert is_scalar(False)
1349-
assert is_scalar(Number())
13501349
assert is_scalar(Fraction())
13511350
assert is_scalar(0.0)
1351+
assert is_scalar(1)
1352+
assert is_scalar(complex(2))
1353+
assert is_scalar(float("NaN"))
13521354
assert is_scalar(np.nan)
13531355
assert is_scalar("foobar")
13541356
assert is_scalar(b"foobar")
@@ -1357,6 +1359,7 @@ def test_is_scalar_builtin_scalars(self):
13571359
assert is_scalar(time(12, 0))
13581360
assert is_scalar(timedelta(hours=1))
13591361
assert is_scalar(pd.NaT)
1362+
assert is_scalar(pd.NA)
13601363

13611364
def test_is_scalar_builtin_nonscalars(self):
13621365
assert not is_scalar({})
@@ -1371,6 +1374,7 @@ def test_is_scalar_numpy_array_scalars(self):
13711374
assert is_scalar(np.int64(1))
13721375
assert is_scalar(np.float64(1.0))
13731376
assert is_scalar(np.int32(1))
1377+
assert is_scalar(np.complex64(2))
13741378
assert is_scalar(np.object_("foobar"))
13751379
assert is_scalar(np.str_("foobar"))
13761380
assert is_scalar(np.unicode_("foobar"))
@@ -1410,6 +1414,21 @@ def test_is_scalar_pandas_containers(self):
14101414
assert not is_scalar(Index([]))
14111415
assert not is_scalar(Index([1]))
14121416

1417+
def test_is_scalar_number(self):
1418+
# Number() is not recognied by PyNumber_Check, so by extension
1419+
# is not recognized by is_scalar, but instances of non-abstract
1420+
# subclasses are.
1421+
1422+
class Numeric(Number):
1423+
def __init__(self, value):
1424+
self.value = value
1425+
1426+
def __int__(self):
1427+
return self.value
1428+
1429+
num = Numeric(1)
1430+
assert is_scalar(num)
1431+
14131432

14141433
def test_datetimeindex_from_empty_datetime64_array():
14151434
for unit in ["ms", "us", "ns"]:

0 commit comments

Comments
 (0)