diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index acd74591134bc..9702eb4615909 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -11,6 +11,9 @@ from cython import Py_ssize_t from cpython.object cimport PyObject_RichCompareBool, Py_EQ from cpython.ref cimport Py_INCREF from cpython.tuple cimport PyTuple_SET_ITEM, PyTuple_New +from cpython.iterator cimport PyIter_Check +from cpython.sequence cimport PySequence_Check +from cpython.number cimport PyNumber_Check from cpython.datetime cimport (PyDateTime_Check, PyDate_Check, PyTime_Check, PyDelta_Check, @@ -156,7 +159,8 @@ def is_scalar(val: object) -> bool: True """ - return (cnp.PyArray_IsAnyScalar(val) + # Start with C-optimized checks + if (cnp.PyArray_IsAnyScalar(val) # PyArray_IsAnyScalar is always False for bytearrays on Py3 or PyDate_Check(val) or PyDelta_Check(val) @@ -164,14 +168,54 @@ def is_scalar(val: object) -> bool: # We differ from numpy, which claims that None is not scalar; # see np.isscalar or val is C_NA - or val is None - or isinstance(val, (Fraction, Number)) + or val is None): + return True + + # Next use C-optimized checks to exclude common non-scalars before falling + # back to non-optimized checks. + if PySequence_Check(val): + # e.g. list, tuple + # includes np.ndarray, Series which PyNumber_Check can return True for + return False + + # Note: PyNumber_Check check includes Decimal, Fraction, numbers.Number + return (PyNumber_Check(val) or util.is_period_object(val) - or is_decimal(val) or is_interval(val) or util.is_offset_object(val)) +def is_iterator(obj: object) -> bool: + """ + Check if the object is an iterator. + + This is intended for generators, not list-like objects. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_iter : bool + Whether `obj` is an iterator. + + Examples + -------- + >>> is_iterator((x for x in [])) + True + >>> is_iterator([1, 2, 3]) + False + >>> is_iterator(datetime(2017, 1, 1)) + False + >>> is_iterator("foo") + False + >>> is_iterator(1) + False + """ + return PyIter_Check(obj) + + def item_from_zerodim(val: object) -> object: """ If the value is a zerodim array, return the item it contains. diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index 9e9278052e35d..37bca76802843 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -25,6 +25,8 @@ is_list_like = lib.is_list_like +is_iterator = lib.is_iterator + def is_number(obj) -> bool: """ @@ -93,40 +95,6 @@ def _iterable_not_string(obj) -> bool: return isinstance(obj, abc.Iterable) and not isinstance(obj, str) -def is_iterator(obj) -> bool: - """ - Check if the object is an iterator. - - For example, lists are considered iterators - but not strings or datetime objects. - - Parameters - ---------- - obj : The object to check - - Returns - ------- - is_iter : bool - Whether `obj` is an iterator. - - Examples - -------- - >>> is_iterator([1, 2, 3]) - True - >>> is_iterator(datetime(2017, 1, 1)) - False - >>> is_iterator("foo") - False - >>> is_iterator(1) - False - """ - - if not hasattr(obj, "__iter__"): - return False - - return hasattr(obj, "__next__") - - def is_file_like(obj) -> bool: """ Check if the object is a file-like object. diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 5eb85de2b90f5..48f9262ad3486 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1346,9 +1346,11 @@ def test_is_scalar_builtin_scalars(self): assert is_scalar(None) assert is_scalar(True) assert is_scalar(False) - assert is_scalar(Number()) assert is_scalar(Fraction()) assert is_scalar(0.0) + assert is_scalar(1) + assert is_scalar(complex(2)) + assert is_scalar(float("NaN")) assert is_scalar(np.nan) assert is_scalar("foobar") assert is_scalar(b"foobar") @@ -1357,6 +1359,7 @@ def test_is_scalar_builtin_scalars(self): assert is_scalar(time(12, 0)) assert is_scalar(timedelta(hours=1)) assert is_scalar(pd.NaT) + assert is_scalar(pd.NA) def test_is_scalar_builtin_nonscalars(self): assert not is_scalar({}) @@ -1371,6 +1374,7 @@ def test_is_scalar_numpy_array_scalars(self): assert is_scalar(np.int64(1)) assert is_scalar(np.float64(1.0)) assert is_scalar(np.int32(1)) + assert is_scalar(np.complex64(2)) assert is_scalar(np.object_("foobar")) assert is_scalar(np.str_("foobar")) assert is_scalar(np.unicode_("foobar")) @@ -1410,6 +1414,21 @@ def test_is_scalar_pandas_containers(self): assert not is_scalar(Index([])) assert not is_scalar(Index([1])) + def test_is_scalar_number(self): + # Number() is not recognied by PyNumber_Check, so by extension + # is not recognized by is_scalar, but instances of non-abstract + # subclasses are. + + class Numeric(Number): + def __init__(self, value): + self.value = value + + def __int__(self): + return self.value + + num = Numeric(1) + assert is_scalar(num) + def test_datetimeindex_from_empty_datetime64_array(): for unit in ["ms", "us", "ns"]: