Skip to content

ENH: [Draft] Fix issue #35131 Identify zero-dimensional duck arrays as non-iterable #44626

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ from cpython.datetime cimport (
PyTime_Check,
)
from cpython.iterator cimport PyIter_Check
from cpython.list cimport PyList_Check
from cpython.number cimport PyNumber_Check
from cpython.object cimport (
Py_EQ,
Expand Down Expand Up @@ -1098,13 +1099,19 @@ def is_list_like(obj: object, allow_sets: bool = True) -> bool:


cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1:
# first, performance short-cuts for the most common cases
if cnp.PyArray_Check(obj):
return not cnp.PyArray_IsZeroDim(obj)
if PyList_Check(obj):
return True
# then the generic implementation
return (
# equiv: `isinstance(obj, abc.Iterable)`
getattr(obj, "__iter__", None) is not None and not isinstance(obj, type)
# we do not count strings/unicode/bytes as list-like
and not isinstance(obj, (str, bytes))
# exclude zero-dimensional numpy arrays, effectively scalars
and not cnp.PyArray_IsZeroDim(obj)
# avoid numpy-style scalars
and not (hasattr(obj, "ndim") and obj.ndim == 0)
# exclude sets if allow_sets is False
and not (allow_sets is False and isinstance(obj, abc.Set))
)
Expand Down
122 changes: 101 additions & 21 deletions pandas/tests/dtypes/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,56 @@ def coerce(request):
return request.param


class MockNumpyLikeArray:
"""
A class which is numpy-like (e.g. Pint's Quantity) but not actually numpy

The key is that it is not actually a numpy array so
``util.is_array(mock_numpy_like_array_instance)`` returns ``False``. Other
important properties are that the class defines a :meth:`__iter__` method
(so that ``isinstance(abc.Iterable)`` returns ``True``) and has a
:meth:`ndim` property, as pandas special-cases 0-dimensional arrays in some
cases.

We expect pandas to behave with respect to such duck arrays exactly as
with real numpy arrays. In particular, a 0-dimensional duck array is *NOT*
a scalar (`is_scalar(np.array(1)) == False`), but it is not list-like either.
"""

def __init__(self, values):
self._values = values

def __iter__(self):
iter_values = iter(self._values)

def it_outer():
yield from iter_values

return it_outer()

def __len__(self):
return len(self._values)

def __array__(self, t=None):
return np.asarray(self._values, dtype=t)

@property
def ndim(self):
return self._values.ndim

@property
def dtype(self):
return self._values.dtype

@property
def size(self):
return self._values.size

@property
def shape(self):
return self._values.shape


# collect all objects to be tested for list-like-ness; use tuples of objects,
# whether they are list-like or not (special casing for sets), and their ID
ll_params = [
Expand Down Expand Up @@ -109,6 +159,15 @@ def coerce(request):
(np.ndarray((2,) * 4), True, "ndarray-4d"),
(np.array([[[[]]]]), True, "ndarray-4d-empty"),
(np.array(2), False, "ndarray-0d"),
(MockNumpyLikeArray(np.ndarray((2,) * 1)), True, "duck-ndarray-1d"),
(MockNumpyLikeArray(np.array([])), True, "duck-ndarray-1d-empty"),
(MockNumpyLikeArray(np.ndarray((2,) * 2)), True, "duck-ndarray-2d"),
(MockNumpyLikeArray(np.array([[]])), True, "duck-ndarray-2d-empty"),
(MockNumpyLikeArray(np.ndarray((2,) * 3)), True, "duck-ndarray-3d"),
(MockNumpyLikeArray(np.array([[[]]])), True, "duck-ndarray-3d-empty"),
(MockNumpyLikeArray(np.ndarray((2,) * 4)), True, "duck-ndarray-4d"),
(MockNumpyLikeArray(np.array([[[[]]]])), True, "duck-ndarray-4d-empty"),
(MockNumpyLikeArray(np.array(2)), False, "duck-ndarray-0d"),
(1, False, "int"),
(b"123", False, "bytes"),
(b"", False, "bytes-empty"),
Expand Down Expand Up @@ -181,6 +240,8 @@ def test_is_array_like():
assert inference.is_array_like(Series([1, 2]))
assert inference.is_array_like(np.array(["a", "b"]))
assert inference.is_array_like(Index(["2016-01-01"]))
assert inference.is_array_like(np.array([2, 3]))
assert inference.is_array_like(MockNumpyLikeArray(np.array([2, 3])))

class DtypeList(list):
dtype = "special"
Expand Down Expand Up @@ -1786,34 +1847,53 @@ def test_is_scalar_builtin_nonscalars(self):
assert not is_scalar(slice(None))
assert not is_scalar(Ellipsis)

def test_is_scalar_numpy_array_scalars(self):
assert is_scalar(np.int64(1))
assert is_scalar(np.float64(1.0))
assert is_scalar(np.int32(1))
assert is_scalar(np.complex64(2))
assert is_scalar(np.object_("foobar"))
assert is_scalar(np.str_("foobar"))
assert is_scalar(np.unicode_("foobar"))
assert is_scalar(np.bytes_(b"foobar"))
assert is_scalar(np.datetime64("2014-01-01"))
assert is_scalar(np.timedelta64(1, "h"))

def test_is_scalar_numpy_zerodim_arrays(self):
for zerodim in [
@pytest.mark.parametrize(
"start",
(
np.int64(1),
np.float64(1.0),
np.int32(1),
np.complex64(2),
np.object_("foobar"),
np.str_("foobar"),
np.unicode_("foobar"),
np.bytes_(b"foobar"),
np.datetime64("2014-01-01"),
np.timedelta64(1, "h"),
),
)
def test_is_scalar_numpy_array_scalars(self, start):
assert is_scalar(start)

@pytest.mark.parametrize(
"zerodim",
(
np.array(1),
np.array("foobar"),
np.array(np.datetime64("2014-01-01")),
np.array(np.timedelta64(1, "h")),
np.array(np.datetime64("NaT")),
]:
assert not is_scalar(zerodim)
assert is_scalar(lib.item_from_zerodim(zerodim))
),
)
def test_is_scalar_numpy_zerodim_arrays(self, zerodim):
assert not is_scalar(zerodim)
assert is_scalar(lib.item_from_zerodim(zerodim))

@pytest.mark.parametrize(
"start",
(
np.array([]),
np.array([[]]),
np.matrix("1; 2"),
),
)
@pytest.mark.parametrize("numpy_like", (True, False))
@pytest.mark.filterwarnings("ignore::PendingDeprecationWarning")
def test_is_scalar_numpy_arrays(self):
assert not is_scalar(np.array([]))
assert not is_scalar(np.array([[]]))
assert not is_scalar(np.matrix("1; 2"))
def test_is_scalar_numpy_arrays(self, start, numpy_like):
if numpy_like:
start = MockNumpyLikeArray(start)

assert not is_scalar(start)

def test_is_scalar_pandas_scalars(self):
assert is_scalar(Timestamp("2014-01-01"))
Expand Down