Skip to content

Commit d228a78

Browse files
authored
ENH: [Draft] Fix issue pandas-dev#35131 Identify zero-dimensional duck arrays as non-iterable (pandas-dev#44626)
1 parent 2ab1d1f commit d228a78

File tree

3 files changed

+78
-5
lines changed

3 files changed

+78
-5
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ Other enhancements
232232
- :meth:`UInt64Index.map` now retains ``dtype`` where possible (:issue:`44609`)
233233
- :meth:`read_json` can now parse unsigned long long integers (:issue:`26068`)
234234
- :meth:`DataFrame.take` now raises a ``TypeError`` when passed a scalar for the indexer (:issue:`42875`)
235+
- :meth:`is_list_like` now identifies duck-arrays as list-like unless ``.ndim == 0`` (:issue:`35131`)
235236
- :class:`ExtensionDtype` and :class:`ExtensionArray` are now (de)serialized when exporting a :class:`DataFrame` with :meth:`DataFrame.to_json` using ``orient='table'`` (:issue:`20612`, :issue:`44705`).
236237
-
237238

pandas/_libs/lib.pyx

+9-2
Original file line numberDiff line numberDiff line change
@@ -1098,13 +1098,20 @@ def is_list_like(obj: object, allow_sets: bool = True) -> bool:
10981098

10991099

11001100
cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1:
1101+
# first, performance short-cuts for the most common cases
1102+
if util.is_array(obj):
1103+
# exclude zero-dimensional numpy arrays, effectively scalars
1104+
return not cnp.PyArray_IsZeroDim(obj)
1105+
elif isinstance(obj, list):
1106+
return True
1107+
# then the generic implementation
11011108
return (
11021109
# equiv: `isinstance(obj, abc.Iterable)`
11031110
getattr(obj, "__iter__", None) is not None and not isinstance(obj, type)
11041111
# we do not count strings/unicode/bytes as list-like
11051112
and not isinstance(obj, (str, bytes))
1106-
# exclude zero-dimensional numpy arrays, effectively scalars
1107-
and not cnp.PyArray_IsZeroDim(obj)
1113+
# exclude zero-dimensional duck-arrays, effectively scalars
1114+
and not (hasattr(obj, "ndim") and obj.ndim == 0)
11081115
# exclude sets if allow_sets is False
11091116
and not (allow_sets is False and isinstance(obj, abc.Set))
11101117
)

pandas/tests/dtypes/test_inference.py

+68-3
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,56 @@ def coerce(request):
7575
return request.param
7676

7777

78+
class MockNumpyLikeArray:
79+
"""
80+
A class which is numpy-like (e.g. Pint's Quantity) but not actually numpy
81+
82+
The key is that it is not actually a numpy array so
83+
``util.is_array(mock_numpy_like_array_instance)`` returns ``False``. Other
84+
important properties are that the class defines a :meth:`__iter__` method
85+
(so that ``isinstance(abc.Iterable)`` returns ``True``) and has a
86+
:meth:`ndim` property, as pandas special-cases 0-dimensional arrays in some
87+
cases.
88+
89+
We expect pandas to behave with respect to such duck arrays exactly as
90+
with real numpy arrays. In particular, a 0-dimensional duck array is *NOT*
91+
a scalar (`is_scalar(np.array(1)) == False`), but it is not list-like either.
92+
"""
93+
94+
def __init__(self, values):
95+
self._values = values
96+
97+
def __iter__(self):
98+
iter_values = iter(self._values)
99+
100+
def it_outer():
101+
yield from iter_values
102+
103+
return it_outer()
104+
105+
def __len__(self):
106+
return len(self._values)
107+
108+
def __array__(self, t=None):
109+
return np.asarray(self._values, dtype=t)
110+
111+
@property
112+
def ndim(self):
113+
return self._values.ndim
114+
115+
@property
116+
def dtype(self):
117+
return self._values.dtype
118+
119+
@property
120+
def size(self):
121+
return self._values.size
122+
123+
@property
124+
def shape(self):
125+
return self._values.shape
126+
127+
78128
# collect all objects to be tested for list-like-ness; use tuples of objects,
79129
# whether they are list-like or not (special casing for sets), and their ID
80130
ll_params = [
@@ -109,6 +159,15 @@ def coerce(request):
109159
(np.ndarray((2,) * 4), True, "ndarray-4d"),
110160
(np.array([[[[]]]]), True, "ndarray-4d-empty"),
111161
(np.array(2), False, "ndarray-0d"),
162+
(MockNumpyLikeArray(np.ndarray((2,) * 1)), True, "duck-ndarray-1d"),
163+
(MockNumpyLikeArray(np.array([])), True, "duck-ndarray-1d-empty"),
164+
(MockNumpyLikeArray(np.ndarray((2,) * 2)), True, "duck-ndarray-2d"),
165+
(MockNumpyLikeArray(np.array([[]])), True, "duck-ndarray-2d-empty"),
166+
(MockNumpyLikeArray(np.ndarray((2,) * 3)), True, "duck-ndarray-3d"),
167+
(MockNumpyLikeArray(np.array([[[]]])), True, "duck-ndarray-3d-empty"),
168+
(MockNumpyLikeArray(np.ndarray((2,) * 4)), True, "duck-ndarray-4d"),
169+
(MockNumpyLikeArray(np.array([[[[]]]])), True, "duck-ndarray-4d-empty"),
170+
(MockNumpyLikeArray(np.array(2)), False, "duck-ndarray-0d"),
112171
(1, False, "int"),
113172
(b"123", False, "bytes"),
114173
(b"", False, "bytes-empty"),
@@ -181,6 +240,8 @@ def test_is_array_like():
181240
assert inference.is_array_like(Series([1, 2]))
182241
assert inference.is_array_like(np.array(["a", "b"]))
183242
assert inference.is_array_like(Index(["2016-01-01"]))
243+
assert inference.is_array_like(np.array([2, 3]))
244+
assert inference.is_array_like(MockNumpyLikeArray(np.array([2, 3])))
184245

185246
class DtypeList(list):
186247
dtype = "special"
@@ -1811,9 +1872,13 @@ def test_is_scalar_numpy_zerodim_arrays(self):
18111872

18121873
@pytest.mark.filterwarnings("ignore::PendingDeprecationWarning")
18131874
def test_is_scalar_numpy_arrays(self):
1814-
assert not is_scalar(np.array([]))
1815-
assert not is_scalar(np.array([[]]))
1816-
assert not is_scalar(np.matrix("1; 2"))
1875+
for a in [
1876+
np.array([]),
1877+
np.array([[]]),
1878+
np.matrix("1; 2"),
1879+
]:
1880+
assert not is_scalar(a)
1881+
assert not is_scalar(MockNumpyLikeArray(a))
18171882

18181883
def test_is_scalar_pandas_scalars(self):
18191884
assert is_scalar(Timestamp("2014-01-01"))

0 commit comments

Comments
 (0)