Skip to content

Commit eef810e

Browse files
authored
COMPAT: followup to #17491 (#17503)
1 parent f11bbf2 commit eef810e

File tree

7 files changed

+119
-66
lines changed

7 files changed

+119
-66
lines changed

doc/source/whatsnew/v0.21.0.txt

+7-7
Original file line numberDiff line numberDiff line change
@@ -190,19 +190,19 @@ the target. Now, a ``ValueError`` will be raised when such an input is passed in
190190

191191
.. _whatsnew_0210.api_breaking.iteration_scalars:
192192

193-
Iteration of Series/Index will now return python scalars
193+
Iteration of Series/Index will now return Python scalars
194194
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
195195

196-
Previously, when using certain iteration methods for a ``Series`` with dtype ``int`` or ``float``, you would receive a ``numpy`` scalar, e.g. a ``np.int64``, rather than a python ``int``. Issue (:issue:`10904`) corrected this for ``Series.tolist()`` and ``list(Series)``. This change makes all iteration methods consistent, in particular, for ``__iter__()`` and ``.map()``; note that this only affect int/float dtypes. (:issue:`13236`, :issue:`13258`, :issue:`14216`).
196+
Previously, when using certain iteration methods for a ``Series`` with dtype ``int`` or ``float``, you would receive a ``numpy`` scalar, e.g. a ``np.int64``, rather than a Python ``int``. Issue (:issue:`10904`) corrected this for ``Series.tolist()`` and ``list(Series)``. This change makes all iteration methods consistent, in particular, for ``__iter__()`` and ``.map()``; note that this only affects int/float dtypes. (:issue:`13236`, :issue:`13258`, :issue:`14216`).
197197

198198
.. ipython:: python
199199

200-
s = Series([1, 2, 3])
200+
s = pd.Series([1, 2, 3])
201201
s
202202

203203
Previously:
204204

205-
.. code-block:: python
205+
.. code-block:: ipython
206206

207207
In [2]: type(list(s)[0])
208208
Out[2]: numpy.int64
@@ -215,14 +215,14 @@ New Behaviour:
215215

216216
Furthermore this will now correctly box the results of iteration for :func:`DataFrame.to_dict` as well.
217217

218-
.. ipython:: python
218+
.. ipython:: ipython
219219

220220
d = {'a':[1], 'b':['b']}
221-
df = DataFrame(d)
221+
df = pd,DataFrame(d)
222222

223223
Previously:
224224

225-
.. code-block:: python
225+
.. code-block:: ipython
226226

227227
In [8]: type(df.to_dict()['a'][0])
228228
Out[8]: numpy.int64

pandas/core/base.py

+20-7
Original file line numberDiff line numberDiff line change
@@ -892,18 +892,31 @@ def argmin(self, axis=None):
892892

893893
def tolist(self):
894894
"""
895-
return a list of the values; box to scalars
895+
Return a list of the values.
896+
897+
These are each a scalar type, which is a Python scalar
898+
(for str, int, float) or a pandas scalar
899+
(for Timestamp/Timedelta/Interval/Period)
900+
901+
See Also
902+
--------
903+
numpy.tolist
896904
"""
897-
return list(self.__iter__())
905+
906+
if is_datetimelike(self):
907+
return [_maybe_box_datetimelike(x) for x in self._values]
908+
else:
909+
return self._values.tolist()
898910

899911
def __iter__(self):
900912
"""
901-
provide iteration over the values; box to scalars
913+
Return an iterator of the values.
914+
915+
These are each a scalar type, which is a Python scalar
916+
(for str, int, float) or a pandas scalar
917+
(for Timestamp/Timedelta/Interval/Period)
902918
"""
903-
if is_datetimelike(self):
904-
return (_maybe_box_datetimelike(x) for x in self._values)
905-
else:
906-
return iter(self._values.tolist())
919+
return iter(self.tolist())
907920

908921
@cache_readonly
909922
def hasnans(self):

pandas/core/categorical.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
is_integer_dtype, is_bool,
2727
is_list_like, is_sequence,
2828
is_scalar)
29-
from pandas.core.common import is_null_slice
29+
from pandas.core.common import is_null_slice, _maybe_box_datetimelike
3030

3131
from pandas.core.algorithms import factorize, take_1d, unique1d
3232
from pandas.core.base import (PandasObject, PandasDelegate,
@@ -401,8 +401,14 @@ def itemsize(self):
401401

402402
def tolist(self):
403403
"""
404-
return a list of my values
404+
Return a list of the values.
405+
406+
These are each a scalar type, which is a Python scalar
407+
(for str, int, float) or a pandas scalar
408+
(for Timestamp/Timedelta/Interval/Period)
405409
"""
410+
if is_datetimelike(self.categories):
411+
return [_maybe_box_datetimelike(x) for x in self]
406412
return np.array(self).tolist()
407413

408414
def reshape(self, new_shape, *args, **kwargs):

pandas/core/indexes/category.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -251,9 +251,8 @@ def get_values(self):
251251
""" return the underlying data as an ndarray """
252252
return self._data.get_values()
253253

254-
def __iter__(self):
255-
""" iterate like Categorical """
256-
return self._data.__iter__()
254+
def tolist(self):
255+
return self._data.tolist()
257256

258257
@property
259258
def codes(self):

pandas/tests/indexes/test_category.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -576,12 +576,13 @@ def test_isin(self):
576576
ci.isin(['c', 'a', 'b', np.nan]), np.array([True] * 6))
577577

578578
# mismatched categorical -> coerced to ndarray so doesn't matter
579-
tm.assert_numpy_array_equal(
580-
ci.isin(ci.set_categories(list('abcdefghi'))), np.array([True] *
581-
6))
582-
tm.assert_numpy_array_equal(
583-
ci.isin(ci.set_categories(list('defghi'))),
584-
np.array([False] * 5 + [True]))
579+
result = ci.isin(ci.set_categories(list('abcdefghi')))
580+
expected = np.array([True] * 6)
581+
tm.assert_numpy_array_equal(result, expected)
582+
583+
result = ci.isin(ci.set_categories(list('defghi')))
584+
expected = np.array([False] * 5 + [True])
585+
tm.assert_numpy_array_equal(result, expected)
585586

586587
def test_identical(self):
587588

pandas/tests/series/test_api.py

-37
Original file line numberDiff line numberDiff line change
@@ -245,43 +245,6 @@ def test_iter(self):
245245
for i, val in enumerate(self.ts):
246246
assert val == self.ts[i]
247247

248-
def test_iter_box(self):
249-
vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]
250-
s = pd.Series(vals)
251-
assert s.dtype == 'datetime64[ns]'
252-
for res, exp in zip(s, vals):
253-
assert isinstance(res, pd.Timestamp)
254-
assert res.tz is None
255-
assert res == exp
256-
257-
vals = [pd.Timestamp('2011-01-01', tz='US/Eastern'),
258-
pd.Timestamp('2011-01-02', tz='US/Eastern')]
259-
s = pd.Series(vals)
260-
261-
assert s.dtype == 'datetime64[ns, US/Eastern]'
262-
for res, exp in zip(s, vals):
263-
assert isinstance(res, pd.Timestamp)
264-
assert res.tz == exp.tz
265-
assert res == exp
266-
267-
# timedelta
268-
vals = [pd.Timedelta('1 days'), pd.Timedelta('2 days')]
269-
s = pd.Series(vals)
270-
assert s.dtype == 'timedelta64[ns]'
271-
for res, exp in zip(s, vals):
272-
assert isinstance(res, pd.Timedelta)
273-
assert res == exp
274-
275-
# period (object dtype, not boxed)
276-
vals = [pd.Period('2011-01-01', freq='M'),
277-
pd.Period('2011-01-02', freq='M')]
278-
s = pd.Series(vals)
279-
assert s.dtype == 'object'
280-
for res, exp in zip(s, vals):
281-
assert isinstance(res, pd.Period)
282-
assert res.freq == 'M'
283-
assert res == exp
284-
285248
def test_keys(self):
286249
# HACK: By doing this in two stages, we avoid 2to3 wrapping the call
287250
# to .keys() in a list()

pandas/tests/test_base.py

+75-4
Original file line numberDiff line numberDiff line change
@@ -1054,10 +1054,7 @@ class TestToIterable(object):
10541054
('timedelta64[ns]', Timedelta)]
10551055

10561056
@pytest.mark.parametrize(
1057-
'dtype, rdtype',
1058-
dtypes + [
1059-
('object', object),
1060-
('category', object)])
1057+
'dtype, rdtype', dtypes)
10611058
@pytest.mark.parametrize(
10621059
'method',
10631060
[
@@ -1074,6 +1071,43 @@ def test_iterable(self, typ, method, dtype, rdtype):
10741071
result = method(s)[0]
10751072
assert isinstance(result, rdtype)
10761073

1074+
@pytest.mark.parametrize(
1075+
'dtype, rdtype, obj',
1076+
[
1077+
('object', object, 'a'),
1078+
('object', (int, long), 1),
1079+
('category', object, 'a'),
1080+
('category', (int, long), 1)])
1081+
@pytest.mark.parametrize(
1082+
'method',
1083+
[
1084+
lambda x: x.tolist(),
1085+
lambda x: list(x),
1086+
lambda x: list(x.__iter__()),
1087+
], ids=['tolist', 'list', 'iter'])
1088+
@pytest.mark.parametrize('typ', [Series, Index])
1089+
def test_iterable_object_and_category(self, typ, method,
1090+
dtype, rdtype, obj):
1091+
# gh-10904
1092+
# gh-13258
1093+
# coerce iteration to underlying python / pandas types
1094+
s = typ([obj], dtype=dtype)
1095+
result = method(s)[0]
1096+
assert isinstance(result, rdtype)
1097+
1098+
@pytest.mark.parametrize(
1099+
'dtype, rdtype', dtypes)
1100+
def test_iterable_items(self, dtype, rdtype):
1101+
# gh-13258
1102+
# test items / iteritems yields the correct boxed scalars
1103+
# this only applies to series
1104+
s = Series([1], dtype=dtype)
1105+
_, result = list(s.items())[0]
1106+
assert isinstance(result, rdtype)
1107+
1108+
_, result = list(s.iteritems())[0]
1109+
assert isinstance(result, rdtype)
1110+
10771111
@pytest.mark.parametrize(
10781112
'dtype, rdtype',
10791113
dtypes + [
@@ -1102,3 +1136,40 @@ def test_categorial_datetimelike(self, method):
11021136

11031137
result = method(i)[0]
11041138
assert isinstance(result, Timestamp)
1139+
1140+
def test_iter_box(self):
1141+
vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]
1142+
s = pd.Series(vals)
1143+
assert s.dtype == 'datetime64[ns]'
1144+
for res, exp in zip(s, vals):
1145+
assert isinstance(res, pd.Timestamp)
1146+
assert res.tz is None
1147+
assert res == exp
1148+
1149+
vals = [pd.Timestamp('2011-01-01', tz='US/Eastern'),
1150+
pd.Timestamp('2011-01-02', tz='US/Eastern')]
1151+
s = pd.Series(vals)
1152+
1153+
assert s.dtype == 'datetime64[ns, US/Eastern]'
1154+
for res, exp in zip(s, vals):
1155+
assert isinstance(res, pd.Timestamp)
1156+
assert res.tz == exp.tz
1157+
assert res == exp
1158+
1159+
# timedelta
1160+
vals = [pd.Timedelta('1 days'), pd.Timedelta('2 days')]
1161+
s = pd.Series(vals)
1162+
assert s.dtype == 'timedelta64[ns]'
1163+
for res, exp in zip(s, vals):
1164+
assert isinstance(res, pd.Timedelta)
1165+
assert res == exp
1166+
1167+
# period (object dtype, not boxed)
1168+
vals = [pd.Period('2011-01-01', freq='M'),
1169+
pd.Period('2011-01-02', freq='M')]
1170+
s = pd.Series(vals)
1171+
assert s.dtype == 'object'
1172+
for res, exp in zip(s, vals):
1173+
assert isinstance(res, pd.Period)
1174+
assert res.freq == 'M'
1175+
assert res == exp

0 commit comments

Comments
 (0)