Skip to content

Commit c3fc62e

Browse files
committed
COMPAT: followup to pandas-dev#17491
1 parent 83436af commit c3fc62e

File tree

7 files changed

+95
-64
lines changed

7 files changed

+95
-64
lines changed

doc/source/whatsnew/v0.21.0.txt

+6-6
Original file line numberDiff line numberDiff line change
@@ -193,16 +193,16 @@ the target. Now, a ``ValueError`` will be raised when such an input is passed in
193193
Iteration of Series/Index will now return python scalars
194194
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
195195

196-
Previously, when using certain iteration methods for a ``Series`` with dtype ``int`` or ``float``, you would receive a ``numpy`` scalar, e.g. a ``np.int64``, rather than a python ``int``. Issue (:issue:`10904`) corrected this for ``Series.tolist()`` and ``list(Series)``. This change makes all iteration methods consistent, in particular, for ``__iter__()`` and ``.map()``; note that this only affect int/float dtypes. (:issue:`13236`, :issue:`13258`, :issue:`14216`).
196+
Previously, when using certain iteration methods for a ``Series`` with dtype ``int`` or ``float``, you would receive a ``numpy`` scalar, e.g. a ``np.int64``, rather than a python ``int``. Issue (:issue:`10904`) corrected this for ``Series.tolist()`` and ``list(Series)``. This change makes all iteration methods consistent, in particular, for ``__iter__()`` and ``.map()``; note that this only affects int/float dtypes. (:issue:`13236`, :issue:`13258`, :issue:`14216`).
197197

198198
.. ipython:: python
199199

200-
s = Series([1, 2, 3])
200+
s = pd.Series([1, 2, 3])
201201
s
202202

203203
Previously:
204204

205-
.. code-block:: python
205+
.. code-block:: ipython
206206

207207
In [2]: type(list(s)[0])
208208
Out[2]: numpy.int64
@@ -215,14 +215,14 @@ New Behaviour:
215215

216216
Furthermore this will now correctly box the results of iteration for :func:`DataFrame.to_dict` as well.
217217

218-
.. ipython:: python
218+
.. ipython:: ipython
219219

220220
d = {'a':[1], 'b':['b']}
221-
df = DataFrame(d)
221+
df = pd,DataFrame(d)
222222

223223
Previously:
224224

225-
.. code-block:: python
225+
.. code-block:: ipython
226226

227227
In [8]: type(df.to_dict()['a'][0])
228228
Out[8]: numpy.int64

pandas/core/base.py

+16-7
Original file line numberDiff line numberDiff line change
@@ -892,18 +892,27 @@ def argmin(self, axis=None):
892892

893893
def tolist(self):
894894
"""
895-
return a list of the values; box to scalars
895+
return a list of the values. These are each a scalar type, which is
896+
a python scalar (for str, int, float) or a pandas scalar
897+
(for Timestamp/Timedelta/Interval/Period)
898+
899+
See Also
900+
--------
901+
numpy.tolist
896902
"""
897-
return list(self.__iter__())
903+
904+
if is_datetimelike(self):
905+
return [_maybe_box_datetimelike(x) for x in self._values]
906+
else:
907+
return self._values.tolist()
898908

899909
def __iter__(self):
900910
"""
901-
provide iteration over the values; box to scalars
911+
return an iterator of the values. These are each a scalar type,
912+
which is a python scalar (for str, int, float) or a pandas scalar
913+
(for Timestamp/Timedelta/Interval/Period)
902914
"""
903-
if is_datetimelike(self):
904-
return (_maybe_box_datetimelike(x) for x in self._values)
905-
else:
906-
return iter(self._values.tolist())
915+
return iter(self.tolist())
907916

908917
@cache_readonly
909918
def hasnans(self):

pandas/core/categorical.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
is_integer_dtype, is_bool,
2727
is_list_like, is_sequence,
2828
is_scalar)
29-
from pandas.core.common import is_null_slice
29+
from pandas.core.common import is_null_slice, _maybe_box_datetimelike
3030

3131
from pandas.core.algorithms import factorize, take_1d, unique1d
3232
from pandas.core.base import (PandasObject, PandasDelegate,
@@ -403,6 +403,8 @@ def tolist(self):
403403
"""
404404
return a list of my values
405405
"""
406+
if is_datetimelike(self.categories):
407+
return [_maybe_box_datetimelike(x) for x in self]
406408
return np.array(self).tolist()
407409

408410
def reshape(self, new_shape, *args, **kwargs):

pandas/core/indexes/category.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -253,9 +253,8 @@ def get_values(self):
253253
""" return the underlying data as an ndarray """
254254
return self._data.get_values()
255255

256-
def __iter__(self):
257-
""" iterate like Categorical """
258-
return self._data.__iter__()
256+
def tolist(self):
257+
return self._data.tolist()
259258

260259
@property
261260
def codes(self):

pandas/tests/indexes/test_category.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -576,12 +576,13 @@ def test_isin(self):
576576
ci.isin(['c', 'a', 'b', np.nan]), np.array([True] * 6))
577577

578578
# mismatched categorical -> coerced to ndarray so doesn't matter
579-
tm.assert_numpy_array_equal(
580-
ci.isin(ci.set_categories(list('abcdefghi'))), np.array([True] *
581-
6))
582-
tm.assert_numpy_array_equal(
583-
ci.isin(ci.set_categories(list('defghi'))),
584-
np.array([False] * 5 + [True]))
579+
result = ci.isin(ci.set_categories(list('abcdefghi')))
580+
expected = np.array([True] * 6)
581+
tm.assert_numpy_array_equal(result, expected)
582+
583+
result = ci.isin(ci.set_categories(list('defghi')))
584+
expected = np.array([False] * 5 + [True])
585+
tm.assert_numpy_array_equal(result, expected)
585586

586587
def test_identical(self):
587588

pandas/tests/series/test_api.py

-37
Original file line numberDiff line numberDiff line change
@@ -245,43 +245,6 @@ def test_iter(self):
245245
for i, val in enumerate(self.ts):
246246
assert val == self.ts[i]
247247

248-
def test_iter_box(self):
249-
vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]
250-
s = pd.Series(vals)
251-
assert s.dtype == 'datetime64[ns]'
252-
for res, exp in zip(s, vals):
253-
assert isinstance(res, pd.Timestamp)
254-
assert res.tz is None
255-
assert res == exp
256-
257-
vals = [pd.Timestamp('2011-01-01', tz='US/Eastern'),
258-
pd.Timestamp('2011-01-02', tz='US/Eastern')]
259-
s = pd.Series(vals)
260-
261-
assert s.dtype == 'datetime64[ns, US/Eastern]'
262-
for res, exp in zip(s, vals):
263-
assert isinstance(res, pd.Timestamp)
264-
assert res.tz == exp.tz
265-
assert res == exp
266-
267-
# timedelta
268-
vals = [pd.Timedelta('1 days'), pd.Timedelta('2 days')]
269-
s = pd.Series(vals)
270-
assert s.dtype == 'timedelta64[ns]'
271-
for res, exp in zip(s, vals):
272-
assert isinstance(res, pd.Timedelta)
273-
assert res == exp
274-
275-
# period (object dtype, not boxed)
276-
vals = [pd.Period('2011-01-01', freq='M'),
277-
pd.Period('2011-01-02', freq='M')]
278-
s = pd.Series(vals)
279-
assert s.dtype == 'object'
280-
for res, exp in zip(s, vals):
281-
assert isinstance(res, pd.Period)
282-
assert res.freq == 'M'
283-
assert res == exp
284-
285248
def test_keys(self):
286249
# HACK: By doing this in two stages, we avoid 2to3 wrapping the call
287250
# to .keys() in a list()

pandas/tests/test_base.py

+61-4
Original file line numberDiff line numberDiff line change
@@ -1054,10 +1054,7 @@ class TestToIterable(object):
10541054
('timedelta64[ns]', Timedelta)]
10551055

10561056
@pytest.mark.parametrize(
1057-
'dtype, rdtype',
1058-
dtypes + [
1059-
('object', object),
1060-
('category', object)])
1057+
'dtype, rdtype', dtypes)
10611058
@pytest.mark.parametrize(
10621059
'method',
10631060
[
@@ -1074,6 +1071,29 @@ def test_iterable(self, typ, method, dtype, rdtype):
10741071
result = method(s)[0]
10751072
assert isinstance(result, rdtype)
10761073

1074+
@pytest.mark.parametrize(
1075+
'dtype, obj',
1076+
[
1077+
('object', 'a'),
1078+
('object', 1),
1079+
('category', 'a'),
1080+
('category', 1)])
1081+
@pytest.mark.parametrize(
1082+
'method',
1083+
[
1084+
lambda x: x.tolist(),
1085+
lambda x: list(x),
1086+
lambda x: list(x.__iter__()),
1087+
], ids=['tolist', 'list', 'iter'])
1088+
@pytest.mark.parametrize('typ', [Series, Index])
1089+
def test_iterable_object_category(self, typ, method, dtype, obj):
1090+
# gh-10904
1091+
# gh-13258
1092+
# coerce iteration to underlying python / pandas types
1093+
s = typ([obj], dtype=dtype)
1094+
result = method(s)[0]
1095+
assert isinstance(result, type(obj))
1096+
10771097
@pytest.mark.parametrize(
10781098
'dtype, rdtype',
10791099
dtypes + [
@@ -1102,3 +1122,40 @@ def test_categorial_datetimelike(self, method):
11021122

11031123
result = method(i)[0]
11041124
assert isinstance(result, Timestamp)
1125+
1126+
def test_iter_box(self):
1127+
vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]
1128+
s = pd.Series(vals)
1129+
assert s.dtype == 'datetime64[ns]'
1130+
for res, exp in zip(s, vals):
1131+
assert isinstance(res, pd.Timestamp)
1132+
assert res.tz is None
1133+
assert res == exp
1134+
1135+
vals = [pd.Timestamp('2011-01-01', tz='US/Eastern'),
1136+
pd.Timestamp('2011-01-02', tz='US/Eastern')]
1137+
s = pd.Series(vals)
1138+
1139+
assert s.dtype == 'datetime64[ns, US/Eastern]'
1140+
for res, exp in zip(s, vals):
1141+
assert isinstance(res, pd.Timestamp)
1142+
assert res.tz == exp.tz
1143+
assert res == exp
1144+
1145+
# timedelta
1146+
vals = [pd.Timedelta('1 days'), pd.Timedelta('2 days')]
1147+
s = pd.Series(vals)
1148+
assert s.dtype == 'timedelta64[ns]'
1149+
for res, exp in zip(s, vals):
1150+
assert isinstance(res, pd.Timedelta)
1151+
assert res == exp
1152+
1153+
# period (object dtype, not boxed)
1154+
vals = [pd.Period('2011-01-01', freq='M'),
1155+
pd.Period('2011-01-02', freq='M')]
1156+
s = pd.Series(vals)
1157+
assert s.dtype == 'object'
1158+
for res, exp in zip(s, vals):
1159+
assert isinstance(res, pd.Period)
1160+
assert res.freq == 'M'
1161+
assert res == exp

0 commit comments

Comments
 (0)