Skip to content

Commit c0fd989

Browse files
committed
COMPAT: Iteration should always yield a python scalar
xref #10904 closes #13236 closes #13256 xref #14216
1 parent 42ed4f1 commit c0fd989

File tree

11 files changed

+181
-64
lines changed

11 files changed

+181
-64
lines changed

doc/source/whatsnew/v0.21.0.txt

+47
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,53 @@ the target. Now, a ``ValueError`` will be raised when such an input is passed in
188188
...
189189
ValueError: Cannot operate inplace if there is no assignment
190190

191+
.. _whatsnew_0210.api_breaking.iteration_scalars:
192+
193+
Iteration of Series/Index will now return python scalars
194+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
195+
196+
Previously, when using certain iteration methods, you could receive a ``numpy`` scalar (in-other-words, ``np.int64``), rather than a python scalar ``int``. (:issue:`10904`) Corrected this for ``Series.tolist()`` and ``list(Series)``. (:issue:`13236`, :issue:`13258`, :issue:`14216`). This change corrects for ``__iter__()`` and ``.map()``; this only affect int/float dtypes.
197+
198+
.. ipython:: python
199+
200+
s = Series([1, 2, 3])
201+
s
202+
203+
Previously:
204+
205+
.. code-block:: python
206+
207+
In [2]: type(list(s)[0])
208+
Out[2]: numpy.int64
209+
210+
New Behaviour:
211+
212+
.. ipython:: python
213+
214+
type(list(s)[0])
215+
216+
Furthermore this will now correctly box the results of iteration.
217+
218+
.. ipython:: python
219+
220+
d = {'a':[1], 'b':['b']}
221+
df = DataFrame(d)
222+
223+
Previously:
224+
225+
.. code-block:: python
226+
227+
In [8]: type(df.to_dict()['a'][0])
228+
Out[8]: numpy.int64
229+
230+
New Behaviour:
231+
232+
.. ipython:: python
233+
234+
type(df.to_dict()['a'][0])
235+
236+
.. _whatsnew_0210.api_breaking.dtype_conversions:
237+
191238
Dtype Conversions
192239
^^^^^^^^^^^^^^^^^
193240

pandas/core/base.py

+23-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,12 @@
88

99
from pandas.core.dtypes.missing import isna
1010
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries, ABCIndexClass
11-
from pandas.core.dtypes.common import is_object_dtype, is_list_like, is_scalar
11+
from pandas.core.dtypes.common import (
12+
is_object_dtype,
13+
is_list_like,
14+
is_scalar,
15+
is_datetimelike)
16+
1217
from pandas.util._validators import validate_bool_kwarg
1318

1419
from pandas.core import common as com
@@ -18,7 +23,8 @@
1823
from pandas.compat import PYPY
1924
from pandas.util._decorators import (Appender, cache_readonly,
2025
deprecate_kwarg, Substitution)
21-
from pandas.core.common import AbstractMethodError
26+
from pandas.core.common import AbstractMethodError, _maybe_box_datetimelike
27+
2228
from pandas.core.accessor import DirNamesMixin
2329

2430
_shared_docs = dict()
@@ -884,6 +890,21 @@ def argmin(self, axis=None):
884890
"""
885891
return nanops.nanargmin(self.values)
886892

893+
def tolist(self):
894+
"""
895+
return a list of the values; box to scalars
896+
"""
897+
return list(self.__iter__())
898+
899+
def __iter__(self):
900+
"""
901+
provide iteration over the values; box to scalars
902+
"""
903+
if is_datetimelike(self):
904+
return (_maybe_box_datetimelike(x) for x in self._values)
905+
else:
906+
return iter(self._values.tolist())
907+
887908
@cache_readonly
888909
def hasnans(self):
889910
""" return if I have any nans; enables various perf speedups """

pandas/core/categorical.py

+6
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,12 @@ def itemsize(self):
399399
""" return the size of a single category """
400400
return self.categories.itemsize
401401

402+
def tolist(self):
403+
"""
404+
return a list of my values
405+
"""
406+
return np.array(self).tolist()
407+
402408
def reshape(self, new_shape, *args, **kwargs):
403409
"""
404410
.. deprecated:: 0.19.0

pandas/core/indexes/base.py

-9
Original file line numberDiff line numberDiff line change
@@ -585,12 +585,6 @@ def memory_usage(self, deep=False):
585585
return result
586586

587587
# ops compat
588-
def tolist(self):
589-
"""
590-
return a list of the Index values
591-
"""
592-
return list(self.values)
593-
594588
@deprecate_kwarg(old_arg_name='n', new_arg_name='repeats')
595589
def repeat(self, repeats, *args, **kwargs):
596590
"""
@@ -1601,9 +1595,6 @@ def is_all_dates(self):
16011595
return False
16021596
return is_datetime_array(_ensure_object(self.values))
16031597

1604-
def __iter__(self):
1605-
return iter(self.values)
1606-
16071598
def __reduce__(self):
16081599
d = dict(data=self._data)
16091600
d.update(self._get_attributes_dict())

pandas/core/indexes/category.py

+4
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,10 @@ def get_values(self):
253253
""" return the underlying data as an ndarray """
254254
return self._data.get_values()
255255

256+
def __iter__(self):
257+
""" iterate like Categorical """
258+
return self._data.__iter__()
259+
256260
@property
257261
def codes(self):
258262
return self._data.codes

pandas/core/series.py

-12
Original file line numberDiff line numberDiff line change
@@ -1095,14 +1095,6 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True,
10951095
with open(buf, 'w') as f:
10961096
f.write(result)
10971097

1098-
def __iter__(self):
1099-
""" provide iteration over the values of the Series
1100-
box values if necessary """
1101-
if is_datetimelike(self):
1102-
return (_maybe_box_datetimelike(x) for x in self._values)
1103-
else:
1104-
return iter(self._values)
1105-
11061098
def iteritems(self):
11071099
"""
11081100
Lazily iterate over (index, value) tuples
@@ -1118,10 +1110,6 @@ def keys(self):
11181110
"""Alias for index"""
11191111
return self.index
11201112

1121-
def tolist(self):
1122-
""" Convert Series to a nested list """
1123-
return list(self.asobject)
1124-
11251113
def to_dict(self, into=dict):
11261114
"""
11271115
Convert Series to {label -> value} dict or dict-like object.

pandas/core/sparse/array.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -407,8 +407,18 @@ def to_dense(self, fill=None):
407407
return self.values
408408

409409
def __iter__(self):
410+
if np.issubdtype(self.dtype, np.floating):
411+
boxer = float
412+
elif np.issubdtype(self.dtype, np.integer):
413+
boxer = int
414+
else:
415+
boxer = lambda x: x
416+
410417
for i in range(len(self)):
411-
yield self._get_val_at(i)
418+
r = self._get_val_at(i)
419+
420+
# box em
421+
yield boxer(r)
412422

413423
def __getitem__(self, key):
414424
"""

pandas/tests/frame/test_api.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import sys
1010
from distutils.version import LooseVersion
1111

12-
from pandas.compat import range, lrange
12+
from pandas.compat import range, lrange, long
1313
from pandas import compat
1414

1515
from numpy.random import randn
@@ -205,7 +205,7 @@ def test_itertuples(self):
205205
'ints': lrange(5)}, columns=['floats', 'ints'])
206206

207207
for tup in df.itertuples(index=False):
208-
assert isinstance(tup[1], np.integer)
208+
assert isinstance(tup[1], (int, long))
209209

210210
df = self.klass(data={"a": [1, 2, 3], "b": [4, 5, 6]})
211211
dfaa = df[['a', 'a']]

pandas/tests/frame/test_convert_to.py

+13
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import numpy as np
66

77
from pandas import compat
8+
from pandas.compat import long
89
from pandas import (DataFrame, Series, MultiIndex, Timestamp,
910
date_range)
1011

@@ -236,3 +237,15 @@ def test_to_records_datetimeindex_with_tz(self, tz):
236237

237238
# both converted to UTC, so they are equal
238239
tm.assert_numpy_array_equal(result, expected)
240+
241+
def test_to_dict_box_scalars(self):
242+
# 14216
243+
# make sure that we are boxing properly
244+
d = {'a': [1], 'b': ['b']}
245+
246+
result = DataFrame(d).to_dict()
247+
assert isinstance(list(result['a'])[0], (int, long))
248+
assert isinstance(list(result['b'])[0], (int, long))
249+
250+
result = DataFrame(d).to_dict(orient='records')
251+
assert isinstance(result[0]['a'], (int, long))

pandas/tests/series/test_io.py

-34
Original file line numberDiff line numberDiff line change
@@ -178,37 +178,3 @@ def test_to_dict(self, mapping):
178178
from_method = Series(ts.to_dict(collections.Counter))
179179
from_constructor = Series(collections.Counter(ts.iteritems()))
180180
tm.assert_series_equal(from_method, from_constructor)
181-
182-
183-
class TestSeriesToList(TestData):
184-
185-
def test_tolist(self):
186-
rs = self.ts.tolist()
187-
xp = self.ts.values.tolist()
188-
assert_almost_equal(rs, xp)
189-
190-
# datetime64
191-
s = Series(self.ts.index)
192-
rs = s.tolist()
193-
assert self.ts.index[0] == rs[0]
194-
195-
def test_tolist_np_int(self):
196-
# GH10904
197-
for t in ['int8', 'int16', 'int32', 'int64']:
198-
s = pd.Series([1], dtype=t)
199-
assert isinstance(s.tolist()[0], (int, long))
200-
201-
def test_tolist_np_uint(self):
202-
# GH10904
203-
for t in ['uint8', 'uint16']:
204-
s = pd.Series([1], dtype=t)
205-
assert isinstance(s.tolist()[0], int)
206-
for t in ['uint32', 'uint64']:
207-
s = pd.Series([1], dtype=t)
208-
assert isinstance(s.tolist()[0], long)
209-
210-
def test_tolist_np_float(self):
211-
# GH10904
212-
for t in ['float16', 'float32', 'float64']:
213-
s = pd.Series([1], dtype=t)
214-
assert isinstance(s.tolist()[0], float)

pandas/tests/test_base.py

+75-4
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,10 @@
1313
is_object_dtype, is_datetimetz,
1414
needs_i8_conversion)
1515
import pandas.util.testing as tm
16-
from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex, PeriodIndex,
17-
Timedelta, IntervalIndex, Interval)
18-
from pandas.compat import StringIO, PYPY
16+
from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex,
17+
PeriodIndex, Timedelta, IntervalIndex, Interval,
18+
CategoricalIndex, Timestamp)
19+
from pandas.compat import StringIO, PYPY, long
1920
from pandas.compat.numpy import np_array_datetime64_compat
2021
from pandas.core.base import PandasDelegate, NoNewAttributesMixin
2122
from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
@@ -433,7 +434,7 @@ def test_value_counts_unique_nunique(self):
433434
# datetimetz Series returns array of Timestamp
434435
assert result[0] == orig[0]
435436
for r in result:
436-
assert isinstance(r, pd.Timestamp)
437+
assert isinstance(r, Timestamp)
437438
tm.assert_numpy_array_equal(result,
438439
orig._values.asobject.values)
439440
else:
@@ -1031,3 +1032,73 @@ def f():
10311032

10321033
pytest.raises(AttributeError, f)
10331034
assert not hasattr(t, "b")
1035+
1036+
1037+
class TestToIterable(object):
1038+
# test that we convert an iterable to python types
1039+
1040+
dtypes = [
1041+
('int8', (int, long)),
1042+
('int16', (int, long)),
1043+
('int32', (int, long)),
1044+
('int64', (int, long)),
1045+
('uint8', (int, long)),
1046+
('uint16', (int, long)),
1047+
('uint32', (int, long)),
1048+
('uint64', (int, long)),
1049+
('float16', float),
1050+
('float32', float),
1051+
('float64', float),
1052+
('datetime64[ns]', Timestamp),
1053+
('datetime64[ns, US/Eastern]', Timestamp),
1054+
('timedelta64[ns]', Timedelta)]
1055+
1056+
@pytest.mark.parametrize(
1057+
'dtype, rdtype',
1058+
dtypes + [
1059+
('object', object),
1060+
('category', object)])
1061+
@pytest.mark.parametrize(
1062+
'method',
1063+
[
1064+
lambda x: x.tolist(),
1065+
lambda x: list(x),
1066+
lambda x: list(x.__iter__()),
1067+
], ids=['tolist', 'list', 'iter'])
1068+
@pytest.mark.parametrize('typ', [Series, Index])
1069+
def test_iterable(self, typ, method, dtype, rdtype):
1070+
# gh-10904
1071+
# gh-13258
1072+
# coerce iteration to underlying python / pandas types
1073+
s = typ([1], dtype=dtype)
1074+
result = method(s)[0]
1075+
assert isinstance(result, rdtype)
1076+
1077+
@pytest.mark.parametrize(
1078+
'dtype, rdtype',
1079+
dtypes + [
1080+
('object', (int, long)),
1081+
('category', (int, long))])
1082+
@pytest.mark.parametrize('typ', [Series, Index])
1083+
def test_iterable_map(self, typ, dtype, rdtype):
1084+
# gh-13236
1085+
# coerce iteration to underlying python / pandas types
1086+
s = typ([1], dtype=dtype)
1087+
result = s.map(type)[0]
1088+
if not isinstance(rdtype, tuple):
1089+
rdtype = tuple([rdtype])
1090+
assert result in rdtype
1091+
1092+
@pytest.mark.parametrize(
1093+
'method',
1094+
[
1095+
lambda x: x.tolist(),
1096+
lambda x: list(x),
1097+
lambda x: list(x.__iter__()),
1098+
], ids=['tolist', 'list', 'iter'])
1099+
def test_categorial_datetimelike(self, method):
1100+
i = CategoricalIndex([Timestamp('1999-12-31'),
1101+
Timestamp('2000-12-31')])
1102+
1103+
result = method(i)[0]
1104+
assert isinstance(result, Timestamp)

0 commit comments

Comments
 (0)