Skip to content

Commit d37cf9e

Browse files
committed
COMPAT: Iteration should always yield a python scalar
xref pandas-dev#10904 closes pandas-dev#13236 closes pandas-dev#13256 xref pandas-dev#14216
1 parent e6aed2e commit d37cf9e

File tree

11 files changed

+191
-64
lines changed

11 files changed

+191
-64
lines changed

doc/source/whatsnew/v0.21.0.txt

+47
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,53 @@ the target. Now, a ``ValueError`` will be raised when such an input is passed in
187187
...
188188
ValueError: Cannot operate inplace if there is no assignment
189189

190+
.. _whatsnew_0210.api_breaking.iteration_scalars:
191+
192+
Iteration of Series/Index will now return python scalars
193+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
194+
195+
Previously, when using certain methods, you could receive ``numpy`` scalars (in-other-words, ``np.int64``), rather than a python scalar ``int``. (:issue:`10904`) previously corrected this for ``Series.tolist()`` and ``list(Series)``. (:issue:`13236`, :issue:`13258`, :issue:`14216`). This is only a change for int/float dtypes.
196+
197+
.. ipython:: python
198+
199+
s = Series([1, 2, 3])
200+
s
201+
202+
Previously:
203+
204+
.. code-block:: python
205+
206+
In [2]: type(list(s)[0])
207+
Out[2]: numpy.int64
208+
209+
New Behaviour:
210+
211+
.. ipython:: python
212+
213+
type(list(s)[0])
214+
215+
Furthermore this will now correctly box the results of iteration.
216+
217+
.. ipython:: python
218+
219+
d = {'a':[1], 'b':['b']}
220+
df = DataFrame(d)
221+
222+
Previously:
223+
224+
.. code-block:: python
225+
226+
In [8]: type(df.to_dict()['a'][0])
227+
Out[8]: numpy.int64
228+
229+
New Behaviour:
230+
231+
.. ipython:: python
232+
233+
type(df.to_dict()['a'][0])
234+
235+
.. _whatsnew_0210.api_breaking.dtype_conversions:
236+
190237
Dtype Conversions
191238
^^^^^^^^^^^^^^^^^
192239

pandas/core/base.py

+23-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,12 @@
88

99
from pandas.core.dtypes.missing import isna
1010
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries, ABCIndexClass
11-
from pandas.core.dtypes.common import is_object_dtype, is_list_like, is_scalar
11+
from pandas.core.dtypes.common import (
12+
is_object_dtype,
13+
is_list_like,
14+
is_scalar,
15+
is_datetimelike)
16+
1217
from pandas.util._validators import validate_bool_kwarg
1318

1419
from pandas.core import common as com
@@ -18,7 +23,8 @@
1823
from pandas.compat import PYPY
1924
from pandas.util._decorators import (Appender, cache_readonly,
2025
deprecate_kwarg, Substitution)
21-
from pandas.core.common import AbstractMethodError
26+
from pandas.core.common import AbstractMethodError, _maybe_box_datetimelike
27+
2228
from pandas.core.accessor import DirNamesMixin
2329

2430
_shared_docs = dict()
@@ -884,6 +890,21 @@ def argmin(self, axis=None):
884890
"""
885891
return nanops.nanargmin(self.values)
886892

893+
def tolist(self):
894+
"""
895+
return a list of the values; box to scalars
896+
"""
897+
return list(self.__iter__())
898+
899+
def __iter__(self):
900+
"""
901+
provide iteration over the values; box to scalars
902+
"""
903+
if is_datetimelike(self):
904+
return (_maybe_box_datetimelike(x) for x in self._values)
905+
else:
906+
return iter(self._values.tolist())
907+
887908
@cache_readonly
888909
def hasnans(self):
889910
""" return if I have any nans; enables various perf speedups """

pandas/core/categorical.py

+6
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,12 @@ def itemsize(self):
399399
""" return the size of a single category """
400400
return self.categories.itemsize
401401

402+
def tolist(self):
403+
"""
404+
return a list of my values
405+
"""
406+
return np.array(self).tolist()
407+
402408
def reshape(self, new_shape, *args, **kwargs):
403409
"""
404410
.. deprecated:: 0.19.0

pandas/core/indexes/base.py

-9
Original file line numberDiff line numberDiff line change
@@ -585,12 +585,6 @@ def memory_usage(self, deep=False):
585585
return result
586586

587587
# ops compat
588-
def tolist(self):
589-
"""
590-
return a list of the Index values
591-
"""
592-
return list(self.values)
593-
594588
@deprecate_kwarg(old_arg_name='n', new_arg_name='repeats')
595589
def repeat(self, repeats, *args, **kwargs):
596590
"""
@@ -1601,9 +1595,6 @@ def is_all_dates(self):
16011595
return False
16021596
return is_datetime_array(_ensure_object(self.values))
16031597

1604-
def __iter__(self):
1605-
return iter(self.values)
1606-
16071598
def __reduce__(self):
16081599
d = dict(data=self._data)
16091600
d.update(self._get_attributes_dict())

pandas/core/indexes/category.py

+4
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,10 @@ def get_values(self):
253253
""" return the underlying data as an ndarray """
254254
return self._data.get_values()
255255

256+
def __iter__(self):
257+
""" iterate like Categorical """
258+
return self._data.__iter__()
259+
256260
@property
257261
def codes(self):
258262
return self._data.codes

pandas/core/series.py

-12
Original file line numberDiff line numberDiff line change
@@ -1095,14 +1095,6 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True,
10951095
with open(buf, 'w') as f:
10961096
f.write(result)
10971097

1098-
def __iter__(self):
1099-
""" provide iteration over the values of the Series
1100-
box values if necessary """
1101-
if is_datetimelike(self):
1102-
return (_maybe_box_datetimelike(x) for x in self._values)
1103-
else:
1104-
return iter(self._values)
1105-
11061098
def iteritems(self):
11071099
"""
11081100
Lazily iterate over (index, value) tuples
@@ -1118,10 +1110,6 @@ def keys(self):
11181110
"""Alias for index"""
11191111
return self.index
11201112

1121-
def tolist(self):
1122-
""" Convert Series to a nested list """
1123-
return list(self.asobject)
1124-
11251113
def to_dict(self, into=dict):
11261114
"""
11271115
Convert Series to {label -> value} dict or dict-like object.

pandas/core/sparse/array.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -407,8 +407,18 @@ def to_dense(self, fill=None):
407407
return self.values
408408

409409
def __iter__(self):
410+
if np.issubdtype(self.dtype, np.float):
411+
boxer = float
412+
elif np.issubdtype(self.dtype, np.int):
413+
boxer = int
414+
else:
415+
boxer = lambda x: x
416+
410417
for i in range(len(self)):
411-
yield self._get_val_at(i)
418+
r = self._get_val_at(i)
419+
420+
# box em
421+
yield boxer(r)
412422

413423
def __getitem__(self, key):
414424
"""

pandas/tests/frame/test_api.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import sys
1010
from distutils.version import LooseVersion
1111

12-
from pandas.compat import range, lrange
12+
from pandas.compat import range, lrange, long
1313
from pandas import compat
1414

1515
from numpy.random import randn
@@ -205,7 +205,7 @@ def test_itertuples(self):
205205
'ints': lrange(5)}, columns=['floats', 'ints'])
206206

207207
for tup in df.itertuples(index=False):
208-
assert isinstance(tup[1], np.integer)
208+
assert isinstance(tup[1], (int, long))
209209

210210
df = self.klass(data={"a": [1, 2, 3], "b": [4, 5, 6]})
211211
dfaa = df[['a', 'a']]

pandas/tests/frame/test_convert_to.py

+13
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import numpy as np
66

77
from pandas import compat
8+
from pandas.compat import long
89
from pandas import (DataFrame, Series, MultiIndex, Timestamp,
910
date_range)
1011

@@ -236,3 +237,15 @@ def test_to_records_datetimeindex_with_tz(self, tz):
236237

237238
# both converted to UTC, so they are equal
238239
tm.assert_numpy_array_equal(result, expected)
240+
241+
def test_to_dict_box_scalars(self):
242+
# 14216
243+
# make sure that we are boxing properly
244+
d = {'a': [1], 'b': ['b']}
245+
246+
result = DataFrame(d).to_dict()
247+
assert isinstance(list(result['a'])[0], (int, long))
248+
assert isinstance(list(result['b'])[0], (int, long))
249+
250+
result = DataFrame(d).to_dict(orient='records')
251+
assert isinstance(result[0]['a'], (int, long))

pandas/tests/series/test_io.py

-34
Original file line numberDiff line numberDiff line change
@@ -178,37 +178,3 @@ def test_to_dict(self, mapping):
178178
from_method = Series(ts.to_dict(collections.Counter))
179179
from_constructor = Series(collections.Counter(ts.iteritems()))
180180
tm.assert_series_equal(from_method, from_constructor)
181-
182-
183-
class TestSeriesToList(TestData):
184-
185-
def test_tolist(self):
186-
rs = self.ts.tolist()
187-
xp = self.ts.values.tolist()
188-
assert_almost_equal(rs, xp)
189-
190-
# datetime64
191-
s = Series(self.ts.index)
192-
rs = s.tolist()
193-
assert self.ts.index[0] == rs[0]
194-
195-
def test_tolist_np_int(self):
196-
# GH10904
197-
for t in ['int8', 'int16', 'int32', 'int64']:
198-
s = pd.Series([1], dtype=t)
199-
assert isinstance(s.tolist()[0], (int, long))
200-
201-
def test_tolist_np_uint(self):
202-
# GH10904
203-
for t in ['uint8', 'uint16']:
204-
s = pd.Series([1], dtype=t)
205-
assert isinstance(s.tolist()[0], int)
206-
for t in ['uint32', 'uint64']:
207-
s = pd.Series([1], dtype=t)
208-
assert isinstance(s.tolist()[0], long)
209-
210-
def test_tolist_np_float(self):
211-
# GH10904
212-
for t in ['float16', 'float32', 'float64']:
213-
s = pd.Series([1], dtype=t)
214-
assert isinstance(s.tolist()[0], float)

pandas/tests/test_base.py

+85-4
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,10 @@
1313
is_object_dtype, is_datetimetz,
1414
needs_i8_conversion)
1515
import pandas.util.testing as tm
16-
from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex, PeriodIndex,
17-
Timedelta, IntervalIndex, Interval)
18-
from pandas.compat import StringIO, PYPY
16+
from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex,
17+
PeriodIndex, Timedelta, IntervalIndex, Interval,
18+
CategoricalIndex, Timestamp)
19+
from pandas.compat import StringIO, PYPY, long
1920
from pandas.compat.numpy import np_array_datetime64_compat
2021
from pandas.core.base import PandasDelegate, NoNewAttributesMixin
2122
from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
@@ -433,7 +434,7 @@ def test_value_counts_unique_nunique(self):
433434
# datetimetz Series returns array of Timestamp
434435
assert result[0] == orig[0]
435436
for r in result:
436-
assert isinstance(r, pd.Timestamp)
437+
assert isinstance(r, Timestamp)
437438
tm.assert_numpy_array_equal(result,
438439
orig._values.asobject.values)
439440
else:
@@ -1031,3 +1032,83 @@ def f():
10311032

10321033
pytest.raises(AttributeError, f)
10331034
assert not hasattr(t, "b")
1035+
1036+
1037+
class TestToIterable(object):
1038+
# test that we convert an iterable to python types
1039+
1040+
@pytest.mark.parametrize(
1041+
'dtype, rdtype',
1042+
[
1043+
('int8', (int, long)),
1044+
('int16', (int, long)),
1045+
('int32', (int, long)),
1046+
('int64', (int, long)),
1047+
('uint8', int),
1048+
('uint16', int),
1049+
('uint32', long),
1050+
('uint64', long),
1051+
('float16', float),
1052+
('float32', float),
1053+
('float64', float),
1054+
('datetime64[ns]', Timestamp),
1055+
('datetime64[ns, US/Eastern]', Timestamp),
1056+
('timedelta64[ns]', Timedelta),
1057+
('object', object),
1058+
('category', object)])
1059+
@pytest.mark.parametrize(
1060+
'method',
1061+
[
1062+
lambda x: x.tolist(),
1063+
lambda x: list(x),
1064+
lambda x: list(x.__iter__()),
1065+
], ids=['tolist', 'list', 'iter'])
1066+
@pytest.mark.parametrize('typ', [Series, Index])
1067+
def test_iterable(self, typ, method, dtype, rdtype):
1068+
# gh-10904
1069+
# gh-13258
1070+
# coerce iteration to underlying python / pandas types
1071+
s = typ([1], dtype=dtype)
1072+
result = method(s)[0]
1073+
assert isinstance(result, rdtype)
1074+
1075+
@pytest.mark.parametrize(
1076+
'dtype, rdtype',
1077+
[
1078+
('int8', int),
1079+
('int16', int),
1080+
('int32', int),
1081+
('int64', int),
1082+
('uint8', int),
1083+
('uint16', int),
1084+
('uint32', int),
1085+
('uint64', int),
1086+
('float16', float),
1087+
('float32', float),
1088+
('float64', float),
1089+
('datetime64[ns]', Timestamp),
1090+
('datetime64[ns, US/Eastern]', Timestamp),
1091+
('timedelta64[ns]', Timedelta),
1092+
('object', int),
1093+
('category', int)])
1094+
@pytest.mark.parametrize('typ', [Series, Index])
1095+
def test_iterable_map(self, typ, dtype, rdtype):
1096+
# gh-13236
1097+
# coerce iteration to underlying python / pandas types
1098+
s = typ([1], dtype=dtype)
1099+
result = s.map(type)[0]
1100+
assert result is rdtype
1101+
1102+
@pytest.mark.parametrize(
1103+
'method',
1104+
[
1105+
lambda x: x.tolist(),
1106+
lambda x: list(x),
1107+
lambda x: list(x.__iter__()),
1108+
], ids=['tolist', 'list', 'iter'])
1109+
def test_categorial_datetimelike(self, method):
1110+
i = CategoricalIndex([Timestamp('1999-12-31'),
1111+
Timestamp('2000-12-31')])
1112+
1113+
result = method(i)[0]
1114+
assert isinstance(result, Timestamp)

0 commit comments

Comments
 (0)