Skip to content

Commit 44d5498

Browse files
topper-123jreback
authored andcommitted
BUG: Exception when frame constructed from dict of iterators (#26356)
1 parent 2b32e41 commit 44d5498

File tree

5 files changed

+43
-12
lines changed

5 files changed

+43
-12
lines changed

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,7 @@ Reshaping
460460
- Bug in :func:`pivot_table` where columns with ``NaN`` values are dropped even if ``dropna`` argument is ``False``, when the ``aggfunc`` argument contains a ``list`` (:issue:`22159`)
461461
- Bug in :func:`concat` where the resulting ``freq`` of two :class:`DatetimeIndex` with the same ``freq`` would be dropped (:issue:`3232`).
462462
- Bug in :func:`merge` where merging with equivalent Categorical dtypes was raising an error (:issue:`22501`)
463+
- bug in :class:`DataFrame` instantiating with a dict of iterators or generators (e.g. ``pd.DataFrame({'A': reversed(range(3))})``) raised an error (:issue:`26349`).
463464
- bug in :class:`DataFrame` instantiating with a ``range`` (e.g. ``pd.DataFrame(range(3))``) raised an error (:issue:`26342`).
464465
- Bug in :class:`DataFrame` constructor when passing non-empty tuples would cause a segmentation fault (:issue:`25691`)
465466
- Bug in :func:`Series.apply` failed when the series is a timezone aware :class:`DatetimeIndex` (:issue:`25959`)

pandas/core/common.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from datetime import datetime, timedelta
1010
from functools import partial
1111
import inspect
12-
from typing import Any
12+
from typing import Any, Iterable, Union
1313

1414
import numpy as np
1515

@@ -289,6 +289,15 @@ def maybe_make_list(obj):
289289
return obj
290290

291291

292+
def maybe_iterable_to_list(obj: Union[Iterable, Any]) -> Union[list, Any]:
293+
"""
294+
If obj is Iterable but not list-like, consume into list.
295+
"""
296+
if isinstance(obj, abc.Iterable) and not isinstance(obj, abc.Sized):
297+
return list(obj)
298+
return obj
299+
300+
292301
def is_null_slice(obj):
293302
"""
294303
We have a null slice.

pandas/core/internals/construction.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -197,10 +197,11 @@ def init_dict(data, index, columns, dtype=None):
197197
else:
198198
keys = com.dict_keys_to_ordered_list(data)
199199
columns = data_names = Index(keys)
200+
arrays = (com.maybe_iterable_to_list(data[k]) for k in keys)
200201
# GH#24096 need copy to be deep for datetime64tz case
201202
# TODO: See if we can avoid these copies
202-
arrays = [data[k] if not is_datetime64tz_dtype(data[k]) else
203-
data[k].copy(deep=True) for k in keys]
203+
arrays = [arr if not is_datetime64tz_dtype(arr) else
204+
arr.copy(deep=True) for arr in arrays]
204205
return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
205206

206207

pandas/core/series.py

+5-9
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
22
Data structure for 1-dimensional cross-sectional and time series data
33
"""
4-
from collections import OrderedDict, abc
4+
from collections import OrderedDict
55
from io import StringIO
66
from shutil import get_terminal_size
77
from textwrap import dedent
@@ -220,15 +220,11 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
220220
elif isinstance(data, (set, frozenset)):
221221
raise TypeError("{0!r} type is unordered"
222222
"".format(data.__class__.__name__))
223-
# If data is Iterable but not list-like, consume into list.
224-
elif (isinstance(data, abc.Iterable) and
225-
not isinstance(data, abc.Sized)):
226-
data = list(data)
227-
else:
228-
223+
elif isinstance(data, ABCSparseArray):
229224
# handle sparse passed here (and force conversion)
230-
if isinstance(data, ABCSparseArray):
231-
data = data.to_dense()
225+
data = data.to_dense()
226+
else:
227+
data = com.maybe_iterable_to_list(data)
232228

233229
if index is None:
234230
if not is_list_like(data):

pandas/tests/frame/test_constructors.py

+24
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,30 @@ def test_constructor_dict_of_tuples(self):
533533
expected = DataFrame({k: list(v) for k, v in data.items()})
534534
tm.assert_frame_equal(result, expected, check_dtype=False)
535535

536+
def test_constructor_dict_of_ranges(self):
537+
# GH 26356
538+
data = {'a': range(3), 'b': range(3, 6)}
539+
540+
result = DataFrame(data)
541+
expected = DataFrame({'a': [0, 1, 2], 'b': [3, 4, 5]})
542+
tm.assert_frame_equal(result, expected)
543+
544+
def test_constructor_dict_of_iterators(self):
545+
# GH 26349
546+
data = {'a': iter(range(3)), 'b': reversed(range(3))}
547+
548+
result = DataFrame(data)
549+
expected = DataFrame({'a': [0, 1, 2], 'b': [2, 1, 0]})
550+
tm.assert_frame_equal(result, expected)
551+
552+
def test_constructor_dict_of_generators(self):
553+
# GH 26349
554+
data = {'a': (i for i in (range(3))),
555+
'b': (i for i in reversed(range(3)))}
556+
result = DataFrame(data)
557+
expected = DataFrame({'a': [0, 1, 2], 'b': [2, 1, 0]})
558+
tm.assert_frame_equal(result, expected)
559+
536560
def test_constructor_dict_multiindex(self):
537561
def check(result, expected):
538562
return tm.assert_frame_equal(result, expected, check_dtype=True,

0 commit comments

Comments
 (0)