Skip to content

BUG: Exception when frame constructed from dict of iterators #26356

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,7 @@ Reshaping
- Bug in :func:`pivot_table` where columns with ``NaN`` values are dropped even if ``dropna`` argument is ``False``, when the ``aggfunc`` argument contains a ``list`` (:issue:`22159`)
- Bug in :func:`concat` where the resulting ``freq`` of two :class:`DatetimeIndex` with the same ``freq`` would be dropped (:issue:`3232`).
- Bug in :func:`merge` where merging with equivalent Categorical dtypes was raising an error (:issue:`22501`)
- bug in :class:`DataFrame` instantiating with a dict of iterators or generators (e.g. ``pd.DataFrame({'A': reversed(range(3))})``) raised an error (:issue:`26349`).
- bug in :class:`DataFrame` instantiating with a ``range`` (e.g. ``pd.DataFrame(range(3))``) raised an error (:issue:`26342`).
- Bug in :class:`DataFrame` constructor when passing non-empty tuples would cause a segmentation fault (:issue:`25691`)
- Bug in :func:`Series.apply` failed when the series is a timezone aware :class:`DatetimeIndex` (:issue:`25959`)
Expand Down
11 changes: 10 additions & 1 deletion pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from datetime import datetime, timedelta
from functools import partial
import inspect
from typing import Any
from typing import Any, Iterable, Union

import numpy as np

Expand Down Expand Up @@ -289,6 +289,15 @@ def maybe_make_list(obj):
return obj


def maybe_iterable_to_list(obj: Union[Iterable, Any]) -> Union[list, Any]:
"""
If obj is Iterable but not list-like, consume into list.
"""
if isinstance(obj, abc.Iterable) and not isinstance(obj, abc.Sized):
return list(obj)
return obj


def is_null_slice(obj):
"""
We have a null slice.
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,10 +197,11 @@ def init_dict(data, index, columns, dtype=None):
else:
keys = com.dict_keys_to_ordered_list(data)
columns = data_names = Index(keys)
arrays = (com.maybe_iterable_to_list(data[k]) for k in keys)
# GH#24096 need copy to be deep for datetime64tz case
# TODO: See if we can avoid these copies
arrays = [data[k] if not is_datetime64tz_dtype(data[k]) else
data[k].copy(deep=True) for k in keys]
arrays = [arr if not is_datetime64tz_dtype(arr) else
arr.copy(deep=True) for arr in arrays]
return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)


Expand Down
14 changes: 5 additions & 9 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Data structure for 1-dimensional cross-sectional and time series data
"""
from collections import OrderedDict, abc
from collections import OrderedDict
from io import StringIO
from shutil import get_terminal_size
from textwrap import dedent
Expand Down Expand Up @@ -220,15 +220,11 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
elif isinstance(data, (set, frozenset)):
raise TypeError("{0!r} type is unordered"
"".format(data.__class__.__name__))
# If data is Iterable but not list-like, consume into list.
elif (isinstance(data, abc.Iterable) and
not isinstance(data, abc.Sized)):
data = list(data)
else:

elif isinstance(data, ABCSparseArray):
# handle sparse passed here (and force conversion)
if isinstance(data, ABCSparseArray):
data = data.to_dense()
data = data.to_dense()
else:
data = com.maybe_iterable_to_list(data)

if index is None:
if not is_list_like(data):
Expand Down
24 changes: 24 additions & 0 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,30 @@ def test_constructor_dict_of_tuples(self):
expected = DataFrame({k: list(v) for k, v in data.items()})
tm.assert_frame_equal(result, expected, check_dtype=False)

def test_constructor_dict_of_ranges(self):
# GH 26356
data = {'a': range(3), 'b': range(3, 6)}

result = DataFrame(data)
expected = DataFrame({'a': [0, 1, 2], 'b': [3, 4, 5]})
tm.assert_frame_equal(result, expected)

def test_constructor_dict_of_iterators(self):
# GH 26349
data = {'a': iter(range(3)), 'b': reversed(range(3))}

result = DataFrame(data)
expected = DataFrame({'a': [0, 1, 2], 'b': [2, 1, 0]})
tm.assert_frame_equal(result, expected)

def test_constructor_dict_of_generators(self):
# GH 26349
data = {'a': (i for i in (range(3))),
'b': (i for i in reversed(range(3)))}
result = DataFrame(data)
expected = DataFrame({'a': [0, 1, 2], 'b': [2, 1, 0]})
tm.assert_frame_equal(result, expected)

def test_constructor_dict_multiindex(self):
def check(result, expected):
return tm.assert_frame_equal(result, expected, check_dtype=True,
Expand Down