BUG: Exception when frame constructed from dict of iterators (#26356)

topper-123 · jreback · commit 44d5498d8697 · 2019-05-19T20:24:35.000-04:00
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -460,6 +460,7 @@ Reshaping
 - Bug in :func:`pivot_table` where columns with ``NaN`` values are dropped even if ``dropna`` argument is ``False``, when the ``aggfunc`` argument contains a ``list`` (:issue:`22159`)
 - Bug in :func:`concat` where the resulting ``freq`` of two :class:`DatetimeIndex` with the same ``freq`` would be dropped (:issue:`3232`).
 - Bug in :func:`merge` where merging with equivalent Categorical dtypes was raising an error (:issue:`22501`)
+- bug in :class:`DataFrame` instantiating with a dict of iterators or generators (e.g. ``pd.DataFrame({'A': reversed(range(3))})``) raised an error (:issue:`26349`).
 - bug in :class:`DataFrame` instantiating with a ``range`` (e.g. ``pd.DataFrame(range(3))``) raised an error (:issue:`26342`).
 - Bug in :class:`DataFrame` constructor when passing non-empty tuples would cause a segmentation fault (:issue:`25691`)
 - Bug in :func:`Series.apply` failed when the series is a timezone aware :class:`DatetimeIndex` (:issue:`25959`)
diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -9,7 +9,7 @@
 from datetime import datetime, timedelta
 from functools import partial
 import inspect
-from typing import Any
+from typing import Any, Iterable, Union
 
 import numpy as np
 
@@ -289,6 +289,15 @@ def maybe_make_list(obj):
     return obj
 
 
+def maybe_iterable_to_list(obj: Union[Iterable, Any]) -> Union[list, Any]:
+    """
+    If obj is Iterable but not list-like, consume into list.
+    """
+    if isinstance(obj, abc.Iterable) and not isinstance(obj, abc.Sized):
+        return list(obj)
+    return obj
+
+
 def is_null_slice(obj):
     """
     We have a null slice.
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
@@ -197,10 +197,11 @@ def init_dict(data, index, columns, dtype=None):
     else:
         keys = com.dict_keys_to_ordered_list(data)
         columns = data_names = Index(keys)
+        arrays = (com.maybe_iterable_to_list(data[k]) for k in keys)
         # GH#24096 need copy to be deep for datetime64tz case
         # TODO: See if we can avoid these copies
-        arrays = [data[k] if not is_datetime64tz_dtype(data[k]) else
-                  data[k].copy(deep=True) for k in keys]
+        arrays = [arr if not is_datetime64tz_dtype(arr) else
+                  arr.copy(deep=True) for arr in arrays]
     return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
 
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -1,7 +1,7 @@
 """
 Data structure for 1-dimensional cross-sectional and time series data
 """
-from collections import OrderedDict, abc
+from collections import OrderedDict
 from io import StringIO
 from shutil import get_terminal_size
 from textwrap import dedent
@@ -220,15 +220,11 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
             elif isinstance(data, (set, frozenset)):
                 raise TypeError("{0!r} type is unordered"
                                 "".format(data.__class__.__name__))
-            # If data is Iterable but not list-like, consume into list.
-            elif (isinstance(data, abc.Iterable) and
-                  not isinstance(data, abc.Sized)):
-                data = list(data)
-            else:
-
+            elif isinstance(data, ABCSparseArray):
                 # handle sparse passed here (and force conversion)
-                if isinstance(data, ABCSparseArray):
-                    data = data.to_dense()
+                data = data.to_dense()
+            else:
+                data = com.maybe_iterable_to_list(data)
 
             if index is None:
                 if not is_list_like(data):
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -533,6 +533,30 @@ def test_constructor_dict_of_tuples(self):
         expected = DataFrame({k: list(v) for k, v in data.items()})
         tm.assert_frame_equal(result, expected, check_dtype=False)
 
+    def test_constructor_dict_of_ranges(self):
+        # GH 26356
+        data = {'a': range(3), 'b': range(3, 6)}
+
+        result = DataFrame(data)
+        expected = DataFrame({'a': [0, 1, 2], 'b': [3, 4, 5]})
+        tm.assert_frame_equal(result, expected)
+
+    def test_constructor_dict_of_iterators(self):
+        # GH 26349
+        data = {'a': iter(range(3)), 'b': reversed(range(3))}
+
+        result = DataFrame(data)
+        expected = DataFrame({'a': [0, 1, 2], 'b': [2, 1, 0]})
+        tm.assert_frame_equal(result, expected)
+
+    def test_constructor_dict_of_generators(self):
+        # GH 26349
+        data = {'a': (i for i in (range(3))),
+                'b': (i for i in reversed(range(3)))}
+        result = DataFrame(data)
+        expected = DataFrame({'a': [0, 1, 2], 'b': [2, 1, 0]})
+        tm.assert_frame_equal(result, expected)
+
     def test_constructor_dict_multiindex(self):
         def check(result, expected):
             return tm.assert_frame_equal(result, expected, check_dtype=True,