From 062cd20f67eb1f717f0b1286b96e4e491df17bce Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 12 May 2019 17:25:40 +0200 Subject: [PATCH 1/3] BUG: Exception when frame constructed from dict of iterators --- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/core/common.py | 9 +++++++++ pandas/core/internals/construction.py | 2 ++ pandas/core/series.py | 11 +++++------ pandas/tests/frame/test_constructors.py | 23 +++++++++++++++++++++++ 5 files changed, 40 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index a829b72c53bc4..88e0f722d943a 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -459,6 +459,7 @@ Reshaping - Bug in :func:`pivot_table` where columns with ``NaN`` values are dropped even if ``dropna`` argument is ``False``, when the ``aggfunc`` argument contains a ``list`` (:issue:`22159`) - Bug in :func:`concat` where the resulting ``freq`` of two :class:`DatetimeIndex` with the same ``freq`` would be dropped (:issue:`3232`). - Bug in :func:`merge` where merging with equivalent Categorical dtypes was raising an error (:issue:`22501`) +- bug in :class:`DataFrame` instantiating with a dict of iterators or generators (e.g. ``pd.DataFrame({'A': reversed(range(3))})``) raised an error (:issue:`26349`). - bug in :class:`DataFrame` instantiating with a ``range`` (e.g. ``pd.DataFrame(range(3))``) raised an error (:issue:`26342`). - Bug in :class:`DataFrame` constructor when passing non-empty tuples would cause a segmentation fault (:issue:`25691`) - Bug in :func:`Series.apply` failed when the series is a timezone aware :class:`DatetimeIndex` (:issue:`25959`) diff --git a/pandas/core/common.py b/pandas/core/common.py index e62a2119df820..7ac32a864427b 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -289,6 +289,15 @@ def maybe_make_list(obj): return obj +def maybe_itarable_to_list(obj: Any) -> Any: + """ + If obj is Iterable but not list-like, consume into list. + """ + if isinstance(obj, abc.Iterable) and not isinstance(obj, abc.Sized): + return list(obj) + return obj + + def is_null_slice(obj): """ We have a null slice. diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 2cd53d2ce9cee..b7db80736ecc3 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -195,6 +195,8 @@ def init_dict(data, index, columns, dtype=None): arrays.loc[missing] = [val] * missing.sum() else: + data = OrderedDict((col_name, com.maybe_itarable_to_list(col)) + for col_name, col in data.items()) keys = com.dict_keys_to_ordered_list(data) columns = data_names = Index(keys) # GH#24096 need copy to be deep for datetime64tz case diff --git a/pandas/core/series.py b/pandas/core/series.py index 08eb6d99b1909..839c982fa2716 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -220,15 +220,14 @@ def __init__(self, data=None, index=None, dtype=None, name=None, elif isinstance(data, (set, frozenset)): raise TypeError("{0!r} type is unordered" "".format(data.__class__.__name__)) - # If data is Iterable but not list-like, consume into list. elif (isinstance(data, abc.Iterable) and not isinstance(data, abc.Sized)): - data = list(data) - else: - + data = com.maybe_itarable_to_list(data) + elif isinstance(data, ABCSparseArray): # handle sparse passed here (and force conversion) - if isinstance(data, ABCSparseArray): - data = data.to_dense() + data = data.to_dense() + else: + pass if index is None: if not is_list_like(data): diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index b5b389b6323b2..ebae3f36ae74e 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -533,6 +533,29 @@ def test_constructor_dict_of_tuples(self): expected = DataFrame({k: list(v) for k, v in data.items()}) tm.assert_frame_equal(result, expected, check_dtype=False) + def test_constructor_dict_of_ranges(self): + data = {'a': range(3), 'b': range(3, 6)} + + result = DataFrame(data) + expected = DataFrame({'a': [0, 1, 2], 'b': [3, 4, 5]}) + tm.assert_frame_equal(result, expected) + + def test_constructor_dict_of_iterators(self): + # GH 26349 + data = {'a': iter(range(3)), 'b': reversed(range(3))} + + result = DataFrame(data) + expected = DataFrame({'a': [0, 1, 2], 'b': [2, 1, 0]}) + tm.assert_frame_equal(result, expected) + + def test_constructor_dict_of_generators(self): + # GH 26349 + data = {'a': (i for i in (range(3))), + 'b': (i for i in reversed(range(3)))} + result = DataFrame(data) + expected = DataFrame({'a': [0, 1, 2], 'b': [2, 1, 0]}) + tm.assert_frame_equal(result, expected) + def test_constructor_dict_multiindex(self): def check(result, expected): return tm.assert_frame_equal(result, expected, check_dtype=True, From 38ac629ecbecd64c8bc35fa669feb7c6a8adc5e4 Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 13 May 2019 12:19:21 +0200 Subject: [PATCH 2/3] changes --- pandas/core/internals/construction.py | 7 +++---- pandas/core/series.py | 7 ++----- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index b7db80736ecc3..bbd290513bd01 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -195,14 +195,13 @@ def init_dict(data, index, columns, dtype=None): arrays.loc[missing] = [val] * missing.sum() else: - data = OrderedDict((col_name, com.maybe_itarable_to_list(col)) - for col_name, col in data.items()) keys = com.dict_keys_to_ordered_list(data) columns = data_names = Index(keys) + arrays = (com.maybe_itarable_to_list(data[k]) for k in keys) # GH#24096 need copy to be deep for datetime64tz case # TODO: See if we can avoid these copies - arrays = [data[k] if not is_datetime64tz_dtype(data[k]) else - data[k].copy(deep=True) for k in keys] + arrays = [arr if not is_datetime64tz_dtype(arr) else + arr.copy(deep=True) for arr in arrays] return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype) diff --git a/pandas/core/series.py b/pandas/core/series.py index 839c982fa2716..f1913e8290f94 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1,7 +1,7 @@ """ Data structure for 1-dimensional cross-sectional and time series data """ -from collections import OrderedDict, abc +from collections import OrderedDict from io import StringIO from shutil import get_terminal_size from textwrap import dedent @@ -220,14 +220,11 @@ def __init__(self, data=None, index=None, dtype=None, name=None, elif isinstance(data, (set, frozenset)): raise TypeError("{0!r} type is unordered" "".format(data.__class__.__name__)) - elif (isinstance(data, abc.Iterable) and - not isinstance(data, abc.Sized)): - data = com.maybe_itarable_to_list(data) elif isinstance(data, ABCSparseArray): # handle sparse passed here (and force conversion) data = data.to_dense() else: - pass + data = com.maybe_itarable_to_list(data) if index is None: if not is_list_like(data): From 29834d7bd71761a379684ce2715993e9a962b67f Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 18 May 2019 21:32:30 +0200 Subject: [PATCH 3/3] changes --- pandas/core/common.py | 4 ++-- pandas/core/internals/construction.py | 2 +- pandas/core/series.py | 2 +- pandas/tests/frame/test_constructors.py | 1 + 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 7ac32a864427b..771ded04f461d 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -9,7 +9,7 @@ from datetime import datetime, timedelta from functools import partial import inspect -from typing import Any +from typing import Any, Iterable, Union import numpy as np @@ -289,7 +289,7 @@ def maybe_make_list(obj): return obj -def maybe_itarable_to_list(obj: Any) -> Any: +def maybe_iterable_to_list(obj: Union[Iterable, Any]) -> Union[list, Any]: """ If obj is Iterable but not list-like, consume into list. """ diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index bbd290513bd01..863b9f7fb16d7 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -197,7 +197,7 @@ def init_dict(data, index, columns, dtype=None): else: keys = com.dict_keys_to_ordered_list(data) columns = data_names = Index(keys) - arrays = (com.maybe_itarable_to_list(data[k]) for k in keys) + arrays = (com.maybe_iterable_to_list(data[k]) for k in keys) # GH#24096 need copy to be deep for datetime64tz case # TODO: See if we can avoid these copies arrays = [arr if not is_datetime64tz_dtype(arr) else diff --git a/pandas/core/series.py b/pandas/core/series.py index f1913e8290f94..5b59fd6e7b38d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -224,7 +224,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None, # handle sparse passed here (and force conversion) data = data.to_dense() else: - data = com.maybe_itarable_to_list(data) + data = com.maybe_iterable_to_list(data) if index is None: if not is_list_like(data): diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index ebae3f36ae74e..68017786eb6a6 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -534,6 +534,7 @@ def test_constructor_dict_of_tuples(self): tm.assert_frame_equal(result, expected, check_dtype=False) def test_constructor_dict_of_ranges(self): + # GH 26356 data = {'a': range(3), 'b': range(3, 6)} result = DataFrame(data)