diff --git a/doc/source/release.rst b/doc/source/release.rst index c80ddd01cdf07..140c3bc836fdb 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -113,6 +113,8 @@ Improvements to existing features ``io.excel.xls.writer``. (:issue:`4745`, :issue:`4750`) - ``Panel.to_excel()`` now accepts keyword arguments that will be passed to its ``DataFrame``'s ``to_excel()`` methods. (:issue:`4750`) + - allow DataFrame constructor to accept more list-like objects, e.g. list of + ``collections.Sequence`` and ``array.Array`` objects (:issue:`3783`,:issue:`42971`) API Changes ~~~~~~~~~~~ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bd601c5c8408e..fb08c5eaa4822 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -16,6 +16,7 @@ import sys import collections import warnings +import types from numpy import nan as NA import numpy as np @@ -24,7 +25,7 @@ from pandas.core.common import (isnull, notnull, PandasError, _try_sort, _default_index, _maybe_upcast, _is_sequence, _infer_dtype_from_scalar, _values_from_object, - _coerce_to_dtypes, _DATELIKE_DTYPES) + _coerce_to_dtypes, _DATELIKE_DTYPES, is_list_like) from pandas.core.generic import NDFrame from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.indexing import (_NDFrameIndexer, _maybe_droplevels, @@ -413,12 +414,14 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, else: mgr = self._init_ndarray(data, index, columns, dtype=dtype, copy=copy) - elif isinstance(data, list): + elif isinstance(data, (list, types.GeneratorType)): + if isinstance(data, types.GeneratorType): + data = list(data) if len(data) > 0: if index is None and isinstance(data[0], Series): index = _get_names_from_index(data) - if isinstance(data[0], (list, tuple, collections.Mapping, Series)): + if is_list_like(data[0]) and getattr(data[0],'ndim',0) <= 1: arrays, columns = _to_arrays(data, columns, dtype=dtype) columns = _ensure_index(columns) @@ -4545,7 +4548,7 @@ def isin(self, values, iloc=False): else: - if not com.is_list_like(values): + if not is_list_like(values): raise TypeError("only list-like or dict-like objects are" " allowed to be passed to DataFrame.isin(), " "you passed a " @@ -4705,7 +4708,7 @@ def extract_index(data): elif isinstance(v, dict): have_dicts = True indexes.append(list(v.keys())) - elif isinstance(v, (list, tuple, np.ndarray)): + elif is_list_like(v) and getattr(v,'ndim',0) <= 1: have_raw_arrays = True raw_lengths.append(len(v)) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index c5af0b0d4d5c8..507c2055e1b68 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -2606,6 +2606,57 @@ def test_constructor_list_of_lists(self): self.assert_(com.is_integer_dtype(df['num'])) self.assert_(df['str'].dtype == np.object_) + def test_constructor_sequence_like(self): + # GH 3783 + # collections.Squence like + import collections + + class DummyContainer(collections.Sequence): + def __init__(self, lst): + self._lst = lst + def __getitem__(self, n): + return self._lst.__getitem__(n) + def __len__(self, n): + return self._lst.__len__() + + l = [DummyContainer([1, 'a']), DummyContainer([2, 'b'])] + columns = ["num", "str"] + result = DataFrame(l, columns=columns) + expected = DataFrame([[1,'a'],[2,'b']],columns=columns) + assert_frame_equal(result, expected, check_dtype=False) + + # GH 4297 + # support Array + import array + result = DataFrame.from_items([('A', array.array('i', range(10)))]) + expected = DataFrame({ 'A' : list(range(10)) }) + assert_frame_equal(result, expected, check_dtype=False) + + expected = DataFrame([ list(range(10)), list(range(10)) ]) + result = DataFrame([ array.array('i', range(10)), array.array('i',range(10)) ]) + assert_frame_equal(result, expected, check_dtype=False) + + def test_constructor_iterator(self): + + expected = DataFrame([ list(range(10)), list(range(10)) ]) + result = DataFrame([ range(10), range(10) ]) + assert_frame_equal(result, expected) + + def test_constructor_generator(self): + #related #2305 + + gen1 = (i for i in range(10)) + gen2 = (i for i in range(10)) + + expected = DataFrame([ list(range(10)), list(range(10)) ]) + result = DataFrame([ gen1, gen2 ]) + assert_frame_equal(result, expected) + + gen = ([ i, 'a'] for i in range(10)) + result = DataFrame(gen) + expected = DataFrame({ 0 : range(10), 1 : 'a' }) + assert_frame_equal(result, expected) + def test_constructor_list_of_dicts(self): data = [OrderedDict([['a', 1.5], ['b', 3], ['c', 4], ['d', 6]]), OrderedDict([['a', 1.5], ['b', 3], ['d', 6]]), diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 7a993cbcf07f4..d2d0bc39fbfc9 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -353,6 +353,12 @@ def test_constructor_series(self): assert_series_equal(s2, s1.sort_index()) + def test_constructor_iterator(self): + + expected = Series(list(range(10))) + result = Series(range(10)) + assert_series_equal(result, expected) + def test_constructor_generator(self): gen = (i for i in range(10))