Skip to content

Commit 8b69209

Browse files
committed
Merge pull request #4829 from jreback/seq
ENH/API: allow DataFrame constructor to better accept list-like collections (GH3783,GH4297)
2 parents d20d9bf + 3a9c177 commit 8b69209

File tree

4 files changed

+67
-5
lines changed

4 files changed

+67
-5
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,8 @@ Improvements to existing features
113113
``io.excel.xls.writer``. (:issue:`4745`, :issue:`4750`)
114114
- ``Panel.to_excel()`` now accepts keyword arguments that will be passed to
115115
its ``DataFrame``'s ``to_excel()`` methods. (:issue:`4750`)
116+
- allow DataFrame constructor to accept more list-like objects, e.g. list of
117+
``collections.Sequence`` and ``array.Array`` objects (:issue:`3783`,:issue:`42971`)
116118

117119
API Changes
118120
~~~~~~~~~~~

pandas/core/frame.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import sys
1717
import collections
1818
import warnings
19+
import types
1920

2021
from numpy import nan as NA
2122
import numpy as np
@@ -24,7 +25,7 @@
2425
from pandas.core.common import (isnull, notnull, PandasError, _try_sort,
2526
_default_index, _maybe_upcast, _is_sequence,
2627
_infer_dtype_from_scalar, _values_from_object,
27-
_coerce_to_dtypes, _DATELIKE_DTYPES)
28+
_coerce_to_dtypes, _DATELIKE_DTYPES, is_list_like)
2829
from pandas.core.generic import NDFrame
2930
from pandas.core.index import Index, MultiIndex, _ensure_index
3031
from pandas.core.indexing import (_NDFrameIndexer, _maybe_droplevels,
@@ -413,12 +414,14 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
413414
else:
414415
mgr = self._init_ndarray(data, index, columns, dtype=dtype,
415416
copy=copy)
416-
elif isinstance(data, list):
417+
elif isinstance(data, (list, types.GeneratorType)):
418+
if isinstance(data, types.GeneratorType):
419+
data = list(data)
417420
if len(data) > 0:
418421
if index is None and isinstance(data[0], Series):
419422
index = _get_names_from_index(data)
420423

421-
if isinstance(data[0], (list, tuple, collections.Mapping, Series)):
424+
if is_list_like(data[0]) and getattr(data[0],'ndim',0) <= 1:
422425
arrays, columns = _to_arrays(data, columns, dtype=dtype)
423426
columns = _ensure_index(columns)
424427

@@ -4545,7 +4548,7 @@ def isin(self, values, iloc=False):
45454548

45464549

45474550
else:
4548-
if not com.is_list_like(values):
4551+
if not is_list_like(values):
45494552
raise TypeError("only list-like or dict-like objects are"
45504553
" allowed to be passed to DataFrame.isin(), "
45514554
"you passed a "
@@ -4705,7 +4708,7 @@ def extract_index(data):
47054708
elif isinstance(v, dict):
47064709
have_dicts = True
47074710
indexes.append(list(v.keys()))
4708-
elif isinstance(v, (list, tuple, np.ndarray)):
4711+
elif is_list_like(v) and getattr(v,'ndim',0) <= 1:
47094712
have_raw_arrays = True
47104713
raw_lengths.append(len(v))
47114714

pandas/tests/test_frame.py

+51
Original file line numberDiff line numberDiff line change
@@ -2606,6 +2606,57 @@ def test_constructor_list_of_lists(self):
26062606
self.assert_(com.is_integer_dtype(df['num']))
26072607
self.assert_(df['str'].dtype == np.object_)
26082608

2609+
def test_constructor_sequence_like(self):
2610+
# GH 3783
2611+
# collections.Squence like
2612+
import collections
2613+
2614+
class DummyContainer(collections.Sequence):
2615+
def __init__(self, lst):
2616+
self._lst = lst
2617+
def __getitem__(self, n):
2618+
return self._lst.__getitem__(n)
2619+
def __len__(self, n):
2620+
return self._lst.__len__()
2621+
2622+
l = [DummyContainer([1, 'a']), DummyContainer([2, 'b'])]
2623+
columns = ["num", "str"]
2624+
result = DataFrame(l, columns=columns)
2625+
expected = DataFrame([[1,'a'],[2,'b']],columns=columns)
2626+
assert_frame_equal(result, expected, check_dtype=False)
2627+
2628+
# GH 4297
2629+
# support Array
2630+
import array
2631+
result = DataFrame.from_items([('A', array.array('i', range(10)))])
2632+
expected = DataFrame({ 'A' : list(range(10)) })
2633+
assert_frame_equal(result, expected, check_dtype=False)
2634+
2635+
expected = DataFrame([ list(range(10)), list(range(10)) ])
2636+
result = DataFrame([ array.array('i', range(10)), array.array('i',range(10)) ])
2637+
assert_frame_equal(result, expected, check_dtype=False)
2638+
2639+
def test_constructor_iterator(self):
2640+
2641+
expected = DataFrame([ list(range(10)), list(range(10)) ])
2642+
result = DataFrame([ range(10), range(10) ])
2643+
assert_frame_equal(result, expected)
2644+
2645+
def test_constructor_generator(self):
2646+
#related #2305
2647+
2648+
gen1 = (i for i in range(10))
2649+
gen2 = (i for i in range(10))
2650+
2651+
expected = DataFrame([ list(range(10)), list(range(10)) ])
2652+
result = DataFrame([ gen1, gen2 ])
2653+
assert_frame_equal(result, expected)
2654+
2655+
gen = ([ i, 'a'] for i in range(10))
2656+
result = DataFrame(gen)
2657+
expected = DataFrame({ 0 : range(10), 1 : 'a' })
2658+
assert_frame_equal(result, expected)
2659+
26092660
def test_constructor_list_of_dicts(self):
26102661
data = [OrderedDict([['a', 1.5], ['b', 3], ['c', 4], ['d', 6]]),
26112662
OrderedDict([['a', 1.5], ['b', 3], ['d', 6]]),

pandas/tests/test_series.py

+6
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,12 @@ def test_constructor_series(self):
353353

354354
assert_series_equal(s2, s1.sort_index())
355355

356+
def test_constructor_iterator(self):
357+
358+
expected = Series(list(range(10)))
359+
result = Series(range(10))
360+
assert_series_equal(result, expected)
361+
356362
def test_constructor_generator(self):
357363
gen = (i for i in range(10))
358364

0 commit comments

Comments
 (0)