Skip to content

ENH/API: allow DataFrame constructor to better accept list-like collections (GH3783,GH4297) #4829

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 13, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ Improvements to existing features
``io.excel.xls.writer``. (:issue:`4745`, :issue:`4750`)
- ``Panel.to_excel()`` now accepts keyword arguments that will be passed to
its ``DataFrame``'s ``to_excel()`` methods. (:issue:`4750`)
- allow DataFrame constructor to accept more list-like objects, e.g. list of
``collections.Sequence`` and ``array.Array`` objects (:issue:`3783`,:issue:`42971`)

API Changes
~~~~~~~~~~~
Expand Down
13 changes: 8 additions & 5 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import sys
import collections
import warnings
import types

from numpy import nan as NA
import numpy as np
Expand All @@ -24,7 +25,7 @@
from pandas.core.common import (isnull, notnull, PandasError, _try_sort,
_default_index, _maybe_upcast, _is_sequence,
_infer_dtype_from_scalar, _values_from_object,
_coerce_to_dtypes, _DATELIKE_DTYPES)
_coerce_to_dtypes, _DATELIKE_DTYPES, is_list_like)
from pandas.core.generic import NDFrame
from pandas.core.index import Index, MultiIndex, _ensure_index
from pandas.core.indexing import (_NDFrameIndexer, _maybe_droplevels,
Expand Down Expand Up @@ -413,12 +414,14 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
else:
mgr = self._init_ndarray(data, index, columns, dtype=dtype,
copy=copy)
elif isinstance(data, list):
elif isinstance(data, (list, types.GeneratorType)):
if isinstance(data, types.GeneratorType):
data = list(data)
if len(data) > 0:
if index is None and isinstance(data[0], Series):
index = _get_names_from_index(data)

if isinstance(data[0], (list, tuple, collections.Mapping, Series)):
if is_list_like(data[0]) and getattr(data[0],'ndim',0) <= 1:
arrays, columns = _to_arrays(data, columns, dtype=dtype)
columns = _ensure_index(columns)

Expand Down Expand Up @@ -4545,7 +4548,7 @@ def isin(self, values, iloc=False):


else:
if not com.is_list_like(values):
if not is_list_like(values):
raise TypeError("only list-like or dict-like objects are"
" allowed to be passed to DataFrame.isin(), "
"you passed a "
Expand Down Expand Up @@ -4705,7 +4708,7 @@ def extract_index(data):
elif isinstance(v, dict):
have_dicts = True
indexes.append(list(v.keys()))
elif isinstance(v, (list, tuple, np.ndarray)):
elif is_list_like(v) and getattr(v,'ndim',0) <= 1:
have_raw_arrays = True
raw_lengths.append(len(v))

Expand Down
51 changes: 51 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2606,6 +2606,57 @@ def test_constructor_list_of_lists(self):
self.assert_(com.is_integer_dtype(df['num']))
self.assert_(df['str'].dtype == np.object_)

def test_constructor_sequence_like(self):
# GH 3783
# collections.Squence like
import collections

class DummyContainer(collections.Sequence):
def __init__(self, lst):
self._lst = lst
def __getitem__(self, n):
return self._lst.__getitem__(n)
def __len__(self, n):
return self._lst.__len__()

l = [DummyContainer([1, 'a']), DummyContainer([2, 'b'])]
columns = ["num", "str"]
result = DataFrame(l, columns=columns)
expected = DataFrame([[1,'a'],[2,'b']],columns=columns)
assert_frame_equal(result, expected, check_dtype=False)

# GH 4297
# support Array
import array
result = DataFrame.from_items([('A', array.array('i', range(10)))])
expected = DataFrame({ 'A' : list(range(10)) })
assert_frame_equal(result, expected, check_dtype=False)

expected = DataFrame([ list(range(10)), list(range(10)) ])
result = DataFrame([ array.array('i', range(10)), array.array('i',range(10)) ])
assert_frame_equal(result, expected, check_dtype=False)

def test_constructor_iterator(self):

expected = DataFrame([ list(range(10)), list(range(10)) ])
result = DataFrame([ range(10), range(10) ])
assert_frame_equal(result, expected)

def test_constructor_generator(self):
#related #2305

gen1 = (i for i in range(10))
gen2 = (i for i in range(10))

expected = DataFrame([ list(range(10)), list(range(10)) ])
result = DataFrame([ gen1, gen2 ])
assert_frame_equal(result, expected)

gen = ([ i, 'a'] for i in range(10))
result = DataFrame(gen)
expected = DataFrame({ 0 : range(10), 1 : 'a' })
assert_frame_equal(result, expected)

def test_constructor_list_of_dicts(self):
data = [OrderedDict([['a', 1.5], ['b', 3], ['c', 4], ['d', 6]]),
OrderedDict([['a', 1.5], ['b', 3], ['d', 6]]),
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,12 @@ def test_constructor_series(self):

assert_series_equal(s2, s1.sort_index())

def test_constructor_iterator(self):

expected = Series(list(range(10)))
result = Series(range(10))
assert_series_equal(result, expected)

def test_constructor_generator(self):
gen = (i for i in range(10))

Expand Down