Skip to content

Commit 4a6e6d2

Browse files
Chang Shewesm
Chang She
authored andcommitted
ENH: DataFrame.from_records takes iterator #1794
1 parent b8dae94 commit 4a6e6d2

File tree

2 files changed

+41
-1
lines changed

2 files changed

+41
-1
lines changed

pandas/core/frame.py

+28-1
Original file line numberDiff line numberDiff line change
@@ -872,7 +872,7 @@ def to_dict(self, outtype='dict'):
872872

873873
@classmethod
874874
def from_records(cls, data, index=None, exclude=None, columns=None,
875-
coerce_float=False):
875+
coerce_float=False, nrows=None):
876876
"""
877877
Convert structured or record ndarray to DataFrame
878878
@@ -906,6 +906,33 @@ def from_records(cls, data, index=None, exclude=None, columns=None,
906906
raise ValueError('Non-unique columns not yet supported in '
907907
'from_records')
908908

909+
if com.is_iterator(data):
910+
if nrows == 0:
911+
return DataFrame()
912+
913+
try:
914+
first_row = data.next()
915+
except StopIteration:
916+
return DataFrame(index=index, columns=columns)
917+
918+
dtype = None
919+
if hasattr(first_row, 'dtype') and first_row.dtype.names:
920+
dtype = first_row.dtype
921+
922+
values = [first_row]
923+
924+
i = 1
925+
for row in data:
926+
values.append(row)
927+
i += 1
928+
if i >= nrows:
929+
break
930+
931+
if dtype is not None:
932+
data = np.array(values, dtype=dtype)
933+
else:
934+
data = values
935+
909936
if isinstance(data, (np.ndarray, DataFrame, dict)):
910937
keys, sdict = _rec_to_dict(data)
911938
if columns is None:

pandas/tests/test_frame.py

+13
Original file line numberDiff line numberDiff line change
@@ -2639,6 +2639,19 @@ def test_from_records_nones(self):
26392639
df = DataFrame.from_records(tuples, columns=['a', 'b', 'c', 'd'])
26402640
self.assert_(np.isnan(df['c'][0]))
26412641

2642+
def test_from_records_iterator(self):
2643+
arr = np.array([(1.0, 2), (3.0, 4), (5., 6), (7., 8)],
2644+
dtype=[('x', float), ('y', int)])
2645+
df = DataFrame.from_records(iter(arr), nrows=2)
2646+
xp = DataFrame({'x' : np.array([1.0, 3.0], dtype=float),
2647+
'y' : np.array([2, 4], dtype=int)})
2648+
assert_frame_equal(df, xp)
2649+
2650+
arr = [(1.0, 2), (3.0, 4), (5., 6), (7., 8)]
2651+
df = DataFrame.from_records(iter(arr), columns=['x', 'y'],
2652+
nrows=2)
2653+
assert_frame_equal(df, xp)
2654+
26422655
def test_from_records_columns_not_modified(self):
26432656
tuples = [(1, 2, 3),
26442657
(1, 2, 3),

0 commit comments

Comments
 (0)