Skip to content

Commit af2864f

Browse files
committed
ENH: pass list of tuples/lists to DataFrame.from_records, GH #357
1 parent 30e539c commit af2864f

File tree

4 files changed

+50
-4
lines changed

4 files changed

+50
-4
lines changed

RELEASE.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ pandas 0.5.1
5050
- Add `orient` option to `Panel.from_dict` to ease creation of mixed-type
5151
Panels (GH #359)
5252
- Add `DataFrame.from_dict` with similar `orient` option
53+
- Can pass list of tuples or list of lists to `DataFrame.from_records` for
54+
fast conversion to DataFrame (GH #357)
5355
5456
**Improvements to existing features**
5557

pandas/core/frame.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -446,22 +446,43 @@ def to_dict(self):
446446
return dict((k, v.to_dict()) for k, v in self.iteritems())
447447

448448
@classmethod
449-
def from_records(cls, data, index=None, exclude=None):
449+
def from_records(cls, data, index=None, exclude=None, names=None):
450450
"""
451451
Convert structured or record ndarray to DataFrame
452452
453453
Parameters
454454
----------
455-
data : NumPy structured array
455+
data : ndarray (structured dtype) or list of tuples
456456
index : string, list of fields, array-like
457457
Field of array to use as the index, alternately a specific set of
458458
input labels to use
459+
exclude: sequence, default None
460+
Columns or fields to exclude
461+
names : sequence, default None
462+
Column names to use, replacing any found in passed data
459463
460464
Returns
461465
-------
462466
df : DataFrame
463467
"""
464-
columns, sdict = _rec_to_dict(data)
468+
if isinstance(data, (np.ndarray, DataFrame, dict)):
469+
columns, sdict = _rec_to_dict(data)
470+
else:
471+
if isinstance(data[0], tuple):
472+
content = list(lib.to_object_array_tuples(data).T)
473+
else:
474+
# list of lists
475+
content = list(lib.to_object_array(data).T)
476+
477+
if names is None:
478+
columns = range(len(content))
479+
else:
480+
assert(len(names) == len(content))
481+
columns = names
482+
483+
sdict = dict((c, lib.convert_sql_column(vals))
484+
for c, vals in zip(columns, content))
485+
del content
465486

466487
if exclude is None:
467488
exclude = set()

pandas/src/parsing.pyx

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ def to_object_array_tuples(list rows):
5252
return result
5353

5454
def maybe_convert_numeric(ndarray[object] values, set na_values):
55+
'''
56+
Type inference function-- convert strings to numeric (potentially) and
57+
convert to proper dtype array
58+
'''
5559
cdef:
5660
Py_ssize_t i, n
5761
ndarray[float64_t] floats
@@ -94,7 +98,10 @@ def maybe_convert_numeric(ndarray[object] values, set na_values):
9498
else:
9599
return ints
96100

97-
def convert_sql_column(ndarray[object] objects):
101+
def maybe_convert_objects(ndarray[object] objects):
102+
'''
103+
Type inference function-- convert object array to proper dtype
104+
'''
98105
cdef:
99106
Py_ssize_t i, n
100107
ndarray[float64_t] floats
@@ -157,6 +164,8 @@ def convert_sql_column(ndarray[object] objects):
157164
else:
158165
return objects
159166

167+
convert_sql_column = maybe_convert_objects
168+
160169
def try_parse_dates(ndarray[object] values, parser=None):
161170
cdef:
162171
Py_ssize_t i, n

pandas/tests/test_frame.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1196,6 +1196,20 @@ def test_from_records_to_records(self):
11961196
self.assertEqual(len(records.dtype.names), 2)
11971197
self.assert_('index' not in records.dtype.names)
11981198

1199+
def test_from_records_tuples(self):
1200+
df = DataFrame({'A' : np.random.randn(6),
1201+
'B' : np.arange(6),
1202+
'C' : ['foo'] * 6,
1203+
'D' : np.array([True, False] * 3, dtype=bool)})
1204+
1205+
tuples = [tuple(x) for x in df.values]
1206+
lists = [list(x) for x in tuples]
1207+
1208+
result = DataFrame.from_records(tuples, names=df.columns)
1209+
result2 = DataFrame.from_records(lists, names=df.columns)
1210+
assert_frame_equal(result, df)
1211+
assert_frame_equal(result2, df)
1212+
11991213
def test_get_agg_axis(self):
12001214
cols = self.frame._get_agg_axis(0)
12011215
self.assert_(cols is self.frame.columns)

0 commit comments

Comments
 (0)