ENH: pass list of tuples/lists to DataFrame.from_records, GH #357

wesm · wesm · commit af2864f20bc5 · 2011-11-11T15:43:12.000-05:00
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -50,6 +50,8 @@ pandas 0.5.1
   - Add `orient` option to `Panel.from_dict` to ease creation of mixed-type
     Panels (GH #359)
   - Add `DataFrame.from_dict` with similar `orient` option
+  - Can pass list of tuples or list of lists to `DataFrame.from_records` for
+    fast conversion to DataFrame (GH #357)
 
 **Improvements to existing features**
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -446,22 +446,43 @@ def to_dict(self):
         return dict((k, v.to_dict()) for k, v in self.iteritems())
 
     @classmethod
-    def from_records(cls, data, index=None, exclude=None):
+    def from_records(cls, data, index=None, exclude=None, names=None):
         """
         Convert structured or record ndarray to DataFrame
 
         Parameters
         ----------
-        data : NumPy structured array
+        data : ndarray (structured dtype) or list of tuples
         index : string, list of fields, array-like
             Field of array to use as the index, alternately a specific set of
             input labels to use
+        exclude: sequence, default None
+            Columns or fields to exclude
+        names : sequence, default None
+            Column names to use, replacing any found in passed data
 
         Returns
         -------
         df : DataFrame
         """
-        columns, sdict = _rec_to_dict(data)
+        if isinstance(data, (np.ndarray, DataFrame, dict)):
+            columns, sdict = _rec_to_dict(data)
+        else:
+            if isinstance(data[0], tuple):
+                content = list(lib.to_object_array_tuples(data).T)
+            else:
+                # list of lists
+                content = list(lib.to_object_array(data).T)
+
+            if names is None:
+                columns = range(len(content))
+            else:
+                assert(len(names) == len(content))
+                columns = names
+
+            sdict = dict((c, lib.convert_sql_column(vals))
+                         for c, vals in zip(columns, content))
+            del content
 
         if exclude is None:
             exclude = set()
diff --git a/pandas/src/parsing.pyx b/pandas/src/parsing.pyx
@@ -52,6 +52,10 @@ def to_object_array_tuples(list rows):
     return result
 
 def maybe_convert_numeric(ndarray[object] values, set na_values):
+    '''
+    Type inference function-- convert strings to numeric (potentially) and
+    convert to proper dtype array
+    '''
     cdef:
         Py_ssize_t i, n
         ndarray[float64_t] floats
@@ -94,7 +98,10 @@ def maybe_convert_numeric(ndarray[object] values, set na_values):
     else:
         return ints
 
-def convert_sql_column(ndarray[object] objects):
+def maybe_convert_objects(ndarray[object] objects):
+    '''
+    Type inference function-- convert object array to proper dtype
+    '''
     cdef:
         Py_ssize_t i, n
         ndarray[float64_t] floats
@@ -157,6 +164,8 @@ def convert_sql_column(ndarray[object] objects):
         else:
             return objects
 
+convert_sql_column = maybe_convert_objects
+
 def try_parse_dates(ndarray[object] values, parser=None):
     cdef:
         Py_ssize_t i, n
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -1196,6 +1196,20 @@ def test_from_records_to_records(self):
         self.assertEqual(len(records.dtype.names), 2)
         self.assert_('index' not in records.dtype.names)
 
+    def test_from_records_tuples(self):
+        df = DataFrame({'A' : np.random.randn(6),
+                        'B' : np.arange(6),
+                        'C' : ['foo'] * 6,
+                        'D' : np.array([True, False] * 3, dtype=bool)})
+
+        tuples = [tuple(x) for x in df.values]
+        lists = [list(x) for x in tuples]
+
+        result = DataFrame.from_records(tuples, names=df.columns)
+        result2 = DataFrame.from_records(lists, names=df.columns)
+        assert_frame_equal(result, df)
+        assert_frame_equal(result2, df)
+
     def test_get_agg_axis(self):
         cols = self.frame._get_agg_axis(0)
         self.assert_(cols is self.frame.columns)