ENH: DataFrame.__init__ will accept structured arrays. general performance tweak in constructor too

wesm · wesm · commit 955b727f5029 · 2011-07-20T14:15:03.000-04:00
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -147,9 +147,18 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
                 mgr = mgr.cast(dtype)
         elif isinstance(data, dict):
             mgr = self._init_dict(data, index, columns, dtype=dtype)
-        elif isinstance(data, (np.ndarray, list)):
-            mgr = self._init_matrix(data, index, columns, dtype=dtype,
-                                    copy=copy)
+        elif isinstance(data, np.ndarray):
+            if data.dtype.names:
+                data_columns, data = _rec_to_dict(data)
+                if columns is None:
+                    columns = data_columns
+                mgr = self._init_dict(data, index, columns, dtype=dtype)
+            else:
+                mgr = self._init_ndarray(data, index, columns, dtype=dtype,
+                                         copy=copy)
+        elif isinstance(data, list):
+            mgr = self._init_ndarray(data, index, columns, dtype=dtype,
+                                     copy=copy)
         else:
             raise PandasError('DataFrame constructor not properly called!')
 
@@ -183,8 +192,8 @@ def _init_dict(self, data, index, columns, dtype=None):
         mgr = BlockManager(blocks, [columns, index])
         return mgr.consolidate()
 
-    def _init_matrix(self, values, index, columns, dtype=None,
-                     copy=False):
+    def _init_ndarray(self, values, index, columns, dtype=None,
+                      copy=False):
         values = _prep_ndarray(values, copy=copy)
 
         if dtype is not None:
@@ -347,16 +356,13 @@ def from_records(cls, data, indexField=None):
         -------
         DataFrame
         """
-        # Dtype when you have records
-        if not issubclass(data.dtype.type, np.void):
+        if not data.dtype.names:
             raise Exception('Input was not a structured array!')
 
-        columns = data.dtype.names
-        sdict = dict((k, data[k]) for k in columns)
-
+        columns, sdict = _rec_to_dict(data)
         if indexField is not None:
             index = sdict.pop(indexField)
-            columns = [c for c in columns if c != indexField]
+            columns.remove(indexField)
         else:
             index = np.arange(len(data))
 
@@ -2484,6 +2490,12 @@ def _prep_ndarray(values, copy=True):
 
     return values
 
+
+def _rec_to_dict(arr):
+    columns = list(arr.dtype.names)
+    sdict = dict((k, arr[k]) for k in columns)
+    return columns, sdict
+
 def _homogenize_series(data, index, dtype=None):
     homogenized = {}
 
@@ -2507,9 +2519,9 @@ def _homogenize_series(data, index, dtype=None):
 
             # only *attempt* to cast to dtype
             try:
-                v = Series(v, dtype=dtype, index=index)
+                v = np.asarray(v, dtype=dtype)
             except Exception:
-                v = Series(v, index=index)
+                v = np.asarray(v)
 
         homogenized[k] = v
 
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -731,7 +731,7 @@ def _simple_blockify(dct, ref_items, dtype):
 
 def _stack_dict(dct, ref_items):
     items = [x for x in ref_items if x in dct]
-    stacked = np.vstack([dct[k].values for k in items])
+    stacked = np.vstack([np.asarray(dct[k]) for k in items])
     return items, stacked
 
 def _blocks_to_series_dict(blocks, index=None):
diff --git a/pandas/core/sparse.py b/pandas/core/sparse.py
@@ -783,7 +783,7 @@ def density(self):
     #----------------------------------------------------------------------
     # Support different internal rep'n of SparseDataFrame
 
-    def _insert_item(self, key, value):
+    def _set_item(self, key, value):
         sp_maker = lambda x: SparseSeries(x, index=self.index,
                                           fill_value=self.default_fill_value,
                                           kind=self.default_kind)
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -626,6 +626,20 @@ def test_constructor_mixed(self):
 
         self.assertEqual(self.mixed_frame['foo'].dtype, np.object_)
 
+    def test_constructor_rec(self):
+        rec = self.frame.to_records(index=False)
+
+        rec.dtype.names = list(rec.dtype.names)[::-1]
+
+        index = self.frame.index
+
+        df = DataFrame(rec)
+        self.assert_(np.array_equal(df.columns, rec.dtype.names))
+
+        df2 = DataFrame(rec, index=index)
+        self.assert_(np.array_equal(df2.columns, rec.dtype.names))
+        self.assert_(df2.index.equals(index))
+
     def test_constructor_bool(self):
         df = DataFrame({0 : np.ones(10, dtype=bool),
                         1 : np.zeros(10, dtype=bool)})