ENH: add ability to pass list of dicts to DataFrame.append (GH #464)

wesm · wesm · commit 81a4853cb459 · 2012-01-14T15:31:36.000-05:00
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -68,6 +68,8 @@ pandas 0.7.0
     values given a sequence of row and column labels (GH #338)
   - Add ``verbose`` option to ``read_csv`` and ``read_table`` to show number of
     NA values inserted in non-numeric columns (GH #614)
+  - Can pass a list of dicts or Series to ``DataFrame.append`` to concatenate
+    multiple rows (GH #464)
 
 **API Changes**
 
diff --git a/TODO.rst b/TODO.rst
@@ -54,3 +54,5 @@ Performance blog
 - Groupby
 - joining
 - Take
+
+git log v0.6.1..master --pretty=format:%aN | sort | uniq -c | sort -rn
diff --git a/doc/source/merging.rst b/doc/source/merging.rst
@@ -262,12 +262,12 @@ like GroupBy where the order of a categorical variable is meaningful.
 
 .. _merging.append.row:
 
-Appending single rows to a DataFrame
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Appending rows to a DataFrame
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 While not especially efficient (since a new object must be created), you can
-append a row to a DataFrame by passing a Series to ``append``, which returns a
-new DataFrame as above:
+append a single row to a DataFrame by passing a Series or dict to ``append``,
+which returns a new DataFrame as above.
 
 .. ipython:: python
 
@@ -276,6 +276,20 @@ new DataFrame as above:
    s = df.xs(3)
    df.append(s, ignore_index=True)
 
+You should use ``ignore_index`` with this method to instruct DataFrame to
+discard its index. If you wish to preserve the index, you should construct an
+appropriately-indexed DataFrame and append or concatenate those objects.
+
+You can also pass a list of dicts or Series:
+
+.. ipython:: python
+
+   df = DataFrame(np.random.randn(5, 4),
+                  columns=['foo', 'bar', 'baz', 'qux'])
+   dicts = [{'foo': 1, 'bar': 2, 'baz': 3, 'peekaboo': 4},
+            {'foo': 5, 'bar': 6, 'baz': 7, 'peekaboo': 8}]
+   result = df.append(dicts, ignore_index=True)
+   result
 
 .. _merging.join:
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -42,7 +42,6 @@
 import pandas.core.datetools as datetools
 import pandas._tseries as lib
 
-
 #----------------------------------------------------------------------
 # Docstring templates
 
@@ -71,6 +70,7 @@
 result : DataFrame
 """
 
+
 _stat_doc = """
 Return %(name)s over requested axis.
 %(na_action)s
@@ -2825,26 +2825,41 @@ def applymap(self, func):
 
     def append(self, other, ignore_index=False, verify_integrity=True):
         """
-        Append columns of other to end of this frame's columns and index.
-        Columns not in this frame are added as new columns.
+        Append columns of other to end of this frame's columns and index,
+        returning a new object.  Columns not in this frame are added as new
+        columns.
 
         Parameters
         ----------
-        other : DataFrame
+        other : DataFrame or list of Series/dict-like objects
         ignore_index : boolean, default False
             If True do not use the index labels. Useful for gluing together
             record arrays
 
+        Notes
+        -----
+        If a list of dict is passed and the keys are all contained in the
+        DataFrame's index, the order of the columns in the resulting DataFrame
+        will be unchanged
+
         Returns
         -------
         appended : DataFrame
         """
-        if isinstance(other, Series):
+        if isinstance(other, (Series, dict)):
+            if isinstance(other, dict):
+                other = Series(other)
+            if other.name is None and not ignore_index:
+                raise Exception('Can only append a Series if ignore_index=True')
+
+            index = None if other.name is None else [other.name]
             other = other.reindex(self.columns, copy=False)
             other = DataFrame(other.values.reshape((1, len(other))),
-                              columns=self.columns)
-            if not ignore_index:
-                raise Exception('Can only append a Series if ignore_index=True')
+                              index=index, columns=self.columns)
+        elif isinstance(other, list):
+            other = DataFrame(other)
+            if (self.columns.get_indexer(other.columns) >= 0).all():
+                other = other.ix[:, self.columns]
 
         if not other:
             return self.copy()
@@ -3932,7 +3947,6 @@ def _lexsort_indexer(keys):
     indexer, _ = lib.groupsort_indexer(comp_ids.astype('i4'), max_group)
     return indexer
 
-
 if __name__ == '__main__':
     import nose
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -2316,7 +2316,7 @@ def test_convert_objects_no_conversion(self):
         mixed2 = mixed1.convert_objects()
         assert_frame_equal(mixed1, mixed2)
 
-    def test_append_series(self):
+    def test_append_series_dict(self):
         df = DataFrame(np.random.randn(5, 4),
                        columns=['foo', 'bar', 'baz', 'qux'])
 
@@ -2329,12 +2329,38 @@ def test_append_series(self):
                              ignore_index=True)
         assert_frame_equal(result, expected)
 
+        # dict
+        result = df.append(series.to_dict(), ignore_index=True)
+        assert_frame_equal(result, expected)
+
         result = df.append(series[::-1][:3], ignore_index=True)
         expected = df.append(DataFrame({0 : series[::-1][:3]}).T,
                              ignore_index=True)
         assert_frame_equal(result, expected.ix[:, result.columns])
 
         # can append when name set
+        row = df.ix[4]
+        row.name = 5
+        result = df.append(row)
+        expected = df.append(df[-1:], ignore_index=True)
+        assert_frame_equal(result, expected)
+
+    def test_append_list_of_series_dicts(self):
+        df = DataFrame(np.random.randn(5, 4),
+                       columns=['foo', 'bar', 'baz', 'qux'])
+
+        dicts = [x.to_dict() for idx, x in df.iterrows()]
+
+        result = df.append(dicts, ignore_index=True)
+        expected = df.append(df, ignore_index=True)
+        assert_frame_equal(result, expected)
+
+        # different columns
+        dicts = [{'foo': 1, 'bar': 2, 'baz': 3, 'peekaboo': 4},
+                 {'foo': 5, 'bar': 6, 'baz': 7, 'peekaboo': 8}]
+        result = df.append(dicts, ignore_index=True)
+        expected = df.append(DataFrame(dicts), ignore_index=True)
+        assert_frame_equal(result, expected)
 
     def test_asfreq(self):
         offset_monthly = self.tsframe.asfreq(datetools.bmonthEnd)
diff --git a/vb_suite/frame_ctor.py b/vb_suite/frame_ctor.py
@@ -12,7 +12,12 @@
 index = [rands(10) for _ in xrange(N)]
 columns = [rands(10) for _ in xrange(K)]
 frame = DataFrame(np.random.randn(N, K), index=index, columns=columns)
-data = frame.to_dict()
+
+try:
+    data = frame.to_dict()
+except:
+    data = frame.toDict()
+
 some_dict = data.values()[0]
 dict_list = [dict(zip(columns, row)) for row in frame.values]
 """

-Original file line number
+Diff line change
 - Groupby
 - joining
 - Take
++
 +git log v0.6.1..master --pretty=format:%aN | sort | uniq -c | sort -rn