Skip to content

Commit 81a4853

Browse files
committed
ENH: add ability to pass list of dicts to DataFrame.append (GH #464)
1 parent bd275d7 commit 81a4853

File tree

6 files changed

+78
-15
lines changed

6 files changed

+78
-15
lines changed

RELEASE.rst

+2
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ pandas 0.7.0
6868
values given a sequence of row and column labels (GH #338)
6969
- Add ``verbose`` option to ``read_csv`` and ``read_table`` to show number of
7070
NA values inserted in non-numeric columns (GH #614)
71+
- Can pass a list of dicts or Series to ``DataFrame.append`` to concatenate
72+
multiple rows (GH #464)
7173

7274
**API Changes**
7375

TODO.rst

+2
Original file line numberDiff line numberDiff line change
@@ -54,3 +54,5 @@ Performance blog
5454
- Groupby
5555
- joining
5656
- Take
57+
58+
git log v0.6.1..master --pretty=format:%aN | sort | uniq -c | sort -rn

doc/source/merging.rst

+18-4
Original file line numberDiff line numberDiff line change
@@ -262,12 +262,12 @@ like GroupBy where the order of a categorical variable is meaningful.
262262

263263
.. _merging.append.row:
264264

265-
Appending single rows to a DataFrame
266-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
265+
Appending rows to a DataFrame
266+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
267267

268268
While not especially efficient (since a new object must be created), you can
269-
append a row to a DataFrame by passing a Series to ``append``, which returns a
270-
new DataFrame as above:
269+
append a single row to a DataFrame by passing a Series or dict to ``append``,
270+
which returns a new DataFrame as above.
271271

272272
.. ipython:: python
273273
@@ -276,6 +276,20 @@ new DataFrame as above:
276276
s = df.xs(3)
277277
df.append(s, ignore_index=True)
278278
279+
You should use ``ignore_index`` with this method to instruct DataFrame to
280+
discard its index. If you wish to preserve the index, you should construct an
281+
appropriately-indexed DataFrame and append or concatenate those objects.
282+
283+
You can also pass a list of dicts or Series:
284+
285+
.. ipython:: python
286+
287+
df = DataFrame(np.random.randn(5, 4),
288+
columns=['foo', 'bar', 'baz', 'qux'])
289+
dicts = [{'foo': 1, 'bar': 2, 'baz': 3, 'peekaboo': 4},
290+
{'foo': 5, 'bar': 6, 'baz': 7, 'peekaboo': 8}]
291+
result = df.append(dicts, ignore_index=True)
292+
result
279293
280294
.. _merging.join:
281295

pandas/core/frame.py

+23-9
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@
4242
import pandas.core.datetools as datetools
4343
import pandas._tseries as lib
4444

45-
4645
#----------------------------------------------------------------------
4746
# Docstring templates
4847

@@ -71,6 +70,7 @@
7170
result : DataFrame
7271
"""
7372

73+
7474
_stat_doc = """
7575
Return %(name)s over requested axis.
7676
%(na_action)s
@@ -2825,26 +2825,41 @@ def applymap(self, func):
28252825

28262826
def append(self, other, ignore_index=False, verify_integrity=True):
28272827
"""
2828-
Append columns of other to end of this frame's columns and index.
2829-
Columns not in this frame are added as new columns.
2828+
Append columns of other to end of this frame's columns and index,
2829+
returning a new object. Columns not in this frame are added as new
2830+
columns.
28302831
28312832
Parameters
28322833
----------
2833-
other : DataFrame
2834+
other : DataFrame or list of Series/dict-like objects
28342835
ignore_index : boolean, default False
28352836
If True do not use the index labels. Useful for gluing together
28362837
record arrays
28372838
2839+
Notes
2840+
-----
2841+
If a list of dict is passed and the keys are all contained in the
2842+
DataFrame's index, the order of the columns in the resulting DataFrame
2843+
will be unchanged
2844+
28382845
Returns
28392846
-------
28402847
appended : DataFrame
28412848
"""
2842-
if isinstance(other, Series):
2849+
if isinstance(other, (Series, dict)):
2850+
if isinstance(other, dict):
2851+
other = Series(other)
2852+
if other.name is None and not ignore_index:
2853+
raise Exception('Can only append a Series if ignore_index=True')
2854+
2855+
index = None if other.name is None else [other.name]
28432856
other = other.reindex(self.columns, copy=False)
28442857
other = DataFrame(other.values.reshape((1, len(other))),
2845-
columns=self.columns)
2846-
if not ignore_index:
2847-
raise Exception('Can only append a Series if ignore_index=True')
2858+
index=index, columns=self.columns)
2859+
elif isinstance(other, list):
2860+
other = DataFrame(other)
2861+
if (self.columns.get_indexer(other.columns) >= 0).all():
2862+
other = other.ix[:, self.columns]
28482863

28492864
if not other:
28502865
return self.copy()
@@ -3932,7 +3947,6 @@ def _lexsort_indexer(keys):
39323947
indexer, _ = lib.groupsort_indexer(comp_ids.astype('i4'), max_group)
39333948
return indexer
39343949

3935-
39363950
if __name__ == '__main__':
39373951
import nose
39383952
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

pandas/tests/test_frame.py

+27-1
Original file line numberDiff line numberDiff line change
@@ -2316,7 +2316,7 @@ def test_convert_objects_no_conversion(self):
23162316
mixed2 = mixed1.convert_objects()
23172317
assert_frame_equal(mixed1, mixed2)
23182318

2319-
def test_append_series(self):
2319+
def test_append_series_dict(self):
23202320
df = DataFrame(np.random.randn(5, 4),
23212321
columns=['foo', 'bar', 'baz', 'qux'])
23222322

@@ -2329,12 +2329,38 @@ def test_append_series(self):
23292329
ignore_index=True)
23302330
assert_frame_equal(result, expected)
23312331

2332+
# dict
2333+
result = df.append(series.to_dict(), ignore_index=True)
2334+
assert_frame_equal(result, expected)
2335+
23322336
result = df.append(series[::-1][:3], ignore_index=True)
23332337
expected = df.append(DataFrame({0 : series[::-1][:3]}).T,
23342338
ignore_index=True)
23352339
assert_frame_equal(result, expected.ix[:, result.columns])
23362340

23372341
# can append when name set
2342+
row = df.ix[4]
2343+
row.name = 5
2344+
result = df.append(row)
2345+
expected = df.append(df[-1:], ignore_index=True)
2346+
assert_frame_equal(result, expected)
2347+
2348+
def test_append_list_of_series_dicts(self):
2349+
df = DataFrame(np.random.randn(5, 4),
2350+
columns=['foo', 'bar', 'baz', 'qux'])
2351+
2352+
dicts = [x.to_dict() for idx, x in df.iterrows()]
2353+
2354+
result = df.append(dicts, ignore_index=True)
2355+
expected = df.append(df, ignore_index=True)
2356+
assert_frame_equal(result, expected)
2357+
2358+
# different columns
2359+
dicts = [{'foo': 1, 'bar': 2, 'baz': 3, 'peekaboo': 4},
2360+
{'foo': 5, 'bar': 6, 'baz': 7, 'peekaboo': 8}]
2361+
result = df.append(dicts, ignore_index=True)
2362+
expected = df.append(DataFrame(dicts), ignore_index=True)
2363+
assert_frame_equal(result, expected)
23382364

23392365
def test_asfreq(self):
23402366
offset_monthly = self.tsframe.asfreq(datetools.bmonthEnd)

vb_suite/frame_ctor.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,12 @@
1212
index = [rands(10) for _ in xrange(N)]
1313
columns = [rands(10) for _ in xrange(K)]
1414
frame = DataFrame(np.random.randn(N, K), index=index, columns=columns)
15-
data = frame.to_dict()
15+
16+
try:
17+
data = frame.to_dict()
18+
except:
19+
data = frame.toDict()
20+
1621
some_dict = data.values()[0]
1722
dict_list = [dict(zip(columns, row)) for row in frame.values]
1823
"""

0 commit comments

Comments
 (0)