From 758a905a082d621c54050af86cf014b332d10b66 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 5 Apr 2015 19:17:04 -0500 Subject: [PATCH] API: Sort keys for DataFrame.assign Previously the order was arbitrary. For predicitability, we'll sort before inserting. --- doc/source/dsintro.rst | 6 ++++-- doc/source/whatsnew/v0.16.1.txt | 4 ++++ pandas/core/frame.py | 11 ++++++----- pandas/tests/test_frame.py | 19 ++++++++++++++----- 4 files changed, 28 insertions(+), 12 deletions(-) diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index e1c14029f1cf9..adcf2fca9b4c5 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -461,7 +461,7 @@ Inspired by `dplyr's `__ ``mutate`` verb, DataFrame has an :meth:`~pandas.DataFrame.assign` method that allows you to easily create new columns that are potentially -derived from existing columns. +derived from existing columns. .. ipython:: python @@ -511,7 +511,9 @@ DataFrame is returned, with the new values inserted. .. warning:: Since the function signature of ``assign`` is ``**kwargs``, a dictionary, - the order of the new columns in the resulting DataFrame cannot be guaranteed. + the order of the new columns in the resulting DataFrame cannot be guaranteed + to match the order you pass in. To make things predictable, items are inserted + alphabetically (by key) at the end of the DataFrame. All expressions are computed first, and then assigned. So you can't refer to another column being assigned in the same call to ``assign``. For example: diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt index f691b0842f071..653c296023c4e 100644 --- a/doc/source/whatsnew/v0.16.1.txt +++ b/doc/source/whatsnew/v0.16.1.txt @@ -45,6 +45,10 @@ API changes - Add support for separating years and quarters using dashes, for example 2014-Q1. (:issue:`9688`) +- :meth:`~pandas.DataFrame.assign` now inserts new columns in alphabetical order. Previously + the order was arbitrary. (:issue:`9777`) + + .. _whatsnew_0161.performance: Performance Improvements diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f700d4316842c..8b683ad89558a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2244,10 +2244,11 @@ def assign(self, **kwargs): Notes ----- Since ``kwargs`` is a dictionary, the order of your - arguments may not be preserved, and so the order of the - new columns is not well defined. Assigning multiple - columns within the same ``assign`` is possible, but you cannot - reference other columns created within the same ``assign`` call. + arguments may not be preserved. The make things predicatable, + the columns are inserted in alphabetical order, at the end of + your DataFrame. Assigning multiple columns within the same + ``assign`` is possible, but you cannot reference other columns + created within the same ``assign`` call. Examples -------- @@ -2296,7 +2297,7 @@ def assign(self, **kwargs): results[k] = v # ... and then assign - for k, v in results.items(): + for k, v in sorted(results.items()): data[k] = v return data diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 3e4c16f63035f..e4abe15dee493 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -14073,12 +14073,21 @@ def test_assign(self): assert_frame_equal(result, expected) def test_assign_multiple(self): - df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) + df = DataFrame([[1, 4], [2, 5], [3, 6]], columns=['A', 'B']) result = df.assign(C=[7, 8, 9], D=df.A, E=lambda x: x.B) - expected = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9], - 'D': [1, 2, 3], 'E': [4, 5, 6]}) - # column order isn't preserved - assert_frame_equal(result.reindex_like(expected), expected) + expected = DataFrame([[1, 4, 7, 1, 4], [2, 5, 8, 2, 5], + [3, 6, 9, 3, 6]], columns=list('ABCDE')) + assert_frame_equal(result, expected) + + def test_assign_alphabetical(self): + # GH 9818 + df = DataFrame([[1, 2], [3, 4]], columns=['A', 'B']) + result = df.assign(D=df.A + df.B, C=df.A - df.B) + expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]], + columns=list('ABCD')) + assert_frame_equal(result, expected) + result = df.assign(C=df.A - df.B, D=df.A + df.B) + assert_frame_equal(result, expected) def test_assign_bad(self): df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})