diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 5003aa0d97c1c..49f831b95a71f 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -117,6 +117,7 @@ Other Enhancements - :func:`MultiIndex.is_monotonic_decreasing` has been implemented. Previously returned ``False`` in all cases. (:issue:`16554`) - :func:`Categorical.rename_categories` now accepts a dict-like argument as `new_categories` and only updates the categories found in that dict. (:issue:`17336`) - :func:`read_excel` raises ``ImportError`` with a better message if ``xlrd`` is not installed. (:issue:`17613`) +- :meth:`DataFrame.assign` will preserve the original order of ``**kwargs`` for Python 3.6+ users instead of sorting the column names .. _whatsnew_0210.api_breaking: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index dd5d490ea66a8..3b85c864d877c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -82,6 +82,7 @@ from pandas.compat import (range, map, zip, lrange, lmap, lzip, StringIO, u, OrderedDict, raise_with_traceback) from pandas import compat +from pandas.compat import PY36 from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution from pandas.util._validators import validate_bool_kwarg @@ -2575,12 +2576,12 @@ def assign(self, **kwargs): Notes ----- - Since ``kwargs`` is a dictionary, the order of your - arguments may not be preserved. To make things predicatable, - the columns are inserted in alphabetical order, at the end of - your DataFrame. Assigning multiple columns within the same - ``assign`` is possible, but you cannot reference other columns - created within the same ``assign`` call. + For python 3.6 and above, the columns are inserted in the order of + **kwargs. For python 3.5 and earlier, since **kwargs is unordered, + the columns are inserted in alphabetical order at the end of your + DataFrame. Assigning multiple columns within the same ``assign`` + is possible, but you cannot reference other columns created within + the same ``assign`` call. Examples -------- @@ -2620,14 +2621,18 @@ def assign(self, **kwargs): data = self.copy() # do all calculations first... - results = {} + results = OrderedDict() for k, v in kwargs.items(): results[k] = com._apply_if_callable(v, data) + # preserve order for 3.6 and later, but sort by key for 3.5 and earlier + if PY36: + results = results.items() + else: + results = sorted(results.items()) # ... and then assign - for k, v in sorted(results.items()): + for k, v in results: data[k] = v - return data def _sanitize_column(self, key, value, broadcast=True): diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index 4462260a290d9..0043475702f94 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -4,6 +4,7 @@ import pytest from pandas.compat import range, lrange import numpy as np +from pandas.compat import PY36 from pandas import DataFrame, Series, Index, MultiIndex @@ -61,14 +62,23 @@ def test_assign_multiple(self): [3, 6, 9, 3, 6]], columns=list('ABCDE')) assert_frame_equal(result, expected) - def test_assign_alphabetical(self): + def test_assign_order(self): # GH 9818 df = DataFrame([[1, 2], [3, 4]], columns=['A', 'B']) result = df.assign(D=df.A + df.B, C=df.A - df.B) - expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]], - columns=list('ABCD')) + + if PY36: + expected = DataFrame([[1, 2, 3, -1], [3, 4, 7, -1]], + columns=list('ABDC')) + else: + expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]], + columns=list('ABCD')) assert_frame_equal(result, expected) result = df.assign(C=df.A - df.B, D=df.A + df.B) + + expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]], + columns=list('ABCD')) + assert_frame_equal(result, expected) def test_assign_bad(self):