From ebaff833622dd2fa49ed39bd9a0492e5acbfa80a Mon Sep 17 00:00:00 2001 From: Bob Haffner Date: Fri, 22 Sep 2017 15:29:50 -0500 Subject: [PATCH 1/2] preserve kwargs order on assign func for py36plus --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/frame.py | 18 +++++++++++------- pandas/tests/frame/test_mutate_columns.py | 16 +++++++++++++--- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 5003aa0d97c1c..d06cd5280ef35 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -117,6 +117,7 @@ Other Enhancements - :func:`MultiIndex.is_monotonic_decreasing` has been implemented. Previously returned ``False`` in all cases. (:issue:`16554`) - :func:`Categorical.rename_categories` now accepts a dict-like argument as `new_categories` and only updates the categories found in that dict. (:issue:`17336`) - :func:`read_excel` raises ``ImportError`` with a better message if ``xlrd`` is not installed. (:issue:`17613`) +- :func:`assign` will preserve the original order of **kwargs for Python 3.6+ users .. _whatsnew_0210.api_breaking: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index dd5d490ea66a8..57502ebe10db9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2576,10 +2576,10 @@ def assign(self, **kwargs): Notes ----- Since ``kwargs`` is a dictionary, the order of your - arguments may not be preserved. To make things predicatable, - the columns are inserted in alphabetical order, at the end of - your DataFrame. Assigning multiple columns within the same - ``assign`` is possible, but you cannot reference other columns + arguments may not be preserved if you are using Python 3.5 and earlier. + To make things predicatable, the columns are inserted in alphabetical + order, at the end of your DataFrame. Assigning multiple columns within + the same ``assign`` is possible, but you cannot reference other columns created within the same ``assign`` call. Examples @@ -2620,14 +2620,18 @@ def assign(self, **kwargs): data = self.copy() # do all calculations first... - results = {} + results = OrderedDict() for k, v in kwargs.items(): results[k] = com._apply_if_callable(v, data) + # sort by key for 3.5 and earlier, but preserve order for 3.6 and later + if sys.version_info <= (3, 5): + results = sorted(results.items()) + else: + results = results.items() # ... and then assign - for k, v in sorted(results.items()): + for k, v in results: data[k] = v - return data def _sanitize_column(self, key, value, broadcast=True): diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index 4462260a290d9..e3f1bbaf46b97 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -4,6 +4,7 @@ import pytest from pandas.compat import range, lrange import numpy as np +import sys from pandas import DataFrame, Series, Index, MultiIndex @@ -61,14 +62,23 @@ def test_assign_multiple(self): [3, 6, 9, 3, 6]], columns=list('ABCDE')) assert_frame_equal(result, expected) - def test_assign_alphabetical(self): + def test_assign_order(self): # GH 9818 df = DataFrame([[1, 2], [3, 4]], columns=['A', 'B']) result = df.assign(D=df.A + df.B, C=df.A - df.B) - expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]], - columns=list('ABCD')) + + if sys.version_info <= (3, 5): + expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]], + columns=list('ABCD')) + else: + expected = DataFrame([[1, 2, 3, -1], [3, 4, 7, -1]], + columns=list('ABDC')) assert_frame_equal(result, expected) result = df.assign(C=df.A - df.B, D=df.A + df.B) + + expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]], + columns=list('ABCD')) + assert_frame_equal(result, expected) def test_assign_bad(self): From eff8af74116c34936696b267bb7828578942ded3 Mon Sep 17 00:00:00 2001 From: Bob Haffner Date: Fri, 22 Sep 2017 23:01:48 -0500 Subject: [PATCH 2/2] changed version checking if and docstring --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/frame.py | 21 +++++++++++---------- pandas/tests/frame/test_mutate_columns.py | 10 +++++----- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index d06cd5280ef35..49f831b95a71f 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -117,7 +117,7 @@ Other Enhancements - :func:`MultiIndex.is_monotonic_decreasing` has been implemented. Previously returned ``False`` in all cases. (:issue:`16554`) - :func:`Categorical.rename_categories` now accepts a dict-like argument as `new_categories` and only updates the categories found in that dict. (:issue:`17336`) - :func:`read_excel` raises ``ImportError`` with a better message if ``xlrd`` is not installed. (:issue:`17613`) -- :func:`assign` will preserve the original order of **kwargs for Python 3.6+ users +- :meth:`DataFrame.assign` will preserve the original order of ``**kwargs`` for Python 3.6+ users instead of sorting the column names .. _whatsnew_0210.api_breaking: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 57502ebe10db9..3b85c864d877c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -82,6 +82,7 @@ from pandas.compat import (range, map, zip, lrange, lmap, lzip, StringIO, u, OrderedDict, raise_with_traceback) from pandas import compat +from pandas.compat import PY36 from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution from pandas.util._validators import validate_bool_kwarg @@ -2575,12 +2576,12 @@ def assign(self, **kwargs): Notes ----- - Since ``kwargs`` is a dictionary, the order of your - arguments may not be preserved if you are using Python 3.5 and earlier. - To make things predicatable, the columns are inserted in alphabetical - order, at the end of your DataFrame. Assigning multiple columns within - the same ``assign`` is possible, but you cannot reference other columns - created within the same ``assign`` call. + For python 3.6 and above, the columns are inserted in the order of + **kwargs. For python 3.5 and earlier, since **kwargs is unordered, + the columns are inserted in alphabetical order at the end of your + DataFrame. Assigning multiple columns within the same ``assign`` + is possible, but you cannot reference other columns created within + the same ``assign`` call. Examples -------- @@ -2624,11 +2625,11 @@ def assign(self, **kwargs): for k, v in kwargs.items(): results[k] = com._apply_if_callable(v, data) - # sort by key for 3.5 and earlier, but preserve order for 3.6 and later - if sys.version_info <= (3, 5): - results = sorted(results.items()) - else: + # preserve order for 3.6 and later, but sort by key for 3.5 and earlier + if PY36: results = results.items() + else: + results = sorted(results.items()) # ... and then assign for k, v in results: data[k] = v diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index e3f1bbaf46b97..0043475702f94 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -4,7 +4,7 @@ import pytest from pandas.compat import range, lrange import numpy as np -import sys +from pandas.compat import PY36 from pandas import DataFrame, Series, Index, MultiIndex @@ -67,12 +67,12 @@ def test_assign_order(self): df = DataFrame([[1, 2], [3, 4]], columns=['A', 'B']) result = df.assign(D=df.A + df.B, C=df.A - df.B) - if sys.version_info <= (3, 5): - expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]], - columns=list('ABCD')) - else: + if PY36: expected = DataFrame([[1, 2, 3, -1], [3, 4, 7, -1]], columns=list('ABDC')) + else: + expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]], + columns=list('ABCD')) assert_frame_equal(result, expected) result = df.assign(C=df.A - df.B, D=df.A + df.B)