Skip to content

Commit df11d07

Browse files
bobhaffnerNo-Stream
authored andcommitted
preserve kwargs order on assign func for py36plus - pandas-dev#14207 (pandas-dev#17632)
1 parent e74d452 commit df11d07

File tree

3 files changed

+28
-12
lines changed

3 files changed

+28
-12
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ Other Enhancements
162162
- :func:`MultiIndex.is_monotonic_decreasing` has been implemented. Previously returned ``False`` in all cases. (:issue:`16554`)
163163
- :func:`Categorical.rename_categories` now accepts a dict-like argument as `new_categories` and only updates the categories found in that dict. (:issue:`17336`)
164164
- :func:`read_excel` raises ``ImportError`` with a better message if ``xlrd`` is not installed. (:issue:`17613`)
165+
- :meth:`DataFrame.assign` will preserve the original order of ``**kwargs`` for Python 3.6+ users instead of sorting the column names
165166

166167

167168
.. _whatsnew_0210.api_breaking:

pandas/core/frame.py

+14-9
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@
8282
from pandas.compat import (range, map, zip, lrange, lmap, lzip, StringIO, u,
8383
OrderedDict, raise_with_traceback)
8484
from pandas import compat
85+
from pandas.compat import PY36
8586
from pandas.compat.numpy import function as nv
8687
from pandas.util._decorators import Appender, Substitution
8788
from pandas.util._validators import validate_bool_kwarg
@@ -2575,12 +2576,12 @@ def assign(self, **kwargs):
25752576
25762577
Notes
25772578
-----
2578-
Since ``kwargs`` is a dictionary, the order of your
2579-
arguments may not be preserved. To make things predicatable,
2580-
the columns are inserted in alphabetical order, at the end of
2581-
your DataFrame. Assigning multiple columns within the same
2582-
``assign`` is possible, but you cannot reference other columns
2583-
created within the same ``assign`` call.
2579+
For python 3.6 and above, the columns are inserted in the order of
2580+
**kwargs. For python 3.5 and earlier, since **kwargs is unordered,
2581+
the columns are inserted in alphabetical order at the end of your
2582+
DataFrame. Assigning multiple columns within the same ``assign``
2583+
is possible, but you cannot reference other columns created within
2584+
the same ``assign`` call.
25842585
25852586
Examples
25862587
--------
@@ -2620,14 +2621,18 @@ def assign(self, **kwargs):
26202621
data = self.copy()
26212622

26222623
# do all calculations first...
2623-
results = {}
2624+
results = OrderedDict()
26242625
for k, v in kwargs.items():
26252626
results[k] = com._apply_if_callable(v, data)
26262627

2628+
# preserve order for 3.6 and later, but sort by key for 3.5 and earlier
2629+
if PY36:
2630+
results = results.items()
2631+
else:
2632+
results = sorted(results.items())
26272633
# ... and then assign
2628-
for k, v in sorted(results.items()):
2634+
for k, v in results:
26292635
data[k] = v
2630-
26312636
return data
26322637

26332638
def _sanitize_column(self, key, value, broadcast=True):

pandas/tests/frame/test_mutate_columns.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import pytest
55
from pandas.compat import range, lrange
66
import numpy as np
7+
from pandas.compat import PY36
78

89
from pandas import DataFrame, Series, Index, MultiIndex
910

@@ -61,14 +62,23 @@ def test_assign_multiple(self):
6162
[3, 6, 9, 3, 6]], columns=list('ABCDE'))
6263
assert_frame_equal(result, expected)
6364

64-
def test_assign_alphabetical(self):
65+
def test_assign_order(self):
6566
# GH 9818
6667
df = DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
6768
result = df.assign(D=df.A + df.B, C=df.A - df.B)
68-
expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]],
69-
columns=list('ABCD'))
69+
70+
if PY36:
71+
expected = DataFrame([[1, 2, 3, -1], [3, 4, 7, -1]],
72+
columns=list('ABDC'))
73+
else:
74+
expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]],
75+
columns=list('ABCD'))
7076
assert_frame_equal(result, expected)
7177
result = df.assign(C=df.A - df.B, D=df.A + df.B)
78+
79+
expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]],
80+
columns=list('ABCD'))
81+
7282
assert_frame_equal(result, expected)
7383

7484
def test_assign_bad(self):

0 commit comments

Comments
 (0)