Skip to content

Commit 0a27f5d

Browse files
author
datajanko
committed
ENH: df.assign accepting dependent **kwargs (pandas-dev#14207)
Specifically, 'df.assign(b=1, c=lambda x:x['b'])' does not throw an exception in python 3.6 and above. Further details are discussed in Issues pandas-dev#14207 and pandas-dev#18797.
1 parent 23fb339 commit 0a27f5d

File tree

3 files changed

+93
-13
lines changed

3 files changed

+93
-13
lines changed

doc/source/whatsnew/v0.23.0.txt

+51
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,57 @@ Please note that the string `index` is not supported with the round trip format,
176176
new_df
177177
print(new_df.index.name)
178178

179+
.. _whatsnew_0230.enhancements.assign_dependent:
180+
181+
``.assign()`` accepts dependent arguments
182+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
183+
184+
The :func:`DataFrame.assign()` now accepts dependent kwargs. In earlier versions this throws a Keyerror exception anymore. (:issue: `14207)
185+
186+
Specifically, defining a new column inside assign may be referenced in the same assign statement if a callable is used. For example
187+
188+
.. code-block:: ipython
189+
190+
In [3]: df = pd.DataFrame({'A': [1, 2, 3]})
191+
192+
In [4]: df.assign(B=df.A, C=lambda x:x['A']+ x['B'])
193+
Out[4]:
194+
A B C
195+
0 1 1 2
196+
1 2 2 4
197+
2 3 3 6
198+
199+
.. warning::
200+
201+
This may subtly change the behavior of your code when you're
202+
using ``assign`` to update an existing column. Previously, callables
203+
refering to other variables being updated would get the "old" values
204+
205+
.. code-block:: ipython
206+
207+
In [2]: df = pd.DataFrame({"A": [1, 2, 3]})
208+
209+
In [3]: df.assign(A=lambda df: df.A + 1, C=lambda df: df.A * -1)
210+
Out[3]:
211+
A C
212+
0 2 -1
213+
1 3 -2
214+
2 4 -3
215+
216+
Now, callables will get the "new" value
217+
218+
.. code-block:: ipython
219+
220+
221+
In [4]: df = pd.DataFrame({"A": [1, 2, 3]})
222+
223+
In [5]: df.assign(A=df.A+1, C= lambda df: df.A* -1)
224+
Out[5]:
225+
A C
226+
0 2 -2
227+
1 3 -3
228+
2 4 -4
229+
179230
.. _whatsnew_0230.enhancements.other:
180231

181232
Other Enhancements

pandas/core/frame.py

+17-12
Original file line numberDiff line numberDiff line change
@@ -2675,8 +2675,11 @@ def assign(self, **kwargs):
26752675
\*\*kwargs. For python 3.5 and earlier, since \*\*kwargs is unordered,
26762676
the columns are inserted in alphabetical order at the end of your
26772677
DataFrame. Assigning multiple columns within the same ``assign``
2678-
is possible, but you cannot reference other columns created within
2679-
the same ``assign`` call.
2678+
is possible, but for python 3.5 and earlier, you cannot reference
2679+
other columns created within the same ``assign`` call.
2680+
For python 3.6 and above it is possible to reference columns created
2681+
in an assignment. To this end you have to respect the order of kwargs
2682+
and use callables referencing the assigned columns.
26802683
26812684
Examples
26822685
--------
@@ -2715,19 +2718,21 @@ def assign(self, **kwargs):
27152718
"""
27162719
data = self.copy()
27172720

2718-
# do all calculations first...
2719-
results = OrderedDict()
2720-
for k, v in kwargs.items():
2721-
results[k] = com._apply_if_callable(v, data)
2722-
2723-
# preserve order for 3.6 and later, but sort by key for 3.5 and earlier
2721+
# for 3.6 preserve order of kwargs
27242722
if PY36:
2725-
results = results.items()
2723+
for k, v in kwargs.items():
2724+
data[k] = com._apply_if_callable(v, data)
27262725
else:
2726+
# for 3.5 or earlier: do all calculations first...
2727+
results = OrderedDict()
2728+
for k, v in kwargs.items():
2729+
results[k] = com._apply_if_callable(v, data)
2730+
2731+
# sort by key for 3.5 and earlier
27272732
results = sorted(results.items())
2728-
# ... and then assign
2729-
for k, v in results:
2730-
data[k] = v
2733+
# ... and then assign
2734+
for k, v in results:
2735+
data[k] = v
27312736
return data
27322737

27332738
def _sanitize_column(self, key, value, broadcast=True):

pandas/tests/frame/test_mutate_columns.py

+25-1
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,35 @@ def test_assign_bad(self):
8989
df.assign(lambda x: x.A)
9090
with pytest.raises(AttributeError):
9191
df.assign(C=df.A, D=df.A + df.C)
92+
93+
@pytest.mark.skipif(PY36, reason="""Issue #14207: valid for python
94+
3.6 and above""")
95+
def test_assign_bad_old_version(self):
96+
df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
97+
98+
# Key C does not exist at defition time of df
9299
with pytest.raises(KeyError):
93-
df.assign(C=lambda df: df.A, D=lambda df: df['A'] + df['C'])
100+
df.assign(C=lambda df: df.A,
101+
D=lambda df: df['A'] + df['C'])
94102
with pytest.raises(KeyError):
95103
df.assign(C=df.A, D=lambda x: x['A'] + x['C'])
96104

105+
@pytest.mark.skipif(not PY36, reason="""Issue #14207: not valid for
106+
python 3.5 and below""")
107+
def test_assign_dependent(self):
108+
df = DataFrame({'A': [1, 2], 'B': [3, 4]})
109+
110+
result = df.assign(C=df.A, D=lambda x: x['A'] + x['C'])
111+
expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]],
112+
columns=list('ABCD'))
113+
assert_frame_equal(result, expected)
114+
115+
result = df.assign(C=lambda df: df.A,
116+
D=lambda df: df['A'] + df['C'])
117+
expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]],
118+
columns=list('ABCD'))
119+
assert_frame_equal(result, expected)
120+
97121
def test_insert_error_msmgs(self):
98122

99123
# GH 7432

0 commit comments

Comments
 (0)