Skip to content

Commit 76d072f

Browse files
author
datajanko
committed
ENH: df.assign accepting dependent **kwargs (pandas-dev#14207)
Specifically, 'df.assign(b=1, c=lambda x:x['b'])' does not throw an exception in python 3.6 and above. Further details are discussed in Issues pandas-dev#14207 and pandas-dev#18797.
1 parent b6a7cc9 commit 76d072f

File tree

3 files changed

+39
-17
lines changed

3 files changed

+39
-17
lines changed

doc/source/whatsnew/v0.22.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ Other Enhancements
139139
- :func:`read_excel()` has gained the ``nrows`` parameter (:issue:`16645`)
140140
- :func:``DataFrame.to_json`` and ``Series.to_json`` now accept an ``index`` argument which allows the user to exclude the index from the JSON output (:issue:`17394`)
141141
- ``IntervalIndex.to_tuples()`` has gained the ``na_tuple`` parameter to control whether NA is returned as a tuple of NA, or NA itself (:issue:`18756`)
142+
- :func:``DataFrame.assign()`` now acceepts dependent kwargs, e.g. `df.assign(b=1, c=lambda x:x['b'])` does not throw an exception anymore. (:issue: `14207)
142143

143144
.. _whatsnew_0220.api_breaking:
144145

pandas/core/frame.py

+18-13
Original file line numberDiff line numberDiff line change
@@ -2659,8 +2659,11 @@ def assign(self, **kwargs):
26592659
\*\*kwargs. For python 3.5 and earlier, since \*\*kwargs is unordered,
26602660
the columns are inserted in alphabetical order at the end of your
26612661
DataFrame. Assigning multiple columns within the same ``assign``
2662-
is possible, but you cannot reference other columns created within
2663-
the same ``assign`` call.
2662+
is possible, but for python 3.5 and eralier you cannot reference other
2663+
columns created within the same ``assign`` call. For python 3.6 and
2664+
above it is possible to reference columns created in an assignment.
2665+
To this end you have to respect the order of |*|*kwargs and use
2666+
callables referencing the assigned columns.
26642667
26652668
Examples
26662669
--------
@@ -2699,19 +2702,21 @@ def assign(self, **kwargs):
26992702
"""
27002703
data = self.copy()
27012704

2702-
# do all calculations first...
2703-
results = OrderedDict()
2704-
for k, v in kwargs.items():
2705-
results[k] = com._apply_if_callable(v, data)
2706-
2707-
# preserve order for 3.6 and later, but sort by key for 3.5 and earlier
2705+
# for 3.6 preserve order of kwargs
27082706
if PY36:
2709-
results = results.items()
2707+
for k, v in kwargs.items():
2708+
data[k] = com._apply_if_callable(v, data)
27102709
else:
2711-
results = sorted(results.items())
2712-
# ... and then assign
2713-
for k, v in results:
2714-
data[k] = v
2710+
# for 3.5 or earlier: do all calculations first...
2711+
results = OrderedDict()
2712+
for k, v in kwargs.items():
2713+
results[k] = com._apply_if_callable(v, data)
2714+
2715+
# sort by key for 3.5 and earlier
2716+
results = sorted(results.items())
2717+
# ... and then assign
2718+
for k, v in results:
2719+
data[k] = v
27152720
return data
27162721

27172722
def _sanitize_column(self, key, value, broadcast=True):

pandas/tests/frame/test_mutate_columns.py

+20-4
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,26 @@ def test_assign_bad(self):
8989
df.assign(lambda x: x.A)
9090
with pytest.raises(AttributeError):
9191
df.assign(C=df.A, D=df.A + df.C)
92-
with pytest.raises(KeyError):
93-
df.assign(C=lambda df: df.A, D=lambda df: df['A'] + df['C'])
94-
with pytest.raises(KeyError):
95-
df.assign(C=df.A, D=lambda x: x['A'] + x['C'])
92+
if not PY36:
93+
with pytest.raises(KeyError):
94+
df.assign(C=lambda df: df.A,
95+
D=lambda df: df['A'] + df['C'])
96+
with pytest.raises(KeyError):
97+
df.assign(C=df.A, D=lambda x: x['A'] + x['C'])
98+
99+
def test_assign_dependent(self):
100+
df = DataFrame({'A': [1, 2], 'B': [3, 4]})
101+
if PY36:
102+
result = df.assign(C=df.A, D=lambda x: x['A'] + x['C'])
103+
expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]],
104+
columns=list('ABCD'))
105+
assert_frame_equal(result, expected)
106+
107+
result = df.assign(C=lambda df: df.A,
108+
D=lambda df: df['A'] + df['C'])
109+
expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]],
110+
columns=list('ABCD'))
111+
assert_frame_equal(result, expected)
96112

97113
def test_insert_error_msmgs(self):
98114

0 commit comments

Comments
 (0)