Skip to content

Commit fa9d1ee

Browse files
author
datajanko
committed
ENH: df.assign accepting dependent **kwargs (pandas-dev#14207)
Specifically, 'df.assign(b=1, c=lambda x:x['b'])' does not throw an exception in python 3.6 and above. Further details are discussed in Issues pandas-dev#14207 and pandas-dev#18797.
1 parent b6a7cc9 commit fa9d1ee

File tree

2 files changed

+39
-18
lines changed

2 files changed

+39
-18
lines changed

pandas/core/frame.py

+19-14
Original file line numberDiff line numberDiff line change
@@ -2659,8 +2659,11 @@ def assign(self, **kwargs):
26592659
\*\*kwargs. For python 3.5 and earlier, since \*\*kwargs is unordered,
26602660
the columns are inserted in alphabetical order at the end of your
26612661
DataFrame. Assigning multiple columns within the same ``assign``
2662-
is possible, but you cannot reference other columns created within
2663-
the same ``assign`` call.
2662+
is possible, but for python 3.5 and eralier you cannot reference other
2663+
columns created within the same ``assign`` call. For python 3.6 and
2664+
above it is possible to reference columns created in an assignment.
2665+
To this end you have to respect the order of |*|*kwargs and use
2666+
callables referencing the assigned columns.
26642667
26652668
Examples
26662669
--------
@@ -2683,7 +2686,7 @@ def assign(self, **kwargs):
26832686
26842687
Where the value already exists and is inserted:
26852688
2686-
>>> newcol = np.log(df['A'])
2689+
>>> newcol = np.log(df['A'].)
26872690
>>> df.assign(ln_A=newcol)
26882691
A B ln_A
26892692
0 1 0.426905 0.000000
@@ -2699,19 +2702,21 @@ def assign(self, **kwargs):
26992702
"""
27002703
data = self.copy()
27012704

2702-
# do all calculations first...
2703-
results = OrderedDict()
2704-
for k, v in kwargs.items():
2705-
results[k] = com._apply_if_callable(v, data)
2706-
2707-
# preserve order for 3.6 and later, but sort by key for 3.5 and earlier
2705+
# for 3.6 preserve order of kwargs
27082706
if PY36:
2709-
results = results.items()
2707+
for k, v in kwargs.items():
2708+
data[k] = com._apply_if_callable(v, data)
27102709
else:
2711-
results = sorted(results.items())
2712-
# ... and then assign
2713-
for k, v in results:
2714-
data[k] = v
2710+
# for 3.5 or earlier: do all calculations first...
2711+
results = OrderedDict()
2712+
for k, v in kwargs.items():
2713+
results[k] = com._apply_if_callable(v, data)
2714+
2715+
# sort by key for 3.5 and earlier
2716+
results = sorted(results.items())
2717+
# ... and then assign
2718+
for k, v in results:
2719+
data[k] = v
27152720
return data
27162721

27172722
def _sanitize_column(self, key, value, broadcast=True):

pandas/tests/frame/test_mutate_columns.py

+20-4
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,26 @@ def test_assign_bad(self):
8989
df.assign(lambda x: x.A)
9090
with pytest.raises(AttributeError):
9191
df.assign(C=df.A, D=df.A + df.C)
92-
with pytest.raises(KeyError):
93-
df.assign(C=lambda df: df.A, D=lambda df: df['A'] + df['C'])
94-
with pytest.raises(KeyError):
95-
df.assign(C=df.A, D=lambda x: x['A'] + x['C'])
92+
if not PY36:
93+
with pytest.raises(KeyError):
94+
df.assign(C=lambda df: df.A,
95+
D=lambda df: df['A'] + df['C'])
96+
with pytest.raises(KeyError):
97+
df.assign(C=df.A, D=lambda x: x['A'] + x['C'])
98+
99+
def test_assign_dependent(self):
100+
df = DataFrame({'A': [1, 2], 'B': [3, 4]})
101+
if PY36:
102+
result = df.assign(C=df.A, D=lambda x: x['A'] + x['C'])
103+
expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]],
104+
columns=list('ABCD'))
105+
assert_frame_equal(result, expected)
106+
107+
result = df.assign(C=lambda df: df.A,
108+
D=lambda df: df['A'] + df['C'])
109+
expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]],
110+
columns=list('ABCD'))
111+
assert_frame_equal(result, expected)
96112

97113
def test_insert_error_msmgs(self):
98114

0 commit comments

Comments
 (0)