fixes issues from comments

datajanko · datajanko · commit f958371fd422 · 2017-12-20T22:56:51.000+01:00
- fixes docstring
- fixes wrong identation
- restructuring tests
- modified content in whatsnew
diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
@@ -119,6 +119,56 @@ Current Behavior
 
     s.rank(na_option='top')
 
+.. _whatsnew_0220.enhancements.assign_dependent:
+
+
+``.assign()`` accepts dependent arguments
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The :func:`DataFrame.assign()` now accepts dependent kwargs. In earlier versions this throws a Keyerror exception anymore. (:issue: `14207)
+
+Specifically, defining a new column inside assign may be referenced in the same assign statement if a callable is used. For example
+
+.. code-block:: ipython
+
+    In [3]: df = pd.DataFrame({'A': [1, 2, 3]})
+
+    In [4]: df.assign(B=df.A, C=lambda x:x['A']+ x['B'])
+    Out[4]:
+       A  B  C
+    0  1  1  2
+    1  2  2  4
+    2  3  3  6
+
+.. warning::
+
+This may subtly change the behavior of your code when you're
+using ``assign`` to update an existing column. Previously, callables
+refering to other variables being updated would get the "old" values
+
+.. code-block:: ipython
+
+    In [2]: df = pd.DataFrame({"A": [1, 2, 3]})
+
+    In [3]: df.assign(A=lambda df: df.A + 1, C=lambda df: df.A * -1)
+    Out[3]:
+       A  C
+    0  2 -1
+    1  3 -2
+    2  4 -3
+
+Now, callables will get the "new" value
+
+.. ipython:: python
+
+    In [6]: df.assign(A=df.A+1, C= lambda df: df.A* -1)
+    Out[6]:
+       A  C
+    0  2 -2
+    1  3 -3
+    2  4 -4
+
+
 .. _whatsnew_0220.enhancements.other:
 
 Other Enhancements
@@ -139,7 +189,6 @@ Other Enhancements
 - :func:`read_excel()` has gained the ``nrows`` parameter (:issue:`16645`)
 - :func:``DataFrame.to_json`` and ``Series.to_json`` now accept an ``index`` argument which allows the user to exclude the index from the JSON output (:issue:`17394`)
 - ``IntervalIndex.to_tuples()`` has gained the ``na_tuple`` parameter to control whether NA is returned as a tuple of NA, or NA itself (:issue:`18756`)
-- :func:``DataFrame.assign()`` now acceepts dependent kwargs, e.g. `df.assign(b=1, c=lambda x:x['b'])` does not throw an exception anymore. (:issue: `14207)
 
 .. _whatsnew_0220.api_breaking:
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2659,11 +2659,11 @@ def assign(self, **kwargs):
         \*\*kwargs. For python 3.5 and earlier, since \*\*kwargs is unordered,
         the columns are inserted in alphabetical order at the end of your
         DataFrame.  Assigning multiple columns within the same ``assign``
-        is possible, but for python 3.5 and eralier you cannot reference other
-        columns created within the same ``assign`` call. For python 3.6 and
-        above it is possible to reference columns created in an assignment.
-        To this end you have to respect the order of |*|*kwargs and use
-        callables referencing the assigned columns.
+        is possible, but for python 3.5 and earlier, you cannot reference
+        other columns created within the same ``assign`` call.
+        For python 3.6 and above it is possible to reference columns created
+        in an assignment. To this end you have to respect the order of kwargs
+        and use callables referencing the assigned columns.
 
         Examples
         --------
@@ -2713,7 +2713,7 @@ def assign(self, **kwargs):
                 results[k] = com._apply_if_callable(v, data)
 
             # sort by key for 3.5 and earlier
-                results = sorted(results.items())
+            results = sorted(results.items())
             # ... and then assign
             for k, v in results:
                 data[k] = v
diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py
@@ -89,26 +89,34 @@ def test_assign_bad(self):
             df.assign(lambda x: x.A)
         with pytest.raises(AttributeError):
             df.assign(C=df.A, D=df.A + df.C)
-        if not PY36:
-            with pytest.raises(KeyError):
-                df.assign(C=lambda df: df.A,
-                          D=lambda df: df['A'] + df['C'])
-            with pytest.raises(KeyError):
-                df.assign(C=df.A, D=lambda x: x['A'] + x['C'])
 
+    @pytest.mark.skipif(PY36, reason="""Issue #14207: valid for python
+                        3.6 and above""")
+    def test_assign_bad_old_version(self):
+        df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
+
+        # Key C does not exist at defition time of df
+        with pytest.raises(KeyError):
+            df.assign(C=lambda df: df.A,
+                      D=lambda df: df['A'] + df['C'])
+        with pytest.raises(KeyError):
+            df.assign(C=df.A, D=lambda x: x['A'] + x['C'])
+
+    @pytest.mark.skipif(not PY36, reason="""Issue #14207: not valid for
+                        python 3.5 and below""")
     def test_assign_dependent(self):
         df = DataFrame({'A': [1, 2], 'B': [3, 4]})
-        if PY36:
-            result = df.assign(C=df.A, D=lambda x: x['A'] + x['C'])
-            expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]],
-                                 columns=list('ABCD'))
-            assert_frame_equal(result, expected)
 
-            result = df.assign(C=lambda df: df.A,
-                               D=lambda df: df['A'] + df['C'])
-            expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]],
-                                 columns=list('ABCD'))
-            assert_frame_equal(result, expected)
+        result = df.assign(C=df.A, D=lambda x: x['A'] + x['C'])
+        expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]],
+                             columns=list('ABCD'))
+        assert_frame_equal(result, expected)
+
+        result = df.assign(C=lambda df: df.A,
+                           D=lambda df: df['A'] + df['C'])
+        expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]],
+                             columns=list('ABCD'))
+        assert_frame_equal(result, expected)
 
     def test_insert_error_msmgs(self):