Add more tests for apply first row

fjetter · fjetter · commit 9afa23464bc4 · 2019-01-26T14:29:53.000+01:00
diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py
@@ -568,6 +568,37 @@ def test_apply_dup_names_multi_agg(self):
 
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.parametrize("axis, expected", [
+        (0, ['a', 'b']),
+        (1, [0, 1, 2, 3, 4, 5]),
+    ])
+    def test_apply_first_row_once(self, axis, expected):
+        df = pd.DataFrame({'a': [0, 0, 1, 1, 2, 2], 'b': np.arange(6)})
+
+        rows = []
+
+        def f_fast(row):
+            rows.append(row.name)
+            return 0
+        df.apply(f_fast, axis=axis)
+        # gh-2936
+        # every row should appear once, i.e. apply is called once per row
+        assert rows == expected
+
+        rows_slow = []
+
+        def f_slow(row):
+            """
+            This function triggers a `function does not reduce`
+            exception and uses the slow path
+            """
+            rows_slow.append(row.name)
+            return row.copy()
+
+        df.apply(f_slow, axis=axis)
+        expected_first_row_twice = [expected[0]] + expected
+        assert rows_slow == expected_first_row_twice
+
 
 class TestInferOutputShape(object):
     # the user has supplied an opaque UDF where
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
@@ -105,6 +105,27 @@ def f(g):
     assert not mutated
 
 
+def test_group_apply_once_per_group():
+    df = pd.DataFrame({'a': [0, 0, 1, 1, 2, 2], 'b': np.arange(6)})
+
+    names = []
+
+    def f_copy(group):
+        names.append(group.name)
+        return group.copy()
+    df.groupby("a").apply(f_copy)
+    assert names == [0, 1, 2]
+
+    def f_nocopy(group):
+        names.append(group.name)
+        return group
+    names.clear()
+    # this takes the slow apply path, i.e. we need to apply the
+    # function to the first row twice
+    df.groupby("a").apply(f_copy)
+    assert names == [0, 0, 1, 2]
+
+
 def test_apply_with_mixed_dtype():
     # GH3480, apply with mixed dtype on axis=1 breaks in 0.11
     df = DataFrame({'foo1': np.random.randn(6),
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -1390,16 +1390,25 @@ def test_group_name_available_in_inference_pass():
 
     names = []
 
-    def f(group):
+    def f_fast(group):
         names.append(group.name)
         return group.copy()
 
-    df.groupby('a', sort=False, group_keys=False).apply(f)
-    # we expect 2 zeros because we call ``f`` once to see if a faster route
-    # can be used.
+    df.groupby('a', sort=False, group_keys=False).apply(f_fast)
+    # gh-2936
+    # every group should appear once, i.e. apply is called once per group
     expected_names = [0, 1, 2]
     assert names == expected_names
 
+    names_slow = []
+
+    def f_slow(group):
+        names_slow.append(group.name)
+        return group
+
+    df.groupby('a', sort=False, group_keys=False).apply(f_slow)
+    assert names_slow == [0, 0, 1, 2]
+
 
 def test_no_dummy_key_names(df):
     # see gh-1291
diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py
@@ -665,3 +665,27 @@ def test_map_missing_mixed(self, vals, mapping, exp):
         result = s.map(mapping)
 
         tm.assert_series_equal(result, pd.Series(exp))
+
+    def test_apply_only_once(self):
+        ser = pd.Series([0, 0, 1, 1, 2, 2], name="series")
+        rows = []
+
+        def f(row):
+            rows.append(row)
+            return row
+        ser.apply(f)
+        # gh-2936
+        # every row should appear once, i.e. apply is called once per row
+        expected_names = [0, 0, 1, 1, 2, 2]
+        assert rows == expected_names
+
+        # Rows should also only be applied once if the return
+        # shape is different
+        rows = []
+
+        def g(row):
+            rows.append(row)
+            return (row, row)
+        ser.apply(g)
+        expected_names = [0, 0, 1, 1, 2, 2]
+        assert rows == expected_names