Skip to content

Commit 90d5c5c

Browse files
committed
Add more tests for apply first row
1 parent c5560a5 commit 90d5c5c

File tree

4 files changed

+93
-4
lines changed

4 files changed

+93
-4
lines changed

pandas/tests/frame/test_apply.py

+33
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,39 @@ def test_apply_dup_names_multi_agg(self):
568568

569569
tm.assert_frame_equal(result, expected)
570570

571+
@pytest.mark.parametrize("axis, expected", [
572+
(0, ['a', 'b']),
573+
(1, [0, 1, 2, 3, 4, 5]),
574+
])
575+
def test_apply_first_row_once(self, axis, expected):
576+
# GH2936, GH2656, GH7739, GH10519, GH12155, GH20084, GH21417
577+
df = pd.DataFrame({'a': [0, 0, 1, 1, 2, 2], 'b': np.arange(6)})
578+
579+
rows = []
580+
581+
def f_fast(row):
582+
import ipdb; ipdb.set_trace()
583+
rows.append(row.name)
584+
return 0
585+
df.apply(f_fast, axis=axis)
586+
587+
# every row should appear once, i.e. apply is called once per row
588+
assert rows == expected
589+
590+
rows_slow = []
591+
592+
def f_slow(row):
593+
"""
594+
This function triggers a `function does not reduce`
595+
exception and uses the slow path
596+
"""
597+
rows_slow.append(row.name)
598+
return row.copy()
599+
600+
df.apply(f_slow, axis=axis)
601+
expected_first_row_twice = [expected[0]] + expected
602+
assert rows_slow == expected_first_row_twice
603+
571604

572605
class TestInferOutputShape(object):
573606
# the user has supplied an opaque UDF where

pandas/tests/groupby/test_apply.py

+22
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,28 @@ def f(g):
105105
assert not mutated
106106

107107

108+
def test_group_apply_once_per_group():
109+
# GH2936, GH2656, GH7739, GH10519, GH12155, GH20084, GH21417
110+
df = pd.DataFrame({'a': [0, 0, 1, 1, 2, 2], 'b': np.arange(6)})
111+
112+
names = []
113+
114+
def f_copy(group):
115+
names.append(group.name)
116+
return group.copy()
117+
df.groupby("a").apply(f_copy)
118+
assert names == [0, 1, 2]
119+
120+
def f_nocopy(group):
121+
names.append(group.name)
122+
return group
123+
names.clear()
124+
# this takes the slow apply path, i.e. we need to apply the
125+
# function to the first row twice
126+
df.groupby("a").apply(f_nocopy)
127+
assert names == [0, 0, 1, 2]
128+
129+
108130
def test_apply_with_mixed_dtype():
109131
# GH3480, apply with mixed dtype on axis=1 breaks in 0.11
110132
df = DataFrame({'foo1': np.random.randn(6),

pandas/tests/groupby/test_groupby.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -1420,20 +1420,30 @@ def foo(x):
14201420

14211421
def test_group_name_available_in_inference_pass():
14221422
# gh-15062
1423+
# GH2936, GH2656, GH7739, GH10519, GH12155, GH20084, GH21417
14231424
df = pd.DataFrame({'a': [0, 0, 1, 1, 2, 2], 'b': np.arange(6)})
14241425

14251426
names = []
14261427

1427-
def f(group):
1428+
def f_fast(group):
14281429
names.append(group.name)
14291430
return group.copy()
14301431

1431-
df.groupby('a', sort=False, group_keys=False).apply(f)
1432-
# we expect 2 zeros because we call ``f`` once to see if a faster route
1433-
# can be used.
1432+
df.groupby('a', sort=False, group_keys=False).apply(f_fast)
1433+
1434+
# every group should appear once, i.e. apply is called once per group
14341435
expected_names = [0, 1, 2]
14351436
assert names == expected_names
14361437

1438+
names_slow = []
1439+
1440+
def f_slow(group):
1441+
names_slow.append(group.name)
1442+
return group
1443+
1444+
df.groupby('a', sort=False, group_keys=False).apply(f_slow)
1445+
assert names_slow == [0, 0, 1, 2]
1446+
14371447

14381448
def test_no_dummy_key_names(df):
14391449
# see gh-1291

pandas/tests/series/test_apply.py

+24
Original file line numberDiff line numberDiff line change
@@ -665,3 +665,27 @@ def test_map_missing_mixed(self, vals, mapping, exp):
665665
result = s.map(mapping)
666666

667667
tm.assert_series_equal(result, pd.Series(exp))
668+
669+
def test_apply_only_once(self):
670+
# GH2936, GH2656, GH7739, GH10519, GH12155, GH20084, GH21417
671+
ser = pd.Series([0, 0, 1, 1, 2, 2], name="series")
672+
rows = []
673+
674+
def f(row):
675+
rows.append(row)
676+
return row
677+
ser.apply(f)
678+
# every row should appear once, i.e. apply is called once per row
679+
expected_names = [0, 0, 1, 1, 2, 2]
680+
assert rows == expected_names
681+
682+
# Rows should also only be applied once if the return
683+
# shape is different
684+
rows = []
685+
686+
def g(row):
687+
rows.append(row)
688+
return (row, row)
689+
ser.apply(g)
690+
expected_names = [0, 0, 1, 1, 2, 2]
691+
assert rows == expected_names

0 commit comments

Comments
 (0)