diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index cc5ebc730f94a..7412d25b33125 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -663,6 +663,25 @@ The dimension of the returned result can also change: s s.apply(f) + +.. warning:: + + In the current implementation apply calls func twice on the + first group to decide whether it can take a fast or slow code + path. This can lead to unexpected behavior if func has + side-effects, as they will take effect twice for the first + group. + + .. ipython:: python + + d = DataFrame({"a":["x", "y"], "b":[1,2]}) + def identity(df): + print df + return df + + d.groupby("a").apply(identity) + + Other useful features --------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5ecdd4d8b351d..3329483a61f5c 100755 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3302,6 +3302,14 @@ def apply(self, func, axis=0, broadcast=False, raw=False, reduce=None, array/series Additional keyword arguments will be passed as keywords to the function + Notes + ----- + In the current implementation apply calls func twice on the + first column/row to decide whether it can take a fast or slow + code path. This can lead to unexpected behavior if func has + side-effects, as they will take effect twice for the first + column/row. + Examples -------- >>> df.apply(numpy.sqrt) # returns DataFrame diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 996a691eca082..8fd49bd2fe5bd 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -547,7 +547,14 @@ def apply(self, func, *args, **kwargs): Notes ----- - See online documentation for full exposition on how to use apply + See online documentation for full exposition on how to use apply. + + In the current implementation apply calls func twice on the + first group to decide whether it can take a fast or slow code + path. This can lead to unexpected behavior if func has + side-effects, as they will take effect twice for the first + group. + See also --------