From ef1ff13f1e2c3cb2b89561af24fa38b6b807d263 Mon Sep 17 00:00:00 2001
From: Kevin Sheppard <kevin.k.sheppard@gmail.com>
Date: Mon, 10 Oct 2016 17:34:51 +0100
Subject: [PATCH] DOC: Add details to DataFrame groupby transform

Add requirements for user function in groupby transform

closes #13543
[skip ci]
---
 doc/source/groupby.rst | 38 +++++++++++++++++++++++++++++++++-----
 pandas/core/groupby.py | 15 +++++++++++++++
 2 files changed, 48 insertions(+), 5 deletions(-)

diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst
index 8484ccd69a983..cbe3588104439 100644
--- a/doc/source/groupby.rst
+++ b/doc/source/groupby.rst
@@ -580,9 +580,21 @@ Transformation
 --------------
 
 The ``transform`` method returns an object that is indexed the same (same size)
-as the one being grouped. Thus, the passed transform function should return a
-result that is the same size as the group chunk. For example, suppose we wished
-to standardize the data within each group:
+as the one being grouped. The transform function must:
+
+* Return a result that is either the same size as the group chunk or
+  broadcastable to the size of the group chunk (e.g., a scalar,
+  ``grouped.transform(lambda x: x.iloc[-1])``).
+* Operate column-by-column on the group chunk.  The transform is applied to
+  the first group chunk using chunk.apply.
+* Not perform in-place operations on the group chunk. Group chunks should
+  be treated as immutable, and changes to a group chunk may produce unexpected
+  results. For example, when using ``fillna``, ``inplace`` must be ``False``
+  (``grouped.transform(lambda x: x.fillna(inplace=False))``).
+* (Optionally) operates on the entire group chunk. If this is supported, a
+  fast path is used starting from the *second* chunk.
+
+For example, suppose we wished to standardize the data within each group:
 
 .. ipython:: python
 
@@ -620,6 +632,21 @@ We can also visually compare the original and transformed data sets.
    @savefig groupby_transform_plot.png
    compare.plot()
 
+Transformation functions that have lower dimension outputs are broadcast to
+match the shape of the input array.
+
+.. ipython:: python
+
+   data_range = lambda x: x.max() - x.min()
+   ts.groupby(key).transform(data_range)
+
+Alternatively the built-in methods can be could be used to produce the same
+outputs
+
+.. ipython:: python
+
+   ts.groupby(key).transform('max') - ts.groupby(key).transform('min')
+
 Another common data transform is to replace missing data with the group mean.
 
 .. ipython:: python
@@ -664,8 +691,9 @@ and that the transformed data contains no NAs.
 
 .. note::
 
-   Some functions when applied to a groupby object will automatically transform the input, returning
-   an object of the same shape as the original. Passing ``as_index=False`` will not affect these transformation methods.
+   Some functions when applied to a groupby object will automatically transform
+   the input, returning an object of the same shape as the original. Passing
+   ``as_index=False`` will not affect these transformation methods.
 
    For example: ``fillna, ffill, bfill, shift``.
 
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index ba2de295fa0a9..c52ddb8bf7016 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -3674,10 +3674,25 @@ def transform(self, func, *args, **kwargs):
         Each subframe is endowed the attribute 'name' in case you need to know
         which group you are working on.
 
+        The current implementation imposes three requirements on f:
+
+        * f must return a value that either has the same shape as the input
+          subframe or can be broadcast to the shape of the input subframe.
+          For example, f returns a scalar it will be broadcast to have the
+          same shape as the input subframe.
+        * f must support application column-by-column in the subframe. If f 
+          also supports application to the entire subframe, then a fast path 
+          is used starting from the second chunk.
+        * f must not mutate subframes. Mutation is not supported and may
+          produce unexpected results.
+
         Examples
         --------
         >>> grouped = df.groupby(lambda x: mapping[x])
+        # Same shape
         >>> grouped.transform(lambda x: (x - x.mean()) / x.std())
+        # Broadcastable
+        >>> grouped.transform(lambda x: x.max() - x.min())
         """
 
         # optimized transforms