diff --git a/README.md b/README.md
index 86cf95508a5d9..78e9b93ae535f 100644
--- a/README.md
+++ b/README.md
@@ -9,18 +9,33 @@
 <table>
 <tr>
   <td>Latest Release</td>
-  <td><img src="https://img.shields.io/pypi/v/pandas.svg" alt="latest release" /></td>
+  <td>
+    <a href="https://pypi.python.org/pypi/pandas/">
+    <img src="https://img.shields.io/pypi/v/pandas.svg" alt="latest release" />
+    </a>
+  </td>
 </tr>
   <td></td>
-  <td><img src="https://anaconda.org/conda-forge/pandas/badges/version.svg" alt="latest release" /></td>
+  <td>
+    <a href="https://anaconda.org/anaconda/pandas/">
+    <img src="https://anaconda.org/conda-forge/pandas/badges/version.svg" alt="latest release" />
+    </a>
+</td>
 </tr>
 <tr>
   <td>Package Status</td>
-  <td><img src="https://img.shields.io/pypi/status/pandas.svg" alt="status" /></td>
+  <td>
+		<a href="https://pypi.python.org/pypi/pandas/">
+		<img src="https://img.shields.io/pypi/status/pandas.svg" alt="status" /></td>
+		</a>
 </tr>
 <tr>
   <td>License</td>
-  <td><img src="https://img.shields.io/pypi/l/pandas.svg" alt="license" /></td>
+  <td>
+    <a href="https://github.com/pandas-dev/pandas/blob/master/LICENSE">
+    <img src="https://img.shields.io/pypi/l/pandas.svg" alt="license" />
+    </a>
+</td>
 </tr>
 <tr>
   <td>Build Status</td>
@@ -48,18 +63,14 @@
 </tr>
 <tr>
   <td>Coverage</td>
-  <td><img src="https://codecov.io/github/pandas-dev/pandas/coverage.svg?branch=master" alt="coverage" /></td>
-</tr>
-<tr>
-  <td>Conda</td>
-  <td>
-    <a href="https://pandas.pydata.org">
-    <img src="http://pubbadges.s3-website-us-east-1.amazonaws.com/pkgs-downloads-pandas.png" alt="conda default downloads" />
+  <td>
+    <a href="https://codecov.io/gh/pandas-dev/pandas">
+    <img src="https://codecov.io/github/pandas-dev/pandas/coverage.svg?branch=master" alt="coverage" />
     </a>
   </td>
 </tr>
 <tr>
-  <td>Conda-forge</td>
+  <td>Downloads</td>
   <td>
     <a href="https://pandas.pydata.org">
     <img src="https://anaconda.org/conda-forge/pandas/badges/downloads.svg" alt="conda-forge downloads" />
@@ -67,16 +78,16 @@
   </td>
 </tr>
 <tr>
-  <td>PyPI</td>
-  <td>
-    <a href="https://pypi.python.org/pypi/pandas/">
-    <img src="https://img.shields.io/pypi/dm/pandas.svg" alt="pypi downloads" />
-    </a>
-  </td>
+	<td>Gitter</td>
+	<td>
+		<a href="https://gitter.im/pydata/pandas">
+		<img src="https://badges.gitter.im/Join%20Chat.svg"
+	</a>
+	</td>
 </tr>
 </table>
 
-[![https://gitter.im/pydata/pandas](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/pydata/pandas?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
+
 
 ## What is it
 
diff --git a/ci/environment-dev.yaml b/ci/environment-dev.yaml
index 1337fc54e9aac..f9f9208519d61 100644
--- a/ci/environment-dev.yaml
+++ b/ci/environment-dev.yaml
@@ -11,5 +11,5 @@ dependencies:
   - python-dateutil>=2.5.0
   - python=3
   - pytz
-  - setuptools>=3.3
+  - setuptools>=24.2.0
   - sphinx
diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt
index fcbe0da5de305..3430e778a4573 100644
--- a/ci/requirements_dev.txt
+++ b/ci/requirements_dev.txt
@@ -7,5 +7,5 @@ moto
 pytest>=3.1
 python-dateutil>=2.5.0
 pytz
-setuptools>=3.3
-sphinx
\ No newline at end of file
+setuptools>=24.2.0
+sphinx
diff --git a/doc/source/api.rst b/doc/source/api.rst
index e224e9927f55c..e43632ea46bfb 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -2106,6 +2106,7 @@ Standard moving window functions
    Rolling.skew
    Rolling.kurt
    Rolling.apply
+   Rolling.aggregate
    Rolling.quantile
    Window.mean
    Window.sum
@@ -2133,6 +2134,7 @@ Standard expanding window functions
    Expanding.skew
    Expanding.kurt
    Expanding.apply
+   Expanding.aggregate
    Expanding.quantile
 
 Exponentially-weighted moving window functions
diff --git a/doc/source/computation.rst b/doc/source/computation.rst
index 4285767654e25..ff06c369e1897 100644
--- a/doc/source/computation.rst
+++ b/doc/source/computation.rst
@@ -323,7 +323,7 @@ compute the mean absolute deviation on a rolling basis:
 
    mad = lambda x: np.fabs(x - x.mean()).mean()
    @savefig rolling_apply_ex.png
-   s.rolling(window=60).apply(mad).plot(style='k')
+   s.rolling(window=60).apply(mad, raw=True).plot(style='k')
 
 .. _stats.rolling_window:
 
diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst
index 4e61228d5c0ad..893642410af02 100644
--- a/doc/source/cookbook.rst
+++ b/doc/source/cookbook.rst
@@ -496,7 +496,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
    def Red(x):
       return functools.reduce(CumRet,x,1.0)
 
-   S.expanding().apply(Red)
+   S.expanding().apply(Red, raw=True)
 
 
 `Replacing some values with mean of the rest of a group
diff --git a/doc/source/extending.rst b/doc/source/extending.rst
index 25c4ba4a4a2a3..b94a43480ed93 100644
--- a/doc/source/extending.rst
+++ b/doc/source/extending.rst
@@ -57,6 +57,13 @@ If you write a custom accessor, make a pull request adding it to our
 Extension Types
 ---------------
 
+.. versionadded:: 0.23.0
+
+.. warning::
+
+   The ``ExtensionDtype`` and ``ExtensionArray`` APIs are new and
+   experimental. They may change between versions without warning.
+
 Pandas defines an interface for implementing data types and arrays that *extend*
 NumPy's type system. Pandas itself uses the extension system for some types
 that aren't built into NumPy (categorical, period, interval, datetime with
@@ -106,6 +113,24 @@ by some other storage type, like Python lists.
 See the `extension array source`_ for the interface definition. The docstrings
 and comments contain guidance for properly implementing the interface.
 
+We provide a test suite for ensuring that your extension arrays satisfy the expected
+behavior. To use the test suite, you must provide several pytest fixtures and inherit
+from the base test class. The required fixtures are found in
+https://github.com/pandas-dev/pandas/blob/master/pandas/tests/extension/conftest.py.
+
+To use a test, subclass it:
+
+.. code-block:: python
+
+   from pandas.tests.extension import base
+
+   class TestConstructors(base.BaseConstructorsTests):
+       pass
+
+
+See https://github.com/pandas-dev/pandas/blob/master/pandas/tests/extension/base/__init__.py
+for a list of all the tests available.
+
 .. _extension dtype source: https://github.com/pandas-dev/pandas/blob/master/pandas/core/dtypes/base.py
 .. _extension array source: https://github.com/pandas-dev/pandas/blob/master/pandas/core/arrays/base.py
 
diff --git a/doc/source/install.rst b/doc/source/install.rst
index fdb22a8dc3380..4713bbb78d633 100644
--- a/doc/source/install.rst
+++ b/doc/source/install.rst
@@ -15,7 +15,7 @@ Instructions for installing from source,
 `PyPI <http://pypi.python.org/pypi/pandas>`__, `ActivePython <https://www.activestate.com/activepython/downloads>`__, various Linux distributions, or a
 `development version <http://github.com/pandas-dev/pandas>`__ are also provided.
 
-.. _install.dropping_27:
+.. _install.dropping-27:
 
 Plan for dropping Python 2.7
 ----------------------------
@@ -223,7 +223,7 @@ installed), make sure you have `pytest
 Dependencies
 ------------
 
-* `setuptools <https://setuptools.readthedocs.io/en/latest/>`__: 3.3.0 or higher
+* `setuptools <https://setuptools.readthedocs.io/en/latest/>`__: 24.2.0 or higher
 * `NumPy <http://www.numpy.org>`__: 1.9.0 or higher
 * `python-dateutil <//https://dateutil.readthedocs.io/en/stable/>`__: 2.5.0 or higher
 * `pytz <http://pytz.sourceforge.net/>`__
diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index 18c4dca5b69da..c5877f10420dc 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -11,7 +11,7 @@ version.
 .. warning::
 
    Starting January 1, 2019, pandas feature releases will support Python 3 only.
-   See :ref:`here <install.dropping_27>` for more.
+   See :ref:`install.dropping-27` for more.
 
 .. _whatsnew_0230.enhancements:
 
@@ -65,6 +65,35 @@ The :func:`get_dummies` now accepts a ``dtype`` argument, which specifies a dtyp
    pd.get_dummies(df, columns=['c'], dtype=bool).dtypes
 
 
+.. _whatsnew_0230.enhancements.window_raw:
+
+Rolling/Expanding.apply() accepts a ``raw`` keyword to pass a ``Series`` to the function
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+:func:`Series.rolling().apply() <pandas.core.window.Rolling.apply>`, :func:`DataFrame.rolling().apply() <pandas.core.window.Rolling.apply>`,
+:func:`Series.expanding().apply() <pandas.core.window.Expanding.apply>`, and :func:`DataFrame.expanding().apply() <pandas.core.window.Expanding.apply>` have gained a ``raw=None`` parameter.
+This is similar to :func:`DataFame.apply`. This parameter, if ``True`` allows one to send a ``np.ndarray`` to the applied function. If ``False`` a ``Series`` will be passed. The
+default is ``None``, which preserves backward compatibility, so this will default to ``True``, sending an ``np.ndarray``.
+In a future version the default will be changed to ``False``, sending a ``Series``. (:issue:`5071`, :issue:`20584`)
+
+.. ipython:: python
+
+   s = pd.Series(np.arange(5), np.arange(5) + 1)
+   s
+
+Pass a ``Series``:
+
+.. ipython:: python
+
+   s.rolling(2, min_periods=1).apply(lambda x: x.iloc[-1], raw=False)
+
+Mimic the original behavior of passing a ndarray:
+
+.. ipython:: python
+
+   s.rolling(2, min_periods=1).apply(lambda x: x[-1], raw=True)
+
+
 .. _whatsnew_0230.enhancements.merge_on_columns_and_levels:
 
 Merging on a combination of columns and index levels
@@ -192,6 +221,12 @@ Current Behavior:
 
     s.rank(na_option='top')
 
+These bugs were squashed:
+
+- Bug in :meth:`DataFrame.rank` and :meth:`Series.rank` when ``method='dense'`` and ``pct=True`` in which percentile ranks were not being used with the number of distinct observations (:issue:`15630`)
+- Bug in :meth:`Series.rank` and :meth:`DataFrame.rank` when ``ascending='False'`` failed to return correct ranks for infinity if ``NaN`` were present (:issue:`19538`)
+- Bug in :func:`DataFrameGroupBy.rank` where ranks were incorrect when both infinity and ``NaN`` were present (:issue:`20561`)
+
 .. _whatsnew_0230.enhancements.round-trippable_json:
 
 JSON read/write round-trippable with ``orient='table'``
@@ -306,8 +341,8 @@ Supplying a ``CategoricalDtype`` will make the categories in each column consist
 
 .. _whatsnew_023.enhancements.extension:
 
-Extending Pandas with Custom Types
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Extending Pandas with Custom Types (Experimental)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Pandas now supports storing array-like objects that aren't necessarily 1-D NumPy
 arrays as columns in a DataFrame or values in a Series. This allows third-party
@@ -380,6 +415,7 @@ Other Enhancements
 - :class:`IntervalIndex` now supports time zone aware ``Interval`` objects (:issue:`18537`, :issue:`18538`)
 - :func:`Series` / :func:`DataFrame` tab completion also returns identifiers in the first level of a :func:`MultiIndex`. (:issue:`16326`)
 - :func:`read_excel()` has gained the ``nrows`` parameter (:issue:`16645`)
+- :meth:`DataFrame.append` can now in more cases preserve the type of the calling dataframe's columns (e.g. if both are ``CategoricalIndex``) (:issue:`18359`)
 - :func:``DataFrame.to_json`` and ``Series.to_json`` now accept an ``index`` argument which allows the user to exclude the index from the JSON output (:issue:`17394`)
 - ``IntervalIndex.to_tuples()`` has gained the ``na_tuple`` parameter to control whether NA is returned as a tuple of NA, or NA itself (:issue:`18756`)
 - ``Categorical.rename_categories``, ``CategoricalIndex.rename_categories`` and :attr:`Series.cat.rename_categories`
@@ -408,6 +444,7 @@ Other Enhancements
   ``SQLAlchemy`` dialects supporting multivalue inserts include: ``mysql``, ``postgresql``, ``sqlite`` and any dialect with ``supports_multivalues_insert``. (:issue:`14315`, :issue:`8953`)
 - :func:`read_html` now accepts a ``displayed_only`` keyword argument to controls whether or not hidden elements are parsed (``True`` by default) (:issue:`20027`)
 - zip compression is supported via ``compression=zip`` in :func:`DataFrame.to_pickle`, :func:`Series.to_pickle`, :func:`DataFrame.to_csv`, :func:`Series.to_csv`, :func:`DataFrame.to_json`, :func:`Series.to_json`. (:issue:`17778`)
+- :class:`WeekOfMonth` constructor now supports ``n=0`` (:issue:`20517`).
 - :class:`DataFrame` and :class:`Series` now support matrix multiplication (```@```) operator (:issue:`10259`) for Python>=3.5
 - Updated ``to_gbq`` and ``read_gbq`` signature and documentation to reflect changes from
   the Pandas-GBQ library version 0.4.0. Adds intersphinx mapping to Pandas-GBQ
@@ -435,6 +472,8 @@ If installed, we now require:
 +-----------------+-----------------+----------+---------------+
 | beautifulsoup4  | 4.2.1           |          | :issue:`20082`|
 +-----------------+-----------------+----------+---------------+
+| setuptools      | 24.2.0          |          | :issue:`20698`|
++-----------------+-----------------+----------+---------------+
 
 .. _whatsnew_0230.api_breaking.dict_insertion_order:
 
@@ -815,6 +854,7 @@ Other API Changes
 - :func:`DatetimeIndex.strftime` and :func:`PeriodIndex.strftime` now return an ``Index`` instead of a numpy array to be consistent with similar accessors (:issue:`20127`)
 - Constructing a Series from a list of length 1 no longer broadcasts this list when a longer index is specified (:issue:`19714`, :issue:`20391`).
 - :func:`DataFrame.to_dict` with ``orient='index'`` no longer casts int columns to float for a DataFrame with only int and float columns (:issue:`18580`)
+- A user-defined-function that is passed to :func:`Series.rolling().aggregate() <pandas.core.window.Rolling.aggregate>`, :func:`DataFrame.rolling().aggregate() <pandas.core.window.Rolling.aggregate>`, or its expanding cousins, will now *always* be passed a ``Series``, rather than a ``np.array``; ``.apply()`` only has the ``raw`` keyword, see :ref:`here <whatsnew_0230.enhancements.window_raw>`. This is consistent with the signatures of ``.aggregate()`` across pandas (:issue:`20584`)
 
 .. _whatsnew_0230.deprecations:
 
@@ -843,6 +883,9 @@ Deprecations
 - ``Index.summary()`` is deprecated and will be removed in a future version (:issue:`18217`)
 - ``NDFrame.get_ftype_counts()`` is deprecated and will be removed in a future version (:issue:`18243`)
 - The ``convert_datetime64`` parameter in :func:`DataFrame.to_records` has been deprecated and will be removed in a future version. The NumPy bug motivating this parameter has been resolved. The default value for this parameter has also changed from ``True`` to ``None`` (:issue:`18160`).
+- :func:`Series.rolling().apply() <pandas.core.window.Rolling.apply>`, :func:`DataFrame.rolling().apply() <pandas.core.window.Rolling.apply>`,
+  :func:`Series.expanding().apply() <pandas.core.window.Expanding.apply>`, and :func:`DataFrame.expanding().apply() <pandas.core.window.Expanding.apply>` have deprecated passing an ``np.array`` by default. One will need to pass the new ``raw`` parameter to be explicit about what is passed (:issue:`20584`)
+- ``DatetimeIndex.offset`` is deprecated. Use ``DatetimeIndex.freq`` instead (:issue:`20716`)
 
 .. _whatsnew_0230.prior_deprecations:
 
@@ -1045,14 +1088,12 @@ Offsets
 
 Numeric
 ^^^^^^^
-- Bug in :meth:`DataFrame.rank` and :meth:`Series.rank` when ``method='dense'`` and ``pct=True`` in which percentile ranks were not being used with the number of distinct observations (:issue:`15630`)
 - Bug in :class:`Series` constructor with an int or float list where specifying ``dtype=str``, ``dtype='str'`` or ``dtype='U'`` failed to convert the data elements to strings (:issue:`16605`)
 - Bug in :class:`Index` multiplication and division methods where operating with a ``Series`` would return an ``Index`` object instead of a ``Series`` object (:issue:`19042`)
 - Bug in the :class:`DataFrame` constructor in which data containing very large positive or very large negative numbers was causing ``OverflowError`` (:issue:`18584`)
 - Bug in :class:`Index` constructor with ``dtype='uint64'`` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`)
 - Bug in :class:`DataFrame` flex arithmetic (e.g. ``df.add(other, fill_value=foo)``) with a ``fill_value`` other than ``None`` failed to raise ``NotImplementedError`` in corner cases where either the frame or ``other`` has length zero (:issue:`19522`)
 - Multiplication and division of numeric-dtyped :class:`Index` objects with timedelta-like scalars returns ``TimedeltaIndex`` instead of raising ``TypeError`` (:issue:`19333`)
-- Bug in :meth:`Series.rank` and :meth:`DataFrame.rank` when ``ascending='False'`` failed to return correct ranks for infinity if ``NaN`` were present (:issue:`19538`)
 - Bug where ``NaN`` was returned instead of 0 by :func:`Series.pct_change` and :func:`DataFrame.pct_change` when ``fill_method`` is not ``None`` (:issue:`19873`)
 
 
@@ -1077,6 +1118,8 @@ Indexing
 - Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` in presence of entire rows of NaNs in the middle of values (:issue:`20499`).
 - Bug in :class:`IntervalIndex` where some indexing operations were not supported for overlapping or non-monotonic ``uint64`` data (:issue:`20636`)
 - Bug in ``Series.is_unique`` where extraneous output in stderr is shown if Series contains objects with ``__ne__`` defined (:issue:`20661`)
+- Bug in ``.loc`` assignment with a single-element list-like incorrectly assigns as a list (:issue:`19474`)
+- Bug in partial string indexing on a ``Series/DataFrame`` with a monotonic decreasing ``DatetimeIndex`` (:issue:`19362`)
 
 MultiIndex
 ^^^^^^^^^^
@@ -1114,6 +1157,7 @@ I/O
 - Bug in :meth:`pandas.io.json.json_normalize` where subrecords are not properly normalized if any subrecords values are NoneType (:issue:`20030`)
 - Bug in ``usecols`` parameter in :func:`read_csv` where error is not raised correctly when passing a string. (:issue:`20529`)
 - Bug in :func:`HDFStore.keys` when reading a file with a softlink causes exception (:issue:`20523`)
+- Bug in :func:`HDFStore.select_column` where a key which is not a valid store raised an ``AttributeError`` instead of a ``KeyError`` (:issue:`17912`)
 
 Plotting
 ^^^^^^^^
@@ -1177,6 +1221,7 @@ Reshaping
 - Bug in :meth:`DataFrame.astype` where column metadata is lost when converting to categorical or a dictionary of dtypes (:issue:`19920`)
 - Bug in :func:`cut` and :func:`qcut` where timezone information was dropped (:issue:`19872`)
 - Bug in :class:`Series` constructor with a ``dtype=str``, previously raised in some cases (:issue:`19853`)
+- Improved error message when the number of levels in a pivot table or an unstacked dataframe is too large causing int32 overflow (:issue:`20601`)
 
 Other
 ^^^^^
diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index de802f4a72277..6a33e4a09476d 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -417,7 +417,8 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
                         ndarray[int64_t] labels,
                         bint is_datetimelike, object ties_method,
                         bint ascending, bint pct, object na_option):
-    """Provides the rank of values within each group
+    """
+	Provides the rank of values within each group.
 
     Parameters
     ----------
@@ -425,17 +426,24 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
     values : array of {{c_type}} values to be ranked
     labels : array containing unique label for each group, with its ordering
         matching up to the corresponding record in `values`
-    is_datetimelike : bool
+    is_datetimelike : bool, default False
         unused in this method but provided for call compatibility with other
         Cython transformations
-    ties_method :  {'keep', 'top', 'bottom'}
+    ties_method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
+        * average: average rank of group
+        * min: lowest rank in group
+        * max: highest rank in group
+        * first: ranks assigned in order they appear in the array
+        * dense: like 'min', but rank always increases by 1 between groups
+    ascending : boolean, default True
+        False for ranks by high (1) to low (N)
+        na_option : {'keep', 'top', 'bottom'}, default 'keep'
+    pct : boolean, default False
+        Compute percentage rank of data within each group
+    na_option : {'keep', 'top', 'bottom'}, default 'keep'
         * keep: leave NA values where they are
         * top: smallest rank if ascending
         * bottom: smallest rank if descending
-    ascending : boolean
-        False for ranks by high (1) to low (N)
-    pct : boolean
-        Compute percentage rank of data within each group
 
     Notes
     -----
@@ -508,7 +516,8 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
 
             # if keep_na, check for missing values and assign back
             # to the result where appropriate
-            if keep_na and masked_vals[_as[i]] == nan_fill_val:
+
+            if keep_na and mask[_as[i]]:
                 grp_na_count += 1
                 out[_as[i], 0] = nan
             else:
@@ -548,9 +557,9 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
             # reset the dups and sum_ranks, knowing that a new value is coming
             # up. the conditional also needs to handle nan equality and the
             # end of iteration
-            if (i == N - 1 or (
-                    (masked_vals[_as[i]] != masked_vals[_as[i+1]]) and not
-                    (mask[_as[i]] and mask[_as[i+1]]))):
+            if (i == N - 1 or
+                    (masked_vals[_as[i]] != masked_vals[_as[i+1]]) or
+                    (mask[_as[i]] ^ mask[_as[i+1]])):
                 dups = sum_ranks = 0
                 val_start = i
                 grp_vals_seen += 1
diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx
index aa13f03d8e9e4..e524f823605a4 100644
--- a/pandas/_libs/window.pyx
+++ b/pandas/_libs/window.pyx
@@ -1432,30 +1432,35 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win,
     return output
 
 
-def roll_generic(ndarray[float64_t, cast=True] input,
+def roll_generic(object obj,
                  int64_t win, int64_t minp, object index, object closed,
-                 int offset, object func,
+                 int offset, object func, bint raw,
                  object args, object kwargs):
     cdef:
         ndarray[double_t] output, counts, bufarr
+        ndarray[float64_t, cast=True] arr
         float64_t *buf
         float64_t *oldbuf
         int64_t nobs = 0, i, j, s, e, N
         bint is_variable
         ndarray[int64_t] start, end
 
-    if not input.flags.c_contiguous:
-        input = input.copy('C')
-
-    n = len(input)
+    n = len(obj)
     if n == 0:
-        return input
+        return obj
+
+    arr = np.asarray(obj)
+
+    # ndarray input
+    if raw:
+        if not arr.flags.c_contiguous:
+            arr = arr.copy('C')
 
-    counts = roll_sum(np.concatenate([np.isfinite(input).astype(float),
+    counts = roll_sum(np.concatenate([np.isfinite(arr).astype(float),
                                       np.array([0.] * offset)]),
                       win, minp, index, closed)[offset:]
 
-    start, end, N, win, minp, is_variable = get_window_indexer(input, win,
+    start, end, N, win, minp, is_variable = get_window_indexer(arr, win,
                                                                minp, index,
                                                                closed,
                                                                floor=0)
@@ -1463,8 +1468,8 @@ def roll_generic(ndarray[float64_t, cast=True] input,
     output = np.empty(N, dtype=float)
 
     if is_variable:
+        # variable window arr or series
 
-        # variable window
         if offset != 0:
             raise ValueError("unable to roll_generic with a non-zero offset")
 
@@ -1473,7 +1478,20 @@ def roll_generic(ndarray[float64_t, cast=True] input,
             e = end[i]
 
             if counts[i] >= minp:
-                output[i] = func(input[s:e], *args, **kwargs)
+                if raw:
+                    output[i] = func(arr[s:e], *args, **kwargs)
+                else:
+                    output[i] = func(obj.iloc[s:e], *args, **kwargs)
+            else:
+                output[i] = NaN
+
+    elif not raw:
+        # series
+        for i from 0 <= i < N:
+            if counts[i] >= minp:
+                sl = slice(int_max(i + offset - win + 1, 0),
+                           int_min(i + offset + 1, N))
+                output[i] = func(obj.iloc[sl], *args, **kwargs)
             else:
                 output[i] = NaN
 
@@ -1482,12 +1500,12 @@ def roll_generic(ndarray[float64_t, cast=True] input,
         # truncated windows at the beginning, through first full-length window
         for i from 0 <= i < (int_min(win, N) - offset):
             if counts[i] >= minp:
-                output[i] = func(input[0: (i + offset + 1)], *args, **kwargs)
+                output[i] = func(arr[0: (i + offset + 1)], *args, **kwargs)
             else:
                 output[i] = NaN
 
         # remaining full-length windows
-        buf = <float64_t *> input.data
+        buf = <float64_t *> arr.data
         bufarr = np.empty(win, dtype=float)
         oldbuf = <float64_t *> bufarr.data
         for i from (win - offset) <= i < (N - offset):
@@ -1502,7 +1520,7 @@ def roll_generic(ndarray[float64_t, cast=True] input,
         # truncated windows at the end
         for i from int_max(N - offset, 0) <= i < N:
             if counts[i] >= minp:
-                output[i] = func(input[int_max(i + offset - win + 1, 0): N],
+                output[i] = func(arr[int_max(i + offset - win + 1, 0): N],
                                  *args,
                                  **kwargs)
             else:
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index c281bd80cb274..97a764fa7dbe8 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -1,4 +1,10 @@
-"""An interface for extending pandas with custom arrays."""
+"""An interface for extending pandas with custom arrays.
+
+.. warning::
+
+   This is an experimental API and subject to breaking changes
+   without warning.
+"""
 import numpy as np
 
 from pandas.errors import AbstractMethodError
@@ -14,12 +20,15 @@ class ExtensionArray(object):
     with a custom type and will not attempt to coerce them to objects. They
     may be stored directly inside a :class:`DataFrame` or :class:`Series`.
 
+    .. versionadded:: 0.23.0
+
     Notes
     -----
     The interface includes the following abstract methods that must be
     implemented by subclasses:
 
     * _constructor_from_sequence
+    * _from_factorized
     * __getitem__
     * __len__
     * dtype
@@ -30,11 +39,21 @@ class ExtensionArray(object):
     * _concat_same_type
 
     Some additional methods are available to satisfy pandas' internal, private
-    block API.
+    block API:
 
     * _can_hold_na
     * _formatting_values
 
+    Some methods require casting the ExtensionArray to an ndarray of Python
+    objects with ``self.astype(object)``, which may be expensive. When
+    performance is a concern, we highly recommend overriding the following
+    methods:
+
+    * fillna
+    * unique
+    * factorize / _values_for_factorize
+    * argsort / _values_for_argsort
+
     This class does not inherit from 'abc.ABCMeta' for performance reasons.
     Methods and properties required by the interface raise
     ``pandas.errors.AbstractMethodError`` and no ``register`` method is
@@ -50,10 +69,6 @@ class ExtensionArray(object):
     by some other storage type, like Python lists. Pandas makes no
     assumptions on how the data are stored, just that it can be converted
     to a NumPy array.
-
-    Extension arrays should be able to be constructed with instances of
-    the class, i.e. ``ExtensionArray(extension_array)`` should return
-    an instance, not error.
     """
     # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray.
     # Don't override this.
@@ -458,11 +473,23 @@ def take(self, indexer, allow_fill=True, fill_value=None):
             Fill value to replace -1 values with. If applicable, this should
             use the sentinel missing value for this type.
 
+        Returns
+        -------
+        ExtensionArray
+
+        Raises
+        ------
+        IndexError
+            When the indexer is out of bounds for the array.
+
         Notes
         -----
         This should follow pandas' semantics where -1 indicates missing values.
         Positions where indexer is ``-1`` should be filled with the missing
         value for this type.
+        This gives rise to the special case of a take on an empty
+        ExtensionArray that does not raises an IndexError straight away
+        when the `indexer` is all ``-1``.
 
         This is called by ``Series.__getitem__``, ``.loc``, ``iloc``, when the
         indexer is a sequence of values.
@@ -477,6 +504,12 @@ def take(self, indexer, allow_fill=True, fill_value=None):
            def take(self, indexer, allow_fill=True, fill_value=None):
                indexer = np.asarray(indexer)
                mask = indexer == -1
+
+               # take on empty array not handled as desired by numpy
+               # in case of -1 (all missing take)
+               if not len(self) and mask.all():
+                   return type(self)([np.nan] * len(indexer))
+
                result = self.data.take(indexer)
                result[mask] = np.nan  # NA for this type
                return type(self)(result)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 0e60f16891a52..41d67c15c55b5 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -411,7 +411,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
                 arr = np.array(data, dtype=dtype, copy=copy)
             except (ValueError, TypeError) as e:
                 exc = TypeError('DataFrame constructor called with '
-                                'incompatible data and dtype: %s' % e)
+                                'incompatible data and dtype: {e}'.format(e=e))
                 raise_with_traceback(exc)
 
             if arr.ndim == 0 and index is not None and columns is not None:
@@ -520,8 +520,9 @@ def _get_axes(N, K, index=index, columns=columns):
                 try:
                     values = values.astype(dtype)
                 except Exception as orig:
-                    e = ValueError("failed to cast to '%s' (Exception was: %s)"
-                                   % (dtype, orig))
+                    e = ValueError("failed to cast to '{dtype}' (Exception "
+                                   "was: {orig})".format(dtype=dtype,
+                                                         orig=orig))
                     raise_with_traceback(e)
 
         index, columns = _get_axes(*values.shape)
@@ -873,8 +874,9 @@ def dot(self, other):
             lvals = self.values
             rvals = np.asarray(other)
             if lvals.shape[1] != rvals.shape[0]:
-                raise ValueError('Dot product shape mismatch, %s vs %s' %
-                                 (lvals.shape, rvals.shape))
+                raise ValueError('Dot product shape mismatch, '
+                                 '{l} vs {r}'.format(l=lvals.shape,
+                                                     r=rvals.shape))
 
         if isinstance(other, DataFrame):
             return self._constructor(np.dot(lvals, rvals), index=left.index,
@@ -888,7 +890,7 @@ def dot(self, other):
             else:
                 return Series(result, index=left.index)
         else:  # pragma: no cover
-            raise TypeError('unsupported type: %s' % type(other))
+            raise TypeError('unsupported type: {oth}'.format(oth=type(other)))
 
     def __matmul__(self, other):
         """ Matrix multiplication using binary `@` operator in Python>=3.5 """
@@ -1098,7 +1100,7 @@ def to_dict(self, orient='dict', into=dict):
             return into_c((t[0], dict(zip(self.columns, t[1:])))
                           for t in self.itertuples())
         else:
-            raise ValueError("orient '%s' not understood" % orient)
+            raise ValueError("orient '{o}' not understood".format(o=orient))
 
     def to_gbq(self, destination_table, project_id, chunksize=None,
                verbose=None, reauth=False, if_exists='fail', private_key=None,
@@ -2140,7 +2142,7 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None,
         lines.append(self.index._summary())
 
         if len(self.columns) == 0:
-            lines.append('Empty %s' % type(self).__name__)
+            lines.append('Empty {name}'.format(name=type(self).__name__))
             fmt.buffer_put_lines(buf, lines)
             return
 
@@ -2166,13 +2168,15 @@ def _verbose_repr():
             space = max(len(pprint_thing(k)) for k in self.columns) + 4
             counts = None
 
-            tmpl = "%s%s"
+            tmpl = "{count}{dtype}"
             if show_counts:
                 counts = self.count()
                 if len(cols) != len(counts):  # pragma: no cover
-                    raise AssertionError('Columns must equal counts (%d != %d)'
-                                         % (len(cols), len(counts)))
-                tmpl = "%s non-null %s"
+                    raise AssertionError(
+                        'Columns must equal counts '
+                        '({cols:d} != {counts:d})'.format(
+                            cols=len(cols), counts=len(counts)))
+                tmpl = "{count} non-null {dtype}"
 
             dtypes = self.dtypes
             for i, col in enumerate(self.columns):
@@ -2183,7 +2187,8 @@ def _verbose_repr():
                 if show_counts:
                     count = counts.iloc[i]
 
-                lines.append(_put_str(col, space) + tmpl % (count, dtype))
+                lines.append(_put_str(col, space) + tmpl.format(count=count,
+                                                                dtype=dtype))
 
         def _non_verbose_repr():
             lines.append(self.columns._summary(name='Columns'))
@@ -2192,9 +2197,12 @@ def _sizeof_fmt(num, size_qualifier):
             # returns size in human readable format
             for x in ['bytes', 'KB', 'MB', 'GB', 'TB']:
                 if num < 1024.0:
-                    return "%3.1f%s %s" % (num, size_qualifier, x)
+                    return ("{num:3.1f}{size_q}"
+                            "{x}".format(num=num, size_q=size_qualifier, x=x))
                 num /= 1024.0
-            return "%3.1f%s %s" % (num, size_qualifier, 'PB')
+            return "{num:3.1f}{size_q} {pb}".format(num=num,
+                                                    size_q=size_qualifier,
+                                                    pb='PB')
 
         if verbose:
             _verbose_repr()
@@ -2207,8 +2215,9 @@ def _sizeof_fmt(num, size_qualifier):
                 _verbose_repr()
 
         counts = self.get_dtype_counts()
-        dtypes = ['%s(%d)' % k for k in sorted(compat.iteritems(counts))]
-        lines.append('dtypes: %s' % ', '.join(dtypes))
+        dtypes = ['{k}({kk:d})'.format(k=k[0], kk=k[1]) for k
+                  in sorted(compat.iteritems(counts))]
+        lines.append('dtypes: {types}'.format(types=', '.join(dtypes)))
 
         if memory_usage is None:
             memory_usage = get_option('display.memory_usage')
@@ -2226,8 +2235,9 @@ def _sizeof_fmt(num, size_qualifier):
                         self.index._is_memory_usage_qualified()):
                     size_qualifier = '+'
             mem_usage = self.memory_usage(index=True, deep=deep).sum()
-            lines.append("memory usage: %s\n" %
-                         _sizeof_fmt(mem_usage, size_qualifier))
+            lines.append("memory usage: {mem}\n".format(
+                mem=_sizeof_fmt(mem_usage, size_qualifier)))
+
         fmt.buffer_put_lines(buf, lines)
 
     def memory_usage(self, index=True, deep=False):
@@ -3013,8 +3023,8 @@ def select_dtypes(self, include=None, exclude=None):
 
         # can't both include AND exclude!
         if not include.isdisjoint(exclude):
-            raise ValueError('include and exclude overlap on %s' %
-                             (include & exclude))
+            raise ValueError('include and exclude overlap on {inc_ex}'.format(
+                inc_ex=(include & exclude)))
 
         # empty include/exclude -> defaults to True
         # three cases (we've already raised if both are empty)
@@ -3331,7 +3341,11 @@ def reindexer(value):
             value = reindexer(value).T
 
         elif isinstance(value, ExtensionArray):
+            from pandas.core.series import _sanitize_index
+            # Explicitly copy here, instead of in _sanitize_index,
+            # as sanitize_index won't copy an EA, even with copy=True
             value = value.copy()
+            value = _sanitize_index(value, self.index, copy=False)
 
         elif isinstance(value, Index) or is_sequence(value):
             from pandas.core.series import _sanitize_index
@@ -3865,7 +3879,8 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
 
         if verify_integrity and not index.is_unique:
             duplicates = index.get_duplicates()
-            raise ValueError('Index has duplicate keys: %s' % duplicates)
+            raise ValueError('Index has duplicate keys: {dup}'.format(
+                dup=duplicates))
 
         for c in to_remove:
             del frame[c]
@@ -4237,7 +4252,7 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None,
                 mask = count > 0
             else:
                 if how is not None:
-                    raise ValueError('invalid how option: %s' % how)
+                    raise ValueError('invalid how option: {h}'.format(h=how))
                 else:
                     raise TypeError('must specify how or thresh')
 
@@ -6123,8 +6138,11 @@ def append(self, other, ignore_index=False, verify_integrity=False):
                 # index name will be reset
                 index = Index([other.name], name=self.index.name)
 
-            combined_columns = self.columns.tolist() + self.columns.union(
-                other.index).difference(self.columns).tolist()
+            idx_diff = other.index.difference(self.columns)
+            try:
+                combined_columns = self.columns.append(idx_diff)
+            except TypeError:
+                combined_columns = self.columns.astype(object).append(idx_diff)
             other = other.reindex(combined_columns, copy=False)
             other = DataFrame(other.values.reshape((1, len(other))),
                               index=index,
@@ -6746,8 +6764,8 @@ def _count_level(self, level, axis=0, numeric_only=False):
         agg_axis = frame._get_agg_axis(axis)
 
         if not isinstance(count_axis, MultiIndex):
-            raise TypeError("Can only count levels on hierarchical %s." %
-                            self._get_axis_name(axis))
+            raise TypeError("Can only count levels on hierarchical "
+                            "{ax}.".format(ax=self._get_axis_name(axis)))
 
         if frame._is_mixed_type:
             # Since we have mixed types, calling notna(frame.values) might
@@ -6825,9 +6843,9 @@ def f(x):
                 elif filter_type == 'bool':
                     data = self._get_bool_data()
                 else:  # pragma: no cover
-                    e = NotImplementedError("Handling exception with filter_"
-                                            "type %s not implemented." %
-                                            filter_type)
+                    e = NotImplementedError(
+                        "Handling exception with filter_type {f} not"
+                        "implemented.".format(f=filter_type))
                     raise_with_traceback(e)
                 with np.errstate(all='ignore'):
                     result = f(data.values)
@@ -6839,8 +6857,8 @@ def f(x):
                 elif filter_type == 'bool':
                     data = self._get_bool_data()
                 else:  # pragma: no cover
-                    msg = ("Generating numeric_only data with filter_type %s"
-                           "not supported." % filter_type)
+                    msg = ("Generating numeric_only data with filter_type {f}"
+                           "not supported.".format(f=filter_type))
                     raise NotImplementedError(msg)
                 values = data.values
                 labels = data._get_agg_axis(axis)
@@ -7115,7 +7133,8 @@ def to_timestamp(self, freq=None, how='start', axis=0, copy=True):
         elif axis == 1:
             new_data.set_axis(0, self.columns.to_timestamp(freq=freq, how=how))
         else:  # pragma: no cover
-            raise AssertionError('Axis must be 0 or 1. Got %s' % str(axis))
+            raise AssertionError('Axis must be 0 or 1. Got {ax!s}'.format(
+                ax=axis))
 
         return self._constructor(new_data)
 
@@ -7146,7 +7165,8 @@ def to_period(self, freq=None, axis=0, copy=True):
         elif axis == 1:
             new_data.set_axis(0, self.columns.to_period(freq=freq))
         else:  # pragma: no cover
-            raise AssertionError('Axis must be 0 or 1. Got %s' % str(axis))
+            raise AssertionError('Axis must be 0 or 1. Got {ax!s}'.format(
+                ax=axis))
 
         return self._constructor(new_data)
 
@@ -7505,8 +7525,9 @@ def _convert_object_array(content, columns, coerce_float=False, dtype=None):
     else:
         if len(columns) != len(content):  # pragma: no cover
             # caller's responsibility to check for this...
-            raise AssertionError('%d columns passed, passed data had %s '
-                                 'columns' % (len(columns), len(content)))
+            raise AssertionError('{col:d} columns passed, passed data had '
+                                 '{con} columns'.format(col=len(columns),
+                                                        con=len(content)))
 
     # provide soft conversion of object dtypes
     def convert(arr):
@@ -7581,4 +7602,4 @@ def _from_nested_dict(data):
 
 
 def _put_str(s, space):
-    return ('%s' % s)[:space].ljust(space)
+    return u'{s}'.format(s=s)[:space].ljust(space)
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index ae9d160db08e9..2b683d3a606b1 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -4292,6 +4292,8 @@ def pipe(self, func, *args, **kwargs):
     Notes
     -----
     `agg` is an alias for `aggregate`. Use the alias.
+
+    A passed user-defined-function will be passed a Series for evaluation.
     """)
 
     _shared_docs['transform'] = ("""
@@ -5287,6 +5289,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
 
         See Also
         --------
+        interpolate : Fill NaN values using interpolation.
         reindex, asfreq
 
         Returns
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 7c89cab6b1428..8c20d62117e25 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1841,24 +1841,27 @@ def cumcount(self, ascending=True):
     @Appender(_doc_template)
     def rank(self, method='average', ascending=True, na_option='keep',
              pct=False, axis=0):
-        """Provides the rank of values within each group
+        """
+        Provides the rank of values within each group.
 
         Parameters
         ----------
-        method : {'average', 'min', 'max', 'first', 'dense'}, efault 'average'
+        method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
             * average: average rank of group
             * min: lowest rank in group
             * max: highest rank in group
             * first: ranks assigned in order they appear in the array
             * dense: like 'min', but rank always increases by 1 between groups
-        method :  {'keep', 'top', 'bottom'}, default 'keep'
+        ascending : boolean, default True
+            False for ranks by high (1) to low (N)
+        na_option :  {'keep', 'top', 'bottom'}, default 'keep'
             * keep: leave NA values where they are
             * top: smallest rank if ascending
             * bottom: smallest rank if descending
-        ascending : boolean, default True
-            False for ranks by high (1) to low (N)
         pct : boolean, default False
             Compute percentage rank of data within each group
+        axis : int, default 0
+            The axis of the object over which to compute the rank.
 
         Returns
         -----
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
index 95e1f8438c704..95186b2e79a16 100644
--- a/pandas/core/indexes/datetimelike.py
+++ b/pandas/core/indexes/datetimelike.py
@@ -342,7 +342,8 @@ def _format_with_header(self, header, **kwargs):
     def __contains__(self, key):
         try:
             res = self.get_loc(key)
-            return is_scalar(res) or type(res) == slice or np.any(res)
+            return (is_scalar(res) or isinstance(res, slice) or
+                    (is_list_like(res) and len(res)))
         except (KeyError, TypeError, ValueError):
             return False
 
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 88ea3511d4ee3..e0e7ba3e8b518 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -302,7 +302,7 @@ def _add_comparison_methods(cls):
     _engine_type = libindex.DatetimeEngine
 
     tz = None
-    offset = None
+    _freq = None
     _comparables = ['name', 'freqstr', 'tz']
     _attributes = ['name', 'freq', 'tz']
 
@@ -415,7 +415,7 @@ def __new__(cls, data=None,
                 subarr = data.values
 
                 if freq is None:
-                    freq = data.offset
+                    freq = data.freq
                     verify_integrity = False
             else:
                 if data.dtype != _NS_DTYPE:
@@ -467,12 +467,12 @@ def __new__(cls, data=None,
         if freq_infer:
             inferred = subarr.inferred_freq
             if inferred:
-                subarr.offset = to_offset(inferred)
+                subarr.freq = to_offset(inferred)
 
         return subarr._deepcopy_if_needed(ref_to_data, copy)
 
     @classmethod
-    def _generate(cls, start, end, periods, name, offset,
+    def _generate(cls, start, end, periods, name, freq,
                   tz=None, normalize=False, ambiguous='raise', closed=None):
         if com._count_not_none(start, end, periods) != 2:
             raise ValueError('Of the three parameters: start, end, and '
@@ -535,7 +535,7 @@ def _generate(cls, start, end, periods, name, offset,
             else:
                 _normalized = _normalized and end.time() == _midnight
 
-        if hasattr(offset, 'delta') and offset != offsets.Day():
+        if hasattr(freq, 'delta') and freq != offsets.Day():
             if inferred_tz is None and tz is not None:
                 # naive dates
                 if start is not None and start.tz is None:
@@ -551,11 +551,11 @@ def _generate(cls, start, end, periods, name, offset,
                 if end.tz is None and start.tz is not None:
                     end = end.tz_localize(start.tz, ambiguous=False)
 
-            if _use_cached_range(offset, _normalized, start, end):
+            if _use_cached_range(freq, _normalized, start, end):
                 index = cls._cached_range(start, end, periods=periods,
-                                          offset=offset, name=name)
+                                          freq=freq, name=name)
             else:
-                index = _generate_regular_range(start, end, periods, offset)
+                index = _generate_regular_range(start, end, periods, freq)
 
         else:
 
@@ -574,11 +574,11 @@ def _generate(cls, start, end, periods, name, offset,
                 if end.tz is None and start.tz is not None:
                     start = start.replace(tzinfo=None)
 
-            if _use_cached_range(offset, _normalized, start, end):
+            if _use_cached_range(freq, _normalized, start, end):
                 index = cls._cached_range(start, end, periods=periods,
-                                          offset=offset, name=name)
+                                          freq=freq, name=name)
             else:
-                index = _generate_regular_range(start, end, periods, offset)
+                index = _generate_regular_range(start, end, periods, freq)
 
             if tz is not None and getattr(index, 'tz', None) is None:
                 index = conversion.tz_localize_to_utc(_ensure_int64(index), tz,
@@ -596,12 +596,12 @@ def _generate(cls, start, end, periods, name, offset,
             index = index[1:]
         if not right_closed and len(index) and index[-1] == end:
             index = index[:-1]
-        index = cls._simple_new(index, name=name, freq=offset, tz=tz)
+        index = cls._simple_new(index, name=name, freq=freq, tz=tz)
         return index
 
     @property
     def _box_func(self):
-        return lambda x: Timestamp(x, freq=self.offset, tz=self.tz)
+        return lambda x: Timestamp(x, freq=self.freq, tz=self.tz)
 
     def _convert_for_op(self, value):
         """ Convert value to be insertable to ndarray """
@@ -647,7 +647,7 @@ def _simple_new(cls, values, name=None, freq=None, tz=None,
         result = object.__new__(cls)
         result._data = values
         result.name = name
-        result.offset = freq
+        result._freq = freq
         result._tz = timezones.maybe_get_tz(tz)
         result._tz = timezones.tz_standardize(result._tz)
         result._reset_identity()
@@ -734,7 +734,7 @@ def _has_same_tz(self, other):
         return zzone == vzone
 
     @classmethod
-    def _cached_range(cls, start=None, end=None, periods=None, offset=None,
+    def _cached_range(cls, start=None, end=None, periods=None, freq=None,
                       name=None):
         if start is None and end is None:
             # I somewhat believe this should never be raised externally
@@ -747,30 +747,30 @@ def _cached_range(cls, start=None, end=None, periods=None, offset=None,
             raise TypeError(
                 'Must either specify period or provide both start and end.')
 
-        if offset is None:
+        if freq is None:
             # This can't happen with external-facing code
-            raise TypeError('Must provide offset.')
+            raise TypeError('Must provide freq.')
 
         drc = _daterange_cache
-        if offset not in _daterange_cache:
-            xdr = generate_range(offset=offset, start=_CACHE_START,
+        if freq not in _daterange_cache:
+            xdr = generate_range(offset=freq, start=_CACHE_START,
                                  end=_CACHE_END)
 
             arr = tools.to_datetime(list(xdr), box=False)
 
             cachedRange = DatetimeIndex._simple_new(arr)
-            cachedRange.offset = offset
+            cachedRange.freq = freq
             cachedRange = cachedRange.tz_localize(None)
             cachedRange.name = None
-            drc[offset] = cachedRange
+            drc[freq] = cachedRange
         else:
-            cachedRange = drc[offset]
+            cachedRange = drc[freq]
 
         if start is None:
             if not isinstance(end, Timestamp):
                 raise AssertionError('end must be an instance of Timestamp')
 
-            end = offset.rollback(end)
+            end = freq.rollback(end)
 
             endLoc = cachedRange.get_loc(end) + 1
             startLoc = endLoc - periods
@@ -778,23 +778,23 @@ def _cached_range(cls, start=None, end=None, periods=None, offset=None,
             if not isinstance(start, Timestamp):
                 raise AssertionError('start must be an instance of Timestamp')
 
-            start = offset.rollforward(start)
+            start = freq.rollforward(start)
 
             startLoc = cachedRange.get_loc(start)
             endLoc = startLoc + periods
         else:
-            if not offset.onOffset(start):
-                start = offset.rollforward(start)
+            if not freq.onOffset(start):
+                start = freq.rollforward(start)
 
-            if not offset.onOffset(end):
-                end = offset.rollback(end)
+            if not freq.onOffset(end):
+                end = freq.rollback(end)
 
             startLoc = cachedRange.get_loc(start)
             endLoc = cachedRange.get_loc(end) + 1
 
         indexSlice = cachedRange[startLoc:endLoc]
         indexSlice.name = name
-        indexSlice.offset = offset
+        indexSlice.freq = freq
 
         return indexSlice
 
@@ -836,7 +836,7 @@ def __setstate__(self, state):
                 np.ndarray.__setstate__(data, nd_state)
 
                 self.name = own_state[0]
-                self.offset = own_state[1]
+                self.freq = own_state[1]
                 self._tz = timezones.tz_standardize(own_state[2])
 
                 # provide numpy < 1.7 compat
@@ -1184,7 +1184,7 @@ def union(self, other):
                 result._tz = timezones.tz_standardize(this.tz)
                 if (result.freq is None and
                         (this.freq is not None or other.freq is not None)):
-                    result.offset = to_offset(result.inferred_freq)
+                    result.freq = to_offset(result.inferred_freq)
             return result
 
     def to_perioddelta(self, freq):
@@ -1232,7 +1232,7 @@ def union_many(self, others):
                     this._tz = timezones.tz_standardize(tz)
 
         if this.freq is None:
-            this.offset = to_offset(this.inferred_freq)
+            this.freq = to_offset(this.inferred_freq)
         return this
 
     def join(self, other, how='left', level=None, return_indexers=False,
@@ -1271,7 +1271,7 @@ def _maybe_utc_convert(self, other):
     def _wrap_joined_index(self, joined, other):
         name = self.name if self.name == other.name else None
         if (isinstance(other, DatetimeIndex) and
-                self.offset == other.offset and
+                self.freq == other.freq and
                 self._can_fast_union(other)):
             joined = self._shallow_copy(joined)
             joined.name = name
@@ -1284,9 +1284,9 @@ def _can_fast_union(self, other):
         if not isinstance(other, DatetimeIndex):
             return False
 
-        offset = self.offset
+        freq = self.freq
 
-        if offset is None or offset != other.offset:
+        if freq is None or freq != other.freq:
             return False
 
         if not self.is_monotonic or not other.is_monotonic:
@@ -1306,10 +1306,10 @@ def _can_fast_union(self, other):
 
         # Only need to "adjoin", not overlap
         try:
-            return (right_start == left_end + offset) or right_start in left
+            return (right_start == left_end + freq) or right_start in left
         except (ValueError):
 
-            # if we are comparing an offset that does not propagate timezones
+            # if we are comparing a freq that does not propagate timezones
             # this will raise
             return False
 
@@ -1329,7 +1329,7 @@ def _fast_union(self, other):
         left_start, left_end = left[0], left[-1]
         right_end = right[-1]
 
-        if not self.offset._should_cache():
+        if not self.freq._should_cache():
             # concatenate dates
             if left_end < right_end:
                 loc = right.searchsorted(left_end, side='right')
@@ -1341,7 +1341,7 @@ def _fast_union(self, other):
         else:
             return type(self)(start=left_start,
                               end=max(left_end, right_end),
-                              freq=left.offset)
+                              freq=left.freq)
 
     def __iter__(self):
         """
@@ -1393,18 +1393,18 @@ def intersection(self, other):
             result = Index.intersection(self, other)
             if isinstance(result, DatetimeIndex):
                 if result.freq is None:
-                    result.offset = to_offset(result.inferred_freq)
+                    result.freq = to_offset(result.inferred_freq)
             return result
 
-        elif (other.offset is None or self.offset is None or
-              other.offset != self.offset or
-              not other.offset.isAnchored() or
+        elif (other.freq is None or self.freq is None or
+              other.freq != self.freq or
+              not other.freq.isAnchored() or
               (not self.is_monotonic or not other.is_monotonic)):
             result = Index.intersection(self, other)
             result = self._shallow_copy(result._values, name=result.name,
                                         tz=result.tz, freq=None)
             if result.freq is None:
-                result.offset = to_offset(result.inferred_freq)
+                result.freq = to_offset(result.inferred_freq)
             return result
 
         if len(self) == 0:
@@ -1729,12 +1729,28 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None):
     @property
     def freq(self):
         """get/set the frequency of the Index"""
-        return self.offset
+        return self._freq
 
     @freq.setter
     def freq(self, value):
         """get/set the frequency of the Index"""
-        self.offset = value
+        self._freq = value
+
+    @property
+    def offset(self):
+        """get/set the frequency of the Index"""
+        msg = ('DatetimeIndex.offset has been deprecated and will be removed '
+               'in a future version; use DatetimeIndex.freq instead.')
+        warnings.warn(msg, FutureWarning, stacklevel=2)
+        return self.freq
+
+    @offset.setter
+    def offset(self, value):
+        """get/set the frequency of the Index"""
+        msg = ('DatetimeIndex.offset has been deprecated and will be removed '
+               'in a future version; use DatetimeIndex.freq instead.')
+        warnings.warn(msg, FutureWarning, stacklevel=2)
+        self.freq = value
 
     year = _field_accessor('year', 'Y', "The year of the datetime")
     month = _field_accessor('month', 'M',
@@ -2525,9 +2541,9 @@ def day_name(self, locale=None):
 DatetimeIndex._add_datetimelike_methods()
 
 
-def _generate_regular_range(start, end, periods, offset):
-    if isinstance(offset, Tick):
-        stride = offset.nanos
+def _generate_regular_range(start, end, periods, freq):
+    if isinstance(freq, Tick):
+        stride = freq.nanos
         if periods is None:
             b = Timestamp(start).value
             # cannot just use e = Timestamp(end) + 1 because arange breaks when
@@ -2558,7 +2574,7 @@ def _generate_regular_range(start, end, periods, offset):
             end = end.to_pydatetime()
 
         xdr = generate_range(start=start, end=end,
-                             periods=periods, offset=offset)
+                             periods=periods, offset=freq)
 
         dates = list(xdr)
         # utc = len(dates) > 0 and dates[0].tzinfo is not None
@@ -2855,9 +2871,9 @@ def _in_range(start, end, rng_start, rng_end):
     return start > rng_start and end < rng_end
 
 
-def _use_cached_range(offset, _normalized, start, end):
-    return (offset._should_cache() and
-            not (offset._normalize_cache and not _normalized) and
+def _use_cached_range(freq, _normalized, start, end):
+    return (freq._should_cache() and
+            not (freq._normalize_cache and not _normalized) and
             _naive_in_cache_range(start, end))
 
 
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 9736bba78ab72..2eb52ecc6bcc7 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -532,7 +532,8 @@ def setter(item, v):
 
             def can_do_equal_len():
                 """ return True if we have an equal len settable """
-                if not len(labels) == 1 or not np.iterable(value):
+                if (not len(labels) == 1 or not np.iterable(value) or
+                        is_scalar(plane_indexer[0])):
                     return False
 
                 l = len(value)
@@ -2310,6 +2311,49 @@ def check_bool_indexer(ax, key):
     return result
 
 
+def check_setitem_lengths(indexer, value, values):
+    """Validate that value and indexer are the same length.
+
+    An special-case is allowed for when the indexer is a boolean array
+    and the number of true values equals the length of ``value``. In
+    this case, no exception is raised.
+
+    Parameters
+    ----------
+    indexer : sequence
+        The key for the setitem
+    value : array-like
+        The value for the setitem
+    values : array-like
+        The values being set into
+
+    Returns
+    -------
+    None
+
+    Raises
+    ------
+    ValueError
+        When the indexer is an ndarray or list and the lengths don't
+        match.
+    """
+    # boolean with truth values == len of the value is ok too
+    if isinstance(indexer, (np.ndarray, list)):
+        if is_list_like(value) and len(indexer) != len(value):
+            if not (isinstance(indexer, np.ndarray) and
+                    indexer.dtype == np.bool_ and
+                    len(indexer[indexer]) == len(value)):
+                raise ValueError("cannot set using a list-like indexer "
+                                 "with a different length than the value")
+    # slice
+    elif isinstance(indexer, slice):
+
+        if is_list_like(value) and len(values):
+            if len(value) != length_of_indexer(indexer, values):
+                raise ValueError("cannot set using a slice indexer with a "
+                                 "different length than the value")
+
+
 def convert_missing_indexer(indexer):
     """ reverse convert a missing indexer, which is a dict
     return the scalar indexer and a boolean indicating if we converted
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index e8fab3748bacf..37d11296400be 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -66,7 +66,7 @@
 import pandas.core.algorithms as algos
 
 from pandas.core.index import Index, MultiIndex, _ensure_index
-from pandas.core.indexing import maybe_convert_indices, length_of_indexer
+from pandas.core.indexing import maybe_convert_indices, check_setitem_lengths
 from pandas.core.arrays import Categorical
 from pandas.core.indexes.datetimes import DatetimeIndex
 from pandas.core.indexes.timedeltas import TimedeltaIndex
@@ -817,11 +817,24 @@ def _replace_single(self, *args, **kwargs):
         return self if kwargs['inplace'] else self.copy()
 
     def setitem(self, indexer, value, mgr=None):
-        """ set the value inplace; return a new block (of a possibly different
-        dtype)
+        """Set the value inplace, returning a a maybe different typed block.
 
-        indexer is a direct slice/positional indexer; value must be a
-        compatible shape
+        Parameters
+        ----------
+        indexer : tuple, list-like, array-like, slice
+            The subset of self.values to set
+        value : object
+            The value being set
+        mgr : BlockPlacement, optional
+
+        Returns
+        -------
+        Block
+
+        Notes
+        -----
+        `indexer` is a direct slice/positional indexer. `value` must
+        be a compatible shape.
         """
         # coerce None values, if appropriate
         if value is None:
@@ -876,22 +889,7 @@ def setitem(self, indexer, value, mgr=None):
         values = transf(values)
 
         # length checking
-        # boolean with truth values == len of the value is ok too
-        if isinstance(indexer, (np.ndarray, list)):
-            if is_list_like(value) and len(indexer) != len(value):
-                if not (isinstance(indexer, np.ndarray) and
-                        indexer.dtype == np.bool_ and
-                        len(indexer[indexer]) == len(value)):
-                    raise ValueError("cannot set using a list-like indexer "
-                                     "with a different length than the value")
-
-        # slice
-        elif isinstance(indexer, slice):
-
-            if is_list_like(value) and len(values):
-                if len(value) != length_of_indexer(indexer, values):
-                    raise ValueError("cannot set using a slice indexer with a "
-                                     "different length than the value")
+        check_setitem_lengths(indexer, value, values)
 
         def _is_scalar_indexer(indexer):
             # return True if we are all scalar indexers
@@ -1900,6 +1898,37 @@ def is_view(self):
         """Extension arrays are never treated as views."""
         return False
 
+    def setitem(self, indexer, value, mgr=None):
+        """Set the value inplace, returning a same-typed block.
+
+        This differs from Block.setitem by not allowing setitem to change
+        the dtype of the Block.
+
+        Parameters
+        ----------
+        indexer : tuple, list-like, array-like, slice
+            The subset of self.values to set
+        value : object
+            The value being set
+        mgr : BlockPlacement, optional
+
+        Returns
+        -------
+        Block
+
+        Notes
+        -----
+        `indexer` is a direct slice/positional indexer. `value` must
+        be a compatible shape.
+        """
+        if isinstance(indexer, tuple):
+            # we are always 1-D
+            indexer = indexer[0]
+
+        check_setitem_lengths(indexer, value, self.values)
+        self.values[indexer] = value
+        return self
+
     def get_values(self, dtype=None):
         # ExtensionArrays must be iterable, so this works.
         values = np.asarray(self.values)
@@ -3519,7 +3548,8 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False,
         # with a .values attribute.
         aligned_args = dict((k, kwargs[k])
                             for k in align_keys
-                            if hasattr(kwargs[k], 'values'))
+                            if hasattr(kwargs[k], 'values') and
+                            not isinstance(kwargs[k], ABCExtensionArray))
 
         for b in self.blocks:
             if filter is not None:
@@ -5220,7 +5250,7 @@ def _safe_reshape(arr, new_shape):
     If possible, reshape `arr` to have shape `new_shape`,
     with a couple of exceptions (see gh-13012):
 
-    1) If `arr` is a Categorical or Index, `arr` will be
+    1) If `arr` is a ExtensionArray or Index, `arr` will be
        returned as is.
     2) If `arr` is a Series, the `_values` attribute will
        be reshaped and returned.
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
index 0d0023b9f67d3..f8d283e932f44 100644
--- a/pandas/core/resample.py
+++ b/pandas/core/resample.py
@@ -659,6 +659,7 @@ def fillna(self, method, limit=None):
         pad : Forward fill NaN values in the resampled data.
         nearest : Fill NaN values in the resampled data
             with nearest neighbor starting from center.
+        interpolate : Fill NaN values using interpolation.
         pandas.Series.fillna : Fill NaN values in the Series using the
             specified method, which can be 'bfill' and 'ffill'.
         pandas.DataFrame.fillna : Fill NaN values in the DataFrame using the
diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
index 74a9b59d3194a..6c3401bdc58e3 100644
--- a/pandas/core/reshape/pivot.py
+++ b/pandas/core/reshape/pivot.py
@@ -29,6 +29,11 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
     index = _convert_by(index)
     columns = _convert_by(columns)
 
+    num_rows = data.reindex(index, axis='columns').shape[0]
+    num_columns = data.reindex(columns, axis='columns').shape[0]
+    if num_rows * num_columns > (2 ** 31 - 1):
+        raise ValueError('Pivot table is too big, causing int32 overflow')
+
     if isinstance(aggfunc, list):
         pieces = []
         keys = []
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 389f1af48434a..fe9fc9b674f50 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -127,6 +127,11 @@ def __init__(self, values, index, level=-1, value_columns=None,
         self.removed_level = self.new_index_levels.pop(self.level)
         self.removed_level_full = index.levels[self.level]
 
+        num_rows = np.max([index_level.size for index_level in self.new_index_levels])
+        num_columns = self.removed_level.size
+        if num_rows * num_columns > (2 ** 31 - 1):
+            raise ValueError('Unstacked data frame is too big, causing int32 overflow')
+
         self._make_sorted_values_labels()
         self._make_selectors()
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 1d6f770d92795..2ed4c99b7a998 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -3130,7 +3130,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwds):
         >>> def add_custom_values(x, **kwargs):
         ...     for month in kwargs:
         ...         x+=kwargs[month]
-        ...         return x
+        ...     return x
 
         >>> series.apply(add_custom_values, june=30, july=20, august=25)
         London      95
diff --git a/pandas/core/window.py b/pandas/core/window.py
index 5cd4fffb5d7dd..f8b5aa292f309 100644
--- a/pandas/core/window.py
+++ b/pandas/core/window.py
@@ -314,7 +314,7 @@ def _center_window(self, result, window):
     def aggregate(self, arg, *args, **kwargs):
         result, how = self._aggregate(arg, *args, **kwargs)
         if result is None:
-            return self.apply(arg, args=args, kwargs=kwargs)
+            return self.apply(arg, raw=False, args=args, kwargs=kwargs)
         return result
 
     agg = aggregate
@@ -954,23 +954,53 @@ def count(self):
     Parameters
     ----------
     func : function
-        Must produce a single value from an ndarray input
-        \*args and \*\*kwargs are passed to the function""")
+        Must produce a single value from an ndarray input if ``raw=True``
+        or a Series if ``raw=False``
+    raw : bool, default None
+        * ``False`` : passes each row or column as a Series to the
+          function.
+        * ``True`` or ``None`` : the passed function will receive ndarray
+          objects instead.
+          If you are just applying a NumPy reduction function this will
+          achieve much better performance.
+
+        The `raw` parameter is required and will show a FutureWarning if
+        not passed. In the future `raw` will default to False.
+
+        .. versionadded:: 0.23.0
+
+    \*args and \*\*kwargs are passed to the function""")
+
+    def apply(self, func, raw=None, args=(), kwargs={}):
+        from pandas import Series
 
-    def apply(self, func, args=(), kwargs={}):
         # TODO: _level is unused?
         _level = kwargs.pop('_level', None)  # noqa
         window = self._get_window()
         offset = _offset(window, self.center)
         index, indexi = self._get_index()
 
+        # TODO: default is for backward compat
+        # change to False in the future
+        if raw is None:
+            warnings.warn(
+                "Currently, 'apply' passes the values as ndarrays to the "
+                "applied function. In the future, this will change to passing "
+                "it as Series objects. You need to specify 'raw=True' to keep "
+                "the current behaviour, and you can pass 'raw=False' to "
+                "silence this warning", FutureWarning, stacklevel=3)
+            raw = True
+
         def f(arg, window, min_periods, closed):
             minp = _use_window(min_periods, window)
-            return _window.roll_generic(arg, window, minp, indexi, closed,
-                                        offset, func, args, kwargs)
+            if not raw:
+                arg = Series(arg, index=self.obj.index)
+            return _window.roll_generic(
+                arg, window, minp, indexi,
+                closed, offset, func, raw, args, kwargs)
 
         return self._apply(f, func, args=args, kwargs=kwargs,
-                           center=False)
+                           center=False, raw=raw)
 
     def sum(self, *args, **kwargs):
         nv.validate_window_func('sum', args, kwargs)
@@ -1498,8 +1528,9 @@ def count(self):
     @Substitution(name='rolling')
     @Appender(_doc_template)
     @Appender(_shared_docs['apply'])
-    def apply(self, func, args=(), kwargs={}):
-        return super(Rolling, self).apply(func, args=args, kwargs=kwargs)
+    def apply(self, func, raw=None, args=(), kwargs={}):
+        return super(Rolling, self).apply(
+            func, raw=raw, args=args, kwargs=kwargs)
 
     @Substitution(name='rolling')
     @Appender(_shared_docs['sum'])
@@ -1756,8 +1787,9 @@ def count(self, **kwargs):
     @Substitution(name='expanding')
     @Appender(_doc_template)
     @Appender(_shared_docs['apply'])
-    def apply(self, func, args=(), kwargs={}):
-        return super(Expanding, self).apply(func, args=args, kwargs=kwargs)
+    def apply(self, func, raw=None, args=(), kwargs={}):
+        return super(Expanding, self).apply(
+            func, raw=raw, args=args, kwargs=kwargs)
 
     @Substitution(name='expanding')
     @Appender(_shared_docs['sum'])
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index f9a496edb45a3..4004a6ea8f6ff 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -887,7 +887,10 @@ def remove(self, key, where=None, start=None, stop=None):
         where = _ensure_term(where, scope_level=1)
         try:
             s = self.get_storer(key)
-        except:
+        except KeyError:
+            # the key is not a valid store, re-raising KeyError
+            raise
+        except Exception:
 
             if where is not None:
                 raise ValueError(
@@ -899,9 +902,6 @@ def remove(self, key, where=None, start=None, stop=None):
                 s._f_remove(recursive=True)
                 return None
 
-        if s is None:
-            raise KeyError('No object named %s in the file' % key)
-
         # remove the node
         if com._all_none(where, start, stop):
             s.group._f_remove(recursive=True)
@@ -1094,7 +1094,8 @@ def get_storer(self, key):
         """ return the storer object for a key, raise if not in the file """
         group = self.get_node(key)
         if group is None:
-            return None
+            raise KeyError('No object named {} in the file'.format(key))
+
         s = self._create_storer(group)
         s.infer_axes()
         return s
diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py
index 590f28b275aec..20cd8b43478d2 100644
--- a/pandas/tests/dtypes/test_cast.py
+++ b/pandas/tests/dtypes/test_cast.py
@@ -191,9 +191,9 @@ def testinfer_dtype_from_scalar_errors(self):
          (pd.Categorical(list('aabc')), 'category', True),
          (pd.Categorical([1, 2, 3]), 'category', True),
          (Timestamp('20160101'), np.object_, False),
-         (np.datetime64('2016-01-01'), np.dtype('<M8[D]'), False),
+         (np.datetime64('2016-01-01'), np.dtype('=M8[D]'), False),
          (pd.date_range('20160101', periods=3),
-          np.dtype('<M8[ns]'), False),
+          np.dtype('=M8[ns]'), False),
          (pd.date_range('20160101', periods=3, tz='US/Eastern'),
           'datetime64[ns, US/Eastern]', True),
          (pd.Series([1., 2, 3]), np.float64, False),
diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py
index f8078d2798b32..9da985625c4ee 100644
--- a/pandas/tests/extension/base/__init__.py
+++ b/pandas/tests/extension/base/__init__.py
@@ -49,3 +49,4 @@ class TestMyDtype(BaseDtypeTests):
 from .methods import BaseMethodsTests  # noqa
 from .missing import BaseMissingTests  # noqa
 from .reshaping import BaseReshapingTests  # noqa
+from .setitem import BaseSetitemTests  # noqa
diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py
index 566ba1721d13c..4e2a65eba06dc 100644
--- a/pandas/tests/extension/base/getitem.py
+++ b/pandas/tests/extension/base/getitem.py
@@ -1,6 +1,8 @@
+import pytest
 import numpy as np
 
 import pandas as pd
+import pandas.util.testing as tm
 
 from .base import BaseExtensionTests
 
@@ -120,3 +122,48 @@ def test_take_sequence(self, data):
         assert result.iloc[0] == data[0]
         assert result.iloc[1] == data[1]
         assert result.iloc[2] == data[3]
+
+    def test_take(self, data, na_value, na_cmp):
+        result = data.take([0, -1])
+        assert result.dtype == data.dtype
+        assert result[0] == data[0]
+        na_cmp(result[1], na_value)
+
+        with tm.assert_raises_regex(IndexError, "out of bounds"):
+            data.take([len(data) + 1])
+
+    def test_take_empty(self, data, na_value, na_cmp):
+        empty = data[:0]
+        result = empty.take([-1])
+        na_cmp(result[0], na_value)
+
+        with tm.assert_raises_regex(IndexError, "cannot do a non-empty take"):
+            empty.take([0, 1])
+
+    @pytest.mark.xfail(reason="Series.take with extension array buggy for -1")
+    def test_take_series(self, data):
+        s = pd.Series(data)
+        result = s.take([0, -1])
+        expected = pd.Series(
+            data._constructor_from_sequence([data[0], data[len(data) - 1]]),
+            index=[0, len(data) - 1])
+        self.assert_series_equal(result, expected)
+
+    def test_reindex(self, data, na_value):
+        s = pd.Series(data)
+        result = s.reindex([0, 1, 3])
+        expected = pd.Series(data.take([0, 1, 3]), index=[0, 1, 3])
+        self.assert_series_equal(result, expected)
+
+        n = len(data)
+        result = s.reindex([-1, 0, n])
+        expected = pd.Series(
+            data._constructor_from_sequence([na_value, data[0], na_value]),
+            index=[-1, 0, n])
+        self.assert_series_equal(result, expected)
+
+        result = s.reindex([n, n + 1])
+        expected = pd.Series(
+            data._constructor_from_sequence([na_value, na_value]),
+            index=[n, n + 1])
+        self.assert_series_equal(result, expected)
diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py
new file mode 100644
index 0000000000000..e91345b504d86
--- /dev/null
+++ b/pandas/tests/extension/base/setitem.py
@@ -0,0 +1,167 @@
+import operator
+
+import numpy as np
+import pytest
+
+import pandas as pd
+import pandas.util.testing as tm
+from .base import BaseExtensionTests
+
+
+class BaseSetitemTests(BaseExtensionTests):
+    def test_setitem_scalar_series(self, data):
+        arr = pd.Series(data)
+        arr[0] = data[1]
+        assert arr[0] == data[1]
+
+    def test_setitem_sequence(self, data):
+        arr = pd.Series(data)
+        original = data.copy()
+
+        arr[[0, 1]] = [data[1], data[0]]
+        assert arr[0] == original[1]
+        assert arr[1] == original[0]
+
+    @pytest.mark.parametrize('as_array', [True, False])
+    def test_setitem_sequence_mismatched_length_raises(self, data, as_array):
+        ser = pd.Series(data)
+        value = [data[0]]
+        if as_array:
+            value = type(data)(value)
+
+        xpr = 'cannot set using a {} indexer with a different length'
+        with tm.assert_raises_regex(ValueError, xpr.format('list-like')):
+            ser[[0, 1]] = value
+
+        with tm.assert_raises_regex(ValueError, xpr.format('slice')):
+            ser[slice(3)] = value
+
+    def test_setitem_empty_indxer(self, data):
+        ser = pd.Series(data)
+        original = ser.copy()
+        ser[[]] = []
+        self.assert_series_equal(ser, original)
+
+    def test_setitem_sequence_broadcasts(self, data):
+        arr = pd.Series(data)
+
+        arr[[0, 1]] = data[2]
+        assert arr[0] == data[2]
+        assert arr[1] == data[2]
+
+    @pytest.mark.parametrize('setter', ['loc', 'iloc'])
+    def test_setitem_scalar(self, data, setter):
+        arr = pd.Series(data)
+        setter = getattr(arr, setter)
+        operator.setitem(setter, 0, data[1])
+        assert arr[0] == data[1]
+
+    def test_setitem_loc_scalar_mixed(self, data):
+        df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
+        df.loc[0, 'B'] = data[1]
+        assert df.loc[0, 'B'] == data[1]
+
+    def test_setitem_loc_scalar_single(self, data):
+        df = pd.DataFrame({"B": data})
+        df.loc[10, 'B'] = data[1]
+        assert df.loc[10, 'B'] == data[1]
+
+    def test_setitem_loc_scalar_multiple_homogoneous(self, data):
+        df = pd.DataFrame({"A": data, "B": data})
+        df.loc[10, 'B'] = data[1]
+        assert df.loc[10, 'B'] == data[1]
+
+    def test_setitem_iloc_scalar_mixed(self, data):
+        df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
+        df.iloc[0, 1] = data[1]
+        assert df.loc[0, 'B'] == data[1]
+
+    def test_setitem_iloc_scalar_single(self, data):
+        df = pd.DataFrame({"B": data})
+        df.iloc[10, 0] = data[1]
+        assert df.loc[10, 'B'] == data[1]
+
+    def test_setitem_iloc_scalar_multiple_homogoneous(self, data):
+        df = pd.DataFrame({"A": data, "B": data})
+        df.iloc[10, 1] = data[1]
+        assert df.loc[10, 'B'] == data[1]
+
+    @pytest.mark.parametrize('as_callable', [True, False])
+    @pytest.mark.parametrize('setter', ['loc', None])
+    def test_setitem_mask_aligned(self, data, as_callable, setter):
+        ser = pd.Series(data)
+        mask = np.zeros(len(data), dtype=bool)
+        mask[:2] = True
+
+        if as_callable:
+            mask2 = lambda x: mask
+        else:
+            mask2 = mask
+
+        if setter:
+            # loc
+            target = getattr(ser, setter)
+        else:
+            # Series.__setitem__
+            target = ser
+
+        operator.setitem(target, mask2, data[5:7])
+
+        ser[mask2] = data[5:7]
+        assert ser[0] == data[5]
+        assert ser[1] == data[6]
+
+    @pytest.mark.parametrize('setter', ['loc', None])
+    def test_setitem_mask_broadcast(self, data, setter):
+        ser = pd.Series(data)
+        mask = np.zeros(len(data), dtype=bool)
+        mask[:2] = True
+
+        if setter:   # loc
+            target = getattr(ser, setter)
+        else:  # __setitem__
+            target = ser
+
+        operator.setitem(target, mask, data[10])
+        assert ser[0] == data[10]
+        assert ser[1] == data[10]
+
+    def test_setitem_expand_columns(self, data):
+        df = pd.DataFrame({"A": data})
+        result = df.copy()
+        result['B'] = 1
+        expected = pd.DataFrame({"A": data, "B": [1] * len(data)})
+        self.assert_frame_equal(result, expected)
+
+        result = df.copy()
+        result.loc[:, 'B'] = 1
+        self.assert_frame_equal(result, expected)
+
+        # overwrite with new type
+        result['B'] = data
+        expected = pd.DataFrame({"A": data, "B": data})
+        self.assert_frame_equal(result, expected)
+
+    def test_setitem_expand_with_extension(self, data):
+        df = pd.DataFrame({"A": [1] * len(data)})
+        result = df.copy()
+        result['B'] = data
+        expected = pd.DataFrame({"A": [1] * len(data), "B": data})
+        self.assert_frame_equal(result, expected)
+
+        result = df.copy()
+        result.loc[:, 'B'] = data
+        self.assert_frame_equal(result, expected)
+
+    def test_setitem_frame_invalid_length(self, data):
+        df = pd.DataFrame({"A": [1] * len(data)})
+        xpr = "Length of values does not match length of index"
+        with tm.assert_raises_regex(ValueError, xpr):
+            df['B'] = data[:5]
+
+    @pytest.mark.xfail(reason="GH-20441: setitem on extension types.")
+    def test_setitem_tuple_index(self, data):
+        s = pd.Series(data[:2], index=[(0, 0), (0, 1)])
+        expected = pd.Series(data.take([1, 1]), index=s.index)
+        s[(0, 1)] = data[1]
+        self.assert_series_equal(s, expected)
diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py
index 7528299578326..27c156c15203f 100644
--- a/pandas/tests/extension/category/test_categorical.py
+++ b/pandas/tests/extension/category/test_categorical.py
@@ -84,6 +84,23 @@ def test_getitem_scalar(self):
         # to break things by changing.
         pass
 
+    @pytest.mark.xfail(reason="Categorical.take buggy")
+    def test_take(self):
+        # TODO remove this once Categorical.take is fixed
+        pass
+
+    @pytest.mark.xfail(reason="Categorical.take buggy")
+    def test_take_empty(self):
+        pass
+
+    @pytest.mark.xfail(reason="test not written correctly for categorical")
+    def test_reindex(self):
+        pass
+
+
+class TestSetitem(base.BaseSetitemTests):
+    pass
+
 
 class TestMissing(base.BaseMissingTests):
 
diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
index b66a14c77a059..a8e88365b5648 100644
--- a/pandas/tests/extension/decimal/array.py
+++ b/pandas/tests/extension/decimal/array.py
@@ -31,6 +31,9 @@ def __init__(self, values):
         values = np.asarray(values, dtype=object)
 
         self.values = values
+        # Some aliases for common attribute names to ensure pandas supports
+        # these
+        self._items = self._data = self.data = self.values
 
     @classmethod
     def _constructor_from_sequence(cls, scalars):
@@ -62,7 +65,7 @@ def __len__(self):
         return len(self.values)
 
     def __repr__(self):
-        return repr(self.values)
+        return 'DecimalArray({!r})'.format(self.values)
 
     @property
     def nbytes(self):
@@ -78,6 +81,10 @@ def take(self, indexer, allow_fill=True, fill_value=None):
         indexer = np.asarray(indexer)
         mask = indexer == -1
 
+        # take on empty array not handled as desired by numpy in case of -1
+        if not len(self) and mask.all():
+            return type(self)([self._na_value] * len(indexer))
+
         indexer = _ensure_platform_int(indexer)
         out = self.values.take(indexer)
         out[mask] = self._na_value
diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py
index d9ae49d87804a..33843492cb706 100644
--- a/pandas/tests/extension/json/array.py
+++ b/pandas/tests/extension/json/array.py
@@ -89,8 +89,12 @@ def isna(self):
         return np.array([x == self._na_value for x in self.data])
 
     def take(self, indexer, allow_fill=True, fill_value=None):
-        output = [self.data[loc] if loc != -1 else self._na_value
-                  for loc in indexer]
+        try:
+            output = [self.data[loc] if loc != -1 else self._na_value
+                      for loc in indexer]
+        except IndexError:
+            raise IndexError("Index is out of bounds or cannot do a "
+                             "non-empty take from an empty array.")
         return self._constructor_from_sequence(output)
 
     def copy(self, deep=False):
diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
index 87668cc1196b6..dcf08440738e7 100644
--- a/pandas/tests/extension/json/test_json.py
+++ b/pandas/tests/extension/json/test_json.py
@@ -135,6 +135,10 @@ def test_astype_str(self):
         """
 
 
+# We intentionally don't run base.BaseSetitemTests because pandas'
+# internals has trouble setting sequences of values into scalar positions.
+
+
 class TestGroupby(base.BaseGroupbyTests):
 
     @unhashable
diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py
index 32b8a6e2b6b86..2472022b862bc 100644
--- a/pandas/tests/frame/test_convert_to.py
+++ b/pandas/tests/frame/test_convert_to.py
@@ -161,7 +161,7 @@ def test_to_records_with_unicode_column_names(self):
         expected = np.rec.array(
             [(0, 1.0)],
             dtype={"names": ["index", u"accented_name_é"],
-                   "formats": ['<i8', '<f8']}
+                   "formats": ['=i8', '=f8']}
         )
         tm.assert_almost_equal(result, expected)
 
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index d85719d328ff2..b2f18e11de8ee 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -15,51 +15,6 @@
 import pandas.util.testing as tm
 
 
-@pytest.fixture
-def ts():
-    return tm.makeTimeSeries()
-
-
-@pytest.fixture
-def tsframe():
-    return DataFrame(tm.getTimeSeriesData())
-
-
-@pytest.fixture
-def df():
-    return DataFrame(
-        {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
-         'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
-         'C': np.random.randn(8),
-         'D': np.random.randn(8)})
-
-
-@pytest.fixture
-def mframe():
-    index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
-                               ['one', 'two', 'three']],
-                       labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
-                               [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
-                       names=['first', 'second'])
-    return DataFrame(np.random.randn(10, 3),
-                     index=index,
-                     columns=['A', 'B', 'C'])
-
-
-@pytest.fixture
-def three_group():
-    return DataFrame(
-        {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar',
-               'bar', 'bar', 'foo', 'foo', 'foo'],
-         'B': ['one', 'one', 'one', 'two', 'one', 'one',
-               'one', 'two', 'two', 'two', 'one'],
-         'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny',
-               'shiny', 'dull', 'shiny', 'shiny', 'shiny'],
-         'D': np.random.randn(11),
-         'E': np.random.randn(11),
-         'F': np.random.randn(11)})
-
-
 def test_agg_regression1(tsframe):
     grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month])
     result = grouped.agg(np.mean)
@@ -87,6 +42,32 @@ def test_agg_ser_multi_key(df):
     tm.assert_series_equal(results, expected)
 
 
+def test_groupby_aggregation_mixed_dtype():
+
+    # GH 6212
+    expected = DataFrame({
+        'v1': [5, 5, 7, np.nan, 3, 3, 4, 1],
+        'v2': [55, 55, 77, np.nan, 33, 33, 44, 11]},
+        index=MultiIndex.from_tuples([(1, 95), (1, 99), (2, 95), (2, 99),
+                                      ('big', 'damp'),
+                                      ('blue', 'dry'),
+                                      ('red', 'red'), ('red', 'wet')],
+                                     names=['by1', 'by2']))
+
+    df = DataFrame({
+        'v1': [1, 3, 5, 7, 8, 3, 5, np.nan, 4, 5, 7, 9],
+        'v2': [11, 33, 55, 77, 88, 33, 55, np.nan, 44, 55, 77, 99],
+        'by1': ["red", "blue", 1, 2, np.nan, "big", 1, 2, "red", 1, np.nan,
+                12],
+        'by2': ["wet", "dry", 99, 95, np.nan, "damp", 95, 99, "red", 99,
+                np.nan, np.nan]
+    })
+
+    g = df.groupby(['by1', 'by2'])
+    result = g[['v1', 'v2']].mean()
+    tm.assert_frame_equal(result, expected)
+
+
 def test_agg_apply_corner(ts, tsframe):
     # nothing to group, all NA
     grouped = ts.groupby(ts * np.nan)
diff --git a/pandas/tests/groupby/common.py b/pandas/tests/groupby/common.py
deleted file mode 100644
index 3e99e8211b4f8..0000000000000
--- a/pandas/tests/groupby/common.py
+++ /dev/null
@@ -1,62 +0,0 @@
-""" Base setup """
-
-import pytest
-import numpy as np
-from pandas.util import testing as tm
-from pandas import DataFrame, MultiIndex
-
-
-@pytest.fixture
-def mframe():
-    index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
-                                                              'three']],
-                       labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
-                               [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
-                       names=['first', 'second'])
-    return DataFrame(np.random.randn(10, 3), index=index,
-                     columns=['A', 'B', 'C'])
-
-
-@pytest.fixture
-def df():
-    return DataFrame(
-        {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
-         'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
-         'C': np.random.randn(8),
-         'D': np.random.randn(8)})
-
-
-class MixIn(object):
-
-    def setup_method(self, method):
-        self.ts = tm.makeTimeSeries()
-
-        self.seriesd = tm.getSeriesData()
-        self.tsd = tm.getTimeSeriesData()
-        self.frame = DataFrame(self.seriesd)
-        self.tsframe = DataFrame(self.tsd)
-
-        self.df = df()
-        self.df_mixed_floats = DataFrame(
-            {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
-             'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
-             'C': np.random.randn(8),
-             'D': np.array(
-                 np.random.randn(8), dtype='float32')})
-
-        self.mframe = mframe()
-
-        self.three_group = DataFrame(
-            {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar',
-                   'foo', 'foo', 'foo'],
-             'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two',
-                   'two', 'two', 'one'],
-             'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny',
-                   'dull', 'shiny', 'shiny', 'shiny'],
-             'D': np.random.randn(11),
-             'E': np.random.randn(11),
-             'F': np.random.randn(11)})
-
-
-def assert_fp_equal(a, b):
-    assert (np.abs(a - b) < 1e-12).all()
diff --git a/pandas/tests/groupby/conftest.py b/pandas/tests/groupby/conftest.py
new file mode 100644
index 0000000000000..877aa835ac6f5
--- /dev/null
+++ b/pandas/tests/groupby/conftest.py
@@ -0,0 +1,77 @@
+import pytest
+import numpy as np
+from pandas import MultiIndex, DataFrame
+from pandas.util import testing as tm
+
+
+@pytest.fixture
+def mframe():
+    index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
+                                                              'three']],
+                       labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
+                               [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
+                       names=['first', 'second'])
+    return DataFrame(np.random.randn(10, 3), index=index,
+                     columns=['A', 'B', 'C'])
+
+
+@pytest.fixture
+def df():
+    return DataFrame(
+        {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
+         'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
+         'C': np.random.randn(8),
+         'D': np.random.randn(8)})
+
+
+@pytest.fixture
+def ts():
+    return tm.makeTimeSeries()
+
+
+@pytest.fixture
+def seriesd():
+    return tm.getSeriesData()
+
+
+@pytest.fixture
+def tsd():
+    return tm.getTimeSeriesData()
+
+
+@pytest.fixture
+def frame(seriesd):
+    return DataFrame(seriesd)
+
+
+@pytest.fixture
+def tsframe(tsd):
+    return DataFrame(tsd)
+
+
+@pytest.fixture
+def df_mixed_floats():
+    return DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
+                            'foo', 'bar', 'foo', 'foo'],
+                      'B': ['one', 'one', 'two', 'three',
+                            'two', 'two', 'one', 'three'],
+                      'C': np.random.randn(8),
+                      'D': np.array(
+                          np.random.randn(8), dtype='float32')})
+
+
+@pytest.fixture
+def three_group():
+    return DataFrame({'A': ['foo', 'foo', 'foo',
+                            'foo', 'bar', 'bar',
+                            'bar', 'bar',
+                            'foo', 'foo', 'foo'],
+                      'B': ['one', 'one', 'one',
+                            'two', 'one', 'one', 'one', 'two',
+                            'two', 'two', 'one'],
+                      'C': ['dull', 'dull', 'shiny',
+                            'dull', 'dull', 'shiny', 'shiny',
+                            'dull', 'shiny', 'shiny', 'shiny'],
+                      'D': np.random.randn(11),
+                      'E': np.random.randn(11),
+                      'F': np.random.randn(11)})
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
new file mode 100644
index 0000000000000..5ca10fe1af9d1
--- /dev/null
+++ b/pandas/tests/groupby/test_apply.py
@@ -0,0 +1,517 @@
+import pytest
+import numpy as np
+import pandas as pd
+from datetime import datetime
+from pandas.util import testing as tm
+from pandas import DataFrame, MultiIndex, compat, Series, bdate_range, Index
+
+
+def test_apply_issues():
+        # GH 5788
+
+    s = """2011.05.16,00:00,1.40893
+2011.05.16,01:00,1.40760
+2011.05.16,02:00,1.40750
+2011.05.16,03:00,1.40649
+2011.05.17,02:00,1.40893
+2011.05.17,03:00,1.40760
+2011.05.17,04:00,1.40750
+2011.05.17,05:00,1.40649
+2011.05.18,02:00,1.40893
+2011.05.18,03:00,1.40760
+2011.05.18,04:00,1.40750
+2011.05.18,05:00,1.40649"""
+
+    df = pd.read_csv(
+        compat.StringIO(s), header=None, names=['date', 'time', 'value'],
+        parse_dates=[['date', 'time']])
+    df = df.set_index('date_time')
+
+    expected = df.groupby(df.index.date).idxmax()
+    result = df.groupby(df.index.date).apply(lambda x: x.idxmax())
+    tm.assert_frame_equal(result, expected)
+
+    # GH 5789
+    # don't auto coerce dates
+    df = pd.read_csv(
+        compat.StringIO(s), header=None, names=['date', 'time', 'value'])
+    exp_idx = pd.Index(
+        ['2011.05.16', '2011.05.17', '2011.05.18'
+         ], dtype=object, name='date')
+    expected = Series(['00:00', '02:00', '02:00'], index=exp_idx)
+    result = df.groupby('date').apply(
+        lambda x: x['time'][x['value'].idxmax()])
+    tm.assert_series_equal(result, expected)
+
+
+def test_apply_trivial():
+    # GH 20066
+    # trivial apply: ignore input and return a constant dataframe.
+    df = pd.DataFrame({'key': ['a', 'a', 'b', 'b', 'a'],
+                       'data': [1.0, 2.0, 3.0, 4.0, 5.0]},
+                      columns=['key', 'data'])
+    expected = pd.concat([df.iloc[1:], df.iloc[1:]],
+                         axis=1, keys=['float64', 'object'])
+    result = df.groupby([str(x) for x in df.dtypes],
+                        axis=1).apply(lambda x: df.iloc[1:])
+
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.xfail(reason=("GH 20066; function passed into apply "
+                           "returns a DataFrame with the same index "
+                           "as the one to create GroupBy object."))
+def test_apply_trivial_fail():
+    # GH 20066
+    # trivial apply fails if the constant dataframe has the same index
+    # with the one used to create GroupBy object.
+    df = pd.DataFrame({'key': ['a', 'a', 'b', 'b', 'a'],
+                       'data': [1.0, 2.0, 3.0, 4.0, 5.0]},
+                      columns=['key', 'data'])
+    expected = pd.concat([df, df],
+                         axis=1, keys=['float64', 'object'])
+    result = df.groupby([str(x) for x in df.dtypes],
+                        axis=1).apply(lambda x: df)
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_fast_apply():
+    # make sure that fast apply is correctly called
+    # rather than raising any kind of error
+    # otherwise the python path will be callsed
+    # which slows things down
+    N = 1000
+    labels = np.random.randint(0, 2000, size=N)
+    labels2 = np.random.randint(0, 3, size=N)
+    df = DataFrame({'key': labels,
+                    'key2': labels2,
+                    'value1': np.random.randn(N),
+                    'value2': ['foo', 'bar', 'baz', 'qux'] * (N // 4)})
+
+    def f(g):
+        return 1
+
+    g = df.groupby(['key', 'key2'])
+
+    grouper = g.grouper
+
+    splitter = grouper._get_splitter(g._selected_obj, axis=g.axis)
+    group_keys = grouper._get_group_keys()
+
+    values, mutated = splitter.fast_apply(f, group_keys)
+    assert not mutated
+
+
+def test_apply_with_mixed_dtype():
+    # GH3480, apply with mixed dtype on axis=1 breaks in 0.11
+    df = DataFrame({'foo1': np.random.randn(6),
+                    'foo2': ['one', 'two', 'two', 'three', 'one', 'two']})
+    result = df.apply(lambda x: x, axis=1)
+    tm.assert_series_equal(df.get_dtype_counts(), result.get_dtype_counts())
+
+    # GH 3610 incorrect dtype conversion with as_index=False
+    df = DataFrame({"c1": [1, 2, 6, 6, 8]})
+    df["c2"] = df.c1 / 2.0
+    result1 = df.groupby("c2").mean().reset_index().c2
+    result2 = df.groupby("c2", as_index=False).mean().c2
+    tm.assert_series_equal(result1, result2)
+
+
+def test_groupby_as_index_apply(df):
+    # GH #4648 and #3417
+    df = DataFrame({'item_id': ['b', 'b', 'a', 'c', 'a', 'b'],
+                    'user_id': [1, 2, 1, 1, 3, 1],
+                    'time': range(6)})
+
+    g_as = df.groupby('user_id', as_index=True)
+    g_not_as = df.groupby('user_id', as_index=False)
+
+    res_as = g_as.head(2).index
+    res_not_as = g_not_as.head(2).index
+    exp = Index([0, 1, 2, 4])
+    tm.assert_index_equal(res_as, exp)
+    tm.assert_index_equal(res_not_as, exp)
+
+    res_as_apply = g_as.apply(lambda x: x.head(2)).index
+    res_not_as_apply = g_not_as.apply(lambda x: x.head(2)).index
+
+    # apply doesn't maintain the original ordering
+    # changed in GH5610 as the as_index=False returns a MI here
+    exp_not_as_apply = MultiIndex.from_tuples([(0, 0), (0, 2), (1, 1), (
+        2, 4)])
+    tp = [(1, 0), (1, 2), (2, 1), (3, 4)]
+    exp_as_apply = MultiIndex.from_tuples(tp, names=['user_id', None])
+
+    tm.assert_index_equal(res_as_apply, exp_as_apply)
+    tm.assert_index_equal(res_not_as_apply, exp_not_as_apply)
+
+    ind = Index(list('abcde'))
+    df = DataFrame([[1, 2], [2, 3], [1, 4], [1, 5], [2, 6]], index=ind)
+    res = df.groupby(0, as_index=False).apply(lambda x: x).index
+    tm.assert_index_equal(res, ind)
+
+
+def test_apply_concat_preserve_names(three_group):
+    grouped = three_group.groupby(['A', 'B'])
+
+    def desc(group):
+        result = group.describe()
+        result.index.name = 'stat'
+        return result
+
+    def desc2(group):
+        result = group.describe()
+        result.index.name = 'stat'
+        result = result[:len(group)]
+        # weirdo
+        return result
+
+    def desc3(group):
+        result = group.describe()
+
+        # names are different
+        result.index.name = 'stat_%d' % len(group)
+
+        result = result[:len(group)]
+        # weirdo
+        return result
+
+    result = grouped.apply(desc)
+    assert result.index.names == ('A', 'B', 'stat')
+
+    result2 = grouped.apply(desc2)
+    assert result2.index.names == ('A', 'B', 'stat')
+
+    result3 = grouped.apply(desc3)
+    assert result3.index.names == ('A', 'B', None)
+
+
+def test_apply_series_to_frame():
+    def f(piece):
+        with np.errstate(invalid='ignore'):
+            logged = np.log(piece)
+        return DataFrame({'value': piece,
+                          'demeaned': piece - piece.mean(),
+                          'logged': logged})
+
+    dr = bdate_range('1/1/2000', periods=100)
+    ts = Series(np.random.randn(100), index=dr)
+
+    grouped = ts.groupby(lambda x: x.month)
+    result = grouped.apply(f)
+
+    assert isinstance(result, DataFrame)
+    tm.assert_index_equal(result.index, ts.index)
+
+
+def test_apply_series_yield_constant(df):
+    result = df.groupby(['A', 'B'])['C'].apply(len)
+    assert result.index.names[:2] == ('A', 'B')
+
+
+def test_apply_frame_yield_constant(df):
+    # GH13568
+    result = df.groupby(['A', 'B']).apply(len)
+    assert isinstance(result, Series)
+    assert result.name is None
+
+    result = df.groupby(['A', 'B'])[['C', 'D']].apply(len)
+    assert isinstance(result, Series)
+    assert result.name is None
+
+
+def test_apply_frame_to_series(df):
+    grouped = df.groupby(['A', 'B'])
+    result = grouped.apply(len)
+    expected = grouped.count()['C']
+    tm.assert_index_equal(result.index, expected.index)
+    tm.assert_numpy_array_equal(result.values, expected.values)
+
+
+def test_apply_frame_concat_series():
+    def trans(group):
+        return group.groupby('B')['C'].sum().sort_values()[:2]
+
+    def trans2(group):
+        grouped = group.groupby(df.reindex(group.index)['B'])
+        return grouped.sum().sort_values()[:2]
+
+    df = DataFrame({'A': np.random.randint(0, 5, 1000),
+                    'B': np.random.randint(0, 5, 1000),
+                    'C': np.random.randn(1000)})
+
+    result = df.groupby('A').apply(trans)
+    exp = df.groupby('A')['C'].apply(trans2)
+    tm.assert_series_equal(result, exp, check_names=False)
+    assert result.name == 'C'
+
+
+def test_apply_transform(ts):
+    grouped = ts.groupby(lambda x: x.month)
+    result = grouped.apply(lambda x: x * 2)
+    expected = grouped.transform(lambda x: x * 2)
+    tm.assert_series_equal(result, expected)
+
+
+def test_apply_multikey_corner(tsframe):
+    grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month])
+
+    def f(group):
+        return group.sort_values('A')[-5:]
+
+    result = grouped.apply(f)
+    for key, group in grouped:
+        tm.assert_frame_equal(result.loc[key], f(group))
+
+
+def test_apply_chunk_view():
+    # Low level tinkering could be unsafe, make sure not
+    df = DataFrame({'key': [1, 1, 1, 2, 2, 2, 3, 3, 3],
+                    'value': compat.lrange(9)})
+
+    # return view
+    f = lambda x: x[:2]
+
+    result = df.groupby('key', group_keys=False).apply(f)
+    expected = df.take([0, 1, 3, 4, 6, 7])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_apply_no_name_column_conflict():
+    df = DataFrame({'name': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2],
+                    'name2': [0, 0, 0, 1, 1, 1, 0, 0, 1, 1],
+                    'value': compat.lrange(10)[::-1]})
+
+    # it works! #2605
+    grouped = df.groupby(['name', 'name2'])
+    grouped.apply(lambda x: x.sort_values('value', inplace=True))
+
+
+def test_apply_typecast_fail():
+    df = DataFrame({'d': [1., 1., 1., 2., 2., 2.],
+                    'c': np.tile(
+                        ['a', 'b', 'c'], 2),
+                    'v': np.arange(1., 7.)})
+
+    def f(group):
+        v = group['v']
+        group['v2'] = (v - v.min()) / (v.max() - v.min())
+        return group
+
+    result = df.groupby('d').apply(f)
+
+    expected = df.copy()
+    expected['v2'] = np.tile([0., 0.5, 1], 2)
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_apply_multiindex_fail():
+    index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]
+                                    ])
+    df = DataFrame({'d': [1., 1., 1., 2., 2., 2.],
+                    'c': np.tile(['a', 'b', 'c'], 2),
+                    'v': np.arange(1., 7.)}, index=index)
+
+    def f(group):
+        v = group['v']
+        group['v2'] = (v - v.min()) / (v.max() - v.min())
+        return group
+
+    result = df.groupby('d').apply(f)
+
+    expected = df.copy()
+    expected['v2'] = np.tile([0., 0.5, 1], 2)
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_apply_corner(tsframe):
+    result = tsframe.groupby(lambda x: x.year).apply(lambda x: x * 2)
+    expected = tsframe * 2
+    tm.assert_frame_equal(result, expected)
+
+
+def test_apply_without_copy():
+    # GH 5545
+    # returning a non-copy in an applied function fails
+
+    data = DataFrame({'id_field': [100, 100, 200, 300],
+                      'category': ['a', 'b', 'c', 'c'],
+                      'value': [1, 2, 3, 4]})
+
+    def filt1(x):
+        if x.shape[0] == 1:
+            return x.copy()
+        else:
+            return x[x.category == 'c']
+
+    def filt2(x):
+        if x.shape[0] == 1:
+            return x
+        else:
+            return x[x.category == 'c']
+
+    expected = data.groupby('id_field').apply(filt1)
+    result = data.groupby('id_field').apply(filt2)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_apply_corner_cases():
+    # #535, can't use sliding iterator
+
+    N = 1000
+    labels = np.random.randint(0, 100, size=N)
+    df = DataFrame({'key': labels,
+                    'value1': np.random.randn(N),
+                    'value2': ['foo', 'bar', 'baz', 'qux'] * (N // 4)})
+
+    grouped = df.groupby('key')
+
+    def f(g):
+        g['value3'] = g['value1'] * 2
+        return g
+
+    result = grouped.apply(f)
+    assert 'value3' in result
+
+
+def test_apply_numeric_coercion_when_datetime():
+    # In the past, group-by/apply operations have been over-eager
+    # in converting dtypes to numeric, in the presence of datetime
+    # columns.  Various GH issues were filed, the reproductions
+    # for which are here.
+
+    # GH 15670
+    df = pd.DataFrame({'Number': [1, 2],
+                       'Date': ["2017-03-02"] * 2,
+                       'Str': ["foo", "inf"]})
+    expected = df.groupby(['Number']).apply(lambda x: x.iloc[0])
+    df.Date = pd.to_datetime(df.Date)
+    result = df.groupby(['Number']).apply(lambda x: x.iloc[0])
+    tm.assert_series_equal(result['Str'], expected['Str'])
+
+    # GH 15421
+    df = pd.DataFrame({'A': [10, 20, 30],
+                       'B': ['foo', '3', '4'],
+                       'T': [pd.Timestamp("12:31:22")] * 3})
+
+    def get_B(g):
+        return g.iloc[0][['B']]
+    result = df.groupby('A').apply(get_B)['B']
+    expected = df.B
+    expected.index = df.A
+    tm.assert_series_equal(result, expected)
+
+    # GH 14423
+    def predictions(tool):
+        out = pd.Series(index=['p1', 'p2', 'useTime'], dtype=object)
+        if 'step1' in list(tool.State):
+            out['p1'] = str(tool[tool.State == 'step1'].Machine.values[0])
+        if 'step2' in list(tool.State):
+            out['p2'] = str(tool[tool.State == 'step2'].Machine.values[0])
+            out['useTime'] = str(
+                tool[tool.State == 'step2'].oTime.values[0])
+        return out
+    df1 = pd.DataFrame({'Key': ['B', 'B', 'A', 'A'],
+                        'State': ['step1', 'step2', 'step1', 'step2'],
+                        'oTime': ['', '2016-09-19 05:24:33',
+                                  '', '2016-09-19 23:59:04'],
+                        'Machine': ['23', '36L', '36R', '36R']})
+    df2 = df1.copy()
+    df2.oTime = pd.to_datetime(df2.oTime)
+    expected = df1.groupby('Key').apply(predictions).p1
+    result = df2.groupby('Key').apply(predictions).p1
+    tm.assert_series_equal(expected, result)
+
+
+def test_time_field_bug():
+    # Test a fix for the following error related to GH issue 11324 When
+    # non-key fields in a group-by dataframe contained time-based fields
+    # that were not returned by the apply function, an exception would be
+    # raised.
+
+    df = pd.DataFrame({'a': 1, 'b': [datetime.now() for nn in range(10)]})
+
+    def func_with_no_date(batch):
+        return pd.Series({'c': 2})
+
+    def func_with_date(batch):
+        return pd.Series({'b': datetime(2015, 1, 1), 'c': 2})
+
+    dfg_no_conversion = df.groupby(by=['a']).apply(func_with_no_date)
+    dfg_no_conversion_expected = pd.DataFrame({'c': 2}, index=[1])
+    dfg_no_conversion_expected.index.name = 'a'
+
+    dfg_conversion = df.groupby(by=['a']).apply(func_with_date)
+    dfg_conversion_expected = pd.DataFrame(
+        {'b': datetime(2015, 1, 1),
+         'c': 2}, index=[1])
+    dfg_conversion_expected.index.name = 'a'
+
+    tm.assert_frame_equal(dfg_no_conversion, dfg_no_conversion_expected)
+    tm.assert_frame_equal(dfg_conversion, dfg_conversion_expected)
+
+
+def test_gb_apply_list_of_unequal_len_arrays():
+
+    # GH1738
+    df = DataFrame({'group1': ['a', 'a', 'a', 'b', 'b', 'b', 'a', 'a', 'a',
+                               'b', 'b', 'b'],
+                    'group2': ['c', 'c', 'd', 'd', 'd', 'e', 'c', 'c', 'd',
+                               'd', 'd', 'e'],
+                    'weight': [1.1, 2, 3, 4, 5, 6, 2, 4, 6, 8, 1, 2],
+                    'value': [7.1, 8, 9, 10, 11, 12, 8, 7, 6, 5, 4, 3]})
+    df = df.set_index(['group1', 'group2'])
+    df_grouped = df.groupby(level=['group1', 'group2'], sort=True)
+
+    def noddy(value, weight):
+        out = np.array(value * weight).repeat(3)
+        return out
+
+    # the kernel function returns arrays of unequal length
+    # pandas sniffs the first one, sees it's an array and not
+    # a list, and assumed the rest are of equal length
+    # and so tries a vstack
+
+    # don't die
+    df_grouped.apply(lambda x: noddy(x.value, x.weight))
+
+
+def test_groupby_apply_all_none():
+    # Tests to make sure no errors if apply function returns all None
+    # values. Issue 9684.
+    test_df = DataFrame({'groups': [0, 0, 1, 1],
+                         'random_vars': [8, 7, 4, 5]})
+
+    def test_func(x):
+        pass
+
+    result = test_df.groupby('groups').apply(test_func)
+    expected = DataFrame()
+    tm.assert_frame_equal(result, expected)
+
+
+def test_groupby_apply_none_first():
+    # GH 12824. Tests if apply returns None first.
+    test_df1 = DataFrame({'groups': [1, 1, 1, 2], 'vars': [0, 1, 2, 3]})
+    test_df2 = DataFrame({'groups': [1, 2, 2, 2], 'vars': [0, 1, 2, 3]})
+
+    def test_func(x):
+        if x.shape[0] < 2:
+            return None
+        return x.iloc[[0, -1]]
+
+    result1 = test_df1.groupby('groups').apply(test_func)
+    result2 = test_df2.groupby('groups').apply(test_func)
+    index1 = MultiIndex.from_arrays([[1, 1], [0, 2]],
+                                    names=['groups', None])
+    index2 = MultiIndex.from_arrays([[2, 2], [1, 3]],
+                                    names=['groups', None])
+    expected1 = DataFrame({'groups': [1, 1], 'vars': [0, 2]},
+                          index=index1)
+    expected2 = DataFrame({'groups': [2, 2], 'vars': [1, 3]},
+                          index=index2)
+    tm.assert_frame_equal(result1, expected1)
+    tm.assert_frame_equal(result2, expected2)
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index bcd0da28b5a34..160b60e69f39d 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -9,710 +9,725 @@
 
 import pandas as pd
 from pandas import (Index, MultiIndex, CategoricalIndex,
-                    DataFrame, Categorical, Series, Interval)
+                    DataFrame, Categorical, Series, Interval, qcut)
 from pandas.util.testing import assert_frame_equal, assert_series_equal
 import pandas.util.testing as tm
-from .common import MixIn
-
-
-class TestGroupByCategorical(MixIn):
-
-    def test_groupby(self):
-
-        cats = Categorical(["a", "a", "a", "b", "b", "b", "c", "c", "c"],
-                           categories=["a", "b", "c", "d"], ordered=True)
-        data = DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 4, 5], "b": cats})
-
-        exp_index = CategoricalIndex(list('abcd'), name='b', ordered=True)
-        expected = DataFrame({'a': [1, 2, 4, np.nan]}, index=exp_index)
-        result = data.groupby("b").mean()
-        tm.assert_frame_equal(result, expected)
-
-        raw_cat1 = Categorical(["a", "a", "b", "b"],
-                               categories=["a", "b", "z"], ordered=True)
-        raw_cat2 = Categorical(["c", "d", "c", "d"],
-                               categories=["c", "d", "y"], ordered=True)
-        df = DataFrame({"A": raw_cat1, "B": raw_cat2, "values": [1, 2, 3, 4]})
-
-        # single grouper
-        gb = df.groupby("A")
-        exp_idx = CategoricalIndex(['a', 'b', 'z'], name='A', ordered=True)
-        expected = DataFrame({'values': Series([3, 7, 0], index=exp_idx)})
-        result = gb.sum()
-        tm.assert_frame_equal(result, expected)
-
-        # multiple groupers
-        gb = df.groupby(['A', 'B'])
-        exp_index = pd.MultiIndex.from_product(
-            [Categorical(["a", "b", "z"], ordered=True),
-             Categorical(["c", "d", "y"], ordered=True)],
-            names=['A', 'B'])
-        expected = DataFrame({'values': [1, 2, np.nan, 3, 4, np.nan,
-                                         np.nan, np.nan, np.nan]},
-                             index=exp_index)
-        result = gb.sum()
-        tm.assert_frame_equal(result, expected)
-
-        # multiple groupers with a non-cat
-        df = df.copy()
-        df['C'] = ['foo', 'bar'] * 2
-        gb = df.groupby(['A', 'B', 'C'])
-        exp_index = pd.MultiIndex.from_product(
-            [Categorical(["a", "b", "z"], ordered=True),
-             Categorical(["c", "d", "y"], ordered=True),
-             ['foo', 'bar']],
-            names=['A', 'B', 'C'])
-        expected = DataFrame({'values': Series(
-            np.nan, index=exp_index)}).sort_index()
-        expected.iloc[[1, 2, 7, 8], 0] = [1, 2, 3, 4]
-        result = gb.sum()
-        tm.assert_frame_equal(result, expected)
-
-        # GH 8623
-        x = DataFrame([[1, 'John P. Doe'], [2, 'Jane Dove'],
-                       [1, 'John P. Doe']],
-                      columns=['person_id', 'person_name'])
-        x['person_name'] = Categorical(x.person_name)
-
-        g = x.groupby(['person_id'])
-        result = g.transform(lambda x: x)
-        tm.assert_frame_equal(result, x[['person_name']])
-
-        result = x.drop_duplicates('person_name')
-        expected = x.iloc[[0, 1]]
-        tm.assert_frame_equal(result, expected)
-
-        def f(x):
-            return x.drop_duplicates('person_name').iloc[0]
-
-        result = g.apply(f)
-        expected = x.iloc[[0, 1]].copy()
-        expected.index = Index([1, 2], name='person_id')
-        expected['person_name'] = expected['person_name'].astype('object')
-        tm.assert_frame_equal(result, expected)
-
-        # GH 9921
-        # Monotonic
-        df = DataFrame({"a": [5, 15, 25]})
-        c = pd.cut(df.a, bins=[0, 10, 20, 30, 40])
-
-        result = df.a.groupby(c).transform(sum)
-        tm.assert_series_equal(result, df['a'])
-
-        tm.assert_series_equal(
-            df.a.groupby(c).transform(lambda xs: np.sum(xs)), df['a'])
-        tm.assert_frame_equal(df.groupby(c).transform(sum), df[['a']])
-        tm.assert_frame_equal(
-            df.groupby(c).transform(lambda xs: np.max(xs)), df[['a']])
-
-        # Filter
-        tm.assert_series_equal(df.a.groupby(c).filter(np.all), df['a'])
-        tm.assert_frame_equal(df.groupby(c).filter(np.all), df)
-
-        # Non-monotonic
-        df = DataFrame({"a": [5, 15, 25, -5]})
-        c = pd.cut(df.a, bins=[-10, 0, 10, 20, 30, 40])
-
-        result = df.a.groupby(c).transform(sum)
-        tm.assert_series_equal(result, df['a'])
-
-        tm.assert_series_equal(
-            df.a.groupby(c).transform(lambda xs: np.sum(xs)), df['a'])
-        tm.assert_frame_equal(df.groupby(c).transform(sum), df[['a']])
-        tm.assert_frame_equal(
-            df.groupby(c).transform(lambda xs: np.sum(xs)), df[['a']])
-
-        # GH 9603
-        df = DataFrame({'a': [1, 0, 0, 0]})
-        c = pd.cut(df.a, [0, 1, 2, 3, 4], labels=Categorical(list('abcd')))
-        result = df.groupby(c).apply(len)
-
-        exp_index = CategoricalIndex(
-            c.values.categories, ordered=c.values.ordered)
-        expected = Series([1, 0, 0, 0], index=exp_index)
-        expected.index.name = 'a'
-        tm.assert_series_equal(result, expected)
-
-    def test_groupby_sort(self):
-
-        # http://stackoverflow.com/questions/23814368/sorting-pandas-categorical-labels-after-groupby
-        # This should result in a properly sorted Series so that the plot
-        # has a sorted x axis
-        # self.cat.groupby(['value_group'])['value_group'].count().plot(kind='bar')
-
-        df = DataFrame({'value': np.random.randint(0, 10000, 100)})
-        labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)]
-        cat_labels = Categorical(labels, labels)
-
-        df = df.sort_values(by=['value'], ascending=True)
-        df['value_group'] = pd.cut(df.value, range(0, 10500, 500),
-                                   right=False, labels=cat_labels)
-
-        res = df.groupby(['value_group'])['value_group'].count()
-        exp = res[sorted(res.index, key=lambda x: float(x.split()[0]))]
-        exp.index = CategoricalIndex(exp.index, name=exp.index.name)
-        tm.assert_series_equal(res, exp)
-
-    def test_level_groupby_get_group(self):
-        # GH15155
-        df = DataFrame(data=np.arange(2, 22, 2),
-                       index=MultiIndex(
-                           levels=[pd.CategoricalIndex(["a", "b"]), range(10)],
-                           labels=[[0] * 5 + [1] * 5, range(10)],
-                           names=["Index1", "Index2"]))
-        g = df.groupby(level=["Index1"])
-
-        # expected should equal test.loc[["a"]]
-        # GH15166
-        expected = DataFrame(data=np.arange(2, 12, 2),
-                             index=pd.MultiIndex(levels=[pd.CategoricalIndex(
-                                 ["a", "b"]), range(5)],
-            labels=[[0] * 5, range(5)],
-            names=["Index1", "Index2"]))
-        result = g.get_group('a')
 
-        assert_frame_equal(result, expected)
-
-    def test_apply_use_categorical_name(self):
-        from pandas import qcut
-        cats = qcut(self.df.C, 4)
-
-        def get_stats(group):
-            return {'min': group.min(),
-                    'max': group.max(),
-                    'count': group.count(),
-                    'mean': group.mean()}
-
-        result = self.df.groupby(cats).D.apply(get_stats)
-        assert result.index.names[0] == 'C'
-
-    def test_apply_categorical_data(self):
-        # GH 10138
-        for ordered in [True, False]:
-            dense = Categorical(list('abc'), ordered=ordered)
-            # 'b' is in the categories but not in the list
-            missing = Categorical(
-                list('aaa'), categories=['a', 'b'], ordered=ordered)
-            values = np.arange(len(dense))
-            df = DataFrame({'missing': missing,
-                            'dense': dense,
-                            'values': values})
-            grouped = df.groupby(['missing', 'dense'])
-
-            # missing category 'b' should still exist in the output index
-            idx = MultiIndex.from_product(
-                [Categorical(['a', 'b'], ordered=ordered),
-                 Categorical(['a', 'b', 'c'], ordered=ordered)],
-                names=['missing', 'dense'])
-            expected = DataFrame([0, 1, 2, np.nan, np.nan, np.nan],
-                                 index=idx,
-                                 columns=['values'])
-
-            assert_frame_equal(grouped.apply(lambda x: np.mean(x)), expected)
-            assert_frame_equal(grouped.mean(), expected)
-            assert_frame_equal(grouped.agg(np.mean), expected)
-
-            # but for transform we should still get back the original index
-            idx = MultiIndex.from_product([['a'], ['a', 'b', 'c']],
-                                          names=['missing', 'dense'])
-            expected = Series(1, index=idx)
-            assert_series_equal(grouped.apply(lambda x: 1), expected)
-
-    def test_groupby_categorical(self):
-        levels = ['foo', 'bar', 'baz', 'qux']
-        codes = np.random.randint(0, 4, size=100)
-
-        cats = Categorical.from_codes(codes, levels, ordered=True)
-
-        data = DataFrame(np.random.randn(100, 4))
-
-        result = data.groupby(cats).mean()
-
-        expected = data.groupby(np.asarray(cats)).mean()
-        exp_idx = CategoricalIndex(levels, categories=cats.categories,
-                                   ordered=True)
-        expected = expected.reindex(exp_idx)
-
-        assert_frame_equal(result, expected)
-
-        grouped = data.groupby(cats)
-        desc_result = grouped.describe()
-
-        idx = cats.codes.argsort()
-        ord_labels = np.asarray(cats).take(idx)
-        ord_data = data.take(idx)
-
-        exp_cats = Categorical(ord_labels, ordered=True,
-                               categories=['foo', 'bar', 'baz', 'qux'])
-        expected = ord_data.groupby(exp_cats, sort=False).describe()
-        assert_frame_equal(desc_result, expected)
-
-        # GH 10460
-        expc = Categorical.from_codes(np.arange(4).repeat(8),
-                                      levels, ordered=True)
-        exp = CategoricalIndex(expc)
-        tm.assert_index_equal((desc_result.stack().index
-                               .get_level_values(0)), exp)
-        exp = Index(['count', 'mean', 'std', 'min', '25%', '50%',
-                     '75%', 'max'] * 4)
-        tm.assert_index_equal((desc_result.stack().index
-                               .get_level_values(1)), exp)
-
-    def test_groupby_datetime_categorical(self):
-        # GH9049: ensure backward compatibility
-        levels = pd.date_range('2014-01-01', periods=4)
-        codes = np.random.randint(0, 4, size=100)
-
-        cats = Categorical.from_codes(codes, levels, ordered=True)
-
-        data = DataFrame(np.random.randn(100, 4))
-        result = data.groupby(cats).mean()
-
-        expected = data.groupby(np.asarray(cats)).mean()
-        expected = expected.reindex(levels)
-        expected.index = CategoricalIndex(expected.index,
-                                          categories=expected.index,
-                                          ordered=True)
-
-        assert_frame_equal(result, expected)
-
-        grouped = data.groupby(cats)
-        desc_result = grouped.describe()
-
-        idx = cats.codes.argsort()
-        ord_labels = cats.take_nd(idx)
-        ord_data = data.take(idx)
-        expected = ord_data.groupby(ord_labels).describe()
-        assert_frame_equal(desc_result, expected)
-        tm.assert_index_equal(desc_result.index, expected.index)
-        tm.assert_index_equal(
-            desc_result.index.get_level_values(0),
-            expected.index.get_level_values(0))
-
-        # GH 10460
-        expc = Categorical.from_codes(
-            np.arange(4).repeat(8), levels, ordered=True)
-        exp = CategoricalIndex(expc)
-        tm.assert_index_equal((desc_result.stack().index
-                               .get_level_values(0)), exp)
-        exp = Index(['count', 'mean', 'std', 'min', '25%', '50%',
-                     '75%', 'max'] * 4)
-        tm.assert_index_equal((desc_result.stack().index
-                               .get_level_values(1)), exp)
-
-    def test_groupby_categorical_index(self):
-
-        s = np.random.RandomState(12345)
-        levels = ['foo', 'bar', 'baz', 'qux']
-        codes = s.randint(0, 4, size=20)
-        cats = Categorical.from_codes(codes, levels, ordered=True)
-        df = DataFrame(
-            np.repeat(
-                np.arange(20), 4).reshape(-1, 4), columns=list('abcd'))
-        df['cats'] = cats
-
-        # with a cat index
-        result = df.set_index('cats').groupby(level=0).sum()
-        expected = df[list('abcd')].groupby(cats.codes).sum()
-        expected.index = CategoricalIndex(
-            Categorical.from_codes(
-                [0, 1, 2, 3], levels, ordered=True), name='cats')
-        assert_frame_equal(result, expected)
 
-        # with a cat column, should produce a cat index
-        result = df.groupby('cats').sum()
-        expected = df[list('abcd')].groupby(cats.codes).sum()
-        expected.index = CategoricalIndex(
-            Categorical.from_codes(
-                [0, 1, 2, 3], levels, ordered=True), name='cats')
-        assert_frame_equal(result, expected)
-
-    def test_groupby_describe_categorical_columns(self):
-        # GH 11558
-        cats = pd.CategoricalIndex(['qux', 'foo', 'baz', 'bar'],
-                                   categories=['foo', 'bar', 'baz', 'qux'],
-                                   ordered=True)
-        df = DataFrame(np.random.randn(20, 4), columns=cats)
-        result = df.groupby([1, 2, 3, 4] * 5).describe()
-
-        tm.assert_index_equal(result.stack().columns, cats)
-        tm.assert_categorical_equal(result.stack().columns.values, cats.values)
-
-    def test_groupby_unstack_categorical(self):
-        # GH11558 (example is taken from the original issue)
-        df = pd.DataFrame({'a': range(10),
-                           'medium': ['A', 'B'] * 5,
-                           'artist': list('XYXXY') * 2})
-        df['medium'] = df['medium'].astype('category')
-
-        gcat = df.groupby(['artist', 'medium'])['a'].count().unstack()
-        result = gcat.describe()
-
-        exp_columns = pd.CategoricalIndex(['A', 'B'], ordered=False,
-                                          name='medium')
-        tm.assert_index_equal(result.columns, exp_columns)
-        tm.assert_categorical_equal(result.columns.values, exp_columns.values)
-
-        result = gcat['A'] + gcat['B']
-        expected = pd.Series([6, 4], index=pd.Index(['X', 'Y'], name='artist'))
-        tm.assert_series_equal(result, expected)
-
-    def test_groupby_bins_unequal_len(self):
-        # GH3011
-        series = Series([np.nan, np.nan, 1, 1, 2, 2, 3, 3, 4, 4])
-        bins = pd.cut(series.dropna().values, 4)
-
-        # len(bins) != len(series) here
-        def f():
-            series.groupby(bins).mean()
-        pytest.raises(ValueError, f)
-
-    def test_groupby_multi_categorical_as_index(self):
-        # GH13204
-        df = DataFrame({'cat': Categorical([1, 2, 2], [1, 2, 3]),
-                        'A': [10, 11, 11],
-                        'B': [101, 102, 103]})
-        result = df.groupby(['cat', 'A'], as_index=False).sum()
-        expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]),
-                              'A': [10, 11, 10, 11, 10, 11],
-                              'B': [101.0, nan, nan, 205.0, nan, nan]},
-                             columns=['cat', 'A', 'B'])
-        tm.assert_frame_equal(result, expected)
-
-        # function grouper
-        f = lambda r: df.loc[r, 'A']
-        result = df.groupby(['cat', f], as_index=False).sum()
-        expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]),
-                              'A': [10.0, nan, nan, 22.0, nan, nan],
-                              'B': [101.0, nan, nan, 205.0, nan, nan]},
-                             columns=['cat', 'A', 'B'])
-        tm.assert_frame_equal(result, expected)
-
-        # another not in-axis grouper
-        s = Series(['a', 'b', 'b'], name='cat2')
-        result = df.groupby(['cat', s], as_index=False).sum()
-        expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]),
-                              'A': [10.0, nan, nan, 22.0, nan, nan],
-                              'B': [101.0, nan, nan, 205.0, nan, nan]},
-                             columns=['cat', 'A', 'B'])
-        tm.assert_frame_equal(result, expected)
-
-        # GH18872: conflicting names in desired index
-        pytest.raises(ValueError, lambda: df.groupby(['cat',
-                                                      s.rename('cat')]).sum())
-
-        # is original index dropped?
-        expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]),
-                              'A': [10, 11, 10, 11, 10, 11],
-                              'B': [101.0, nan, nan, 205.0, nan, nan]},
-                             columns=['cat', 'A', 'B'])
-
-        group_columns = ['cat', 'A']
-
-        for name in [None, 'X', 'B', 'cat']:
-            df.index = Index(list("abc"), name=name)
-
-            if name in group_columns and name in df.index.names:
-                with tm.assert_produces_warning(FutureWarning,
-                                                check_stacklevel=False):
-                    result = df.groupby(group_columns, as_index=False).sum()
-
-            else:
+def test_groupby():
+
+    cats = Categorical(["a", "a", "a", "b", "b", "b", "c", "c", "c"],
+                       categories=["a", "b", "c", "d"], ordered=True)
+    data = DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 4, 5], "b": cats})
+
+    exp_index = CategoricalIndex(list('abcd'), name='b', ordered=True)
+    expected = DataFrame({'a': [1, 2, 4, np.nan]}, index=exp_index)
+    result = data.groupby("b").mean()
+    tm.assert_frame_equal(result, expected)
+
+    raw_cat1 = Categorical(["a", "a", "b", "b"],
+                           categories=["a", "b", "z"], ordered=True)
+    raw_cat2 = Categorical(["c", "d", "c", "d"],
+                           categories=["c", "d", "y"], ordered=True)
+    df = DataFrame({"A": raw_cat1, "B": raw_cat2, "values": [1, 2, 3, 4]})
+
+    # single grouper
+    gb = df.groupby("A")
+    exp_idx = CategoricalIndex(['a', 'b', 'z'], name='A', ordered=True)
+    expected = DataFrame({'values': Series([3, 7, 0], index=exp_idx)})
+    result = gb.sum()
+    tm.assert_frame_equal(result, expected)
+
+    # multiple groupers
+    gb = df.groupby(['A', 'B'])
+    exp_index = pd.MultiIndex.from_product(
+        [Categorical(["a", "b", "z"], ordered=True),
+         Categorical(["c", "d", "y"], ordered=True)],
+        names=['A', 'B'])
+    expected = DataFrame({'values': [1, 2, np.nan, 3, 4, np.nan,
+                                     np.nan, np.nan, np.nan]},
+                         index=exp_index)
+    result = gb.sum()
+    tm.assert_frame_equal(result, expected)
+
+    # multiple groupers with a non-cat
+    df = df.copy()
+    df['C'] = ['foo', 'bar'] * 2
+    gb = df.groupby(['A', 'B', 'C'])
+    exp_index = pd.MultiIndex.from_product(
+        [Categorical(["a", "b", "z"], ordered=True),
+         Categorical(["c", "d", "y"], ordered=True),
+         ['foo', 'bar']],
+        names=['A', 'B', 'C'])
+    expected = DataFrame({'values': Series(
+        np.nan, index=exp_index)}).sort_index()
+    expected.iloc[[1, 2, 7, 8], 0] = [1, 2, 3, 4]
+    result = gb.sum()
+    tm.assert_frame_equal(result, expected)
+
+    # GH 8623
+    x = DataFrame([[1, 'John P. Doe'], [2, 'Jane Dove'],
+                   [1, 'John P. Doe']],
+                  columns=['person_id', 'person_name'])
+    x['person_name'] = Categorical(x.person_name)
+
+    g = x.groupby(['person_id'])
+    result = g.transform(lambda x: x)
+    tm.assert_frame_equal(result, x[['person_name']])
+
+    result = x.drop_duplicates('person_name')
+    expected = x.iloc[[0, 1]]
+    tm.assert_frame_equal(result, expected)
+
+    def f(x):
+        return x.drop_duplicates('person_name').iloc[0]
+
+    result = g.apply(f)
+    expected = x.iloc[[0, 1]].copy()
+    expected.index = Index([1, 2], name='person_id')
+    expected['person_name'] = expected['person_name'].astype('object')
+    tm.assert_frame_equal(result, expected)
+
+    # GH 9921
+    # Monotonic
+    df = DataFrame({"a": [5, 15, 25]})
+    c = pd.cut(df.a, bins=[0, 10, 20, 30, 40])
+
+    result = df.a.groupby(c).transform(sum)
+    tm.assert_series_equal(result, df['a'])
+
+    tm.assert_series_equal(
+        df.a.groupby(c).transform(lambda xs: np.sum(xs)), df['a'])
+    tm.assert_frame_equal(df.groupby(c).transform(sum), df[['a']])
+    tm.assert_frame_equal(
+        df.groupby(c).transform(lambda xs: np.max(xs)), df[['a']])
+
+    # Filter
+    tm.assert_series_equal(df.a.groupby(c).filter(np.all), df['a'])
+    tm.assert_frame_equal(df.groupby(c).filter(np.all), df)
+
+    # Non-monotonic
+    df = DataFrame({"a": [5, 15, 25, -5]})
+    c = pd.cut(df.a, bins=[-10, 0, 10, 20, 30, 40])
+
+    result = df.a.groupby(c).transform(sum)
+    tm.assert_series_equal(result, df['a'])
+
+    tm.assert_series_equal(
+        df.a.groupby(c).transform(lambda xs: np.sum(xs)), df['a'])
+    tm.assert_frame_equal(df.groupby(c).transform(sum), df[['a']])
+    tm.assert_frame_equal(
+        df.groupby(c).transform(lambda xs: np.sum(xs)), df[['a']])
+
+    # GH 9603
+    df = DataFrame({'a': [1, 0, 0, 0]})
+    c = pd.cut(df.a, [0, 1, 2, 3, 4], labels=Categorical(list('abcd')))
+    result = df.groupby(c).apply(len)
+
+    exp_index = CategoricalIndex(
+        c.values.categories, ordered=c.values.ordered)
+    expected = Series([1, 0, 0, 0], index=exp_index)
+    expected.index.name = 'a'
+    tm.assert_series_equal(result, expected)
+
+
+def test_groupby_sort():
+
+    # http://stackoverflow.com/questions/23814368/sorting-pandas-categorical-labels-after-groupby
+    # This should result in a properly sorted Series so that the plot
+    # has a sorted x axis
+    # self.cat.groupby(['value_group'])['value_group'].count().plot(kind='bar')
+
+    df = DataFrame({'value': np.random.randint(0, 10000, 100)})
+    labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)]
+    cat_labels = Categorical(labels, labels)
+
+    df = df.sort_values(by=['value'], ascending=True)
+    df['value_group'] = pd.cut(df.value, range(0, 10500, 500),
+                               right=False, labels=cat_labels)
+
+    res = df.groupby(['value_group'])['value_group'].count()
+    exp = res[sorted(res.index, key=lambda x: float(x.split()[0]))]
+    exp.index = CategoricalIndex(exp.index, name=exp.index.name)
+    tm.assert_series_equal(res, exp)
+
+
+def test_level_groupby_get_group():
+    # GH15155
+    df = DataFrame(data=np.arange(2, 22, 2),
+                   index=MultiIndex(
+                       levels=[pd.CategoricalIndex(["a", "b"]), range(10)],
+                       labels=[[0] * 5 + [1] * 5, range(10)],
+                       names=["Index1", "Index2"]))
+    g = df.groupby(level=["Index1"])
+
+    # expected should equal test.loc[["a"]]
+    # GH15166
+    expected = DataFrame(data=np.arange(2, 12, 2),
+                         index=pd.MultiIndex(levels=[pd.CategoricalIndex(
+                             ["a", "b"]), range(5)],
+        labels=[[0] * 5, range(5)],
+        names=["Index1", "Index2"]))
+    result = g.get_group('a')
+
+    assert_frame_equal(result, expected)
+
+
+def test_apply_use_categorical_name(df):
+    cats = qcut(df.C, 4)
+
+    def get_stats(group):
+        return {'min': group.min(),
+                'max': group.max(),
+                'count': group.count(),
+                'mean': group.mean()}
+
+    result = df.groupby(cats).D.apply(get_stats)
+    assert result.index.names[0] == 'C'
+
+
+def test_apply_categorical_data():
+    # GH 10138
+    for ordered in [True, False]:
+        dense = Categorical(list('abc'), ordered=ordered)
+        # 'b' is in the categories but not in the list
+        missing = Categorical(
+            list('aaa'), categories=['a', 'b'], ordered=ordered)
+        values = np.arange(len(dense))
+        df = DataFrame({'missing': missing,
+                        'dense': dense,
+                        'values': values})
+        grouped = df.groupby(['missing', 'dense'])
+
+        # missing category 'b' should still exist in the output index
+        idx = MultiIndex.from_product(
+            [Categorical(['a', 'b'], ordered=ordered),
+             Categorical(['a', 'b', 'c'], ordered=ordered)],
+            names=['missing', 'dense'])
+        expected = DataFrame([0, 1, 2, np.nan, np.nan, np.nan],
+                             index=idx,
+                             columns=['values'])
+
+        assert_frame_equal(grouped.apply(lambda x: np.mean(x)), expected)
+        assert_frame_equal(grouped.mean(), expected)
+        assert_frame_equal(grouped.agg(np.mean), expected)
+
+        # but for transform we should still get back the original index
+        idx = MultiIndex.from_product([['a'], ['a', 'b', 'c']],
+                                      names=['missing', 'dense'])
+        expected = Series(1, index=idx)
+        assert_series_equal(grouped.apply(lambda x: 1), expected)
+
+
+def test_groupby_categorical():
+    levels = ['foo', 'bar', 'baz', 'qux']
+    codes = np.random.randint(0, 4, size=100)
+
+    cats = Categorical.from_codes(codes, levels, ordered=True)
+
+    data = DataFrame(np.random.randn(100, 4))
+
+    result = data.groupby(cats).mean()
+
+    expected = data.groupby(np.asarray(cats)).mean()
+    exp_idx = CategoricalIndex(levels, categories=cats.categories,
+                               ordered=True)
+    expected = expected.reindex(exp_idx)
+
+    assert_frame_equal(result, expected)
+
+    grouped = data.groupby(cats)
+    desc_result = grouped.describe()
+
+    idx = cats.codes.argsort()
+    ord_labels = np.asarray(cats).take(idx)
+    ord_data = data.take(idx)
+
+    exp_cats = Categorical(ord_labels, ordered=True,
+                           categories=['foo', 'bar', 'baz', 'qux'])
+    expected = ord_data.groupby(exp_cats, sort=False).describe()
+    assert_frame_equal(desc_result, expected)
+
+    # GH 10460
+    expc = Categorical.from_codes(np.arange(4).repeat(8),
+                                  levels, ordered=True)
+    exp = CategoricalIndex(expc)
+    tm.assert_index_equal((desc_result.stack().index
+                           .get_level_values(0)), exp)
+    exp = Index(['count', 'mean', 'std', 'min', '25%', '50%',
+                 '75%', 'max'] * 4)
+    tm.assert_index_equal((desc_result.stack().index
+                           .get_level_values(1)), exp)
+
+
+def test_groupby_datetime_categorical():
+    # GH9049: ensure backward compatibility
+    levels = pd.date_range('2014-01-01', periods=4)
+    codes = np.random.randint(0, 4, size=100)
+
+    cats = Categorical.from_codes(codes, levels, ordered=True)
+
+    data = DataFrame(np.random.randn(100, 4))
+    result = data.groupby(cats).mean()
+
+    expected = data.groupby(np.asarray(cats)).mean()
+    expected = expected.reindex(levels)
+    expected.index = CategoricalIndex(expected.index,
+                                      categories=expected.index,
+                                      ordered=True)
+
+    assert_frame_equal(result, expected)
+
+    grouped = data.groupby(cats)
+    desc_result = grouped.describe()
+
+    idx = cats.codes.argsort()
+    ord_labels = cats.take_nd(idx)
+    ord_data = data.take(idx)
+    expected = ord_data.groupby(ord_labels).describe()
+    assert_frame_equal(desc_result, expected)
+    tm.assert_index_equal(desc_result.index, expected.index)
+    tm.assert_index_equal(
+        desc_result.index.get_level_values(0),
+        expected.index.get_level_values(0))
+
+    # GH 10460
+    expc = Categorical.from_codes(
+        np.arange(4).repeat(8), levels, ordered=True)
+    exp = CategoricalIndex(expc)
+    tm.assert_index_equal((desc_result.stack().index
+                           .get_level_values(0)), exp)
+    exp = Index(['count', 'mean', 'std', 'min', '25%', '50%',
+                 '75%', 'max'] * 4)
+    tm.assert_index_equal((desc_result.stack().index
+                           .get_level_values(1)), exp)
+
+
+def test_groupby_categorical_index():
+
+    s = np.random.RandomState(12345)
+    levels = ['foo', 'bar', 'baz', 'qux']
+    codes = s.randint(0, 4, size=20)
+    cats = Categorical.from_codes(codes, levels, ordered=True)
+    df = DataFrame(
+        np.repeat(
+            np.arange(20), 4).reshape(-1, 4), columns=list('abcd'))
+    df['cats'] = cats
+
+    # with a cat index
+    result = df.set_index('cats').groupby(level=0).sum()
+    expected = df[list('abcd')].groupby(cats.codes).sum()
+    expected.index = CategoricalIndex(
+        Categorical.from_codes(
+            [0, 1, 2, 3], levels, ordered=True), name='cats')
+    assert_frame_equal(result, expected)
+
+    # with a cat column, should produce a cat index
+    result = df.groupby('cats').sum()
+    expected = df[list('abcd')].groupby(cats.codes).sum()
+    expected.index = CategoricalIndex(
+        Categorical.from_codes(
+            [0, 1, 2, 3], levels, ordered=True), name='cats')
+    assert_frame_equal(result, expected)
+
+
+def test_groupby_describe_categorical_columns():
+    # GH 11558
+    cats = pd.CategoricalIndex(['qux', 'foo', 'baz', 'bar'],
+                               categories=['foo', 'bar', 'baz', 'qux'],
+                               ordered=True)
+    df = DataFrame(np.random.randn(20, 4), columns=cats)
+    result = df.groupby([1, 2, 3, 4] * 5).describe()
+
+    tm.assert_index_equal(result.stack().columns, cats)
+    tm.assert_categorical_equal(result.stack().columns.values, cats.values)
+
+
+def test_groupby_unstack_categorical():
+    # GH11558 (example is taken from the original issue)
+    df = pd.DataFrame({'a': range(10),
+                       'medium': ['A', 'B'] * 5,
+                       'artist': list('XYXXY') * 2})
+    df['medium'] = df['medium'].astype('category')
+
+    gcat = df.groupby(['artist', 'medium'])['a'].count().unstack()
+    result = gcat.describe()
+
+    exp_columns = pd.CategoricalIndex(['A', 'B'], ordered=False,
+                                      name='medium')
+    tm.assert_index_equal(result.columns, exp_columns)
+    tm.assert_categorical_equal(result.columns.values, exp_columns.values)
+
+    result = gcat['A'] + gcat['B']
+    expected = pd.Series([6, 4], index=pd.Index(['X', 'Y'], name='artist'))
+    tm.assert_series_equal(result, expected)
+
+
+def test_groupby_bins_unequal_len():
+    # GH3011
+    series = Series([np.nan, np.nan, 1, 1, 2, 2, 3, 3, 4, 4])
+    bins = pd.cut(series.dropna().values, 4)
+
+    # len(bins) != len(series) here
+    def f():
+        series.groupby(bins).mean()
+    pytest.raises(ValueError, f)
+
+
+def test_groupby_multi_categorical_as_index():
+    # GH13204
+    df = DataFrame({'cat': Categorical([1, 2, 2], [1, 2, 3]),
+                    'A': [10, 11, 11],
+                    'B': [101, 102, 103]})
+    result = df.groupby(['cat', 'A'], as_index=False).sum()
+    expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]),
+                          'A': [10, 11, 10, 11, 10, 11],
+                          'B': [101.0, nan, nan, 205.0, nan, nan]},
+                         columns=['cat', 'A', 'B'])
+    tm.assert_frame_equal(result, expected)
+
+    # function grouper
+    f = lambda r: df.loc[r, 'A']
+    result = df.groupby(['cat', f], as_index=False).sum()
+    expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]),
+                          'A': [10.0, nan, nan, 22.0, nan, nan],
+                          'B': [101.0, nan, nan, 205.0, nan, nan]},
+                         columns=['cat', 'A', 'B'])
+    tm.assert_frame_equal(result, expected)
+
+    # another not in-axis grouper
+    s = Series(['a', 'b', 'b'], name='cat2')
+    result = df.groupby(['cat', s], as_index=False).sum()
+    expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]),
+                          'A': [10.0, nan, nan, 22.0, nan, nan],
+                          'B': [101.0, nan, nan, 205.0, nan, nan]},
+                         columns=['cat', 'A', 'B'])
+    tm.assert_frame_equal(result, expected)
+
+    # GH18872: conflicting names in desired index
+    pytest.raises(ValueError, lambda: df.groupby(['cat',
+                                                  s.rename('cat')]).sum())
+
+    # is original index dropped?
+    expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]),
+                          'A': [10, 11, 10, 11, 10, 11],
+                          'B': [101.0, nan, nan, 205.0, nan, nan]},
+                         columns=['cat', 'A', 'B'])
+
+    group_columns = ['cat', 'A']
+
+    for name in [None, 'X', 'B', 'cat']:
+        df.index = Index(list("abc"), name=name)
+
+        if name in group_columns and name in df.index.names:
+            with tm.assert_produces_warning(FutureWarning,
+                                            check_stacklevel=False):
                 result = df.groupby(group_columns, as_index=False).sum()
 
-            tm.assert_frame_equal(result, expected, check_index_type=True)
-
-    def test_groupby_preserve_categories(self):
-        # GH-13179
-        categories = list('abc')
-
-        # ordered=True
-        df = DataFrame({'A': pd.Categorical(list('ba'),
-                                            categories=categories,
+        else:
+            result = df.groupby(group_columns, as_index=False).sum()
+
+        tm.assert_frame_equal(result, expected, check_index_type=True)
+
+
+def test_groupby_preserve_categories():
+    # GH-13179
+    categories = list('abc')
+
+    # ordered=True
+    df = DataFrame({'A': pd.Categorical(list('ba'),
+                                        categories=categories,
+                                        ordered=True)})
+    index = pd.CategoricalIndex(categories, categories, ordered=True)
+    tm.assert_index_equal(df.groupby('A', sort=True).first().index, index)
+    tm.assert_index_equal(df.groupby('A', sort=False).first().index, index)
+
+    # ordered=False
+    df = DataFrame({'A': pd.Categorical(list('ba'),
+                                        categories=categories,
+                                        ordered=False)})
+    sort_index = pd.CategoricalIndex(categories, categories, ordered=False)
+    nosort_index = pd.CategoricalIndex(list('bac'), list('bac'),
+                                       ordered=False)
+    tm.assert_index_equal(df.groupby('A', sort=True).first().index,
+                          sort_index)
+    tm.assert_index_equal(df.groupby('A', sort=False).first().index,
+                          nosort_index)
+
+
+def test_groupby_preserve_categorical_dtype():
+    # GH13743, GH13854
+    df = DataFrame({'A': [1, 2, 1, 1, 2],
+                    'B': [10, 16, 22, 28, 34],
+                    'C1': Categorical(list("abaab"),
+                                      categories=list("bac"),
+                                      ordered=False),
+                    'C2': Categorical(list("abaab"),
+                                      categories=list("bac"),
+                                      ordered=True)})
+    # single grouper
+    exp_full = DataFrame({'A': [2.0, 1.0, np.nan],
+                          'B': [25.0, 20.0, np.nan],
+                          'C1': Categorical(list("bac"),
+                                            categories=list("bac"),
+                                            ordered=False),
+                          'C2': Categorical(list("bac"),
+                                            categories=list("bac"),
                                             ordered=True)})
-        index = pd.CategoricalIndex(categories, categories, ordered=True)
-        tm.assert_index_equal(df.groupby('A', sort=True).first().index, index)
-        tm.assert_index_equal(df.groupby('A', sort=False).first().index, index)
-
-        # ordered=False
-        df = DataFrame({'A': pd.Categorical(list('ba'),
-                                            categories=categories,
-                                            ordered=False)})
-        sort_index = pd.CategoricalIndex(categories, categories, ordered=False)
-        nosort_index = pd.CategoricalIndex(list('bac'), list('bac'),
-                                           ordered=False)
-        tm.assert_index_equal(df.groupby('A', sort=True).first().index,
-                              sort_index)
-        tm.assert_index_equal(df.groupby('A', sort=False).first().index,
-                              nosort_index)
-
-    def test_groupby_preserve_categorical_dtype(self):
-        # GH13743, GH13854
-        df = DataFrame({'A': [1, 2, 1, 1, 2],
-                        'B': [10, 16, 22, 28, 34],
-                        'C1': Categorical(list("abaab"),
-                                          categories=list("bac"),
-                                          ordered=False),
-                        'C2': Categorical(list("abaab"),
-                                          categories=list("bac"),
-                                          ordered=True)})
-        # single grouper
-        exp_full = DataFrame({'A': [2.0, 1.0, np.nan],
-                              'B': [25.0, 20.0, np.nan],
-                              'C1': Categorical(list("bac"),
-                                                categories=list("bac"),
-                                                ordered=False),
-                              'C2': Categorical(list("bac"),
-                                                categories=list("bac"),
-                                                ordered=True)})
-        for col in ['C1', 'C2']:
-            result1 = df.groupby(by=col, as_index=False).mean()
-            result2 = df.groupby(by=col, as_index=True).mean().reset_index()
-            expected = exp_full.reindex(columns=result1.columns)
-            tm.assert_frame_equal(result1, expected)
-            tm.assert_frame_equal(result2, expected)
-
-        # multiple grouper
-        exp_full = DataFrame({'A': [1, 1, 1, 2, 2, 2],
-                              'B': [np.nan, 20.0, np.nan, 25.0, np.nan,
-                                    np.nan],
-                              'C1': Categorical(list("bacbac"),
-                                                categories=list("bac"),
-                                                ordered=False),
-                              'C2': Categorical(list("bacbac"),
-                                                categories=list("bac"),
-                                                ordered=True)})
-        for cols in [['A', 'C1'], ['A', 'C2']]:
-            result1 = df.groupby(by=cols, as_index=False).mean()
-            result2 = df.groupby(by=cols, as_index=True).mean().reset_index()
-            expected = exp_full.reindex(columns=result1.columns)
-            tm.assert_frame_equal(result1, expected)
-            tm.assert_frame_equal(result2, expected)
-
-    def test_groupby_categorical_no_compress(self):
-        data = Series(np.random.randn(9))
-
-        codes = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])
-        cats = Categorical.from_codes(codes, [0, 1, 2], ordered=True)
-
-        result = data.groupby(cats).mean()
-        exp = data.groupby(codes).mean()
-
-        exp.index = CategoricalIndex(exp.index, categories=cats.categories,
-                                     ordered=cats.ordered)
-        assert_series_equal(result, exp)
-
-        codes = np.array([0, 0, 0, 1, 1, 1, 3, 3, 3])
-        cats = Categorical.from_codes(codes, [0, 1, 2, 3], ordered=True)
-
-        result = data.groupby(cats).mean()
-        exp = data.groupby(codes).mean().reindex(cats.categories)
-        exp.index = CategoricalIndex(exp.index, categories=cats.categories,
-                                     ordered=cats.ordered)
-        assert_series_equal(result, exp)
-
-        cats = Categorical(["a", "a", "a", "b", "b", "b", "c", "c", "c"],
-                           categories=["a", "b", "c", "d"], ordered=True)
-        data = DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 4, 5], "b": cats})
-
-        result = data.groupby("b").mean()
-        result = result["a"].values
-        exp = np.array([1, 2, 4, np.nan])
-        tm.assert_numpy_array_equal(result, exp)
-
-    def test_groupby_sort_categorical(self):
-        # dataframe groupby sort was being ignored # GH 8868
-        df = DataFrame([['(7.5, 10]', 10, 10],
-                        ['(7.5, 10]', 8, 20],
-                        ['(2.5, 5]', 5, 30],
-                        ['(5, 7.5]', 6, 40],
-                        ['(2.5, 5]', 4, 50],
-                        ['(0, 2.5]', 1, 60],
-                        ['(5, 7.5]', 7, 70]], columns=['range', 'foo', 'bar'])
-        df['range'] = Categorical(df['range'], ordered=True)
-        index = CategoricalIndex(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]',
-                                  '(7.5, 10]'], name='range', ordered=True)
-        result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]],
-                                columns=['foo', 'bar'], index=index)
-
-        col = 'range'
-        assert_frame_equal(result_sort, df.groupby(col, sort=True).first())
-        # when categories is ordered, group is ordered by category's order
-        assert_frame_equal(result_sort, df.groupby(col, sort=False).first())
-
-        df['range'] = Categorical(df['range'], ordered=False)
-        index = CategoricalIndex(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]',
-                                  '(7.5, 10]'], name='range')
-        result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]],
-                                columns=['foo', 'bar'], index=index)
-
-        index = CategoricalIndex(['(7.5, 10]', '(2.5, 5]', '(5, 7.5]',
-                                  '(0, 2.5]'],
-                                 categories=['(7.5, 10]', '(2.5, 5]',
-                                             '(5, 7.5]', '(0, 2.5]'],
-                                 name='range')
-        result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]],
-                                  index=index, columns=['foo', 'bar'])
-
-        col = 'range'
-        # this is an unordered categorical, but we allow this ####
-        assert_frame_equal(result_sort, df.groupby(col, sort=True).first())
-        assert_frame_equal(result_nosort, df.groupby(col, sort=False).first())
-
-    def test_groupby_sort_categorical_datetimelike(self):
-        # GH10505
-
-        # use same data as test_groupby_sort_categorical, which category is
-        # corresponding to datetime.month
-        df = DataFrame({'dt': [datetime(2011, 7, 1), datetime(2011, 7, 1),
-                               datetime(2011, 2, 1), datetime(2011, 5, 1),
-                               datetime(2011, 2, 1), datetime(2011, 1, 1),
-                               datetime(2011, 5, 1)],
-                        'foo': [10, 8, 5, 6, 4, 1, 7],
-                        'bar': [10, 20, 30, 40, 50, 60, 70]},
-                       columns=['dt', 'foo', 'bar'])
-
-        # ordered=True
-        df['dt'] = Categorical(df['dt'], ordered=True)
-        index = [datetime(2011, 1, 1), datetime(2011, 2, 1),
-                 datetime(2011, 5, 1), datetime(2011, 7, 1)]
-        result_sort = DataFrame(
-            [[1, 60], [5, 30], [6, 40], [10, 10]], columns=['foo', 'bar'])
-        result_sort.index = CategoricalIndex(index, name='dt', ordered=True)
-
-        index = [datetime(2011, 7, 1), datetime(2011, 2, 1),
-                 datetime(2011, 5, 1), datetime(2011, 1, 1)]
-        result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]],
-                                  columns=['foo', 'bar'])
-        result_nosort.index = CategoricalIndex(index, categories=index,
-                                               name='dt', ordered=True)
-
-        col = 'dt'
-        assert_frame_equal(result_sort, df.groupby(col, sort=True).first())
-        # when categories is ordered, group is ordered by category's order
-        assert_frame_equal(result_sort, df.groupby(col, sort=False).first())
-
-        # ordered = False
-        df['dt'] = Categorical(df['dt'], ordered=False)
-        index = [datetime(2011, 1, 1), datetime(2011, 2, 1),
-                 datetime(2011, 5, 1), datetime(2011, 7, 1)]
-        result_sort = DataFrame(
-            [[1, 60], [5, 30], [6, 40], [10, 10]], columns=['foo', 'bar'])
-        result_sort.index = CategoricalIndex(index, name='dt')
-
-        index = [datetime(2011, 7, 1), datetime(2011, 2, 1),
-                 datetime(2011, 5, 1), datetime(2011, 1, 1)]
-        result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]],
-                                  columns=['foo', 'bar'])
-        result_nosort.index = CategoricalIndex(index, categories=index,
-                                               name='dt')
-
-        col = 'dt'
-        assert_frame_equal(result_sort, df.groupby(col, sort=True).first())
-        assert_frame_equal(result_nosort, df.groupby(col, sort=False).first())
-
-    def test_groupby_categorical_two_columns(self):
-
-        # https://github.com/pandas-dev/pandas/issues/8138
-        d = {'cat':
-             pd.Categorical(["a", "b", "a", "b"], categories=["a", "b", "c"],
-                            ordered=True),
-             'ints': [1, 1, 2, 2],
-             'val': [10, 20, 30, 40]}
-        test = pd.DataFrame(d)
-
-        # Grouping on a single column
-        groups_single_key = test.groupby("cat")
-        res = groups_single_key.agg('mean')
-
-        exp_index = pd.CategoricalIndex(["a", "b", "c"], name="cat",
-                                        ordered=True)
-        exp = DataFrame({"ints": [1.5, 1.5, np.nan], "val": [20, 30, np.nan]},
-                        index=exp_index)
-        tm.assert_frame_equal(res, exp)
-
-        # Grouping on two columns
-        groups_double_key = test.groupby(["cat", "ints"])
-        res = groups_double_key.agg('mean')
-        exp = DataFrame({"val": [10, 30, 20, 40, np.nan, np.nan],
-                         "cat": pd.Categorical(["a", "a", "b", "b", "c", "c"],
-                                               ordered=True),
-                         "ints": [1, 2, 1, 2, 1, 2]}).set_index(["cat", "ints"
-                                                                 ])
-        tm.assert_frame_equal(res, exp)
-
-        # GH 10132
-        for key in [('a', 1), ('b', 2), ('b', 1), ('a', 2)]:
-            c, i = key
-            result = groups_double_key.get_group(key)
-            expected = test[(test.cat == c) & (test.ints == i)]
-            assert_frame_equal(result, expected)
-
-        d = {'C1': [3, 3, 4, 5], 'C2': [1, 2, 3, 4], 'C3': [10, 100, 200, 34]}
-        test = pd.DataFrame(d)
-        values = pd.cut(test['C1'], [1, 2, 3, 6])
-        values.name = "cat"
-        groups_double_key = test.groupby([values, 'C2'])
-
-        res = groups_double_key.agg('mean')
-        nan = np.nan
-        idx = MultiIndex.from_product(
-            [Categorical([Interval(1, 2), Interval(2, 3),
-                          Interval(3, 6)], ordered=True),
-             [1, 2, 3, 4]],
-            names=["cat", "C2"])
-        exp = DataFrame({"C1": [nan, nan, nan, nan, 3, 3,
-                                nan, nan, nan, nan, 4, 5],
-                         "C3": [nan, nan, nan, nan, 10, 100,
-                                nan, nan, nan, nan, 200, 34]}, index=idx)
-        tm.assert_frame_equal(res, exp)
-
-    def test_empty_sum(self):
-        # https://github.com/pandas-dev/pandas/issues/18678
-        df = pd.DataFrame({"A": pd.Categorical(['a', 'a', 'b'],
-                                               categories=['a', 'b', 'c']),
-                           'B': [1, 2, 1]})
-        expected_idx = pd.CategoricalIndex(['a', 'b', 'c'], name='A')
-
-        # 0 by default
-        result = df.groupby("A").B.sum()
-        expected = pd.Series([3, 1, 0], expected_idx, name='B')
-        tm.assert_series_equal(result, expected)
-
-        # min_count=0
-        result = df.groupby("A").B.sum(min_count=0)
-        expected = pd.Series([3, 1, 0], expected_idx, name='B')
-        tm.assert_series_equal(result, expected)
-
-        # min_count=1
-        result = df.groupby("A").B.sum(min_count=1)
-        expected = pd.Series([3, 1, np.nan], expected_idx, name='B')
-        tm.assert_series_equal(result, expected)
-
-        # min_count>1
-        result = df.groupby("A").B.sum(min_count=2)
-        expected = pd.Series([3, np.nan, np.nan], expected_idx, name='B')
-        tm.assert_series_equal(result, expected)
-
-    def test_empty_prod(self):
-        # https://github.com/pandas-dev/pandas/issues/18678
-        df = pd.DataFrame({"A": pd.Categorical(['a', 'a', 'b'],
-                                               categories=['a', 'b', 'c']),
-                           'B': [1, 2, 1]})
-
-        expected_idx = pd.CategoricalIndex(['a', 'b', 'c'], name='A')
-
-        # 1 by default
-        result = df.groupby("A").B.prod()
-        expected = pd.Series([2, 1, 1], expected_idx, name='B')
-        tm.assert_series_equal(result, expected)
-
-        # min_count=0
-        result = df.groupby("A").B.prod(min_count=0)
-        expected = pd.Series([2, 1, 1], expected_idx, name='B')
-        tm.assert_series_equal(result, expected)
-
-        # min_count=1
-        result = df.groupby("A").B.prod(min_count=1)
-        expected = pd.Series([2, 1, np.nan], expected_idx, name='B')
-        tm.assert_series_equal(result, expected)
+    for col in ['C1', 'C2']:
+        result1 = df.groupby(by=col, as_index=False).mean()
+        result2 = df.groupby(by=col, as_index=True).mean().reset_index()
+        expected = exp_full.reindex(columns=result1.columns)
+        tm.assert_frame_equal(result1, expected)
+        tm.assert_frame_equal(result2, expected)
+
+    # multiple grouper
+    exp_full = DataFrame({'A': [1, 1, 1, 2, 2, 2],
+                          'B': [np.nan, 20.0, np.nan, 25.0, np.nan,
+                                np.nan],
+                          'C1': Categorical(list("bacbac"),
+                                            categories=list("bac"),
+                                            ordered=False),
+                          'C2': Categorical(list("bacbac"),
+                                            categories=list("bac"),
+                                            ordered=True)})
+    for cols in [['A', 'C1'], ['A', 'C2']]:
+        result1 = df.groupby(by=cols, as_index=False).mean()
+        result2 = df.groupby(by=cols, as_index=True).mean().reset_index()
+        expected = exp_full.reindex(columns=result1.columns)
+        tm.assert_frame_equal(result1, expected)
+        tm.assert_frame_equal(result2, expected)
+
+
+def test_groupby_categorical_no_compress():
+    data = Series(np.random.randn(9))
+
+    codes = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])
+    cats = Categorical.from_codes(codes, [0, 1, 2], ordered=True)
+
+    result = data.groupby(cats).mean()
+    exp = data.groupby(codes).mean()
+
+    exp.index = CategoricalIndex(exp.index, categories=cats.categories,
+                                 ordered=cats.ordered)
+    assert_series_equal(result, exp)
+
+    codes = np.array([0, 0, 0, 1, 1, 1, 3, 3, 3])
+    cats = Categorical.from_codes(codes, [0, 1, 2, 3], ordered=True)
+
+    result = data.groupby(cats).mean()
+    exp = data.groupby(codes).mean().reindex(cats.categories)
+    exp.index = CategoricalIndex(exp.index, categories=cats.categories,
+                                 ordered=cats.ordered)
+    assert_series_equal(result, exp)
+
+    cats = Categorical(["a", "a", "a", "b", "b", "b", "c", "c", "c"],
+                       categories=["a", "b", "c", "d"], ordered=True)
+    data = DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 4, 5], "b": cats})
+
+    result = data.groupby("b").mean()
+    result = result["a"].values
+    exp = np.array([1, 2, 4, np.nan])
+    tm.assert_numpy_array_equal(result, exp)
+
+
+def test_groupby_sort_categorical():
+    # dataframe groupby sort was being ignored # GH 8868
+    df = DataFrame([['(7.5, 10]', 10, 10],
+                    ['(7.5, 10]', 8, 20],
+                    ['(2.5, 5]', 5, 30],
+                    ['(5, 7.5]', 6, 40],
+                    ['(2.5, 5]', 4, 50],
+                    ['(0, 2.5]', 1, 60],
+                    ['(5, 7.5]', 7, 70]], columns=['range', 'foo', 'bar'])
+    df['range'] = Categorical(df['range'], ordered=True)
+    index = CategoricalIndex(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]',
+                              '(7.5, 10]'], name='range', ordered=True)
+    result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]],
+                            columns=['foo', 'bar'], index=index)
+
+    col = 'range'
+    assert_frame_equal(result_sort, df.groupby(col, sort=True).first())
+    # when categories is ordered, group is ordered by category's order
+    assert_frame_equal(result_sort, df.groupby(col, sort=False).first())
+
+    df['range'] = Categorical(df['range'], ordered=False)
+    index = CategoricalIndex(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]',
+                              '(7.5, 10]'], name='range')
+    result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]],
+                            columns=['foo', 'bar'], index=index)
+
+    index = CategoricalIndex(['(7.5, 10]', '(2.5, 5]', '(5, 7.5]',
+                              '(0, 2.5]'],
+                             categories=['(7.5, 10]', '(2.5, 5]',
+                                         '(5, 7.5]', '(0, 2.5]'],
+                             name='range')
+    result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]],
+                              index=index, columns=['foo', 'bar'])
+
+    col = 'range'
+    # this is an unordered categorical, but we allow this ####
+    assert_frame_equal(result_sort, df.groupby(col, sort=True).first())
+    assert_frame_equal(result_nosort, df.groupby(col, sort=False).first())
+
+
+def test_groupby_sort_categorical_datetimelike():
+    # GH10505
+
+    # use same data as test_groupby_sort_categorical, which category is
+    # corresponding to datetime.month
+    df = DataFrame({'dt': [datetime(2011, 7, 1), datetime(2011, 7, 1),
+                           datetime(2011, 2, 1), datetime(2011, 5, 1),
+                           datetime(2011, 2, 1), datetime(2011, 1, 1),
+                           datetime(2011, 5, 1)],
+                    'foo': [10, 8, 5, 6, 4, 1, 7],
+                    'bar': [10, 20, 30, 40, 50, 60, 70]},
+                   columns=['dt', 'foo', 'bar'])
+
+    # ordered=True
+    df['dt'] = Categorical(df['dt'], ordered=True)
+    index = [datetime(2011, 1, 1), datetime(2011, 2, 1),
+             datetime(2011, 5, 1), datetime(2011, 7, 1)]
+    result_sort = DataFrame(
+        [[1, 60], [5, 30], [6, 40], [10, 10]], columns=['foo', 'bar'])
+    result_sort.index = CategoricalIndex(index, name='dt', ordered=True)
+
+    index = [datetime(2011, 7, 1), datetime(2011, 2, 1),
+             datetime(2011, 5, 1), datetime(2011, 1, 1)]
+    result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]],
+                              columns=['foo', 'bar'])
+    result_nosort.index = CategoricalIndex(index, categories=index,
+                                           name='dt', ordered=True)
+
+    col = 'dt'
+    assert_frame_equal(result_sort, df.groupby(col, sort=True).first())
+    # when categories is ordered, group is ordered by category's order
+    assert_frame_equal(result_sort, df.groupby(col, sort=False).first())
+
+    # ordered = False
+    df['dt'] = Categorical(df['dt'], ordered=False)
+    index = [datetime(2011, 1, 1), datetime(2011, 2, 1),
+             datetime(2011, 5, 1), datetime(2011, 7, 1)]
+    result_sort = DataFrame(
+        [[1, 60], [5, 30], [6, 40], [10, 10]], columns=['foo', 'bar'])
+    result_sort.index = CategoricalIndex(index, name='dt')
+
+    index = [datetime(2011, 7, 1), datetime(2011, 2, 1),
+             datetime(2011, 5, 1), datetime(2011, 1, 1)]
+    result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]],
+                              columns=['foo', 'bar'])
+    result_nosort.index = CategoricalIndex(index, categories=index,
+                                           name='dt')
+
+    col = 'dt'
+    assert_frame_equal(result_sort, df.groupby(col, sort=True).first())
+    assert_frame_equal(result_nosort, df.groupby(col, sort=False).first())
+
+
+def test_groupby_categorical_two_columns():
+
+    # https://github.com/pandas-dev/pandas/issues/8138
+    d = {'cat':
+         pd.Categorical(["a", "b", "a", "b"], categories=["a", "b", "c"],
+                        ordered=True),
+         'ints': [1, 1, 2, 2],
+         'val': [10, 20, 30, 40]}
+    test = pd.DataFrame(d)
+
+    # Grouping on a single column
+    groups_single_key = test.groupby("cat")
+    res = groups_single_key.agg('mean')
+
+    exp_index = pd.CategoricalIndex(["a", "b", "c"], name="cat",
+                                    ordered=True)
+    exp = DataFrame({"ints": [1.5, 1.5, np.nan], "val": [20, 30, np.nan]},
+                    index=exp_index)
+    tm.assert_frame_equal(res, exp)
+
+    # Grouping on two columns
+    groups_double_key = test.groupby(["cat", "ints"])
+    res = groups_double_key.agg('mean')
+    exp = DataFrame({"val": [10, 30, 20, 40, np.nan, np.nan],
+                     "cat": pd.Categorical(["a", "a", "b", "b", "c", "c"],
+                                           ordered=True),
+                     "ints": [1, 2, 1, 2, 1, 2]}).set_index(["cat", "ints"
+                                                             ])
+    tm.assert_frame_equal(res, exp)
+
+    # GH 10132
+    for key in [('a', 1), ('b', 2), ('b', 1), ('a', 2)]:
+        c, i = key
+        result = groups_double_key.get_group(key)
+        expected = test[(test.cat == c) & (test.ints == i)]
+        assert_frame_equal(result, expected)
+
+    d = {'C1': [3, 3, 4, 5], 'C2': [1, 2, 3, 4], 'C3': [10, 100, 200, 34]}
+    test = pd.DataFrame(d)
+    values = pd.cut(test['C1'], [1, 2, 3, 6])
+    values.name = "cat"
+    groups_double_key = test.groupby([values, 'C2'])
+
+    res = groups_double_key.agg('mean')
+    nan = np.nan
+    idx = MultiIndex.from_product(
+        [Categorical([Interval(1, 2), Interval(2, 3),
+                      Interval(3, 6)], ordered=True),
+         [1, 2, 3, 4]],
+        names=["cat", "C2"])
+    exp = DataFrame({"C1": [nan, nan, nan, nan, 3, 3,
+                            nan, nan, nan, nan, 4, 5],
+                     "C3": [nan, nan, nan, nan, 10, 100,
+                            nan, nan, nan, nan, 200, 34]}, index=idx)
+    tm.assert_frame_equal(res, exp)
+
+
+def test_empty_sum():
+    # https://github.com/pandas-dev/pandas/issues/18678
+    df = pd.DataFrame({"A": pd.Categorical(['a', 'a', 'b'],
+                                           categories=['a', 'b', 'c']),
+                       'B': [1, 2, 1]})
+    expected_idx = pd.CategoricalIndex(['a', 'b', 'c'], name='A')
+
+    # 0 by default
+    result = df.groupby("A").B.sum()
+    expected = pd.Series([3, 1, 0], expected_idx, name='B')
+    tm.assert_series_equal(result, expected)
+
+    # min_count=0
+    result = df.groupby("A").B.sum(min_count=0)
+    expected = pd.Series([3, 1, 0], expected_idx, name='B')
+    tm.assert_series_equal(result, expected)
+
+    # min_count=1
+    result = df.groupby("A").B.sum(min_count=1)
+    expected = pd.Series([3, 1, np.nan], expected_idx, name='B')
+    tm.assert_series_equal(result, expected)
+
+    # min_count>1
+    result = df.groupby("A").B.sum(min_count=2)
+    expected = pd.Series([3, np.nan, np.nan], expected_idx, name='B')
+    tm.assert_series_equal(result, expected)
+
+
+def test_empty_prod():
+    # https://github.com/pandas-dev/pandas/issues/18678
+    df = pd.DataFrame({"A": pd.Categorical(['a', 'a', 'b'],
+                                           categories=['a', 'b', 'c']),
+                       'B': [1, 2, 1]})
+
+    expected_idx = pd.CategoricalIndex(['a', 'b', 'c'], name='A')
+
+    # 1 by default
+    result = df.groupby("A").B.prod()
+    expected = pd.Series([2, 1, 1], expected_idx, name='B')
+    tm.assert_series_equal(result, expected)
+
+    # min_count=0
+    result = df.groupby("A").B.prod(min_count=0)
+    expected = pd.Series([2, 1, 1], expected_idx, name='B')
+    tm.assert_series_equal(result, expected)
+
+    # min_count=1
+    result = df.groupby("A").B.prod(min_count=1)
+    expected = pd.Series([2, 1, np.nan], expected_idx, name='B')
+    tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py
index cac6b46af8f87..873d9f6076b69 100644
--- a/pandas/tests/groupby/test_filters.py
+++ b/pandas/tests/groupby/test_filters.py
@@ -1,622 +1,576 @@
 # -*- coding: utf-8 -*-
 from __future__ import print_function
-from numpy import nan
-
 import pytest
 
-from pandas import Timestamp
-from pandas.core.index import MultiIndex
-from pandas.core.api import DataFrame
-
-from pandas.core.series import Series
-
-from pandas.util.testing import (assert_frame_equal, assert_series_equal
-                                 )
-from pandas.compat import (lmap)
-
-from pandas import compat
-
-import pandas.core.common as com
 import numpy as np
-
 import pandas.util.testing as tm
+from pandas import Timestamp, DataFrame, Series
 import pandas as pd
 
 
-class TestGroupByFilter(object):
-
-    def setup_method(self, method):
-        self.ts = tm.makeTimeSeries()
-
-        self.seriesd = tm.getSeriesData()
-        self.tsd = tm.getTimeSeriesData()
-        self.frame = DataFrame(self.seriesd)
-        self.tsframe = DataFrame(self.tsd)
-
-        self.df = DataFrame(
-            {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
-             'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
-             'C': np.random.randn(8),
-             'D': np.random.randn(8)})
-
-        self.df_mixed_floats = DataFrame(
-            {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
-             'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
-             'C': np.random.randn(8),
-             'D': np.array(
-                 np.random.randn(8), dtype='float32')})
-
-        index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
-                                                                  'three']],
-                           labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
-                                   [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
-                           names=['first', 'second'])
-        self.mframe = DataFrame(np.random.randn(10, 3), index=index,
-                                columns=['A', 'B', 'C'])
-
-        self.three_group = DataFrame(
-            {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar',
-                   'foo', 'foo', 'foo'],
-             'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two',
-                   'two', 'two', 'one'],
-             'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny',
-                   'dull', 'shiny', 'shiny', 'shiny'],
-             'D': np.random.randn(11),
-             'E': np.random.randn(11),
-             'F': np.random.randn(11)})
-
-    def test_filter_series(self):
-        s = pd.Series([1, 3, 20, 5, 22, 24, 7])
-        expected_odd = pd.Series([1, 3, 5, 7], index=[0, 1, 3, 6])
-        expected_even = pd.Series([20, 22, 24], index=[2, 4, 5])
-        grouper = s.apply(lambda x: x % 2)
-        grouped = s.groupby(grouper)
-        assert_series_equal(
-            grouped.filter(lambda x: x.mean() < 10), expected_odd)
-        assert_series_equal(
-            grouped.filter(lambda x: x.mean() > 10), expected_even)
-        # Test dropna=False.
-        assert_series_equal(
-            grouped.filter(lambda x: x.mean() < 10, dropna=False),
-            expected_odd.reindex(s.index))
-        assert_series_equal(
-            grouped.filter(lambda x: x.mean() > 10, dropna=False),
-            expected_even.reindex(s.index))
-
-    def test_filter_single_column_df(self):
-        df = pd.DataFrame([1, 3, 20, 5, 22, 24, 7])
-        expected_odd = pd.DataFrame([1, 3, 5, 7], index=[0, 1, 3, 6])
-        expected_even = pd.DataFrame([20, 22, 24], index=[2, 4, 5])
-        grouper = df[0].apply(lambda x: x % 2)
-        grouped = df.groupby(grouper)
-        assert_frame_equal(
-            grouped.filter(lambda x: x.mean() < 10), expected_odd)
-        assert_frame_equal(
-            grouped.filter(lambda x: x.mean() > 10), expected_even)
-        # Test dropna=False.
-        assert_frame_equal(
-            grouped.filter(lambda x: x.mean() < 10, dropna=False),
-            expected_odd.reindex(df.index))
-        assert_frame_equal(
-            grouped.filter(lambda x: x.mean() > 10, dropna=False),
-            expected_even.reindex(df.index))
-
-    def test_filter_multi_column_df(self):
-        df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': [1, 1, 1, 1]})
-        grouper = df['A'].apply(lambda x: x % 2)
-        grouped = df.groupby(grouper)
-        expected = pd.DataFrame({'A': [12, 12], 'B': [1, 1]}, index=[1, 2])
-        assert_frame_equal(
-            grouped.filter(lambda x: x['A'].sum() - x['B'].sum() > 10),
-            expected)
-
-    def test_filter_mixed_df(self):
-        df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': 'a b c d'.split()})
-        grouper = df['A'].apply(lambda x: x % 2)
-        grouped = df.groupby(grouper)
-        expected = pd.DataFrame({'A': [12, 12], 'B': ['b', 'c']}, index=[1, 2])
-        assert_frame_equal(
-            grouped.filter(lambda x: x['A'].sum() > 10), expected)
-
-    def test_filter_out_all_groups(self):
-        s = pd.Series([1, 3, 20, 5, 22, 24, 7])
-        grouper = s.apply(lambda x: x % 2)
-        grouped = s.groupby(grouper)
-        assert_series_equal(grouped.filter(lambda x: x.mean() > 1000), s[[]])
-        df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': 'a b c d'.split()})
-        grouper = df['A'].apply(lambda x: x % 2)
-        grouped = df.groupby(grouper)
-        assert_frame_equal(
-            grouped.filter(lambda x: x['A'].sum() > 1000), df.loc[[]])
-
-    def test_filter_out_no_groups(self):
-        s = pd.Series([1, 3, 20, 5, 22, 24, 7])
-        grouper = s.apply(lambda x: x % 2)
-        grouped = s.groupby(grouper)
-        filtered = grouped.filter(lambda x: x.mean() > 0)
-        assert_series_equal(filtered, s)
-        df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': 'a b c d'.split()})
-        grouper = df['A'].apply(lambda x: x % 2)
-        grouped = df.groupby(grouper)
-        filtered = grouped.filter(lambda x: x['A'].mean() > 0)
-        assert_frame_equal(filtered, df)
-
-    def test_filter_out_all_groups_in_df(self):
-        # GH12768
-        df = pd.DataFrame({'a': [1, 1, 2], 'b': [1, 2, 0]})
-        res = df.groupby('a')
-        res = res.filter(lambda x: x['b'].sum() > 5, dropna=False)
-        expected = pd.DataFrame({'a': [nan] * 3, 'b': [nan] * 3})
-        assert_frame_equal(expected, res)
-
-        df = pd.DataFrame({'a': [1, 1, 2], 'b': [1, 2, 0]})
-        res = df.groupby('a')
-        res = res.filter(lambda x: x['b'].sum() > 5, dropna=True)
-        expected = pd.DataFrame({'a': [], 'b': []}, dtype="int64")
-        assert_frame_equal(expected, res)
-
-    def test_filter_condition_raises(self):
-        def raise_if_sum_is_zero(x):
-            if x.sum() == 0:
-                raise ValueError
-            else:
-                return x.sum() > 0
-
-        s = pd.Series([-1, 0, 1, 2])
-        grouper = s.apply(lambda x: x % 2)
-        grouped = s.groupby(grouper)
-        pytest.raises(TypeError,
-                      lambda: grouped.filter(raise_if_sum_is_zero))
-
-    def test_filter_with_axis_in_groupby(self):
-        # issue 11041
-        index = pd.MultiIndex.from_product([range(10), [0, 1]])
-        data = pd.DataFrame(
-            np.arange(100).reshape(-1, 20), columns=index, dtype='int64')
-        result = data.groupby(level=0,
-                              axis=1).filter(lambda x: x.iloc[0, 0] > 10)
-        expected = data.iloc[:, 12:20]
-        assert_frame_equal(result, expected)
-
-    def test_filter_bad_shapes(self):
-        df = DataFrame({'A': np.arange(8),
-                        'B': list('aabbbbcc'),
-                        'C': np.arange(8)})
-        s = df['B']
-        g_df = df.groupby('B')
-        g_s = s.groupby(s)
-
-        f = lambda x: x
-        pytest.raises(TypeError, lambda: g_df.filter(f))
-        pytest.raises(TypeError, lambda: g_s.filter(f))
-
-        f = lambda x: x == 1
-        pytest.raises(TypeError, lambda: g_df.filter(f))
-        pytest.raises(TypeError, lambda: g_s.filter(f))
-
-        f = lambda x: np.outer(x, x)
-        pytest.raises(TypeError, lambda: g_df.filter(f))
-        pytest.raises(TypeError, lambda: g_s.filter(f))
-
-    def test_filter_nan_is_false(self):
-        df = DataFrame({'A': np.arange(8),
-                        'B': list('aabbbbcc'),
-                        'C': np.arange(8)})
-        s = df['B']
-        g_df = df.groupby(df['B'])
-        g_s = s.groupby(s)
-
-        f = lambda x: np.nan
-        assert_frame_equal(g_df.filter(f), df.loc[[]])
-        assert_series_equal(g_s.filter(f), s[[]])
-
-    def test_filter_against_workaround(self):
-        np.random.seed(0)
-        # Series of ints
-        s = Series(np.random.randint(0, 100, 1000))
-        grouper = s.apply(lambda x: np.round(x, -1))
-        grouped = s.groupby(grouper)
-        f = lambda x: x.mean() > 10
-
-        old_way = s[grouped.transform(f).astype('bool')]
-        new_way = grouped.filter(f)
-        assert_series_equal(new_way.sort_values(), old_way.sort_values())
-
-        # Series of floats
-        s = 100 * Series(np.random.random(1000))
-        grouper = s.apply(lambda x: np.round(x, -1))
-        grouped = s.groupby(grouper)
-        f = lambda x: x.mean() > 10
-        old_way = s[grouped.transform(f).astype('bool')]
-        new_way = grouped.filter(f)
-        assert_series_equal(new_way.sort_values(), old_way.sort_values())
-
-        # Set up DataFrame of ints, floats, strings.
-        from string import ascii_lowercase
-        letters = np.array(list(ascii_lowercase))
-        N = 1000
-        random_letters = letters.take(np.random.randint(0, 26, N))
-        df = DataFrame({'ints': Series(np.random.randint(0, 100, N)),
-                        'floats': N / 10 * Series(np.random.random(N)),
-                        'letters': Series(random_letters)})
-
-        # Group by ints; filter on floats.
-        grouped = df.groupby('ints')
-        old_way = df[grouped.floats.
-                     transform(lambda x: x.mean() > N / 20).astype('bool')]
-        new_way = grouped.filter(lambda x: x['floats'].mean() > N / 20)
-        assert_frame_equal(new_way, old_way)
-
-        # Group by floats (rounded); filter on strings.
-        grouper = df.floats.apply(lambda x: np.round(x, -1))
-        grouped = df.groupby(grouper)
-        old_way = df[grouped.letters.
-                     transform(lambda x: len(x) < N / 10).astype('bool')]
-        new_way = grouped.filter(lambda x: len(x.letters) < N / 10)
-        assert_frame_equal(new_way, old_way)
-
-        # Group by strings; filter on ints.
-        grouped = df.groupby('letters')
-        old_way = df[grouped.ints.
-                     transform(lambda x: x.mean() > N / 20).astype('bool')]
-        new_way = grouped.filter(lambda x: x['ints'].mean() > N / 20)
-        assert_frame_equal(new_way, old_way)
-
-    def test_filter_using_len(self):
-        # BUG GH4447
-        df = DataFrame({'A': np.arange(8),
-                        'B': list('aabbbbcc'),
-                        'C': np.arange(8)})
-        grouped = df.groupby('B')
-        actual = grouped.filter(lambda x: len(x) > 2)
-        expected = DataFrame(
-            {'A': np.arange(2, 6),
-             'B': list('bbbb'),
-             'C': np.arange(2, 6)}, index=np.arange(2, 6))
-        assert_frame_equal(actual, expected)
-
-        actual = grouped.filter(lambda x: len(x) > 4)
-        expected = df.loc[[]]
-        assert_frame_equal(actual, expected)
-
-        # Series have always worked properly, but we'll test anyway.
-        s = df['B']
-        grouped = s.groupby(s)
-        actual = grouped.filter(lambda x: len(x) > 2)
-        expected = Series(4 * ['b'], index=np.arange(2, 6), name='B')
-        assert_series_equal(actual, expected)
-
-        actual = grouped.filter(lambda x: len(x) > 4)
-        expected = s[[]]
-        assert_series_equal(actual, expected)
-
-    def test_filter_maintains_ordering(self):
-        # Simple case: index is sequential. #4621
-        df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
-                        'tag': [23, 45, 62, 24, 45, 34, 25, 62]})
-        s = df['pid']
-        grouped = df.groupby('tag')
-        actual = grouped.filter(lambda x: len(x) > 1)
-        expected = df.iloc[[1, 2, 4, 7]]
-        assert_frame_equal(actual, expected)
-
-        grouped = s.groupby(df['tag'])
-        actual = grouped.filter(lambda x: len(x) > 1)
-        expected = s.iloc[[1, 2, 4, 7]]
-        assert_series_equal(actual, expected)
-
-        # Now index is sequentially decreasing.
-        df.index = np.arange(len(df) - 1, -1, -1)
-        s = df['pid']
-        grouped = df.groupby('tag')
-        actual = grouped.filter(lambda x: len(x) > 1)
-        expected = df.iloc[[1, 2, 4, 7]]
-        assert_frame_equal(actual, expected)
-
-        grouped = s.groupby(df['tag'])
-        actual = grouped.filter(lambda x: len(x) > 1)
-        expected = s.iloc[[1, 2, 4, 7]]
-        assert_series_equal(actual, expected)
-
-        # Index is shuffled.
-        SHUFFLED = [4, 6, 7, 2, 1, 0, 5, 3]
-        df.index = df.index[SHUFFLED]
-        s = df['pid']
-        grouped = df.groupby('tag')
-        actual = grouped.filter(lambda x: len(x) > 1)
-        expected = df.iloc[[1, 2, 4, 7]]
-        assert_frame_equal(actual, expected)
-
-        grouped = s.groupby(df['tag'])
-        actual = grouped.filter(lambda x: len(x) > 1)
-        expected = s.iloc[[1, 2, 4, 7]]
-        assert_series_equal(actual, expected)
-
-    def test_filter_multiple_timestamp(self):
-        # GH 10114
-        df = DataFrame({'A': np.arange(5, dtype='int64'),
-                        'B': ['foo', 'bar', 'foo', 'bar', 'bar'],
-                        'C': Timestamp('20130101')})
-
-        grouped = df.groupby(['B', 'C'])
-
-        result = grouped['A'].filter(lambda x: True)
-        assert_series_equal(df['A'], result)
-
-        result = grouped['A'].transform(len)
-        expected = Series([2, 3, 2, 3, 3], name='A')
-        assert_series_equal(result, expected)
-
-        result = grouped.filter(lambda x: True)
-        assert_frame_equal(df, result)
-
-        result = grouped.transform('sum')
-        expected = DataFrame({'A': [2, 8, 2, 8, 8]})
-        assert_frame_equal(result, expected)
-
-        result = grouped.transform(len)
-        expected = DataFrame({'A': [2, 3, 2, 3, 3]})
-        assert_frame_equal(result, expected)
-
-    def test_filter_and_transform_with_non_unique_int_index(self):
-        # GH4620
-        index = [1, 1, 1, 2, 1, 1, 0, 1]
-        df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
-                        'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index)
-        grouped_df = df.groupby('tag')
-        ser = df['pid']
-        grouped_ser = ser.groupby(df['tag'])
-        expected_indexes = [1, 2, 4, 7]
-
-        # Filter DataFrame
-        actual = grouped_df.filter(lambda x: len(x) > 1)
-        expected = df.iloc[expected_indexes]
-        assert_frame_equal(actual, expected)
-
-        actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
-        expected = df.copy()
-        expected.iloc[[0, 3, 5, 6]] = np.nan
-        assert_frame_equal(actual, expected)
-
-        # Filter Series
-        actual = grouped_ser.filter(lambda x: len(x) > 1)
-        expected = ser.take(expected_indexes)
-        assert_series_equal(actual, expected)
-
-        actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
-        NA = np.nan
-        expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid')
-        # ^ made manually because this can get confusing!
-        assert_series_equal(actual, expected)
-
-        # Transform Series
-        actual = grouped_ser.transform(len)
-        expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name='pid')
-        assert_series_equal(actual, expected)
-
-        # Transform (a column from) DataFrameGroupBy
-        actual = grouped_df.pid.transform(len)
-        assert_series_equal(actual, expected)
-
-    def test_filter_and_transform_with_multiple_non_unique_int_index(self):
-        # GH4620
-        index = [1, 1, 1, 2, 0, 0, 0, 1]
-        df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
-                        'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index)
-        grouped_df = df.groupby('tag')
-        ser = df['pid']
-        grouped_ser = ser.groupby(df['tag'])
-        expected_indexes = [1, 2, 4, 7]
-
-        # Filter DataFrame
-        actual = grouped_df.filter(lambda x: len(x) > 1)
-        expected = df.iloc[expected_indexes]
-        assert_frame_equal(actual, expected)
-
-        actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
-        expected = df.copy()
-        expected.iloc[[0, 3, 5, 6]] = np.nan
-        assert_frame_equal(actual, expected)
-
-        # Filter Series
-        actual = grouped_ser.filter(lambda x: len(x) > 1)
-        expected = ser.take(expected_indexes)
-        assert_series_equal(actual, expected)
-
-        actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
-        NA = np.nan
-        expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid')
-        # ^ made manually because this can get confusing!
-        assert_series_equal(actual, expected)
-
-        # Transform Series
-        actual = grouped_ser.transform(len)
-        expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name='pid')
-        assert_series_equal(actual, expected)
-
-        # Transform (a column from) DataFrameGroupBy
-        actual = grouped_df.pid.transform(len)
-        assert_series_equal(actual, expected)
-
-    def test_filter_and_transform_with_non_unique_float_index(self):
-        # GH4620
-        index = np.array([1, 1, 1, 2, 1, 1, 0, 1], dtype=float)
-        df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
-                        'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index)
-        grouped_df = df.groupby('tag')
-        ser = df['pid']
-        grouped_ser = ser.groupby(df['tag'])
-        expected_indexes = [1, 2, 4, 7]
-
-        # Filter DataFrame
-        actual = grouped_df.filter(lambda x: len(x) > 1)
-        expected = df.iloc[expected_indexes]
-        assert_frame_equal(actual, expected)
-
-        actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
-        expected = df.copy()
-        expected.iloc[[0, 3, 5, 6]] = np.nan
-        assert_frame_equal(actual, expected)
-
-        # Filter Series
-        actual = grouped_ser.filter(lambda x: len(x) > 1)
-        expected = ser.take(expected_indexes)
-        assert_series_equal(actual, expected)
-
-        actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
-        NA = np.nan
-        expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid')
-        # ^ made manually because this can get confusing!
-        assert_series_equal(actual, expected)
-
-        # Transform Series
-        actual = grouped_ser.transform(len)
-        expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name='pid')
-        assert_series_equal(actual, expected)
-
-        # Transform (a column from) DataFrameGroupBy
-        actual = grouped_df.pid.transform(len)
-        assert_series_equal(actual, expected)
-
-    def test_filter_and_transform_with_non_unique_timestamp_index(self):
-        # GH4620
-        t0 = Timestamp('2013-09-30 00:05:00')
-        t1 = Timestamp('2013-10-30 00:05:00')
-        t2 = Timestamp('2013-11-30 00:05:00')
-        index = [t1, t1, t1, t2, t1, t1, t0, t1]
-        df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
-                        'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index)
-        grouped_df = df.groupby('tag')
-        ser = df['pid']
-        grouped_ser = ser.groupby(df['tag'])
-        expected_indexes = [1, 2, 4, 7]
-
-        # Filter DataFrame
-        actual = grouped_df.filter(lambda x: len(x) > 1)
-        expected = df.iloc[expected_indexes]
-        assert_frame_equal(actual, expected)
-
-        actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
-        expected = df.copy()
-        expected.iloc[[0, 3, 5, 6]] = np.nan
-        assert_frame_equal(actual, expected)
-
-        # Filter Series
-        actual = grouped_ser.filter(lambda x: len(x) > 1)
-        expected = ser.take(expected_indexes)
-        assert_series_equal(actual, expected)
-
-        actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
-        NA = np.nan
-        expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid')
-        # ^ made manually because this can get confusing!
-        assert_series_equal(actual, expected)
-
-        # Transform Series
-        actual = grouped_ser.transform(len)
-        expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name='pid')
-        assert_series_equal(actual, expected)
-
-        # Transform (a column from) DataFrameGroupBy
-        actual = grouped_df.pid.transform(len)
-        assert_series_equal(actual, expected)
-
-    def test_filter_and_transform_with_non_unique_string_index(self):
-        # GH4620
-        index = list('bbbcbbab')
-        df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
-                        'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index)
-        grouped_df = df.groupby('tag')
-        ser = df['pid']
-        grouped_ser = ser.groupby(df['tag'])
-        expected_indexes = [1, 2, 4, 7]
-
-        # Filter DataFrame
-        actual = grouped_df.filter(lambda x: len(x) > 1)
-        expected = df.iloc[expected_indexes]
-        assert_frame_equal(actual, expected)
-
-        actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
-        expected = df.copy()
-        expected.iloc[[0, 3, 5, 6]] = np.nan
-        assert_frame_equal(actual, expected)
-
-        # Filter Series
-        actual = grouped_ser.filter(lambda x: len(x) > 1)
-        expected = ser.take(expected_indexes)
-        assert_series_equal(actual, expected)
-
-        actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
-        NA = np.nan
-        expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid')
-        # ^ made manually because this can get confusing!
-        assert_series_equal(actual, expected)
-
-        # Transform Series
-        actual = grouped_ser.transform(len)
-        expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name='pid')
-        assert_series_equal(actual, expected)
-
-        # Transform (a column from) DataFrameGroupBy
-        actual = grouped_df.pid.transform(len)
-        assert_series_equal(actual, expected)
-
-    def test_filter_has_access_to_grouped_cols(self):
-        df = DataFrame([[1, 2], [1, 3], [5, 6]], columns=['A', 'B'])
-        g = df.groupby('A')
-        # previously didn't have access to col A #????
-        filt = g.filter(lambda x: x['A'].sum() == 2)
-        assert_frame_equal(filt, df.iloc[[0, 1]])
-
-    def test_filter_enforces_scalarness(self):
-        df = pd.DataFrame([
-            ['best', 'a', 'x'],
-            ['worst', 'b', 'y'],
-            ['best', 'c', 'x'],
-            ['best', 'd', 'y'],
-            ['worst', 'd', 'y'],
-            ['worst', 'd', 'y'],
-            ['best', 'd', 'z'],
-        ], columns=['a', 'b', 'c'])
-        with tm.assert_raises_regex(TypeError,
-                                    'filter function returned a.*'):
-            df.groupby('c').filter(lambda g: g['a'] == 'best')
-
-    def test_filter_non_bool_raises(self):
-        df = pd.DataFrame([
-            ['best', 'a', 1],
-            ['worst', 'b', 1],
-            ['best', 'c', 1],
-            ['best', 'd', 1],
-            ['worst', 'd', 1],
-            ['worst', 'd', 1],
-            ['best', 'd', 1],
-        ], columns=['a', 'b', 'c'])
-        with tm.assert_raises_regex(TypeError,
-                                    'filter function returned a.*'):
-            df.groupby('a').filter(lambda g: g.c.mean())
-
-    def test_filter_dropna_with_empty_groups(self):
-        # GH 10780
-        data = pd.Series(np.random.rand(9), index=np.repeat([1, 2, 3], 3))
-        groupped = data.groupby(level=0)
-        result_false = groupped.filter(lambda x: x.mean() > 1, dropna=False)
-        expected_false = pd.Series([np.nan] * 9,
-                                   index=np.repeat([1, 2, 3], 3))
-        tm.assert_series_equal(result_false, expected_false)
-
-        result_true = groupped.filter(lambda x: x.mean() > 1, dropna=True)
-        expected_true = pd.Series(index=pd.Index([], dtype=int))
-        tm.assert_series_equal(result_true, expected_true)
-
-
-def assert_fp_equal(a, b):
-    assert (np.abs(a - b) < 1e-12).all()
-
-
-def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
-    tups = lmap(tuple, df[keys].values)
-    tups = com._asarray_tuplesafe(tups)
-    expected = f(df.groupby(tups)[field])
-    for k, v in compat.iteritems(expected):
-        assert (result[k] == v)
+def test_filter_series():
+    s = pd.Series([1, 3, 20, 5, 22, 24, 7])
+    expected_odd = pd.Series([1, 3, 5, 7], index=[0, 1, 3, 6])
+    expected_even = pd.Series([20, 22, 24], index=[2, 4, 5])
+    grouper = s.apply(lambda x: x % 2)
+    grouped = s.groupby(grouper)
+    tm.assert_series_equal(
+        grouped.filter(lambda x: x.mean() < 10), expected_odd)
+    tm.assert_series_equal(
+        grouped.filter(lambda x: x.mean() > 10), expected_even)
+    # Test dropna=False.
+    tm.assert_series_equal(
+        grouped.filter(lambda x: x.mean() < 10, dropna=False),
+        expected_odd.reindex(s.index))
+    tm.assert_series_equal(
+        grouped.filter(lambda x: x.mean() > 10, dropna=False),
+        expected_even.reindex(s.index))
+
+
+def test_filter_single_column_df():
+    df = pd.DataFrame([1, 3, 20, 5, 22, 24, 7])
+    expected_odd = pd.DataFrame([1, 3, 5, 7], index=[0, 1, 3, 6])
+    expected_even = pd.DataFrame([20, 22, 24], index=[2, 4, 5])
+    grouper = df[0].apply(lambda x: x % 2)
+    grouped = df.groupby(grouper)
+    tm.assert_frame_equal(
+        grouped.filter(lambda x: x.mean() < 10), expected_odd)
+    tm.assert_frame_equal(
+        grouped.filter(lambda x: x.mean() > 10), expected_even)
+    # Test dropna=False.
+    tm.assert_frame_equal(
+        grouped.filter(lambda x: x.mean() < 10, dropna=False),
+        expected_odd.reindex(df.index))
+    tm.assert_frame_equal(
+        grouped.filter(lambda x: x.mean() > 10, dropna=False),
+        expected_even.reindex(df.index))
+
+
+def test_filter_multi_column_df():
+    df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': [1, 1, 1, 1]})
+    grouper = df['A'].apply(lambda x: x % 2)
+    grouped = df.groupby(grouper)
+    expected = pd.DataFrame({'A': [12, 12], 'B': [1, 1]}, index=[1, 2])
+    tm.assert_frame_equal(
+        grouped.filter(lambda x: x['A'].sum() - x['B'].sum() > 10),
+        expected)
+
+
+def test_filter_mixed_df():
+    df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': 'a b c d'.split()})
+    grouper = df['A'].apply(lambda x: x % 2)
+    grouped = df.groupby(grouper)
+    expected = pd.DataFrame({'A': [12, 12], 'B': ['b', 'c']}, index=[1, 2])
+    tm.assert_frame_equal(
+        grouped.filter(lambda x: x['A'].sum() > 10), expected)
+
+
+def test_filter_out_all_groups():
+    s = pd.Series([1, 3, 20, 5, 22, 24, 7])
+    grouper = s.apply(lambda x: x % 2)
+    grouped = s.groupby(grouper)
+    tm.assert_series_equal(grouped.filter(lambda x: x.mean() > 1000), s[[]])
+    df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': 'a b c d'.split()})
+    grouper = df['A'].apply(lambda x: x % 2)
+    grouped = df.groupby(grouper)
+    tm.assert_frame_equal(
+        grouped.filter(lambda x: x['A'].sum() > 1000), df.loc[[]])
+
+
+def test_filter_out_no_groups():
+    s = pd.Series([1, 3, 20, 5, 22, 24, 7])
+    grouper = s.apply(lambda x: x % 2)
+    grouped = s.groupby(grouper)
+    filtered = grouped.filter(lambda x: x.mean() > 0)
+    tm.assert_series_equal(filtered, s)
+    df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': 'a b c d'.split()})
+    grouper = df['A'].apply(lambda x: x % 2)
+    grouped = df.groupby(grouper)
+    filtered = grouped.filter(lambda x: x['A'].mean() > 0)
+    tm.assert_frame_equal(filtered, df)
+
+
+def test_filter_out_all_groups_in_df():
+    # GH12768
+    df = pd.DataFrame({'a': [1, 1, 2], 'b': [1, 2, 0]})
+    res = df.groupby('a')
+    res = res.filter(lambda x: x['b'].sum() > 5, dropna=False)
+    expected = pd.DataFrame({'a': [np.nan] * 3, 'b': [np.nan] * 3})
+    tm.assert_frame_equal(expected, res)
+
+    df = pd.DataFrame({'a': [1, 1, 2], 'b': [1, 2, 0]})
+    res = df.groupby('a')
+    res = res.filter(lambda x: x['b'].sum() > 5, dropna=True)
+    expected = pd.DataFrame({'a': [], 'b': []}, dtype="int64")
+    tm.assert_frame_equal(expected, res)
+
+
+def test_filter_condition_raises():
+    def raise_if_sum_is_zero(x):
+        if x.sum() == 0:
+            raise ValueError
+        else:
+            return x.sum() > 0
+
+    s = pd.Series([-1, 0, 1, 2])
+    grouper = s.apply(lambda x: x % 2)
+    grouped = s.groupby(grouper)
+    pytest.raises(TypeError,
+                  lambda: grouped.filter(raise_if_sum_is_zero))
+
+
+def test_filter_with_axis_in_groupby():
+    # issue 11041
+    index = pd.MultiIndex.from_product([range(10), [0, 1]])
+    data = pd.DataFrame(
+        np.arange(100).reshape(-1, 20), columns=index, dtype='int64')
+    result = data.groupby(level=0,
+                          axis=1).filter(lambda x: x.iloc[0, 0] > 10)
+    expected = data.iloc[:, 12:20]
+    tm.assert_frame_equal(result, expected)
+
+
+def test_filter_bad_shapes():
+    df = DataFrame({'A': np.arange(8),
+                    'B': list('aabbbbcc'),
+                    'C': np.arange(8)})
+    s = df['B']
+    g_df = df.groupby('B')
+    g_s = s.groupby(s)
+
+    f = lambda x: x
+    pytest.raises(TypeError, lambda: g_df.filter(f))
+    pytest.raises(TypeError, lambda: g_s.filter(f))
+
+    f = lambda x: x == 1
+    pytest.raises(TypeError, lambda: g_df.filter(f))
+    pytest.raises(TypeError, lambda: g_s.filter(f))
+
+    f = lambda x: np.outer(x, x)
+    pytest.raises(TypeError, lambda: g_df.filter(f))
+    pytest.raises(TypeError, lambda: g_s.filter(f))
+
+
+def test_filter_nan_is_false():
+    df = DataFrame({'A': np.arange(8),
+                    'B': list('aabbbbcc'),
+                    'C': np.arange(8)})
+    s = df['B']
+    g_df = df.groupby(df['B'])
+    g_s = s.groupby(s)
+
+    f = lambda x: np.nan
+    tm.assert_frame_equal(g_df.filter(f), df.loc[[]])
+    tm.assert_series_equal(g_s.filter(f), s[[]])
+
+
+def test_filter_against_workaround():
+    np.random.seed(0)
+    # Series of ints
+    s = Series(np.random.randint(0, 100, 1000))
+    grouper = s.apply(lambda x: np.round(x, -1))
+    grouped = s.groupby(grouper)
+    f = lambda x: x.mean() > 10
+
+    old_way = s[grouped.transform(f).astype('bool')]
+    new_way = grouped.filter(f)
+    tm.assert_series_equal(new_way.sort_values(), old_way.sort_values())
+
+    # Series of floats
+    s = 100 * Series(np.random.random(1000))
+    grouper = s.apply(lambda x: np.round(x, -1))
+    grouped = s.groupby(grouper)
+    f = lambda x: x.mean() > 10
+    old_way = s[grouped.transform(f).astype('bool')]
+    new_way = grouped.filter(f)
+    tm.assert_series_equal(new_way.sort_values(), old_way.sort_values())
+
+    # Set up DataFrame of ints, floats, strings.
+    from string import ascii_lowercase
+    letters = np.array(list(ascii_lowercase))
+    N = 1000
+    random_letters = letters.take(np.random.randint(0, 26, N))
+    df = DataFrame({'ints': Series(np.random.randint(0, 100, N)),
+                    'floats': N / 10 * Series(np.random.random(N)),
+                    'letters': Series(random_letters)})
+
+    # Group by ints; filter on floats.
+    grouped = df.groupby('ints')
+    old_way = df[grouped.floats.
+                 transform(lambda x: x.mean() > N / 20).astype('bool')]
+    new_way = grouped.filter(lambda x: x['floats'].mean() > N / 20)
+    tm.assert_frame_equal(new_way, old_way)
+
+    # Group by floats (rounded); filter on strings.
+    grouper = df.floats.apply(lambda x: np.round(x, -1))
+    grouped = df.groupby(grouper)
+    old_way = df[grouped.letters.
+                 transform(lambda x: len(x) < N / 10).astype('bool')]
+    new_way = grouped.filter(lambda x: len(x.letters) < N / 10)
+    tm.assert_frame_equal(new_way, old_way)
+
+    # Group by strings; filter on ints.
+    grouped = df.groupby('letters')
+    old_way = df[grouped.ints.
+                 transform(lambda x: x.mean() > N / 20).astype('bool')]
+    new_way = grouped.filter(lambda x: x['ints'].mean() > N / 20)
+    tm.assert_frame_equal(new_way, old_way)
+
+
+def test_filter_using_len():
+    # BUG GH4447
+    df = DataFrame({'A': np.arange(8),
+                    'B': list('aabbbbcc'),
+                    'C': np.arange(8)})
+    grouped = df.groupby('B')
+    actual = grouped.filter(lambda x: len(x) > 2)
+    expected = DataFrame(
+        {'A': np.arange(2, 6),
+         'B': list('bbbb'),
+         'C': np.arange(2, 6)}, index=np.arange(2, 6))
+    tm.assert_frame_equal(actual, expected)
+
+    actual = grouped.filter(lambda x: len(x) > 4)
+    expected = df.loc[[]]
+    tm.assert_frame_equal(actual, expected)
+
+    # Series have always worked properly, but we'll test anyway.
+    s = df['B']
+    grouped = s.groupby(s)
+    actual = grouped.filter(lambda x: len(x) > 2)
+    expected = Series(4 * ['b'], index=np.arange(2, 6), name='B')
+    tm.assert_series_equal(actual, expected)
+
+    actual = grouped.filter(lambda x: len(x) > 4)
+    expected = s[[]]
+    tm.assert_series_equal(actual, expected)
+
+
+def test_filter_maintains_ordering():
+    # Simple case: index is sequential. #4621
+    df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
+                    'tag': [23, 45, 62, 24, 45, 34, 25, 62]})
+    s = df['pid']
+    grouped = df.groupby('tag')
+    actual = grouped.filter(lambda x: len(x) > 1)
+    expected = df.iloc[[1, 2, 4, 7]]
+    tm.assert_frame_equal(actual, expected)
+
+    grouped = s.groupby(df['tag'])
+    actual = grouped.filter(lambda x: len(x) > 1)
+    expected = s.iloc[[1, 2, 4, 7]]
+    tm.assert_series_equal(actual, expected)
+
+    # Now index is sequentially decreasing.
+    df.index = np.arange(len(df) - 1, -1, -1)
+    s = df['pid']
+    grouped = df.groupby('tag')
+    actual = grouped.filter(lambda x: len(x) > 1)
+    expected = df.iloc[[1, 2, 4, 7]]
+    tm.assert_frame_equal(actual, expected)
+
+    grouped = s.groupby(df['tag'])
+    actual = grouped.filter(lambda x: len(x) > 1)
+    expected = s.iloc[[1, 2, 4, 7]]
+    tm.assert_series_equal(actual, expected)
+
+    # Index is shuffled.
+    SHUFFLED = [4, 6, 7, 2, 1, 0, 5, 3]
+    df.index = df.index[SHUFFLED]
+    s = df['pid']
+    grouped = df.groupby('tag')
+    actual = grouped.filter(lambda x: len(x) > 1)
+    expected = df.iloc[[1, 2, 4, 7]]
+    tm.assert_frame_equal(actual, expected)
+
+    grouped = s.groupby(df['tag'])
+    actual = grouped.filter(lambda x: len(x) > 1)
+    expected = s.iloc[[1, 2, 4, 7]]
+    tm.assert_series_equal(actual, expected)
+
+
+def test_filter_multiple_timestamp():
+    # GH 10114
+    df = DataFrame({'A': np.arange(5, dtype='int64'),
+                    'B': ['foo', 'bar', 'foo', 'bar', 'bar'],
+                    'C': Timestamp('20130101')})
+
+    grouped = df.groupby(['B', 'C'])
+
+    result = grouped['A'].filter(lambda x: True)
+    tm.assert_series_equal(df['A'], result)
+
+    result = grouped['A'].transform(len)
+    expected = Series([2, 3, 2, 3, 3], name='A')
+    tm.assert_series_equal(result, expected)
+
+    result = grouped.filter(lambda x: True)
+    tm.assert_frame_equal(df, result)
+
+    result = grouped.transform('sum')
+    expected = DataFrame({'A': [2, 8, 2, 8, 8]})
+    tm.assert_frame_equal(result, expected)
+
+    result = grouped.transform(len)
+    expected = DataFrame({'A': [2, 3, 2, 3, 3]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_filter_and_transform_with_non_unique_int_index():
+    # GH4620
+    index = [1, 1, 1, 2, 1, 1, 0, 1]
+    df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
+                    'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index)
+    grouped_df = df.groupby('tag')
+    ser = df['pid']
+    grouped_ser = ser.groupby(df['tag'])
+    expected_indexes = [1, 2, 4, 7]
+
+    # Filter DataFrame
+    actual = grouped_df.filter(lambda x: len(x) > 1)
+    expected = df.iloc[expected_indexes]
+    tm.assert_frame_equal(actual, expected)
+
+    actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
+    expected = df.copy()
+    expected.iloc[[0, 3, 5, 6]] = np.nan
+    tm.assert_frame_equal(actual, expected)
+
+    # Filter Series
+    actual = grouped_ser.filter(lambda x: len(x) > 1)
+    expected = ser.take(expected_indexes)
+    tm.assert_series_equal(actual, expected)
+
+    actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
+    NA = np.nan
+    expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid')
+    # ^ made manually because this can get confusing!
+    tm.assert_series_equal(actual, expected)
+
+    # Transform Series
+    actual = grouped_ser.transform(len)
+    expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name='pid')
+    tm.assert_series_equal(actual, expected)
+
+    # Transform (a column from) DataFrameGroupBy
+    actual = grouped_df.pid.transform(len)
+    tm.assert_series_equal(actual, expected)
+
+
+def test_filter_and_transform_with_multiple_non_unique_int_index():
+    # GH4620
+    index = [1, 1, 1, 2, 0, 0, 0, 1]
+    df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
+                    'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index)
+    grouped_df = df.groupby('tag')
+    ser = df['pid']
+    grouped_ser = ser.groupby(df['tag'])
+    expected_indexes = [1, 2, 4, 7]
+
+    # Filter DataFrame
+    actual = grouped_df.filter(lambda x: len(x) > 1)
+    expected = df.iloc[expected_indexes]
+    tm.assert_frame_equal(actual, expected)
+
+    actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
+    expected = df.copy()
+    expected.iloc[[0, 3, 5, 6]] = np.nan
+    tm.assert_frame_equal(actual, expected)
+
+    # Filter Series
+    actual = grouped_ser.filter(lambda x: len(x) > 1)
+    expected = ser.take(expected_indexes)
+    tm.assert_series_equal(actual, expected)
+
+    actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
+    NA = np.nan
+    expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid')
+    # ^ made manually because this can get confusing!
+    tm.assert_series_equal(actual, expected)
+
+    # Transform Series
+    actual = grouped_ser.transform(len)
+    expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name='pid')
+    tm.assert_series_equal(actual, expected)
+
+    # Transform (a column from) DataFrameGroupBy
+    actual = grouped_df.pid.transform(len)
+    tm.assert_series_equal(actual, expected)
+
+
+def test_filter_and_transform_with_non_unique_float_index():
+    # GH4620
+    index = np.array([1, 1, 1, 2, 1, 1, 0, 1], dtype=float)
+    df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
+                    'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index)
+    grouped_df = df.groupby('tag')
+    ser = df['pid']
+    grouped_ser = ser.groupby(df['tag'])
+    expected_indexes = [1, 2, 4, 7]
+
+    # Filter DataFrame
+    actual = grouped_df.filter(lambda x: len(x) > 1)
+    expected = df.iloc[expected_indexes]
+    tm.assert_frame_equal(actual, expected)
+
+    actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
+    expected = df.copy()
+    expected.iloc[[0, 3, 5, 6]] = np.nan
+    tm.assert_frame_equal(actual, expected)
+
+    # Filter Series
+    actual = grouped_ser.filter(lambda x: len(x) > 1)
+    expected = ser.take(expected_indexes)
+    tm.assert_series_equal(actual, expected)
+
+    actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
+    NA = np.nan
+    expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid')
+    # ^ made manually because this can get confusing!
+    tm.assert_series_equal(actual, expected)
+
+    # Transform Series
+    actual = grouped_ser.transform(len)
+    expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name='pid')
+    tm.assert_series_equal(actual, expected)
+
+    # Transform (a column from) DataFrameGroupBy
+    actual = grouped_df.pid.transform(len)
+    tm.assert_series_equal(actual, expected)
+
+
+def test_filter_and_transform_with_non_unique_timestamp_index():
+    # GH4620
+    t0 = Timestamp('2013-09-30 00:05:00')
+    t1 = Timestamp('2013-10-30 00:05:00')
+    t2 = Timestamp('2013-11-30 00:05:00')
+    index = [t1, t1, t1, t2, t1, t1, t0, t1]
+    df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
+                    'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index)
+    grouped_df = df.groupby('tag')
+    ser = df['pid']
+    grouped_ser = ser.groupby(df['tag'])
+    expected_indexes = [1, 2, 4, 7]
+
+    # Filter DataFrame
+    actual = grouped_df.filter(lambda x: len(x) > 1)
+    expected = df.iloc[expected_indexes]
+    tm.assert_frame_equal(actual, expected)
+
+    actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
+    expected = df.copy()
+    expected.iloc[[0, 3, 5, 6]] = np.nan
+    tm.assert_frame_equal(actual, expected)
+
+    # Filter Series
+    actual = grouped_ser.filter(lambda x: len(x) > 1)
+    expected = ser.take(expected_indexes)
+    tm.assert_series_equal(actual, expected)
+
+    actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
+    NA = np.nan
+    expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid')
+    # ^ made manually because this can get confusing!
+    tm.assert_series_equal(actual, expected)
+
+    # Transform Series
+    actual = grouped_ser.transform(len)
+    expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name='pid')
+    tm.assert_series_equal(actual, expected)
+
+    # Transform (a column from) DataFrameGroupBy
+    actual = grouped_df.pid.transform(len)
+    tm.assert_series_equal(actual, expected)
+
+
+def test_filter_and_transform_with_non_unique_string_index():
+    # GH4620
+    index = list('bbbcbbab')
+    df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3],
+                    'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index)
+    grouped_df = df.groupby('tag')
+    ser = df['pid']
+    grouped_ser = ser.groupby(df['tag'])
+    expected_indexes = [1, 2, 4, 7]
+
+    # Filter DataFrame
+    actual = grouped_df.filter(lambda x: len(x) > 1)
+    expected = df.iloc[expected_indexes]
+    tm.assert_frame_equal(actual, expected)
+
+    actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
+    expected = df.copy()
+    expected.iloc[[0, 3, 5, 6]] = np.nan
+    tm.assert_frame_equal(actual, expected)
+
+    # Filter Series
+    actual = grouped_ser.filter(lambda x: len(x) > 1)
+    expected = ser.take(expected_indexes)
+    tm.assert_series_equal(actual, expected)
+
+    actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False)
+    NA = np.nan
+    expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid')
+    # ^ made manually because this can get confusing!
+    tm.assert_series_equal(actual, expected)
+
+    # Transform Series
+    actual = grouped_ser.transform(len)
+    expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name='pid')
+    tm.assert_series_equal(actual, expected)
+
+    # Transform (a column from) DataFrameGroupBy
+    actual = grouped_df.pid.transform(len)
+    tm.assert_series_equal(actual, expected)
+
+
+def test_filter_has_access_to_grouped_cols():
+    df = DataFrame([[1, 2], [1, 3], [5, 6]], columns=['A', 'B'])
+    g = df.groupby('A')
+    # previously didn't have access to col A #????
+    filt = g.filter(lambda x: x['A'].sum() == 2)
+    tm.assert_frame_equal(filt, df.iloc[[0, 1]])
+
+
+def test_filter_enforces_scalarness():
+    df = pd.DataFrame([
+        ['best', 'a', 'x'],
+        ['worst', 'b', 'y'],
+        ['best', 'c', 'x'],
+        ['best', 'd', 'y'],
+        ['worst', 'd', 'y'],
+        ['worst', 'd', 'y'],
+        ['best', 'd', 'z'],
+    ], columns=['a', 'b', 'c'])
+    with tm.assert_raises_regex(TypeError,
+                                'filter function returned a.*'):
+        df.groupby('c').filter(lambda g: g['a'] == 'best')
+
+
+def test_filter_non_bool_raises():
+    df = pd.DataFrame([
+        ['best', 'a', 1],
+        ['worst', 'b', 1],
+        ['best', 'c', 1],
+        ['best', 'd', 1],
+        ['worst', 'd', 1],
+        ['worst', 'd', 1],
+        ['best', 'd', 1],
+    ], columns=['a', 'b', 'c'])
+    with tm.assert_raises_regex(TypeError,
+                                'filter function returned a.*'):
+        df.groupby('a').filter(lambda g: g.c.mean())
+
+
+def test_filter_dropna_with_empty_groups():
+    # GH 10780
+    data = pd.Series(np.random.rand(9), index=np.repeat([1, 2, 3], 3))
+    groupped = data.groupby(level=0)
+    result_false = groupped.filter(lambda x: x.mean() > 1, dropna=False)
+    expected_false = pd.Series([np.nan] * 9,
+                               index=np.repeat([1, 2, 3], 3))
+    tm.assert_series_equal(result_false, expected_false)
+
+    result_true = groupped.filter(lambda x: x.mean() > 1, dropna=True)
+    expected_true = pd.Series(index=pd.Index([], dtype=int))
+    tm.assert_series_equal(result_true, expected_true)
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
new file mode 100644
index 0000000000000..ba1371fe9f931
--- /dev/null
+++ b/pandas/tests/groupby/test_function.py
@@ -0,0 +1,1120 @@
+import pytest
+
+import numpy as np
+import pandas as pd
+from pandas import (DataFrame, Index, compat, isna,
+                    Series, MultiIndex, Timestamp, date_range)
+from pandas.errors import UnsupportedFunctionCall
+from pandas.util import testing as tm
+import pandas.core.nanops as nanops
+from string import ascii_lowercase
+from pandas.compat import product as cart_product
+
+
+@pytest.mark.parametrize("agg_func", ['any', 'all'])
+@pytest.mark.parametrize("skipna", [True, False])
+@pytest.mark.parametrize("vals", [
+    ['foo', 'bar', 'baz'], ['foo', '', ''], ['', '', ''],
+    [1, 2, 3], [1, 0, 0], [0, 0, 0],
+    [1., 2., 3.], [1., 0., 0.], [0., 0., 0.],
+    [True, True, True], [True, False, False], [False, False, False],
+    [np.nan, np.nan, np.nan]
+])
+def test_groupby_bool_aggs(agg_func, skipna, vals):
+    df = DataFrame({'key': ['a'] * 3 + ['b'] * 3, 'val': vals * 2})
+
+    # Figure out expectation using Python builtin
+    exp = getattr(compat.builtins, agg_func)(vals)
+
+    # edge case for missing data with skipna and 'any'
+    if skipna and all(isna(vals)) and agg_func == 'any':
+        exp = False
+
+    exp_df = DataFrame([exp] * 2, columns=['val'], index=Index(
+        ['a', 'b'], name='key'))
+    result = getattr(df.groupby('key'), agg_func)(skipna=skipna)
+    tm.assert_frame_equal(result, exp_df)
+
+
+def test_max_min_non_numeric():
+    # #2700
+    aa = DataFrame({'nn': [11, 11, 22, 22],
+                    'ii': [1, 2, 3, 4],
+                    'ss': 4 * ['mama']})
+
+    result = aa.groupby('nn').max()
+    assert 'ss' in result
+
+    result = aa.groupby('nn').max(numeric_only=False)
+    assert 'ss' in result
+
+    result = aa.groupby('nn').min()
+    assert 'ss' in result
+
+    result = aa.groupby('nn').min(numeric_only=False)
+    assert 'ss' in result
+
+
+def test_intercept_builtin_sum():
+    s = Series([1., 2., np.nan, 3.])
+    grouped = s.groupby([0, 1, 2, 2])
+
+    result = grouped.agg(compat.builtins.sum)
+    result2 = grouped.apply(compat.builtins.sum)
+    expected = grouped.sum()
+    tm.assert_series_equal(result, expected)
+    tm.assert_series_equal(result2, expected)
+
+
+def test_builtins_apply():  # GH8155
+    df = pd.DataFrame(np.random.randint(1, 50, (1000, 2)),
+                      columns=['jim', 'joe'])
+    df['jolie'] = np.random.randn(1000)
+
+    for keys in ['jim', ['jim', 'joe']]:  # single key & multi-key
+        if keys == 'jim':
+            continue
+        for f in [max, min, sum]:
+            fname = f.__name__
+            result = df.groupby(keys).apply(f)
+            result.shape
+            ngroups = len(df.drop_duplicates(subset=keys))
+            assert result.shape == (ngroups, 3), 'invalid frame shape: '\
+                '{} (expected ({}, 3))'.format(result.shape, ngroups)
+
+            tm.assert_frame_equal(result,  # numpy's equivalent function
+                                  df.groupby(keys).apply(getattr(np, fname)))
+
+            if f != sum:
+                expected = df.groupby(keys).agg(fname).reset_index()
+                expected.set_index(keys, inplace=True, drop=False)
+                tm.assert_frame_equal(result, expected, check_dtype=False)
+
+            tm.assert_series_equal(getattr(result, fname)(),
+                                   getattr(df, fname)())
+
+
+def test_arg_passthru():
+    # make sure that we are passing thru kwargs
+    # to our agg functions
+
+    # GH3668
+    # GH5724
+    df = pd.DataFrame(
+        {'group': [1, 1, 2],
+         'int': [1, 2, 3],
+         'float': [4., 5., 6.],
+         'string': list('abc'),
+         'category_string': pd.Series(list('abc')).astype('category'),
+         'category_int': [7, 8, 9],
+         'datetime': pd.date_range('20130101', periods=3),
+         'datetimetz': pd.date_range('20130101',
+                                     periods=3,
+                                     tz='US/Eastern'),
+         'timedelta': pd.timedelta_range('1 s', periods=3, freq='s')},
+        columns=['group', 'int', 'float', 'string',
+                 'category_string', 'category_int',
+                 'datetime', 'datetimetz',
+                 'timedelta'])
+
+    expected_columns_numeric = Index(['int', 'float', 'category_int'])
+
+    # mean / median
+    expected = pd.DataFrame(
+        {'category_int': [7.5, 9],
+         'float': [4.5, 6.],
+         'timedelta': [pd.Timedelta('1.5s'),
+                       pd.Timedelta('3s')],
+         'int': [1.5, 3],
+         'datetime': [pd.Timestamp('2013-01-01 12:00:00'),
+                      pd.Timestamp('2013-01-03 00:00:00')],
+         'datetimetz': [
+             pd.Timestamp('2013-01-01 12:00:00', tz='US/Eastern'),
+             pd.Timestamp('2013-01-03 00:00:00', tz='US/Eastern')]},
+        index=Index([1, 2], name='group'),
+        columns=['int', 'float', 'category_int',
+                 'datetime', 'datetimetz', 'timedelta'])
+    for attr in ['mean', 'median']:
+        f = getattr(df.groupby('group'), attr)
+        result = f()
+        tm.assert_index_equal(result.columns, expected_columns_numeric)
+
+        result = f(numeric_only=False)
+        tm.assert_frame_equal(result.reindex_like(expected), expected)
+
+    # TODO: min, max *should* handle
+    # categorical (ordered) dtype
+    expected_columns = Index(['int', 'float', 'string',
+                              'category_int',
+                              'datetime', 'datetimetz',
+                              'timedelta'])
+    for attr in ['min', 'max']:
+        f = getattr(df.groupby('group'), attr)
+        result = f()
+        tm.assert_index_equal(result.columns, expected_columns)
+
+        result = f(numeric_only=False)
+        tm.assert_index_equal(result.columns, expected_columns)
+
+    expected_columns = Index(['int', 'float', 'string',
+                              'category_string', 'category_int',
+                              'datetime', 'datetimetz',
+                              'timedelta'])
+    for attr in ['first', 'last']:
+        f = getattr(df.groupby('group'), attr)
+        result = f()
+        tm.assert_index_equal(result.columns, expected_columns)
+
+        result = f(numeric_only=False)
+        tm.assert_index_equal(result.columns, expected_columns)
+
+    expected_columns = Index(['int', 'float', 'string',
+                              'category_int', 'timedelta'])
+    for attr in ['sum']:
+        f = getattr(df.groupby('group'), attr)
+        result = f()
+        tm.assert_index_equal(result.columns, expected_columns_numeric)
+
+        result = f(numeric_only=False)
+        tm.assert_index_equal(result.columns, expected_columns)
+
+    expected_columns = Index(['int', 'float', 'category_int'])
+    for attr in ['prod', 'cumprod']:
+        f = getattr(df.groupby('group'), attr)
+        result = f()
+        tm.assert_index_equal(result.columns, expected_columns_numeric)
+
+        result = f(numeric_only=False)
+        tm.assert_index_equal(result.columns, expected_columns)
+
+    # like min, max, but don't include strings
+    expected_columns = Index(['int', 'float',
+                              'category_int',
+                              'datetime', 'datetimetz',
+                              'timedelta'])
+    for attr in ['cummin', 'cummax']:
+        f = getattr(df.groupby('group'), attr)
+        result = f()
+        # GH 15561: numeric_only=False set by default like min/max
+        tm.assert_index_equal(result.columns, expected_columns)
+
+        result = f(numeric_only=False)
+        tm.assert_index_equal(result.columns, expected_columns)
+
+    expected_columns = Index(['int', 'float', 'category_int',
+                              'timedelta'])
+    for attr in ['cumsum']:
+        f = getattr(df.groupby('group'), attr)
+        result = f()
+        tm.assert_index_equal(result.columns, expected_columns_numeric)
+
+        result = f(numeric_only=False)
+        tm.assert_index_equal(result.columns, expected_columns)
+
+
+def test_non_cython_api():
+
+    # GH5610
+    # non-cython calls should not include the grouper
+
+    df = DataFrame(
+        [[1, 2, 'foo'],
+         [1, np.nan, 'bar'],
+         [3, np.nan, 'baz']],
+        columns=['A', 'B', 'C'])
+    g = df.groupby('A')
+    gni = df.groupby('A', as_index=False)
+
+    # mad
+    expected = DataFrame([[0], [np.nan]], columns=['B'], index=[1, 3])
+    expected.index.name = 'A'
+    result = g.mad()
+    tm.assert_frame_equal(result, expected)
+
+    expected = DataFrame([[0., 0.], [0, np.nan]], columns=['A', 'B'],
+                         index=[0, 1])
+    result = gni.mad()
+    tm.assert_frame_equal(result, expected)
+
+    # describe
+    expected_index = pd.Index([1, 3], name='A')
+    expected_col = pd.MultiIndex(levels=[['B'],
+                                         ['count', 'mean', 'std', 'min',
+                                          '25%', '50%', '75%', 'max']],
+                                 labels=[[0] * 8, list(range(8))])
+    expected = pd.DataFrame([[1.0, 2.0, np.nan, 2.0, 2.0, 2.0, 2.0, 2.0],
+                             [0.0, np.nan, np.nan, np.nan, np.nan, np.nan,
+                              np.nan, np.nan]],
+                            index=expected_index,
+                            columns=expected_col)
+    result = g.describe()
+    tm.assert_frame_equal(result, expected)
+
+    expected = pd.concat([df[df.A == 1].describe().unstack().to_frame().T,
+                          df[df.A == 3].describe().unstack().to_frame().T])
+    expected.index = pd.Index([0, 1])
+    result = gni.describe()
+    tm.assert_frame_equal(result, expected)
+
+    # any
+    expected = DataFrame([[True, True], [False, True]], columns=['B', 'C'],
+                         index=[1, 3])
+    expected.index.name = 'A'
+    result = g.any()
+    tm.assert_frame_equal(result, expected)
+
+    # idxmax
+    expected = DataFrame([[0.0], [np.nan]], columns=['B'], index=[1, 3])
+    expected.index.name = 'A'
+    result = g.idxmax()
+    tm.assert_frame_equal(result, expected)
+
+
+def test_cython_api2():
+
+    # this takes the fast apply path
+
+    # cumsum (GH5614)
+    df = DataFrame(
+        [[1, 2, np.nan], [1, np.nan, 9], [3, 4, 9]
+         ], columns=['A', 'B', 'C'])
+    expected = DataFrame(
+        [[2, np.nan], [np.nan, 9], [4, 9]], columns=['B', 'C'])
+    result = df.groupby('A').cumsum()
+    tm.assert_frame_equal(result, expected)
+
+    # GH 5755 - cumsum is a transformer and should ignore as_index
+    result = df.groupby('A', as_index=False).cumsum()
+    tm.assert_frame_equal(result, expected)
+
+    # GH 13994
+    result = df.groupby('A').cumsum(axis=1)
+    expected = df.cumsum(axis=1)
+    tm.assert_frame_equal(result, expected)
+    result = df.groupby('A').cumprod(axis=1)
+    expected = df.cumprod(axis=1)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_cython_median():
+    df = DataFrame(np.random.randn(1000))
+    df.values[::2] = np.nan
+
+    labels = np.random.randint(0, 50, size=1000).astype(float)
+    labels[::17] = np.nan
+
+    result = df.groupby(labels).median()
+    exp = df.groupby(labels).agg(nanops.nanmedian)
+    tm.assert_frame_equal(result, exp)
+
+    df = DataFrame(np.random.randn(1000, 5))
+    rs = df.groupby(labels).agg(np.median)
+    xp = df.groupby(labels).median()
+    tm.assert_frame_equal(rs, xp)
+
+
+def test_median_empty_bins():
+    df = pd.DataFrame(np.random.randint(0, 44, 500))
+
+    grps = range(0, 55, 5)
+    bins = pd.cut(df[0], grps)
+
+    result = df.groupby(bins).median()
+    expected = df.groupby(bins).agg(lambda x: x.median())
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("dtype", [
+    'int8', 'int16', 'int32', 'int64', 'float32', 'float64'])
+@pytest.mark.parametrize("method,data", [
+    ('first', {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}),
+    ('last', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}),
+    ('min', {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}),
+    ('max', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}),
+    ('nth', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}],
+             'args': [1]}),
+    ('count', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 2}],
+               'out_type': 'int64'})
+])
+def test_groupby_non_arithmetic_agg_types(dtype, method, data):
+    # GH9311, GH6620
+    df = pd.DataFrame(
+        [{'a': 1, 'b': 1},
+         {'a': 1, 'b': 2},
+         {'a': 2, 'b': 3},
+         {'a': 2, 'b': 4}])
+
+    df['b'] = df.b.astype(dtype)
+
+    if 'args' not in data:
+        data['args'] = []
+
+    if 'out_type' in data:
+        out_type = data['out_type']
+    else:
+        out_type = dtype
+
+    exp = data['df']
+    df_out = pd.DataFrame(exp)
+
+    df_out['b'] = df_out.b.astype(out_type)
+    df_out.set_index('a', inplace=True)
+
+    grpd = df.groupby('a')
+    t = getattr(grpd, method)(*data['args'])
+    tm.assert_frame_equal(t, df_out)
+
+
+def test_groupby_non_arithmetic_agg_intlike_precision():
+    # GH9311, GH6620
+    c = 24650000000000000
+
+    inputs = ((Timestamp('2011-01-15 12:50:28.502376'),
+               Timestamp('2011-01-20 12:50:28.593448')), (1 + c, 2 + c))
+
+    for i in inputs:
+        df = pd.DataFrame([{'a': 1, 'b': i[0]}, {'a': 1, 'b': i[1]}])
+
+        grp_exp = {'first': {'expected': i[0]},
+                   'last': {'expected': i[1]},
+                   'min': {'expected': i[0]},
+                   'max': {'expected': i[1]},
+                   'nth': {'expected': i[1],
+                           'args': [1]},
+                   'count': {'expected': 2}}
+
+        for method, data in compat.iteritems(grp_exp):
+            if 'args' not in data:
+                data['args'] = []
+
+            grpd = df.groupby('a')
+            res = getattr(grpd, method)(*data['args'])
+            assert res.iloc[0].b == data['expected']
+
+
+def test_fill_constistency():
+
+    # GH9221
+    # pass thru keyword arguments to the generated wrapper
+    # are set if the passed kw is None (only)
+    df = DataFrame(index=pd.MultiIndex.from_product(
+        [['value1', 'value2'], date_range('2014-01-01', '2014-01-06')]),
+        columns=Index(
+        ['1', '2'], name='id'))
+    df['1'] = [np.nan, 1, np.nan, np.nan, 11, np.nan, np.nan, 2, np.nan,
+               np.nan, 22, np.nan]
+    df['2'] = [np.nan, 3, np.nan, np.nan, 33, np.nan, np.nan, 4, np.nan,
+               np.nan, 44, np.nan]
+
+    expected = df.groupby(level=0, axis=0).fillna(method='ffill')
+    result = df.T.groupby(level=0, axis=1).fillna(method='ffill').T
+    tm.assert_frame_equal(result, expected)
+
+
+def test_groupby_cumprod():
+    # GH 4095
+    df = pd.DataFrame({'key': ['b'] * 10, 'value': 2})
+
+    actual = df.groupby('key')['value'].cumprod()
+    expected = df.groupby('key')['value'].apply(lambda x: x.cumprod())
+    expected.name = 'value'
+    tm.assert_series_equal(actual, expected)
+
+    df = pd.DataFrame({'key': ['b'] * 100, 'value': 2})
+    actual = df.groupby('key')['value'].cumprod()
+    # if overflows, groupby product casts to float
+    # while numpy passes back invalid values
+    df['value'] = df['value'].astype(float)
+    expected = df.groupby('key')['value'].apply(lambda x: x.cumprod())
+    expected.name = 'value'
+    tm.assert_series_equal(actual, expected)
+
+
+def test_ops_general():
+    ops = [('mean', np.mean),
+           ('median', np.median),
+           ('std', np.std),
+           ('var', np.var),
+           ('sum', np.sum),
+           ('prod', np.prod),
+           ('min', np.min),
+           ('max', np.max),
+           ('first', lambda x: x.iloc[0]),
+           ('last', lambda x: x.iloc[-1]),
+           ('count', np.size), ]
+    try:
+        from scipy.stats import sem
+    except ImportError:
+        pass
+    else:
+        ops.append(('sem', sem))
+    df = DataFrame(np.random.randn(1000))
+    labels = np.random.randint(0, 50, size=1000).astype(float)
+
+    for op, targop in ops:
+        result = getattr(df.groupby(labels), op)().astype(float)
+        expected = df.groupby(labels).agg(targop)
+        try:
+            tm.assert_frame_equal(result, expected)
+        except BaseException as exc:
+            exc.args += ('operation: %s' % op, )
+            raise
+
+
+def test_max_nan_bug():
+    raw = """,Date,app,File
+-04-23,2013-04-23 00:00:00,,log080001.log
+-05-06,2013-05-06 00:00:00,,log.log
+-05-07,2013-05-07 00:00:00,OE,xlsx"""
+
+    df = pd.read_csv(compat.StringIO(raw), parse_dates=[0])
+    gb = df.groupby('Date')
+    r = gb[['File']].max()
+    e = gb['File'].max().to_frame()
+    tm.assert_frame_equal(r, e)
+    assert not r['File'].isna().any()
+
+
+def test_nlargest():
+    a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10])
+    b = Series(list('a' * 5 + 'b' * 5))
+    gb = a.groupby(b)
+    r = gb.nlargest(3)
+    e = Series([
+        7, 5, 3, 10, 9, 6
+    ], index=MultiIndex.from_arrays([list('aaabbb'), [3, 2, 1, 9, 5, 8]]))
+    tm.assert_series_equal(r, e)
+
+    a = Series([1, 1, 3, 2, 0, 3, 3, 2, 1, 0])
+    gb = a.groupby(b)
+    e = Series([
+        3, 2, 1, 3, 3, 2
+    ], index=MultiIndex.from_arrays([list('aaabbb'), [2, 3, 1, 6, 5, 7]]))
+    tm.assert_series_equal(gb.nlargest(3, keep='last'), e)
+
+
+def test_nsmallest():
+    a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10])
+    b = Series(list('a' * 5 + 'b' * 5))
+    gb = a.groupby(b)
+    r = gb.nsmallest(3)
+    e = Series([
+        1, 2, 3, 0, 4, 6
+    ], index=MultiIndex.from_arrays([list('aaabbb'), [0, 4, 1, 6, 7, 8]]))
+    tm.assert_series_equal(r, e)
+
+    a = Series([1, 1, 3, 2, 0, 3, 3, 2, 1, 0])
+    gb = a.groupby(b)
+    e = Series([
+        0, 1, 1, 0, 1, 2
+    ], index=MultiIndex.from_arrays([list('aaabbb'), [4, 1, 0, 9, 8, 7]]))
+    tm.assert_series_equal(gb.nsmallest(3, keep='last'), e)
+
+
+def test_numpy_compat():
+    # see gh-12811
+    df = pd.DataFrame({'A': [1, 2, 1], 'B': [1, 2, 3]})
+    g = df.groupby('A')
+
+    msg = "numpy operations are not valid with groupby"
+
+    for func in ('mean', 'var', 'std', 'cumprod', 'cumsum'):
+        tm.assert_raises_regex(UnsupportedFunctionCall, msg,
+                               getattr(g, func), 1, 2, 3)
+        tm.assert_raises_regex(UnsupportedFunctionCall, msg,
+                               getattr(g, func), foo=1)
+
+
+def test_cummin_cummax():
+    # GH 15048
+    num_types = [np.int32, np.int64, np.float32, np.float64]
+    num_mins = [np.iinfo(np.int32).min, np.iinfo(np.int64).min,
+                np.finfo(np.float32).min, np.finfo(np.float64).min]
+    num_max = [np.iinfo(np.int32).max, np.iinfo(np.int64).max,
+               np.finfo(np.float32).max, np.finfo(np.float64).max]
+    base_df = pd.DataFrame({'A': [1, 1, 1, 1, 2, 2, 2, 2],
+                            'B': [3, 4, 3, 2, 2, 3, 2, 1]})
+    expected_mins = [3, 3, 3, 2, 2, 2, 2, 1]
+    expected_maxs = [3, 4, 4, 4, 2, 3, 3, 3]
+
+    for dtype, min_val, max_val in zip(num_types, num_mins, num_max):
+        df = base_df.astype(dtype)
+
+        # cummin
+        expected = pd.DataFrame({'B': expected_mins}).astype(dtype)
+        result = df.groupby('A').cummin()
+        tm.assert_frame_equal(result, expected)
+        result = df.groupby('A').B.apply(lambda x: x.cummin()).to_frame()
+        tm.assert_frame_equal(result, expected)
+
+        # Test cummin w/ min value for dtype
+        df.loc[[2, 6], 'B'] = min_val
+        expected.loc[[2, 3, 6, 7], 'B'] = min_val
+        result = df.groupby('A').cummin()
+        tm.assert_frame_equal(result, expected)
+        expected = df.groupby('A').B.apply(lambda x: x.cummin()).to_frame()
+        tm.assert_frame_equal(result, expected)
+
+        # cummax
+        expected = pd.DataFrame({'B': expected_maxs}).astype(dtype)
+        result = df.groupby('A').cummax()
+        tm.assert_frame_equal(result, expected)
+        result = df.groupby('A').B.apply(lambda x: x.cummax()).to_frame()
+        tm.assert_frame_equal(result, expected)
+
+        # Test cummax w/ max value for dtype
+        df.loc[[2, 6], 'B'] = max_val
+        expected.loc[[2, 3, 6, 7], 'B'] = max_val
+        result = df.groupby('A').cummax()
+        tm.assert_frame_equal(result, expected)
+        expected = df.groupby('A').B.apply(lambda x: x.cummax()).to_frame()
+        tm.assert_frame_equal(result, expected)
+
+    # Test nan in some values
+    base_df.loc[[0, 2, 4, 6], 'B'] = np.nan
+    expected = pd.DataFrame({'B': [np.nan, 4, np.nan, 2,
+                                   np.nan, 3, np.nan, 1]})
+    result = base_df.groupby('A').cummin()
+    tm.assert_frame_equal(result, expected)
+    expected = (base_df.groupby('A')
+                       .B
+                       .apply(lambda x: x.cummin())
+                       .to_frame())
+    tm.assert_frame_equal(result, expected)
+
+    expected = pd.DataFrame({'B': [np.nan, 4, np.nan, 4,
+                                   np.nan, 3, np.nan, 3]})
+    result = base_df.groupby('A').cummax()
+    tm.assert_frame_equal(result, expected)
+    expected = (base_df.groupby('A')
+                       .B
+                       .apply(lambda x: x.cummax())
+                       .to_frame())
+    tm.assert_frame_equal(result, expected)
+
+    # Test nan in entire column
+    base_df['B'] = np.nan
+    expected = pd.DataFrame({'B': [np.nan] * 8})
+    result = base_df.groupby('A').cummin()
+    tm.assert_frame_equal(expected, result)
+    result = base_df.groupby('A').B.apply(lambda x: x.cummin()).to_frame()
+    tm.assert_frame_equal(expected, result)
+    result = base_df.groupby('A').cummax()
+    tm.assert_frame_equal(expected, result)
+    result = base_df.groupby('A').B.apply(lambda x: x.cummax()).to_frame()
+    tm.assert_frame_equal(expected, result)
+
+    # GH 15561
+    df = pd.DataFrame(dict(a=[1], b=pd.to_datetime(['2001'])))
+    expected = pd.Series(pd.to_datetime('2001'), index=[0], name='b')
+    for method in ['cummax', 'cummin']:
+        result = getattr(df.groupby('a')['b'], method)()
+        tm.assert_series_equal(expected, result)
+
+    # GH 15635
+    df = pd.DataFrame(dict(a=[1, 2, 1], b=[2, 1, 1]))
+    result = df.groupby('a').b.cummax()
+    expected = pd.Series([2, 1, 2], name='b')
+    tm.assert_series_equal(result, expected)
+
+    df = pd.DataFrame(dict(a=[1, 2, 1], b=[1, 2, 2]))
+    result = df.groupby('a').b.cummin()
+    expected = pd.Series([1, 2, 1], name='b')
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize('in_vals, out_vals', [
+
+    # Basics: strictly increasing (T), strictly decreasing (F),
+    # abs val increasing (F), non-strictly increasing (T)
+    ([1, 2, 5, 3, 2, 0, 4, 5, -6, 1, 1],
+     [True, False, False, True]),
+
+    # Test with inf vals
+    ([1, 2.1, np.inf, 3, 2, np.inf, -np.inf, 5, 11, 1, -np.inf],
+     [True, False, True, False]),
+
+    # Test with nan vals; should always be False
+    ([1, 2, np.nan, 3, 2, np.nan, np.nan, 5, -np.inf, 1, np.nan],
+     [False, False, False, False]),
+])
+def test_is_monotonic_increasing(in_vals, out_vals):
+    # GH 17015
+    source_dict = {
+        'A': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11'],
+        'B': ['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c', 'd', 'd'],
+        'C': in_vals}
+    df = pd.DataFrame(source_dict)
+    result = df.groupby('B').C.is_monotonic_increasing
+    index = Index(list('abcd'), name='B')
+    expected = pd.Series(index=index, data=out_vals, name='C')
+    tm.assert_series_equal(result, expected)
+
+    # Also check result equal to manually taking x.is_monotonic_increasing.
+    expected = (
+        df.groupby(['B']).C.apply(lambda x: x.is_monotonic_increasing))
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize('in_vals, out_vals', [
+    # Basics: strictly decreasing (T), strictly increasing (F),
+    # abs val decreasing (F), non-strictly increasing (T)
+    ([10, 9, 7, 3, 4, 5, -3, 2, 0, 1, 1],
+     [True, False, False, True]),
+
+    # Test with inf vals
+    ([np.inf, 1, -np.inf, np.inf, 2, -3, -np.inf, 5, -3, -np.inf, -np.inf],
+     [True, True, False, True]),
+
+    # Test with nan vals; should always be False
+    ([1, 2, np.nan, 3, 2, np.nan, np.nan, 5, -np.inf, 1, np.nan],
+     [False, False, False, False]),
+])
+def test_is_monotonic_decreasing(in_vals, out_vals):
+    # GH 17015
+    source_dict = {
+        'A': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11'],
+        'B': ['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c', 'd', 'd'],
+        'C': in_vals}
+
+    df = pd.DataFrame(source_dict)
+    result = df.groupby('B').C.is_monotonic_decreasing
+    index = Index(list('abcd'), name='B')
+    expected = pd.Series(index=index, data=out_vals, name='C')
+    tm.assert_series_equal(result, expected)
+
+
+# describe
+# --------------------------------
+
+def test_apply_describe_bug(mframe):
+    grouped = mframe.groupby(level='first')
+    grouped.describe()  # it works!
+
+
+def test_series_describe_multikey():
+    ts = tm.makeTimeSeries()
+    grouped = ts.groupby([lambda x: x.year, lambda x: x.month])
+    result = grouped.describe()
+    tm.assert_series_equal(result['mean'], grouped.mean(),
+                           check_names=False)
+    tm.assert_series_equal(result['std'], grouped.std(), check_names=False)
+    tm.assert_series_equal(result['min'], grouped.min(), check_names=False)
+
+
+def test_series_describe_single():
+    ts = tm.makeTimeSeries()
+    grouped = ts.groupby(lambda x: x.month)
+    result = grouped.apply(lambda x: x.describe())
+    expected = grouped.describe().stack()
+    tm.assert_series_equal(result, expected)
+
+
+def test_series_index_name(df):
+    grouped = df.loc[:, ['C']].groupby(df['A'])
+    result = grouped.agg(lambda x: x.mean())
+    assert result.index.name == 'A'
+
+
+def test_frame_describe_multikey(tsframe):
+    grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month])
+    result = grouped.describe()
+    desc_groups = []
+    for col in tsframe:
+        group = grouped[col].describe()
+        # GH 17464 - Remove duplicate MultiIndex levels
+        group_col = pd.MultiIndex(
+            levels=[[col], group.columns],
+            labels=[[0] * len(group.columns), range(len(group.columns))])
+        group = pd.DataFrame(group.values,
+                             columns=group_col,
+                             index=group.index)
+        desc_groups.append(group)
+    expected = pd.concat(desc_groups, axis=1)
+    tm.assert_frame_equal(result, expected)
+
+    groupedT = tsframe.groupby({'A': 0, 'B': 0,
+                                'C': 1, 'D': 1}, axis=1)
+    result = groupedT.describe()
+    expected = tsframe.describe().T
+    expected.index = pd.MultiIndex(
+        levels=[[0, 1], expected.index],
+        labels=[[0, 0, 1, 1], range(len(expected.index))])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_frame_describe_tupleindex():
+
+    # GH 14848 - regression from 0.19.0 to 0.19.1
+    df1 = DataFrame({'x': [1, 2, 3, 4, 5] * 3,
+                     'y': [10, 20, 30, 40, 50] * 3,
+                     'z': [100, 200, 300, 400, 500] * 3})
+    df1['k'] = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] * 5
+    df2 = df1.rename(columns={'k': 'key'})
+    pytest.raises(ValueError, lambda: df1.groupby('k').describe())
+    pytest.raises(ValueError, lambda: df2.groupby('key').describe())
+
+
+def test_frame_describe_unstacked_format():
+    # GH 4792
+    prices = {pd.Timestamp('2011-01-06 10:59:05', tz=None): 24990,
+              pd.Timestamp('2011-01-06 12:43:33', tz=None): 25499,
+              pd.Timestamp('2011-01-06 12:54:09', tz=None): 25499}
+    volumes = {pd.Timestamp('2011-01-06 10:59:05', tz=None): 1500000000,
+               pd.Timestamp('2011-01-06 12:43:33', tz=None): 5000000000,
+               pd.Timestamp('2011-01-06 12:54:09', tz=None): 100000000}
+    df = pd.DataFrame({'PRICE': prices,
+                       'VOLUME': volumes})
+    result = df.groupby('PRICE').VOLUME.describe()
+    data = [df[df.PRICE == 24990].VOLUME.describe().values.tolist(),
+            df[df.PRICE == 25499].VOLUME.describe().values.tolist()]
+    expected = pd.DataFrame(data,
+                            index=pd.Index([24990, 25499], name='PRICE'),
+                            columns=['count', 'mean', 'std', 'min',
+                                     '25%', '50%', '75%', 'max'])
+    tm.assert_frame_equal(result, expected)
+
+
+# nunique
+# --------------------------------
+
+@pytest.mark.parametrize("n, m", cart_product(10 ** np.arange(2, 6),
+                                              (10, 100, 1000)))
+@pytest.mark.parametrize("sort, dropna", cart_product((False, True), repeat=2))
+def test_series_groupby_nunique(n, m, sort, dropna):
+
+    def check_nunique(df, keys, as_index=True):
+        gr = df.groupby(keys, as_index=as_index, sort=sort)
+        left = gr['julie'].nunique(dropna=dropna)
+
+        gr = df.groupby(keys, as_index=as_index, sort=sort)
+        right = gr['julie'].apply(Series.nunique, dropna=dropna)
+        if not as_index:
+            right = right.reset_index(drop=True)
+
+        tm.assert_series_equal(left, right, check_names=False)
+
+    days = date_range('2015-08-23', periods=10)
+
+    frame = DataFrame({'jim': np.random.choice(list(ascii_lowercase), n),
+                       'joe': np.random.choice(days, n),
+                       'julie': np.random.randint(0, m, n)})
+
+    check_nunique(frame, ['jim'])
+    check_nunique(frame, ['jim', 'joe'])
+
+    frame.loc[1::17, 'jim'] = None
+    frame.loc[3::37, 'joe'] = None
+    frame.loc[7::19, 'julie'] = None
+    frame.loc[8::19, 'julie'] = None
+    frame.loc[9::19, 'julie'] = None
+
+    check_nunique(frame, ['jim'])
+    check_nunique(frame, ['jim', 'joe'])
+    check_nunique(frame, ['jim'], as_index=False)
+    check_nunique(frame, ['jim', 'joe'], as_index=False)
+
+
+def test_nunique():
+    df = DataFrame({
+        'A': list('abbacc'),
+        'B': list('abxacc'),
+        'C': list('abbacx'),
+    })
+
+    expected = DataFrame({'A': [1] * 3, 'B': [1, 2, 1], 'C': [1, 1, 2]})
+    result = df.groupby('A', as_index=False).nunique()
+    tm.assert_frame_equal(result, expected)
+
+    # as_index
+    expected.index = list('abc')
+    expected.index.name = 'A'
+    result = df.groupby('A').nunique()
+    tm.assert_frame_equal(result, expected)
+
+    # with na
+    result = df.replace({'x': None}).groupby('A').nunique(dropna=False)
+    tm.assert_frame_equal(result, expected)
+
+    # dropna
+    expected = DataFrame({'A': [1] * 3, 'B': [1] * 3, 'C': [1] * 3},
+                         index=list('abc'))
+    expected.index.name = 'A'
+    result = df.replace({'x': None}).groupby('A').nunique()
+    tm.assert_frame_equal(result, expected)
+
+
+def test_nunique_with_object():
+    # GH 11077
+    data = pd.DataFrame(
+        [[100, 1, 'Alice'],
+         [200, 2, 'Bob'],
+         [300, 3, 'Charlie'],
+         [-400, 4, 'Dan'],
+         [500, 5, 'Edith']],
+        columns=['amount', 'id', 'name']
+    )
+
+    result = data.groupby(['id', 'amount'])['name'].nunique()
+    index = MultiIndex.from_arrays([data.id, data.amount])
+    expected = pd.Series([1] * 5, name='name', index=index)
+    tm.assert_series_equal(result, expected)
+
+
+def test_nunique_with_empty_series():
+    # GH 12553
+    data = pd.Series(name='name')
+    result = data.groupby(level=0).nunique()
+    expected = pd.Series(name='name', dtype='int64')
+    tm.assert_series_equal(result, expected)
+
+
+def test_nunique_with_timegrouper():
+    # GH 13453
+    test = pd.DataFrame({
+        'time': [Timestamp('2016-06-28 09:35:35'),
+                 Timestamp('2016-06-28 16:09:30'),
+                 Timestamp('2016-06-28 16:46:28')],
+        'data': ['1', '2', '3']}).set_index('time')
+    result = test.groupby(pd.Grouper(freq='h'))['data'].nunique()
+    expected = test.groupby(
+        pd.Grouper(freq='h')
+    )['data'].apply(pd.Series.nunique)
+    tm.assert_series_equal(result, expected)
+
+
+# count
+# --------------------------------
+
+def test_groupby_timedelta_cython_count():
+    df = DataFrame({'g': list('ab' * 2),
+                    'delt': np.arange(4).astype('timedelta64[ns]')})
+    expected = Series([
+        2, 2
+    ], index=pd.Index(['a', 'b'], name='g'), name='delt')
+    result = df.groupby('g').delt.count()
+    tm.assert_series_equal(expected, result)
+
+
+def test_count():
+    n = 1 << 15
+    dr = date_range('2015-08-30', periods=n // 10, freq='T')
+
+    df = DataFrame({
+        '1st': np.random.choice(
+            list(ascii_lowercase), n),
+        '2nd': np.random.randint(0, 5, n),
+        '3rd': np.random.randn(n).round(3),
+        '4th': np.random.randint(-10, 10, n),
+        '5th': np.random.choice(dr, n),
+        '6th': np.random.randn(n).round(3),
+        '7th': np.random.randn(n).round(3),
+        '8th': np.random.choice(dr, n) - np.random.choice(dr, 1),
+        '9th': np.random.choice(
+            list(ascii_lowercase), n)
+    })
+
+    for col in df.columns.drop(['1st', '2nd', '4th']):
+        df.loc[np.random.choice(n, n // 10), col] = np.nan
+
+    df['9th'] = df['9th'].astype('category')
+
+    for key in '1st', '2nd', ['1st', '2nd']:
+        left = df.groupby(key).count()
+        right = df.groupby(key).apply(DataFrame.count).drop(key, axis=1)
+        tm.assert_frame_equal(left, right)
+
+    # GH5610
+    # count counts non-nulls
+    df = pd.DataFrame([[1, 2, 'foo'],
+                       [1, np.nan, 'bar'],
+                       [3, np.nan, np.nan]],
+                      columns=['A', 'B', 'C'])
+
+    count_as = df.groupby('A').count()
+    count_not_as = df.groupby('A', as_index=False).count()
+
+    expected = DataFrame([[1, 2], [0, 0]], columns=['B', 'C'],
+                         index=[1, 3])
+    expected.index.name = 'A'
+    tm.assert_frame_equal(count_not_as, expected.reset_index())
+    tm.assert_frame_equal(count_as, expected)
+
+    count_B = df.groupby('A')['B'].count()
+    tm.assert_series_equal(count_B, expected['B'])
+
+
+def test_count_object():
+    df = pd.DataFrame({'a': ['a'] * 3 + ['b'] * 3, 'c': [2] * 3 + [3] * 3})
+    result = df.groupby('c').a.count()
+    expected = pd.Series([
+        3, 3
+    ], index=pd.Index([2, 3], name='c'), name='a')
+    tm.assert_series_equal(result, expected)
+
+    df = pd.DataFrame({'a': ['a', np.nan, np.nan] + ['b'] * 3,
+                       'c': [2] * 3 + [3] * 3})
+    result = df.groupby('c').a.count()
+    expected = pd.Series([
+        1, 3
+    ], index=pd.Index([2, 3], name='c'), name='a')
+    tm.assert_series_equal(result, expected)
+
+
+def test_count_cross_type():
+    # GH8169
+    vals = np.hstack((np.random.randint(0, 5, (100, 2)), np.random.randint(
+        0, 2, (100, 2))))
+
+    df = pd.DataFrame(vals, columns=['a', 'b', 'c', 'd'])
+    df[df == 2] = np.nan
+    expected = df.groupby(['c', 'd']).count()
+
+    for t in ['float32', 'object']:
+        df['a'] = df['a'].astype(t)
+        df['b'] = df['b'].astype(t)
+        result = df.groupby(['c', 'd']).count()
+        tm.assert_frame_equal(result, expected)
+
+
+def test_lower_int_prec_count():
+    df = DataFrame({'a': np.array(
+        [0, 1, 2, 100], np.int8),
+        'b': np.array(
+        [1, 2, 3, 6], np.uint32),
+        'c': np.array(
+        [4, 5, 6, 8], np.int16),
+        'grp': list('ab' * 2)})
+    result = df.groupby('grp').count()
+    expected = DataFrame({'a': [2, 2],
+                          'b': [2, 2],
+                          'c': [2, 2]}, index=pd.Index(list('ab'),
+                                                       name='grp'))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_count_uses_size_on_exception():
+    class RaisingObjectException(Exception):
+        pass
+
+    class RaisingObject(object):
+
+        def __init__(self, msg='I will raise inside Cython'):
+            super(RaisingObject, self).__init__()
+            self.msg = msg
+
+        def __eq__(self, other):
+            # gets called in Cython to check that raising calls the method
+            raise RaisingObjectException(self.msg)
+
+    df = DataFrame({'a': [RaisingObject() for _ in range(4)],
+                    'grp': list('ab' * 2)})
+    result = df.groupby('grp').count()
+    expected = DataFrame({'a': [2, 2]}, index=pd.Index(
+        list('ab'), name='grp'))
+    tm.assert_frame_equal(result, expected)
+
+
+# size
+# --------------------------------
+
+def test_size(df):
+    grouped = df.groupby(['A', 'B'])
+    result = grouped.size()
+    for key, group in grouped:
+        assert result[key] == len(group)
+
+    grouped = df.groupby('A')
+    result = grouped.size()
+    for key, group in grouped:
+        assert result[key] == len(group)
+
+    grouped = df.groupby('B')
+    result = grouped.size()
+    for key, group in grouped:
+        assert result[key] == len(group)
+
+    df = DataFrame(np.random.choice(20, (1000, 3)), columns=list('abc'))
+    for sort, key in cart_product((False, True), ('a', 'b', ['a', 'b'])):
+        left = df.groupby(key, sort=sort).size()
+        right = df.groupby(key, sort=sort)['c'].apply(lambda a: a.shape[0])
+        tm.assert_series_equal(left, right, check_names=False)
+
+    # GH11699
+    df = DataFrame([], columns=['A', 'B'])
+    out = Series([], dtype='int64', index=Index([], name='A'))
+    tm.assert_series_equal(df.groupby('A').size(), out)
+
+
+# pipe
+# --------------------------------
+
+def test_pipe():
+    # Test the pipe method of DataFrameGroupBy.
+    # Issue #17871
+
+    random_state = np.random.RandomState(1234567890)
+
+    df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
+                          'foo', 'bar', 'foo', 'foo'],
+                    'B': random_state.randn(8),
+                    'C': random_state.randn(8)})
+
+    def f(dfgb):
+        return dfgb.B.max() - dfgb.C.min().min()
+
+    def square(srs):
+        return srs ** 2
+
+    # Note that the transformations are
+    # GroupBy -> Series
+    # Series -> Series
+    # This then chains the GroupBy.pipe and the
+    # NDFrame.pipe methods
+    result = df.groupby('A').pipe(f).pipe(square)
+
+    index = Index([u'bar', u'foo'], dtype='object', name=u'A')
+    expected = pd.Series([8.99110003361, 8.17516964785], name='B',
+                         index=index)
+
+    tm.assert_series_equal(expected, result)
+
+
+def test_pipe_args():
+    # Test passing args to the pipe method of DataFrameGroupBy.
+    # Issue #17871
+
+    df = pd.DataFrame({'group': ['A', 'A', 'B', 'B', 'C'],
+                       'x': [1.0, 2.0, 3.0, 2.0, 5.0],
+                       'y': [10.0, 100.0, 1000.0, -100.0, -1000.0]})
+
+    def f(dfgb, arg1):
+        return (dfgb.filter(lambda grp: grp.y.mean() > arg1, dropna=False)
+                    .groupby(dfgb.grouper))
+
+    def g(dfgb, arg2):
+        return dfgb.sum() / dfgb.sum().sum() + arg2
+
+    def h(df, arg3):
+        return df.x + df.y - arg3
+
+    result = (df
+              .groupby('group')
+              .pipe(f, 0)
+              .pipe(g, 10)
+              .pipe(h, 100))
+
+    # Assert the results here
+    index = pd.Index(['A', 'B', 'C'], name='group')
+    expected = pd.Series([-79.5160891089, -78.4839108911, -80],
+                         index=index)
+
+    tm.assert_series_equal(expected, result)
+
+    # test SeriesGroupby.pipe
+    ser = pd.Series([1, 1, 2, 2, 3, 3])
+    result = ser.groupby(ser).pipe(lambda grp: grp.sum() * grp.count())
+
+    expected = pd.Series([4, 8, 12], index=pd.Int64Index([1, 2, 3]))
+
+    tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/groupby/test_functional.py b/pandas/tests/groupby/test_functional.py
deleted file mode 100644
index b9718663570bd..0000000000000
--- a/pandas/tests/groupby/test_functional.py
+++ /dev/null
@@ -1,372 +0,0 @@
-# -*- coding: utf-8 -*-
-
-""" test function application """
-
-import pytest
-
-from string import ascii_lowercase
-from pandas import (date_range, Timestamp,
-                    Index, MultiIndex, DataFrame, Series)
-from pandas.util.testing import assert_frame_equal, assert_series_equal
-from pandas.compat import product as cart_product
-
-import numpy as np
-
-import pandas.util.testing as tm
-import pandas as pd
-from .common import MixIn
-
-
-# describe
-# --------------------------------
-
-class TestDescribe(MixIn):
-
-    def test_apply_describe_bug(self):
-        grouped = self.mframe.groupby(level='first')
-        grouped.describe()  # it works!
-
-    def test_series_describe_multikey(self):
-        ts = tm.makeTimeSeries()
-        grouped = ts.groupby([lambda x: x.year, lambda x: x.month])
-        result = grouped.describe()
-        assert_series_equal(result['mean'], grouped.mean(), check_names=False)
-        assert_series_equal(result['std'], grouped.std(), check_names=False)
-        assert_series_equal(result['min'], grouped.min(), check_names=False)
-
-    def test_series_describe_single(self):
-        ts = tm.makeTimeSeries()
-        grouped = ts.groupby(lambda x: x.month)
-        result = grouped.apply(lambda x: x.describe())
-        expected = grouped.describe().stack()
-        assert_series_equal(result, expected)
-
-    def test_series_index_name(self):
-        grouped = self.df.loc[:, ['C']].groupby(self.df['A'])
-        result = grouped.agg(lambda x: x.mean())
-        assert result.index.name == 'A'
-
-    def test_frame_describe_multikey(self):
-        grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month])
-        result = grouped.describe()
-        desc_groups = []
-        for col in self.tsframe:
-            group = grouped[col].describe()
-            # GH 17464 - Remove duplicate MultiIndex levels
-            group_col = pd.MultiIndex(
-                levels=[[col], group.columns],
-                labels=[[0] * len(group.columns), range(len(group.columns))])
-            group = pd.DataFrame(group.values,
-                                 columns=group_col,
-                                 index=group.index)
-            desc_groups.append(group)
-        expected = pd.concat(desc_groups, axis=1)
-        tm.assert_frame_equal(result, expected)
-
-        groupedT = self.tsframe.groupby({'A': 0, 'B': 0,
-                                         'C': 1, 'D': 1}, axis=1)
-        result = groupedT.describe()
-        expected = self.tsframe.describe().T
-        expected.index = pd.MultiIndex(
-            levels=[[0, 1], expected.index],
-            labels=[[0, 0, 1, 1], range(len(expected.index))])
-        tm.assert_frame_equal(result, expected)
-
-    def test_frame_describe_tupleindex(self):
-
-        # GH 14848 - regression from 0.19.0 to 0.19.1
-        df1 = DataFrame({'x': [1, 2, 3, 4, 5] * 3,
-                         'y': [10, 20, 30, 40, 50] * 3,
-                         'z': [100, 200, 300, 400, 500] * 3})
-        df1['k'] = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] * 5
-        df2 = df1.rename(columns={'k': 'key'})
-        pytest.raises(ValueError, lambda: df1.groupby('k').describe())
-        pytest.raises(ValueError, lambda: df2.groupby('key').describe())
-
-    def test_frame_describe_unstacked_format(self):
-        # GH 4792
-        prices = {pd.Timestamp('2011-01-06 10:59:05', tz=None): 24990,
-                  pd.Timestamp('2011-01-06 12:43:33', tz=None): 25499,
-                  pd.Timestamp('2011-01-06 12:54:09', tz=None): 25499}
-        volumes = {pd.Timestamp('2011-01-06 10:59:05', tz=None): 1500000000,
-                   pd.Timestamp('2011-01-06 12:43:33', tz=None): 5000000000,
-                   pd.Timestamp('2011-01-06 12:54:09', tz=None): 100000000}
-        df = pd.DataFrame({'PRICE': prices,
-                           'VOLUME': volumes})
-        result = df.groupby('PRICE').VOLUME.describe()
-        data = [df[df.PRICE == 24990].VOLUME.describe().values.tolist(),
-                df[df.PRICE == 25499].VOLUME.describe().values.tolist()]
-        expected = pd.DataFrame(data,
-                                index=pd.Index([24990, 25499], name='PRICE'),
-                                columns=['count', 'mean', 'std', 'min',
-                                         '25%', '50%', '75%', 'max'])
-        tm.assert_frame_equal(result, expected)
-
-
-# nunique
-# --------------------------------
-
-class TestNUnique(MixIn):
-
-    def test_series_groupby_nunique(self):
-
-        def check_nunique(df, keys, as_index=True):
-            for sort, dropna in cart_product((False, True), repeat=2):
-                gr = df.groupby(keys, as_index=as_index, sort=sort)
-                left = gr['julie'].nunique(dropna=dropna)
-
-                gr = df.groupby(keys, as_index=as_index, sort=sort)
-                right = gr['julie'].apply(Series.nunique, dropna=dropna)
-                if not as_index:
-                    right = right.reset_index(drop=True)
-
-                assert_series_equal(left, right, check_names=False)
-
-        days = date_range('2015-08-23', periods=10)
-
-        for n, m in cart_product(10 ** np.arange(2, 6), (10, 100, 1000)):
-            frame = DataFrame({
-                'jim': np.random.choice(
-                    list(ascii_lowercase), n),
-                'joe': np.random.choice(days, n),
-                'julie': np.random.randint(0, m, n)
-            })
-
-            check_nunique(frame, ['jim'])
-            check_nunique(frame, ['jim', 'joe'])
-
-            frame.loc[1::17, 'jim'] = None
-            frame.loc[3::37, 'joe'] = None
-            frame.loc[7::19, 'julie'] = None
-            frame.loc[8::19, 'julie'] = None
-            frame.loc[9::19, 'julie'] = None
-
-            check_nunique(frame, ['jim'])
-            check_nunique(frame, ['jim', 'joe'])
-            check_nunique(frame, ['jim'], as_index=False)
-            check_nunique(frame, ['jim', 'joe'], as_index=False)
-
-    def test_nunique(self):
-        df = DataFrame({
-            'A': list('abbacc'),
-            'B': list('abxacc'),
-            'C': list('abbacx'),
-        })
-
-        expected = DataFrame({'A': [1] * 3, 'B': [1, 2, 1], 'C': [1, 1, 2]})
-        result = df.groupby('A', as_index=False).nunique()
-        tm.assert_frame_equal(result, expected)
-
-        # as_index
-        expected.index = list('abc')
-        expected.index.name = 'A'
-        result = df.groupby('A').nunique()
-        tm.assert_frame_equal(result, expected)
-
-        # with na
-        result = df.replace({'x': None}).groupby('A').nunique(dropna=False)
-        tm.assert_frame_equal(result, expected)
-
-        # dropna
-        expected = DataFrame({'A': [1] * 3, 'B': [1] * 3, 'C': [1] * 3},
-                             index=list('abc'))
-        expected.index.name = 'A'
-        result = df.replace({'x': None}).groupby('A').nunique()
-        tm.assert_frame_equal(result, expected)
-
-    def test_nunique_with_object(self):
-        # GH 11077
-        data = pd.DataFrame(
-            [[100, 1, 'Alice'],
-             [200, 2, 'Bob'],
-             [300, 3, 'Charlie'],
-             [-400, 4, 'Dan'],
-             [500, 5, 'Edith']],
-            columns=['amount', 'id', 'name']
-        )
-
-        result = data.groupby(['id', 'amount'])['name'].nunique()
-        index = MultiIndex.from_arrays([data.id, data.amount])
-        expected = pd.Series([1] * 5, name='name', index=index)
-        tm.assert_series_equal(result, expected)
-
-    def test_nunique_with_empty_series(self):
-        # GH 12553
-        data = pd.Series(name='name')
-        result = data.groupby(level=0).nunique()
-        expected = pd.Series(name='name', dtype='int64')
-        tm.assert_series_equal(result, expected)
-
-    def test_nunique_with_timegrouper(self):
-        # GH 13453
-        test = pd.DataFrame({
-            'time': [Timestamp('2016-06-28 09:35:35'),
-                     Timestamp('2016-06-28 16:09:30'),
-                     Timestamp('2016-06-28 16:46:28')],
-            'data': ['1', '2', '3']}).set_index('time')
-        result = test.groupby(pd.Grouper(freq='h'))['data'].nunique()
-        expected = test.groupby(
-            pd.Grouper(freq='h')
-        )['data'].apply(pd.Series.nunique)
-        tm.assert_series_equal(result, expected)
-
-
-# count
-# --------------------------------
-
-class TestCount(MixIn):
-
-    def test_groupby_timedelta_cython_count(self):
-        df = DataFrame({'g': list('ab' * 2),
-                        'delt': np.arange(4).astype('timedelta64[ns]')})
-        expected = Series([
-            2, 2
-        ], index=pd.Index(['a', 'b'], name='g'), name='delt')
-        result = df.groupby('g').delt.count()
-        tm.assert_series_equal(expected, result)
-
-    def test_count(self):
-        n = 1 << 15
-        dr = date_range('2015-08-30', periods=n // 10, freq='T')
-
-        df = DataFrame({
-            '1st': np.random.choice(
-                list(ascii_lowercase), n),
-            '2nd': np.random.randint(0, 5, n),
-            '3rd': np.random.randn(n).round(3),
-            '4th': np.random.randint(-10, 10, n),
-            '5th': np.random.choice(dr, n),
-            '6th': np.random.randn(n).round(3),
-            '7th': np.random.randn(n).round(3),
-            '8th': np.random.choice(dr, n) - np.random.choice(dr, 1),
-            '9th': np.random.choice(
-                list(ascii_lowercase), n)
-        })
-
-        for col in df.columns.drop(['1st', '2nd', '4th']):
-            df.loc[np.random.choice(n, n // 10), col] = np.nan
-
-        df['9th'] = df['9th'].astype('category')
-
-        for key in '1st', '2nd', ['1st', '2nd']:
-            left = df.groupby(key).count()
-            right = df.groupby(key).apply(DataFrame.count).drop(key, axis=1)
-            assert_frame_equal(left, right)
-
-        # GH5610
-        # count counts non-nulls
-        df = pd.DataFrame([[1, 2, 'foo'],
-                           [1, np.nan, 'bar'],
-                           [3, np.nan, np.nan]],
-                          columns=['A', 'B', 'C'])
-
-        count_as = df.groupby('A').count()
-        count_not_as = df.groupby('A', as_index=False).count()
-
-        expected = DataFrame([[1, 2], [0, 0]], columns=['B', 'C'],
-                             index=[1, 3])
-        expected.index.name = 'A'
-        assert_frame_equal(count_not_as, expected.reset_index())
-        assert_frame_equal(count_as, expected)
-
-        count_B = df.groupby('A')['B'].count()
-        assert_series_equal(count_B, expected['B'])
-
-    def test_count_object(self):
-        df = pd.DataFrame({'a': ['a'] * 3 + ['b'] * 3, 'c': [2] * 3 + [3] * 3})
-        result = df.groupby('c').a.count()
-        expected = pd.Series([
-            3, 3
-        ], index=pd.Index([2, 3], name='c'), name='a')
-        tm.assert_series_equal(result, expected)
-
-        df = pd.DataFrame({'a': ['a', np.nan, np.nan] + ['b'] * 3,
-                           'c': [2] * 3 + [3] * 3})
-        result = df.groupby('c').a.count()
-        expected = pd.Series([
-            1, 3
-        ], index=pd.Index([2, 3], name='c'), name='a')
-        tm.assert_series_equal(result, expected)
-
-    def test_count_cross_type(self):  # GH8169
-        vals = np.hstack((np.random.randint(0, 5, (100, 2)), np.random.randint(
-            0, 2, (100, 2))))
-
-        df = pd.DataFrame(vals, columns=['a', 'b', 'c', 'd'])
-        df[df == 2] = np.nan
-        expected = df.groupby(['c', 'd']).count()
-
-        for t in ['float32', 'object']:
-            df['a'] = df['a'].astype(t)
-            df['b'] = df['b'].astype(t)
-            result = df.groupby(['c', 'd']).count()
-            tm.assert_frame_equal(result, expected)
-
-    def test_lower_int_prec_count(self):
-        df = DataFrame({'a': np.array(
-            [0, 1, 2, 100], np.int8),
-            'b': np.array(
-            [1, 2, 3, 6], np.uint32),
-            'c': np.array(
-            [4, 5, 6, 8], np.int16),
-            'grp': list('ab' * 2)})
-        result = df.groupby('grp').count()
-        expected = DataFrame({'a': [2, 2],
-                              'b': [2, 2],
-                              'c': [2, 2]}, index=pd.Index(list('ab'),
-                                                           name='grp'))
-        tm.assert_frame_equal(result, expected)
-
-    def test_count_uses_size_on_exception(self):
-        class RaisingObjectException(Exception):
-            pass
-
-        class RaisingObject(object):
-
-            def __init__(self, msg='I will raise inside Cython'):
-                super(RaisingObject, self).__init__()
-                self.msg = msg
-
-            def __eq__(self, other):
-                # gets called in Cython to check that raising calls the method
-                raise RaisingObjectException(self.msg)
-
-        df = DataFrame({'a': [RaisingObject() for _ in range(4)],
-                        'grp': list('ab' * 2)})
-        result = df.groupby('grp').count()
-        expected = DataFrame({'a': [2, 2]}, index=pd.Index(
-            list('ab'), name='grp'))
-        tm.assert_frame_equal(result, expected)
-
-
-# size
-# --------------------------------
-
-class TestSize(MixIn):
-
-    def test_size(self):
-        grouped = self.df.groupby(['A', 'B'])
-        result = grouped.size()
-        for key, group in grouped:
-            assert result[key] == len(group)
-
-        grouped = self.df.groupby('A')
-        result = grouped.size()
-        for key, group in grouped:
-            assert result[key] == len(group)
-
-        grouped = self.df.groupby('B')
-        result = grouped.size()
-        for key, group in grouped:
-            assert result[key] == len(group)
-
-        df = DataFrame(np.random.choice(20, (1000, 3)), columns=list('abc'))
-        for sort, key in cart_product((False, True), ('a', 'b', ['a', 'b'])):
-            left = df.groupby(key, sort=sort).size()
-            right = df.groupby(key, sort=sort)['c'].apply(lambda a: a.shape[0])
-            assert_series_equal(left, right, check_names=False)
-
-        # GH11699
-        df = DataFrame([], columns=['A', 'B'])
-        out = Series([], dtype='int64', index=Index([], name='A'))
-        assert_series_equal(df.groupby('A').size(), out)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index cdb4e3072c65d..bb892f92f213e 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -5,3041 +5,1672 @@
 
 from warnings import catch_warnings
 from datetime import datetime
+from decimal import Decimal
 
-from pandas import (date_range, bdate_range, Timestamp,
+from pandas import (date_range, Timestamp,
                     Index, MultiIndex, DataFrame, Series,
-                    concat, Panel, DatetimeIndex, read_csv)
-from pandas.core.dtypes.missing import isna
-from pandas.errors import UnsupportedFunctionCall, PerformanceWarning
-from pandas.util.testing import (assert_frame_equal, assert_index_equal,
+                    Panel, DatetimeIndex, read_csv)
+from pandas.errors import PerformanceWarning
+from pandas.util.testing import (assert_frame_equal,
                                  assert_series_equal, assert_almost_equal)
 from pandas.compat import (range, lrange, StringIO, lmap, lzip, map, zip,
-                           builtins, OrderedDict)
+                           OrderedDict)
 from pandas import compat
 from collections import defaultdict
 import pandas.core.common as com
 import numpy as np
 
-import pandas.core.nanops as nanops
 import pandas.util.testing as tm
 import pandas as pd
-from .common import MixIn
 
 
-class TestGrouper(object):
+def test_repr():
+    # GH18203
+    result = repr(pd.Grouper(key='A', level='B'))
+    expected = "Grouper(key='A', level='B', axis=0, sort=False)"
+    assert result == expected
 
-    def test_repr(self):
-        # GH18203
-        result = repr(pd.Grouper(key='A', level='B'))
-        expected = "Grouper(key='A', level='B', axis=0, sort=False)"
-        assert result == expected
 
+@pytest.mark.parametrize('dtype', ['int64', 'int32', 'float64', 'float32'])
+def test_basic(dtype):
 
-class TestGroupBy(MixIn):
+    data = Series(np.arange(9) // 3, index=np.arange(9), dtype=dtype)
 
-    def test_basic(self):
-        def checkit(dtype):
-            data = Series(np.arange(9) // 3, index=np.arange(9), dtype=dtype)
+    index = np.arange(9)
+    np.random.shuffle(index)
+    data = data.reindex(index)
 
-            index = np.arange(9)
-            np.random.shuffle(index)
-            data = data.reindex(index)
+    grouped = data.groupby(lambda x: x // 3)
 
-            grouped = data.groupby(lambda x: x // 3)
+    for k, v in grouped:
+        assert len(v) == 3
 
-            for k, v in grouped:
-                assert len(v) == 3
+    agged = grouped.aggregate(np.mean)
+    assert agged[1] == 1
 
-            agged = grouped.aggregate(np.mean)
-            assert agged[1] == 1
+    assert_series_equal(agged, grouped.agg(np.mean))  # shorthand
+    assert_series_equal(agged, grouped.mean())
+    assert_series_equal(grouped.agg(np.sum), grouped.sum())
 
-            assert_series_equal(agged, grouped.agg(np.mean))  # shorthand
-            assert_series_equal(agged, grouped.mean())
-            assert_series_equal(grouped.agg(np.sum), grouped.sum())
+    expected = grouped.apply(lambda x: x * x.sum())
+    transformed = grouped.transform(lambda x: x * x.sum())
+    assert transformed[7] == 12
+    assert_series_equal(transformed, expected)
 
-            expected = grouped.apply(lambda x: x * x.sum())
-            transformed = grouped.transform(lambda x: x * x.sum())
-            assert transformed[7] == 12
-            assert_series_equal(transformed, expected)
+    value_grouped = data.groupby(data)
+    assert_series_equal(value_grouped.aggregate(np.mean), agged,
+                        check_index_type=False)
 
-            value_grouped = data.groupby(data)
-            assert_series_equal(value_grouped.aggregate(np.mean), agged,
-                                check_index_type=False)
+    # complex agg
+    agged = grouped.aggregate([np.mean, np.std])
 
-            # complex agg
-            agged = grouped.aggregate([np.mean, np.std])
+    with tm.assert_produces_warning(FutureWarning,
+                                    check_stacklevel=False):
+        agged = grouped.aggregate({'one': np.mean, 'two': np.std})
 
-            with tm.assert_produces_warning(FutureWarning,
-                                            check_stacklevel=False):
-                agged = grouped.aggregate({'one': np.mean, 'two': np.std})
+    group_constants = {0: 10, 1: 20, 2: 30}
+    agged = grouped.agg(lambda x: group_constants[x.name] + x.mean())
+    assert agged[1] == 21
 
-            group_constants = {0: 10, 1: 20, 2: 30}
-            agged = grouped.agg(lambda x: group_constants[x.name] + x.mean())
-            assert agged[1] == 21
+    # corner cases
+    pytest.raises(Exception, grouped.aggregate, lambda x: x * 2)
 
-            # corner cases
-            pytest.raises(Exception, grouped.aggregate, lambda x: x * 2)
 
-        for dtype in ['int64', 'int32', 'float64', 'float32']:
-            checkit(dtype)
+def test_groupby_nonobject_dtype(mframe, df_mixed_floats):
+    key = mframe.index.labels[0]
+    grouped = mframe.groupby(key)
+    result = grouped.sum()
 
-    def test_groupby_nonobject_dtype(self):
-        key = self.mframe.index.labels[0]
-        grouped = self.mframe.groupby(key)
-        result = grouped.sum()
+    expected = mframe.groupby(key.astype('O')).sum()
+    assert_frame_equal(result, expected)
 
-        expected = self.mframe.groupby(key.astype('O')).sum()
-        assert_frame_equal(result, expected)
+    # GH 3911, mixed frame non-conversion
+    df = df_mixed_floats.copy()
+    df['value'] = lrange(len(df))
 
-        # GH 3911, mixed frame non-conversion
-        df = self.df_mixed_floats.copy()
-        df['value'] = lrange(len(df))
+    def max_value(group):
+        return group.loc[group['value'].idxmax()]
 
-        def max_value(group):
-            return group.loc[group['value'].idxmax()]
+    applied = df.groupby('A').apply(max_value)
+    result = applied.get_dtype_counts().sort_values()
+    expected = Series({'float64': 2,
+                       'int64': 1,
+                       'object': 2}).sort_values()
+    assert_series_equal(result, expected)
 
-        applied = df.groupby('A').apply(max_value)
-        result = applied.get_dtype_counts().sort_values()
-        expected = Series({'float64': 2,
-                           'int64': 1,
-                           'object': 2}).sort_values()
-        assert_series_equal(result, expected)
 
-    def test_groupby_return_type(self):
+def test_groupby_return_type():
 
-        # GH2893, return a reduced type
-        df1 = DataFrame(
-            [{"val1": 1, "val2": 20},
-             {"val1": 1, "val2": 19},
-             {"val1": 2, "val2": 27},
-             {"val1": 2, "val2": 12}
-             ])
+    # GH2893, return a reduced type
+    df1 = DataFrame(
+        [{"val1": 1, "val2": 20},
+         {"val1": 1, "val2": 19},
+         {"val1": 2, "val2": 27},
+         {"val1": 2, "val2": 12}
+         ])
 
-        def func(dataf):
-            return dataf["val2"] - dataf["val2"].mean()
+    def func(dataf):
+        return dataf["val2"] - dataf["val2"].mean()
 
-        result = df1.groupby("val1", squeeze=True).apply(func)
-        assert isinstance(result, Series)
+    result = df1.groupby("val1", squeeze=True).apply(func)
+    assert isinstance(result, Series)
 
-        df2 = DataFrame(
-            [{"val1": 1, "val2": 20},
-             {"val1": 1, "val2": 19},
-             {"val1": 1, "val2": 27},
-             {"val1": 1, "val2": 12}
-             ])
+    df2 = DataFrame(
+        [{"val1": 1, "val2": 20},
+         {"val1": 1, "val2": 19},
+         {"val1": 1, "val2": 27},
+         {"val1": 1, "val2": 12}
+         ])
 
-        def func(dataf):
-            return dataf["val2"] - dataf["val2"].mean()
+    def func(dataf):
+        return dataf["val2"] - dataf["val2"].mean()
+
+    result = df2.groupby("val1", squeeze=True).apply(func)
+    assert isinstance(result, Series)
 
-        result = df2.groupby("val1", squeeze=True).apply(func)
-        assert isinstance(result, Series)
+    # GH3596, return a consistent type (regression in 0.11 from 0.10.1)
+    df = DataFrame([[1, 1], [1, 1]], columns=['X', 'Y'])
+    result = df.groupby('X', squeeze=False).count()
+    assert isinstance(result, DataFrame)
+
+    # GH5592
+    # inconcistent return type
+    df = DataFrame(dict(A=['Tiger', 'Tiger', 'Tiger', 'Lamb', 'Lamb',
+                           'Pony', 'Pony'], B=Series(
+                               np.arange(7), dtype='int64'), C=date_range(
+                                   '20130101', periods=7)))
+
+    def f(grp):
+        return grp.iloc[0]
+
+    expected = df.groupby('A').first()[['B']]
+    result = df.groupby('A').apply(f)[['B']]
+    assert_frame_equal(result, expected)
+
+    def f(grp):
+        if grp.name == 'Tiger':
+            return None
+        return grp.iloc[0]
+
+    result = df.groupby('A').apply(f)[['B']]
+    e = expected.copy()
+    e.loc['Tiger'] = np.nan
+    assert_frame_equal(result, e)
+
+    def f(grp):
+        if grp.name == 'Pony':
+            return None
+        return grp.iloc[0]
+
+    result = df.groupby('A').apply(f)[['B']]
+    e = expected.copy()
+    e.loc['Pony'] = np.nan
+    assert_frame_equal(result, e)
+
+    # 5592 revisited, with datetimes
+    def f(grp):
+        if grp.name == 'Pony':
+            return None
+        return grp.iloc[0]
+
+    result = df.groupby('A').apply(f)[['C']]
+    e = df.groupby('A').first()[['C']]
+    e.loc['Pony'] = pd.NaT
+    assert_frame_equal(result, e)
+
+    # scalar outputs
+    def f(grp):
+        if grp.name == 'Pony':
+            return None
+        return grp.iloc[0].loc['C']
+
+    result = df.groupby('A').apply(f)
+    e = df.groupby('A').first()['C'].copy()
+    e.loc['Pony'] = np.nan
+    e.name = None
+    assert_series_equal(result, e)
 
-        # GH3596, return a consistent type (regression in 0.11 from 0.10.1)
-        df = DataFrame([[1, 1], [1, 1]], columns=['X', 'Y'])
-        result = df.groupby('X', squeeze=False).count()
-        assert isinstance(result, DataFrame)
 
-        # GH5592
-        # inconcistent return type
-        df = DataFrame(dict(A=['Tiger', 'Tiger', 'Tiger', 'Lamb', 'Lamb',
-                               'Pony', 'Pony'], B=Series(
-                                   np.arange(7), dtype='int64'), C=date_range(
-                                       '20130101', periods=7)))
+def test_pass_args_kwargs(ts, tsframe):
 
-        def f(grp):
-            return grp.iloc[0]
+    def f(x, q=None, axis=0):
+        return np.percentile(x, q, axis=axis)
 
-        expected = df.groupby('A').first()[['B']]
-        result = df.groupby('A').apply(f)[['B']]
-        assert_frame_equal(result, expected)
+    g = lambda x: np.percentile(x, 80, axis=0)
 
-        def f(grp):
-            if grp.name == 'Tiger':
-                return None
-            return grp.iloc[0]
+    # Series
+    ts_grouped = ts.groupby(lambda x: x.month)
+    agg_result = ts_grouped.agg(np.percentile, 80, axis=0)
+    apply_result = ts_grouped.apply(np.percentile, 80, axis=0)
+    trans_result = ts_grouped.transform(np.percentile, 80, axis=0)
+
+    agg_expected = ts_grouped.quantile(.8)
+    trans_expected = ts_grouped.transform(g)
+
+    assert_series_equal(apply_result, agg_expected)
+    assert_series_equal(agg_result, agg_expected, check_names=False)
+    assert_series_equal(trans_result, trans_expected)
+
+    agg_result = ts_grouped.agg(f, q=80)
+    apply_result = ts_grouped.apply(f, q=80)
+    trans_result = ts_grouped.transform(f, q=80)
+    assert_series_equal(agg_result, agg_expected)
+    assert_series_equal(apply_result, agg_expected)
+    assert_series_equal(trans_result, trans_expected)
+
+    # DataFrame
+    df_grouped = tsframe.groupby(lambda x: x.month)
+    agg_result = df_grouped.agg(np.percentile, 80, axis=0)
+    apply_result = df_grouped.apply(DataFrame.quantile, .8)
+    expected = df_grouped.quantile(.8)
+    assert_frame_equal(apply_result, expected)
+    assert_frame_equal(agg_result, expected, check_names=False)
+
+    agg_result = df_grouped.agg(f, q=80)
+    apply_result = df_grouped.apply(DataFrame.quantile, q=.8)
+    assert_frame_equal(agg_result, expected, check_names=False)
+    assert_frame_equal(apply_result, expected)
+
+
+def test_len():
+    df = tm.makeTimeDataFrame()
+    grouped = df.groupby([lambda x: x.year, lambda x: x.month,
+                          lambda x: x.day])
+    assert len(grouped) == len(df)
 
-        result = df.groupby('A').apply(f)[['B']]
-        e = expected.copy()
-        e.loc['Tiger'] = np.nan
-        assert_frame_equal(result, e)
+    grouped = df.groupby([lambda x: x.year, lambda x: x.month])
+    expected = len({(x.year, x.month) for x in df.index})
+    assert len(grouped) == expected
 
-        def f(grp):
-            if grp.name == 'Pony':
-                return None
-            return grp.iloc[0]
+    # issue 11016
+    df = pd.DataFrame(dict(a=[np.nan] * 3, b=[1, 2, 3]))
+    assert len(df.groupby(('a'))) == 0
+    assert len(df.groupby(('b'))) == 3
+    assert len(df.groupby(['a', 'b'])) == 3
+
+
+def test_basic_regression():
+    # regression
+    T = [1.0 * x for x in lrange(1, 10) * 10][:1095]
+    result = Series(T, lrange(0, len(T)))
 
-        result = df.groupby('A').apply(f)[['B']]
-        e = expected.copy()
-        e.loc['Pony'] = np.nan
-        assert_frame_equal(result, e)
+    groupings = np.random.random((1100, ))
+    groupings = Series(groupings, lrange(0, len(groupings))) * 10.
 
-        # 5592 revisited, with datetimes
-        def f(grp):
-            if grp.name == 'Pony':
-                return None
-            return grp.iloc[0]
+    grouped = result.groupby(groupings)
+    grouped.mean()
 
-        result = df.groupby('A').apply(f)[['C']]
-        e = df.groupby('A').first()[['C']]
-        e.loc['Pony'] = pd.NaT
-        assert_frame_equal(result, e)
 
-        # scalar outputs
-        def f(grp):
-            if grp.name == 'Pony':
-                return None
-            return grp.iloc[0].loc['C']
-
-        result = df.groupby('A').apply(f)
-        e = df.groupby('A').first()['C'].copy()
-        e.loc['Pony'] = np.nan
-        e.name = None
-        assert_series_equal(result, e)
-
-    def test_apply_issues(self):
-        # GH 5788
-
-        s = """2011.05.16,00:00,1.40893
-2011.05.16,01:00,1.40760
-2011.05.16,02:00,1.40750
-2011.05.16,03:00,1.40649
-2011.05.17,02:00,1.40893
-2011.05.17,03:00,1.40760
-2011.05.17,04:00,1.40750
-2011.05.17,05:00,1.40649
-2011.05.18,02:00,1.40893
-2011.05.18,03:00,1.40760
-2011.05.18,04:00,1.40750
-2011.05.18,05:00,1.40649"""
-
-        df = pd.read_csv(
-            StringIO(s), header=None, names=['date', 'time', 'value'],
-            parse_dates=[['date', 'time']])
-        df = df.set_index('date_time')
-
-        expected = df.groupby(df.index.date).idxmax()
-        result = df.groupby(df.index.date).apply(lambda x: x.idxmax())
-        assert_frame_equal(result, expected)
-
-        # GH 5789
-        # don't auto coerce dates
-        df = pd.read_csv(
-            StringIO(s), header=None, names=['date', 'time', 'value'])
-        exp_idx = pd.Index(
-            ['2011.05.16', '2011.05.17', '2011.05.18'
-             ], dtype=object, name='date')
-        expected = Series(['00:00', '02:00', '02:00'], index=exp_idx)
-        result = df.groupby('date').apply(
-            lambda x: x['time'][x['value'].idxmax()])
-        assert_series_equal(result, expected)
-
-    def test_apply_trivial(self):
-        # GH 20066
-        # trivial apply: ignore input and return a constant dataframe.
-        df = pd.DataFrame({'key': ['a', 'a', 'b', 'b', 'a'],
-                           'data': [1.0, 2.0, 3.0, 4.0, 5.0]},
-                          columns=['key', 'data'])
-        expected = pd.concat([df.iloc[1:], df.iloc[1:]],
-                             axis=1, keys=['float64', 'object'])
-        result = df.groupby([str(x) for x in df.dtypes],
-                            axis=1).apply(lambda x: df.iloc[1:])
-
-        assert_frame_equal(result, expected)
-
-    @pytest.mark.xfail(reason=("GH 20066; function passed into apply "
-                               "returns a DataFrame with the same index "
-                               "as the one to create GroupBy object."))
-    def test_apply_trivial_fail(self):
-        # GH 20066
-        # trivial apply fails if the constant dataframe has the same index
-        # with the one used to create GroupBy object.
-        df = pd.DataFrame({'key': ['a', 'a', 'b', 'b', 'a'],
-                           'data': [1.0, 2.0, 3.0, 4.0, 5.0]},
-                          columns=['key', 'data'])
-        expected = pd.concat([df, df],
-                             axis=1, keys=['float64', 'object'])
-        result = df.groupby([str(x) for x in df.dtypes],
-                            axis=1).apply(lambda x: df)
-
-        assert_frame_equal(result, expected)
-
-    def test_time_field_bug(self):
-        # Test a fix for the following error related to GH issue 11324 When
-        # non-key fields in a group-by dataframe contained time-based fields
-        # that were not returned by the apply function, an exception would be
-        # raised.
-
-        df = pd.DataFrame({'a': 1, 'b': [datetime.now() for nn in range(10)]})
-
-        def func_with_no_date(batch):
-            return pd.Series({'c': 2})
-
-        def func_with_date(batch):
-            return pd.Series({'b': datetime(2015, 1, 1), 'c': 2})
-
-        dfg_no_conversion = df.groupby(by=['a']).apply(func_with_no_date)
-        dfg_no_conversion_expected = pd.DataFrame({'c': 2}, index=[1])
-        dfg_no_conversion_expected.index.name = 'a'
-
-        dfg_conversion = df.groupby(by=['a']).apply(func_with_date)
-        dfg_conversion_expected = pd.DataFrame(
-            {'b': datetime(2015, 1, 1),
-             'c': 2}, index=[1])
-        dfg_conversion_expected.index.name = 'a'
-
-        tm.assert_frame_equal(dfg_no_conversion, dfg_no_conversion_expected)
-        tm.assert_frame_equal(dfg_conversion, dfg_conversion_expected)
-
-    def test_len(self):
-        df = tm.makeTimeDataFrame()
-        grouped = df.groupby([lambda x: x.year, lambda x: x.month,
-                              lambda x: x.day])
-        assert len(grouped) == len(df)
-
-        grouped = df.groupby([lambda x: x.year, lambda x: x.month])
-        expected = len({(x.year, x.month) for x in df.index})
-        assert len(grouped) == expected
-
-        # issue 11016
-        df = pd.DataFrame(dict(a=[np.nan] * 3, b=[1, 2, 3]))
-        assert len(df.groupby(('a'))) == 0
-        assert len(df.groupby(('b'))) == 3
-        assert len(df.groupby(['a', 'b'])) == 3
-
-    def test_basic_regression(self):
-        # regression
-        T = [1.0 * x for x in lrange(1, 10) * 10][:1095]
-        result = Series(T, lrange(0, len(T)))
-
-        groupings = np.random.random((1100, ))
-        groupings = Series(groupings, lrange(0, len(groupings))) * 10.
-
-        grouped = result.groupby(groupings)
-        grouped.mean()
-
-    def test_with_na_groups(self):
-        index = Index(np.arange(10))
-
-        for dtype in ['float64', 'float32', 'int64', 'int32', 'int16', 'int8']:
-            values = Series(np.ones(10), index, dtype=dtype)
-            labels = Series([np.nan, 'foo', 'bar', 'bar', np.nan, np.nan,
-                             'bar', 'bar', np.nan, 'foo'], index=index)
-
-            # this SHOULD be an int
-            grouped = values.groupby(labels)
-            agged = grouped.agg(len)
-            expected = Series([4, 2], index=['bar', 'foo'])
-
-            assert_series_equal(agged, expected, check_dtype=False)
-
-            # assert issubclass(agged.dtype.type, np.integer)
-
-            # explicitly return a float from my function
-            def f(x):
-                return float(len(x))
-
-            agged = grouped.agg(f)
-            expected = Series([4, 2], index=['bar', 'foo'])
-
-            assert_series_equal(agged, expected, check_dtype=False)
-            assert issubclass(agged.dtype.type, np.dtype(dtype).type)
-
-    def test_indices_concatenation_order(self):
-
-        # GH 2808
-
-        def f1(x):
-            y = x[(x.b % 2) == 1] ** 2
-            if y.empty:
-                multiindex = MultiIndex(levels=[[]] * 2, labels=[[]] * 2,
-                                        names=['b', 'c'])
-                res = DataFrame(None, columns=['a'], index=multiindex)
-                return res
-            else:
-                y = y.set_index(['b', 'c'])
-                return y
-
-        def f2(x):
-            y = x[(x.b % 2) == 1] ** 2
-            if y.empty:
-                return DataFrame()
-            else:
-                y = y.set_index(['b', 'c'])
-                return y
-
-        def f3(x):
-            y = x[(x.b % 2) == 1] ** 2
-            if y.empty:
-                multiindex = MultiIndex(levels=[[]] * 2, labels=[[]] * 2,
-                                        names=['foo', 'bar'])
-                res = DataFrame(None, columns=['a', 'b'], index=multiindex)
-                return res
-            else:
-                return y
-
-        df = DataFrame({'a': [1, 2, 2, 2], 'b': lrange(4), 'c': lrange(5, 9)})
-
-        df2 = DataFrame({'a': [3, 2, 2, 2], 'b': lrange(4), 'c': lrange(5, 9)})
-
-        # correct result
-        result1 = df.groupby('a').apply(f1)
-        result2 = df2.groupby('a').apply(f1)
-        assert_frame_equal(result1, result2)
-
-        # should fail (not the same number of levels)
-        pytest.raises(AssertionError, df.groupby('a').apply, f2)
-        pytest.raises(AssertionError, df2.groupby('a').apply, f2)
-
-        # should fail (incorrect shape)
-        pytest.raises(AssertionError, df.groupby('a').apply, f3)
-        pytest.raises(AssertionError, df2.groupby('a').apply, f3)
-
-    def test_attr_wrapper(self):
-        grouped = self.ts.groupby(lambda x: x.weekday())
-
-        result = grouped.std()
-        expected = grouped.agg(lambda x: np.std(x, ddof=1))
-        assert_series_equal(result, expected)
-
-        # this is pretty cool
-        result = grouped.describe()
-        expected = {}
-        for name, gp in grouped:
-            expected[name] = gp.describe()
-        expected = DataFrame(expected).T
-        assert_frame_equal(result, expected)
-
-        # get attribute
-        result = grouped.dtype
-        expected = grouped.agg(lambda x: x.dtype)
-
-        # make sure raises error
-        pytest.raises(AttributeError, getattr, grouped, 'foo')
-
-    def test_frame_groupby(self):
-        grouped = self.tsframe.groupby(lambda x: x.weekday())
-
-        # aggregate
-        aggregated = grouped.aggregate(np.mean)
-        assert len(aggregated) == 5
-        assert len(aggregated.columns) == 4
-
-        # by string
-        tscopy = self.tsframe.copy()
-        tscopy['weekday'] = [x.weekday() for x in tscopy.index]
-        stragged = tscopy.groupby('weekday').aggregate(np.mean)
-        assert_frame_equal(stragged, aggregated, check_names=False)
-
-        # transform
-        grouped = self.tsframe.head(30).groupby(lambda x: x.weekday())
-        transformed = grouped.transform(lambda x: x - x.mean())
-        assert len(transformed) == 30
-        assert len(transformed.columns) == 4
-
-        # transform propagate
-        transformed = grouped.transform(lambda x: x.mean())
-        for name, group in grouped:
-            mean = group.mean()
-            for idx in group.index:
-                tm.assert_series_equal(transformed.xs(idx), mean,
-                                       check_names=False)
-
-        # iterate
-        for weekday, group in grouped:
-            assert group.index[0].weekday() == weekday
-
-        # groups / group_indices
-        groups = grouped.groups
-        indices = grouped.indices
-
-        for k, v in compat.iteritems(groups):
-            samething = self.tsframe.index.take(indices[k])
-            assert (samething == v).all()
-
-    def test_frame_groupby_columns(self):
-        mapping = {'A': 0, 'B': 0, 'C': 1, 'D': 1}
-        grouped = self.tsframe.groupby(mapping, axis=1)
-
-        # aggregate
-        aggregated = grouped.aggregate(np.mean)
-        assert len(aggregated) == len(self.tsframe)
-        assert len(aggregated.columns) == 2
-
-        # transform
-        tf = lambda x: x - x.mean()
-        groupedT = self.tsframe.T.groupby(mapping, axis=0)
-        assert_frame_equal(groupedT.transform(tf).T, grouped.transform(tf))
-
-        # iterate
-        for k, v in grouped:
-            assert len(v.columns) == 2
-
-    def test_frame_set_name_single(self):
-        grouped = self.df.groupby('A')
-
-        result = grouped.mean()
-        assert result.index.name == 'A'
-
-        result = self.df.groupby('A', as_index=False).mean()
-        assert result.index.name != 'A'
-
-        result = grouped.agg(np.mean)
-        assert result.index.name == 'A'
-
-        result = grouped.agg({'C': np.mean, 'D': np.std})
-        assert result.index.name == 'A'
-
-        result = grouped['C'].mean()
-        assert result.index.name == 'A'
-        result = grouped['C'].agg(np.mean)
-        assert result.index.name == 'A'
-        result = grouped['C'].agg([np.mean, np.std])
-        assert result.index.name == 'A'
-
-        with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False):
-            result = grouped['C'].agg({'foo': np.mean, 'bar': np.std})
-        assert result.index.name == 'A'
-
-    def test_multi_func(self):
-        col1 = self.df['A']
-        col2 = self.df['B']
-
-        grouped = self.df.groupby([col1.get, col2.get])
-        agged = grouped.mean()
-        expected = self.df.groupby(['A', 'B']).mean()
-
-        # TODO groupby get drops names
-        assert_frame_equal(agged.loc[:, ['C', 'D']],
-                           expected.loc[:, ['C', 'D']],
-                           check_names=False)
-
-        # some "groups" with no data
-        df = DataFrame({'v1': np.random.randn(6),
-                        'v2': np.random.randn(6),
-                        'k1': np.array(['b', 'b', 'b', 'a', 'a', 'a']),
-                        'k2': np.array(['1', '1', '1', '2', '2', '2'])},
-                       index=['one', 'two', 'three', 'four', 'five', 'six'])
-        # only verify that it works for now
-        grouped = df.groupby(['k1', 'k2'])
-        grouped.agg(np.sum)
-
-    def test_multi_key_multiple_functions(self):
-        grouped = self.df.groupby(['A', 'B'])['C']
-
-        agged = grouped.agg([np.mean, np.std])
-        expected = DataFrame({'mean': grouped.agg(np.mean),
-                              'std': grouped.agg(np.std)})
-        assert_frame_equal(agged, expected)
-
-    def test_frame_multi_key_function_list(self):
-        data = DataFrame(
-            {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar',
-                   'foo', 'foo', 'foo'],
-             'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two',
-                   'two', 'two', 'one'],
-             'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny',
-                   'dull', 'shiny', 'shiny', 'shiny'],
-             'D': np.random.randn(11),
-             'E': np.random.randn(11),
-             'F': np.random.randn(11)})
-
-        grouped = data.groupby(['A', 'B'])
-        funcs = [np.mean, np.std]
-        agged = grouped.agg(funcs)
-        expected = concat([grouped['D'].agg(funcs), grouped['E'].agg(funcs),
-                           grouped['F'].agg(funcs)],
-                          keys=['D', 'E', 'F'], axis=1)
-        assert (isinstance(agged.index, MultiIndex))
-        assert (isinstance(expected.index, MultiIndex))
-        assert_frame_equal(agged, expected)
-
-    def test_groupby_multiple_columns(self):
-        data = self.df
-        grouped = data.groupby(['A', 'B'])
-
-        def _check_op(op):
-
-            with catch_warnings(record=True):
-                result1 = op(grouped)
-
-                expected = defaultdict(dict)
-                for n1, gp1 in data.groupby('A'):
-                    for n2, gp2 in gp1.groupby('B'):
-                        expected[n1][n2] = op(gp2.loc[:, ['C', 'D']])
-                expected = dict((k, DataFrame(v))
-                                for k, v in compat.iteritems(expected))
-                expected = Panel.fromDict(expected).swapaxes(0, 1)
-                expected.major_axis.name, expected.minor_axis.name = 'A', 'B'
-
-                # a little bit crude
-                for col in ['C', 'D']:
-                    result_col = op(grouped[col])
-                    exp = expected[col]
-                    pivoted = result1[col].unstack()
-                    pivoted2 = result_col.unstack()
-                    assert_frame_equal(pivoted.reindex_like(exp), exp)
-                    assert_frame_equal(pivoted2.reindex_like(exp), exp)
-
-        _check_op(lambda x: x.sum())
-        _check_op(lambda x: x.mean())
-
-        # test single series works the same
-        result = data['C'].groupby([data['A'], data['B']]).mean()
-        expected = data.groupby(['A', 'B']).mean()['C']
-
-        assert_series_equal(result, expected)
-
-    def test_groupby_as_index_agg(self):
-        grouped = self.df.groupby('A', as_index=False)
-
-        # single-key
-
-        result = grouped.agg(np.mean)
-        expected = grouped.mean()
-        assert_frame_equal(result, expected)
-
-        result2 = grouped.agg(OrderedDict([['C', np.mean], ['D', np.sum]]))
-        expected2 = grouped.mean()
-        expected2['D'] = grouped.sum()['D']
-        assert_frame_equal(result2, expected2)
-
-        grouped = self.df.groupby('A', as_index=True)
-        expected3 = grouped['C'].sum()
-        expected3 = DataFrame(expected3).rename(columns={'C': 'Q'})
-
-        with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False):
-            result3 = grouped['C'].agg({'Q': np.sum})
-        assert_frame_equal(result3, expected3)
-
-        # multi-key
-
-        grouped = self.df.groupby(['A', 'B'], as_index=False)
-
-        result = grouped.agg(np.mean)
-        expected = grouped.mean()
-        assert_frame_equal(result, expected)
-
-        result2 = grouped.agg(OrderedDict([['C', np.mean], ['D', np.sum]]))
-        expected2 = grouped.mean()
-        expected2['D'] = grouped.sum()['D']
-        assert_frame_equal(result2, expected2)
-
-        expected3 = grouped['C'].sum()
-        expected3 = DataFrame(expected3).rename(columns={'C': 'Q'})
+@pytest.mark.parametrize('dtype', ['float64', 'float32', 'int64',
+                                   'int32', 'int16', 'int8'])
+def test_with_na_groups(dtype):
+    index = Index(np.arange(10))
+    values = Series(np.ones(10), index, dtype=dtype)
+    labels = Series([np.nan, 'foo', 'bar', 'bar', np.nan, np.nan,
+                     'bar', 'bar', np.nan, 'foo'], index=index)
+
+    # this SHOULD be an int
+    grouped = values.groupby(labels)
+    agged = grouped.agg(len)
+    expected = Series([4, 2], index=['bar', 'foo'])
+
+    assert_series_equal(agged, expected, check_dtype=False)
+
+    # assert issubclass(agged.dtype.type, np.integer)
+
+    # explicitly return a float from my function
+    def f(x):
+        return float(len(x))
+
+    agged = grouped.agg(f)
+    expected = Series([4, 2], index=['bar', 'foo'])
+
+    assert_series_equal(agged, expected, check_dtype=False)
+    assert issubclass(agged.dtype.type, np.dtype(dtype).type)
+
+
+def test_indices_concatenation_order():
+
+    # GH 2808
+
+    def f1(x):
+        y = x[(x.b % 2) == 1] ** 2
+        if y.empty:
+            multiindex = MultiIndex(levels=[[]] * 2, labels=[[]] * 2,
+                                    names=['b', 'c'])
+            res = DataFrame(None, columns=['a'], index=multiindex)
+            return res
+        else:
+            y = y.set_index(['b', 'c'])
+            return y
+
+    def f2(x):
+        y = x[(x.b % 2) == 1] ** 2
+        if y.empty:
+            return DataFrame()
+        else:
+            y = y.set_index(['b', 'c'])
+            return y
+
+    def f3(x):
+        y = x[(x.b % 2) == 1] ** 2
+        if y.empty:
+            multiindex = MultiIndex(levels=[[]] * 2, labels=[[]] * 2,
+                                    names=['foo', 'bar'])
+            res = DataFrame(None, columns=['a', 'b'], index=multiindex)
+            return res
+        else:
+            return y
+
+    df = DataFrame({'a': [1, 2, 2, 2], 'b': lrange(4), 'c': lrange(5, 9)})
+
+    df2 = DataFrame({'a': [3, 2, 2, 2], 'b': lrange(4), 'c': lrange(5, 9)})
+
+    # correct result
+    result1 = df.groupby('a').apply(f1)
+    result2 = df2.groupby('a').apply(f1)
+    assert_frame_equal(result1, result2)
+
+    # should fail (not the same number of levels)
+    pytest.raises(AssertionError, df.groupby('a').apply, f2)
+    pytest.raises(AssertionError, df2.groupby('a').apply, f2)
+
+    # should fail (incorrect shape)
+    pytest.raises(AssertionError, df.groupby('a').apply, f3)
+    pytest.raises(AssertionError, df2.groupby('a').apply, f3)
+
+
+def test_attr_wrapper(ts):
+    grouped = ts.groupby(lambda x: x.weekday())
+
+    result = grouped.std()
+    expected = grouped.agg(lambda x: np.std(x, ddof=1))
+    assert_series_equal(result, expected)
+
+    # this is pretty cool
+    result = grouped.describe()
+    expected = {}
+    for name, gp in grouped:
+        expected[name] = gp.describe()
+    expected = DataFrame(expected).T
+    assert_frame_equal(result, expected)
+
+    # get attribute
+    result = grouped.dtype
+    expected = grouped.agg(lambda x: x.dtype)
+
+    # make sure raises error
+    pytest.raises(AttributeError, getattr, grouped, 'foo')
+
+
+def test_frame_groupby(tsframe):
+    grouped = tsframe.groupby(lambda x: x.weekday())
+
+    # aggregate
+    aggregated = grouped.aggregate(np.mean)
+    assert len(aggregated) == 5
+    assert len(aggregated.columns) == 4
+
+    # by string
+    tscopy = tsframe.copy()
+    tscopy['weekday'] = [x.weekday() for x in tscopy.index]
+    stragged = tscopy.groupby('weekday').aggregate(np.mean)
+    assert_frame_equal(stragged, aggregated, check_names=False)
+
+    # transform
+    grouped = tsframe.head(30).groupby(lambda x: x.weekday())
+    transformed = grouped.transform(lambda x: x - x.mean())
+    assert len(transformed) == 30
+    assert len(transformed.columns) == 4
+
+    # transform propagate
+    transformed = grouped.transform(lambda x: x.mean())
+    for name, group in grouped:
+        mean = group.mean()
+        for idx in group.index:
+            tm.assert_series_equal(transformed.xs(idx), mean,
+                                   check_names=False)
+
+    # iterate
+    for weekday, group in grouped:
+        assert group.index[0].weekday() == weekday
+
+    # groups / group_indices
+    groups = grouped.groups
+    indices = grouped.indices
+
+    for k, v in compat.iteritems(groups):
+        samething = tsframe.index.take(indices[k])
+        assert (samething == v).all()
+
+
+def test_frame_groupby_columns(tsframe):
+    mapping = {'A': 0, 'B': 0, 'C': 1, 'D': 1}
+    grouped = tsframe.groupby(mapping, axis=1)
+
+    # aggregate
+    aggregated = grouped.aggregate(np.mean)
+    assert len(aggregated) == len(tsframe)
+    assert len(aggregated.columns) == 2
+
+    # transform
+    tf = lambda x: x - x.mean()
+    groupedT = tsframe.T.groupby(mapping, axis=0)
+    assert_frame_equal(groupedT.transform(tf).T, grouped.transform(tf))
+
+    # iterate
+    for k, v in grouped:
+        assert len(v.columns) == 2
+
+
+def test_frame_set_name_single(df):
+    grouped = df.groupby('A')
+
+    result = grouped.mean()
+    assert result.index.name == 'A'
+
+    result = df.groupby('A', as_index=False).mean()
+    assert result.index.name != 'A'
+
+    result = grouped.agg(np.mean)
+    assert result.index.name == 'A'
+
+    result = grouped.agg({'C': np.mean, 'D': np.std})
+    assert result.index.name == 'A'
+
+    result = grouped['C'].mean()
+    assert result.index.name == 'A'
+    result = grouped['C'].agg(np.mean)
+    assert result.index.name == 'A'
+    result = grouped['C'].agg([np.mean, np.std])
+    assert result.index.name == 'A'
+
+    with tm.assert_produces_warning(FutureWarning,
+                                    check_stacklevel=False):
+        result = grouped['C'].agg({'foo': np.mean, 'bar': np.std})
+    assert result.index.name == 'A'
+
+
+def test_multi_func(df):
+    col1 = df['A']
+    col2 = df['B']
+
+    grouped = df.groupby([col1.get, col2.get])
+    agged = grouped.mean()
+    expected = df.groupby(['A', 'B']).mean()
+
+    # TODO groupby get drops names
+    assert_frame_equal(agged.loc[:, ['C', 'D']],
+                       expected.loc[:, ['C', 'D']],
+                       check_names=False)
+
+    # some "groups" with no data
+    df = DataFrame({'v1': np.random.randn(6),
+                    'v2': np.random.randn(6),
+                    'k1': np.array(['b', 'b', 'b', 'a', 'a', 'a']),
+                    'k2': np.array(['1', '1', '1', '2', '2', '2'])},
+                   index=['one', 'two', 'three', 'four', 'five', 'six'])
+    # only verify that it works for now
+    grouped = df.groupby(['k1', 'k2'])
+    grouped.agg(np.sum)
+
+
+def test_multi_key_multiple_functions(df):
+    grouped = df.groupby(['A', 'B'])['C']
+
+    agged = grouped.agg([np.mean, np.std])
+    expected = DataFrame({'mean': grouped.agg(np.mean),
+                          'std': grouped.agg(np.std)})
+    assert_frame_equal(agged, expected)
+
+
+def test_frame_multi_key_function_list():
+    data = DataFrame(
+        {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar',
+               'foo', 'foo', 'foo'],
+         'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two',
+               'two', 'two', 'one'],
+         'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny',
+               'dull', 'shiny', 'shiny', 'shiny'],
+         'D': np.random.randn(11),
+         'E': np.random.randn(11),
+         'F': np.random.randn(11)})
+
+    grouped = data.groupby(['A', 'B'])
+    funcs = [np.mean, np.std]
+    agged = grouped.agg(funcs)
+    expected = pd.concat([grouped['D'].agg(funcs), grouped['E'].agg(funcs),
+                          grouped['F'].agg(funcs)],
+                         keys=['D', 'E', 'F'], axis=1)
+    assert (isinstance(agged.index, MultiIndex))
+    assert (isinstance(expected.index, MultiIndex))
+    assert_frame_equal(agged, expected)
+
+
+@pytest.mark.parametrize('op', [lambda x: x.sum(), lambda x: x.mean()])
+def test_groupby_multiple_columns(df, op):
+    data = df
+    grouped = data.groupby(['A', 'B'])
+
+    with catch_warnings(record=True):
+        result1 = op(grouped)
+
+        expected = defaultdict(dict)
+        for n1, gp1 in data.groupby('A'):
+            for n2, gp2 in gp1.groupby('B'):
+                expected[n1][n2] = op(gp2.loc[:, ['C', 'D']])
+        expected = dict((k, DataFrame(v))
+                        for k, v in compat.iteritems(expected))
+        expected = Panel.fromDict(expected).swapaxes(0, 1)
+        expected.major_axis.name, expected.minor_axis.name = 'A', 'B'
+
+        # a little bit crude
+        for col in ['C', 'D']:
+            result_col = op(grouped[col])
+            exp = expected[col]
+            pivoted = result1[col].unstack()
+            pivoted2 = result_col.unstack()
+            assert_frame_equal(pivoted.reindex_like(exp), exp)
+            assert_frame_equal(pivoted2.reindex_like(exp), exp)
+
+    # test single series works the same
+    result = data['C'].groupby([data['A'], data['B']]).mean()
+    expected = data.groupby(['A', 'B']).mean()['C']
+
+    assert_series_equal(result, expected)
+
+
+def test_groupby_as_index_agg(df):
+    grouped = df.groupby('A', as_index=False)
+
+    # single-key
+
+    result = grouped.agg(np.mean)
+    expected = grouped.mean()
+    assert_frame_equal(result, expected)
+
+    result2 = grouped.agg(OrderedDict([['C', np.mean], ['D', np.sum]]))
+    expected2 = grouped.mean()
+    expected2['D'] = grouped.sum()['D']
+    assert_frame_equal(result2, expected2)
+
+    grouped = df.groupby('A', as_index=True)
+    expected3 = grouped['C'].sum()
+    expected3 = DataFrame(expected3).rename(columns={'C': 'Q'})
+
+    with tm.assert_produces_warning(FutureWarning,
+                                    check_stacklevel=False):
         result3 = grouped['C'].agg({'Q': np.sum})
-        assert_frame_equal(result3, expected3)
-
-        # GH7115 & GH8112 & GH8582
-        df = DataFrame(np.random.randint(0, 100, (50, 3)),
-                       columns=['jim', 'joe', 'jolie'])
-        ts = Series(np.random.randint(5, 10, 50), name='jim')
-
-        gr = df.groupby(ts)
-        gr.nth(0)  # invokes set_selection_from_grouper internally
-        assert_frame_equal(gr.apply(sum), df.groupby(ts).apply(sum))
-
-        for attr in ['mean', 'max', 'count', 'idxmax', 'cumsum', 'all']:
-            gr = df.groupby(ts, as_index=False)
-            left = getattr(gr, attr)()
-
-            gr = df.groupby(ts.values, as_index=True)
-            right = getattr(gr, attr)().reset_index(drop=True)
-
-            assert_frame_equal(left, right)
-
-    def test_as_index_series_return_frame(self):
-        grouped = self.df.groupby('A', as_index=False)
-        grouped2 = self.df.groupby(['A', 'B'], as_index=False)
-
-        result = grouped['C'].agg(np.sum)
-        expected = grouped.agg(np.sum).loc[:, ['A', 'C']]
-        assert isinstance(result, DataFrame)
-        assert_frame_equal(result, expected)
-
-        result2 = grouped2['C'].agg(np.sum)
-        expected2 = grouped2.agg(np.sum).loc[:, ['A', 'B', 'C']]
-        assert isinstance(result2, DataFrame)
-        assert_frame_equal(result2, expected2)
-
-        result = grouped['C'].sum()
-        expected = grouped.sum().loc[:, ['A', 'C']]
-        assert isinstance(result, DataFrame)
-        assert_frame_equal(result, expected)
-
-        result2 = grouped2['C'].sum()
-        expected2 = grouped2.sum().loc[:, ['A', 'B', 'C']]
-        assert isinstance(result2, DataFrame)
-        assert_frame_equal(result2, expected2)
-
-        # corner case
-        pytest.raises(Exception, grouped['C'].__getitem__, 'D')
-
-    def test_groupby_as_index_cython(self):
-        data = self.df
-
-        # single-key
-        grouped = data.groupby('A', as_index=False)
-        result = grouped.mean()
-        expected = data.groupby(['A']).mean()
-        expected.insert(0, 'A', expected.index)
-        expected.index = np.arange(len(expected))
-        assert_frame_equal(result, expected)
-
-        # multi-key
-        grouped = data.groupby(['A', 'B'], as_index=False)
-        result = grouped.mean()
-        expected = data.groupby(['A', 'B']).mean()
-
-        arrays = lzip(*expected.index.values)
-        expected.insert(0, 'A', arrays[0])
-        expected.insert(1, 'B', arrays[1])
-        expected.index = np.arange(len(expected))
-        assert_frame_equal(result, expected)
-
-    def test_groupby_as_index_series_scalar(self):
-        grouped = self.df.groupby(['A', 'B'], as_index=False)
-
-        # GH #421
-
-        result = grouped['C'].agg(len)
-        expected = grouped.agg(len).loc[:, ['A', 'B', 'C']]
-        assert_frame_equal(result, expected)
-
-    def test_groupby_as_index_corner(self):
-        pytest.raises(TypeError, self.ts.groupby, lambda x: x.weekday(),
-                      as_index=False)
-
-        pytest.raises(ValueError, self.df.groupby, lambda x: x.lower(),
-                      as_index=False, axis=1)
-
-    def test_groupby_as_index_apply(self):
-        # GH #4648 and #3417
-        df = DataFrame({'item_id': ['b', 'b', 'a', 'c', 'a', 'b'],
-                        'user_id': [1, 2, 1, 1, 3, 1],
-                        'time': range(6)})
-
-        g_as = df.groupby('user_id', as_index=True)
-        g_not_as = df.groupby('user_id', as_index=False)
-
-        res_as = g_as.head(2).index
-        res_not_as = g_not_as.head(2).index
-        exp = Index([0, 1, 2, 4])
-        assert_index_equal(res_as, exp)
-        assert_index_equal(res_not_as, exp)
-
-        res_as_apply = g_as.apply(lambda x: x.head(2)).index
-        res_not_as_apply = g_not_as.apply(lambda x: x.head(2)).index
-
-        # apply doesn't maintain the original ordering
-        # changed in GH5610 as the as_index=False returns a MI here
-        exp_not_as_apply = MultiIndex.from_tuples([(0, 0), (0, 2), (1, 1), (
-            2, 4)])
-        tp = [(1, 0), (1, 2), (2, 1), (3, 4)]
-        exp_as_apply = MultiIndex.from_tuples(tp, names=['user_id', None])
-
-        assert_index_equal(res_as_apply, exp_as_apply)
-        assert_index_equal(res_not_as_apply, exp_not_as_apply)
-
-        ind = Index(list('abcde'))
-        df = DataFrame([[1, 2], [2, 3], [1, 4], [1, 5], [2, 6]], index=ind)
-        res = df.groupby(0, as_index=False).apply(lambda x: x).index
-        assert_index_equal(res, ind)
-
-    def test_groupby_multiple_key(self):
-        df = tm.makeTimeDataFrame()
-        grouped = df.groupby([lambda x: x.year, lambda x: x.month,
-                              lambda x: x.day])
-        agged = grouped.sum()
-        assert_almost_equal(df.values, agged.values)
-
-        grouped = df.T.groupby([lambda x: x.year,
-                                lambda x: x.month,
-                                lambda x: x.day], axis=1)
-
-        agged = grouped.agg(lambda x: x.sum())
-        tm.assert_index_equal(agged.index, df.columns)
-        assert_almost_equal(df.T.values, agged.values)
-
-        agged = grouped.agg(lambda x: x.sum())
-        assert_almost_equal(df.T.values, agged.values)
-
-    def test_groupby_multi_corner(self):
-        # test that having an all-NA column doesn't mess you up
-        df = self.df.copy()
-        df['bad'] = np.nan
-        agged = df.groupby(['A', 'B']).mean()
-
-        expected = self.df.groupby(['A', 'B']).mean()
-        expected['bad'] = np.nan
-
-        assert_frame_equal(agged, expected)
-
-    def test_omit_nuisance(self):
-        grouped = self.df.groupby('A')
-
-        result = grouped.mean()
-        expected = self.df.loc[:, ['A', 'C', 'D']].groupby('A').mean()
-        assert_frame_equal(result, expected)
-
-        agged = grouped.agg(np.mean)
-        exp = grouped.mean()
-        assert_frame_equal(agged, exp)
-
-        df = self.df.loc[:, ['A', 'C', 'D']]
-        df['E'] = datetime.now()
-        grouped = df.groupby('A')
-        result = grouped.agg(np.sum)
-        expected = grouped.sum()
-        assert_frame_equal(result, expected)
-
-        # won't work with axis = 1
-        grouped = df.groupby({'A': 0, 'C': 0, 'D': 1, 'E': 1}, axis=1)
-        result = pytest.raises(TypeError, grouped.agg,
-                               lambda x: x.sum(0, numeric_only=False))
-
-    def test_omit_nuisance_python_multiple(self):
-        grouped = self.three_group.groupby(['A', 'B'])
-
-        agged = grouped.agg(np.mean)
-        exp = grouped.mean()
-        assert_frame_equal(agged, exp)
-
-    def test_empty_groups_corner(self):
-        # handle empty groups
-        df = DataFrame({'k1': np.array(['b', 'b', 'b', 'a', 'a', 'a']),
-                        'k2': np.array(['1', '1', '1', '2', '2', '2']),
-                        'k3': ['foo', 'bar'] * 3,
-                        'v1': np.random.randn(6),
-                        'v2': np.random.randn(6)})
-
-        grouped = df.groupby(['k1', 'k2'])
-        result = grouped.agg(np.mean)
-        expected = grouped.mean()
-        assert_frame_equal(result, expected)
-
-        grouped = self.mframe[3:5].groupby(level=0)
-        agged = grouped.apply(lambda x: x.mean())
-        agged_A = grouped['A'].apply(np.mean)
-        assert_series_equal(agged['A'], agged_A)
-        assert agged.index.name == 'first'
-
-    def test_apply_concat_preserve_names(self):
-        grouped = self.three_group.groupby(['A', 'B'])
-
-        def desc(group):
-            result = group.describe()
-            result.index.name = 'stat'
-            return result
-
-        def desc2(group):
-            result = group.describe()
-            result.index.name = 'stat'
-            result = result[:len(group)]
-            # weirdo
-            return result
-
-        def desc3(group):
-            result = group.describe()
-
-            # names are different
-            result.index.name = 'stat_%d' % len(group)
-
-            result = result[:len(group)]
-            # weirdo
-            return result
-
-        result = grouped.apply(desc)
-        assert result.index.names == ('A', 'B', 'stat')
-
-        result2 = grouped.apply(desc2)
-        assert result2.index.names == ('A', 'B', 'stat')
-
-        result3 = grouped.apply(desc3)
-        assert result3.index.names == ('A', 'B', None)
-
-    def test_nonsense_func(self):
-        df = DataFrame([0])
-        pytest.raises(Exception, df.groupby, lambda x: x + 'foo')
-
-    def test_builtins_apply(self):  # GH8155
-        df = pd.DataFrame(np.random.randint(1, 50, (1000, 2)),
-                          columns=['jim', 'joe'])
-        df['jolie'] = np.random.randn(1000)
-
-        for keys in ['jim', ['jim', 'joe']]:  # single key & multi-key
-            if keys == 'jim':
-                continue
-            for f in [max, min, sum]:
-                fname = f.__name__
-                result = df.groupby(keys).apply(f)
-                result.shape
-                ngroups = len(df.drop_duplicates(subset=keys))
-                assert result.shape == (ngroups, 3), 'invalid frame shape: '\
-                    '{} (expected ({}, 3))'.format(result.shape, ngroups)
-
-                assert_frame_equal(result,  # numpy's equivalent function
-                                   df.groupby(keys).apply(getattr(np, fname)))
-
-                if f != sum:
-                    expected = df.groupby(keys).agg(fname).reset_index()
-                    expected.set_index(keys, inplace=True, drop=False)
-                    assert_frame_equal(result, expected, check_dtype=False)
-
-                assert_series_equal(getattr(result, fname)(),
-                                    getattr(df, fname)())
-
-    def test_max_min_non_numeric(self):
-        # #2700
-        aa = DataFrame({'nn': [11, 11, 22, 22],
-                        'ii': [1, 2, 3, 4],
-                        'ss': 4 * ['mama']})
-
-        result = aa.groupby('nn').max()
-        assert 'ss' in result
-
-        result = aa.groupby('nn').max(numeric_only=False)
-        assert 'ss' in result
-
-        result = aa.groupby('nn').min()
-        assert 'ss' in result
-
-        result = aa.groupby('nn').min(numeric_only=False)
-        assert 'ss' in result
-
-    def test_arg_passthru(self):
-        # make sure that we are passing thru kwargs
-        # to our agg functions
-
-        # GH3668
-        # GH5724
-        df = pd.DataFrame(
-            {'group': [1, 1, 2],
-             'int': [1, 2, 3],
-             'float': [4., 5., 6.],
-             'string': list('abc'),
-             'category_string': pd.Series(list('abc')).astype('category'),
-             'category_int': [7, 8, 9],
-             'datetime': pd.date_range('20130101', periods=3),
-             'datetimetz': pd.date_range('20130101',
-                                         periods=3,
-                                         tz='US/Eastern'),
-             'timedelta': pd.timedelta_range('1 s', periods=3, freq='s')},
-            columns=['group', 'int', 'float', 'string',
-                     'category_string', 'category_int',
-                     'datetime', 'datetimetz',
-                     'timedelta'])
-
-        expected_columns_numeric = Index(['int', 'float', 'category_int'])
-
-        # mean / median
-        expected = pd.DataFrame(
-            {'category_int': [7.5, 9],
-             'float': [4.5, 6.],
-             'timedelta': [pd.Timedelta('1.5s'),
-                           pd.Timedelta('3s')],
-             'int': [1.5, 3],
-             'datetime': [pd.Timestamp('2013-01-01 12:00:00'),
-                          pd.Timestamp('2013-01-03 00:00:00')],
-             'datetimetz': [
-                 pd.Timestamp('2013-01-01 12:00:00', tz='US/Eastern'),
-                 pd.Timestamp('2013-01-03 00:00:00', tz='US/Eastern')]},
-            index=Index([1, 2], name='group'),
-            columns=['int', 'float', 'category_int',
-                     'datetime', 'datetimetz', 'timedelta'])
-        for attr in ['mean', 'median']:
-            f = getattr(df.groupby('group'), attr)
-            result = f()
-            tm.assert_index_equal(result.columns, expected_columns_numeric)
-
-            result = f(numeric_only=False)
-            assert_frame_equal(result.reindex_like(expected), expected)
-
-        # TODO: min, max *should* handle
-        # categorical (ordered) dtype
-        expected_columns = Index(['int', 'float', 'string',
-                                  'category_int',
-                                  'datetime', 'datetimetz',
-                                  'timedelta'])
-        for attr in ['min', 'max']:
-            f = getattr(df.groupby('group'), attr)
-            result = f()
-            tm.assert_index_equal(result.columns, expected_columns)
-
-            result = f(numeric_only=False)
-            tm.assert_index_equal(result.columns, expected_columns)
-
-        expected_columns = Index(['int', 'float', 'string',
-                                  'category_string', 'category_int',
-                                  'datetime', 'datetimetz',
-                                  'timedelta'])
-        for attr in ['first', 'last']:
-            f = getattr(df.groupby('group'), attr)
-            result = f()
-            tm.assert_index_equal(result.columns, expected_columns)
-
-            result = f(numeric_only=False)
-            tm.assert_index_equal(result.columns, expected_columns)
-
-        expected_columns = Index(['int', 'float', 'string',
-                                  'category_int', 'timedelta'])
-        for attr in ['sum']:
-            f = getattr(df.groupby('group'), attr)
-            result = f()
-            tm.assert_index_equal(result.columns, expected_columns_numeric)
-
-            result = f(numeric_only=False)
-            tm.assert_index_equal(result.columns, expected_columns)
-
-        expected_columns = Index(['int', 'float', 'category_int'])
-        for attr in ['prod', 'cumprod']:
-            f = getattr(df.groupby('group'), attr)
-            result = f()
-            tm.assert_index_equal(result.columns, expected_columns_numeric)
-
-            result = f(numeric_only=False)
-            tm.assert_index_equal(result.columns, expected_columns)
-
-        # like min, max, but don't include strings
-        expected_columns = Index(['int', 'float',
-                                  'category_int',
-                                  'datetime', 'datetimetz',
-                                  'timedelta'])
-        for attr in ['cummin', 'cummax']:
-            f = getattr(df.groupby('group'), attr)
-            result = f()
-            # GH 15561: numeric_only=False set by default like min/max
-            tm.assert_index_equal(result.columns, expected_columns)
-
-            result = f(numeric_only=False)
-            tm.assert_index_equal(result.columns, expected_columns)
-
-        expected_columns = Index(['int', 'float', 'category_int',
-                                  'timedelta'])
-        for attr in ['cumsum']:
-            f = getattr(df.groupby('group'), attr)
-            result = f()
-            tm.assert_index_equal(result.columns, expected_columns_numeric)
-
-            result = f(numeric_only=False)
-            tm.assert_index_equal(result.columns, expected_columns)
-
-    def test_wrap_aggregated_output_multindex(self):
-        df = self.mframe.T
-        df['baz', 'two'] = 'peekaboo'
-
-        keys = [np.array([0, 0, 1]), np.array([0, 0, 1])]
-        agged = df.groupby(keys).agg(np.mean)
-        assert isinstance(agged.columns, MultiIndex)
-
-        def aggfun(ser):
-            if ser.name == ('foo', 'one'):
-                raise TypeError
-            else:
-                return ser.sum()
-
-        agged2 = df.groupby(keys).aggregate(aggfun)
-        assert len(agged2.columns) + 1 == len(df.columns)
-
-    def test_groupby_level_apply(self):
-        frame = self.mframe
-
-        result = frame.groupby(level=0).count()
-        assert result.index.name == 'first'
-        result = frame.groupby(level=1).count()
-        assert result.index.name == 'second'
-
-        result = frame['A'].groupby(level=0).count()
-        assert result.index.name == 'first'
-
-    def test_groupby_level_mapper(self):
-        frame = self.mframe
-        deleveled = frame.reset_index()
-
-        mapper0 = {'foo': 0, 'bar': 0, 'baz': 1, 'qux': 1}
-        mapper1 = {'one': 0, 'two': 0, 'three': 1}
-
-        result0 = frame.groupby(mapper0, level=0).sum()
-        result1 = frame.groupby(mapper1, level=1).sum()
-
-        mapped_level0 = np.array([mapper0.get(x) for x in deleveled['first']])
-        mapped_level1 = np.array([mapper1.get(x) for x in deleveled['second']])
-        expected0 = frame.groupby(mapped_level0).sum()
-        expected1 = frame.groupby(mapped_level1).sum()
-        expected0.index.name, expected1.index.name = 'first', 'second'
-
-        assert_frame_equal(result0, expected0)
-        assert_frame_equal(result1, expected1)
-
-    def test_groupby_level_nonmulti(self):
-        # GH 1313, GH 13901
-        s = Series([1, 2, 3, 10, 4, 5, 20, 6],
-                   Index([1, 2, 3, 1, 4, 5, 2, 6], name='foo'))
-        expected = Series([11, 22, 3, 4, 5, 6],
-                          Index(range(1, 7), name='foo'))
-
-        result = s.groupby(level=0).sum()
-        tm.assert_series_equal(result, expected)
-        result = s.groupby(level=[0]).sum()
-        tm.assert_series_equal(result, expected)
-        result = s.groupby(level=-1).sum()
-        tm.assert_series_equal(result, expected)
-        result = s.groupby(level=[-1]).sum()
-        tm.assert_series_equal(result, expected)
-
-        pytest.raises(ValueError, s.groupby, level=1)
-        pytest.raises(ValueError, s.groupby, level=-2)
-        pytest.raises(ValueError, s.groupby, level=[])
-        pytest.raises(ValueError, s.groupby, level=[0, 0])
-        pytest.raises(ValueError, s.groupby, level=[0, 1])
-        pytest.raises(ValueError, s.groupby, level=[1])
-
-    def test_groupby_complex(self):
-        # GH 12902
-        a = Series(data=np.arange(4) * (1 + 2j), index=[0, 0, 1, 1])
-        expected = Series((1 + 2j, 5 + 10j))
-
-        result = a.groupby(level=0).sum()
-        assert_series_equal(result, expected)
-
-        result = a.sum(level=0)
-        assert_series_equal(result, expected)
-
-    def test_apply_series_to_frame(self):
-        def f(piece):
-            with np.errstate(invalid='ignore'):
-                logged = np.log(piece)
-            return DataFrame({'value': piece,
-                              'demeaned': piece - piece.mean(),
-                              'logged': logged})
-
-        dr = bdate_range('1/1/2000', periods=100)
-        ts = Series(np.random.randn(100), index=dr)
-
-        grouped = ts.groupby(lambda x: x.month)
-        result = grouped.apply(f)
-
-        assert isinstance(result, DataFrame)
-        tm.assert_index_equal(result.index, ts.index)
-
-    def test_apply_series_yield_constant(self):
-        result = self.df.groupby(['A', 'B'])['C'].apply(len)
-        assert result.index.names[:2] == ('A', 'B')
-
-    def test_apply_frame_yield_constant(self):
-        # GH13568
-        result = self.df.groupby(['A', 'B']).apply(len)
-        assert isinstance(result, Series)
-        assert result.name is None
-
-        result = self.df.groupby(['A', 'B'])[['C', 'D']].apply(len)
-        assert isinstance(result, Series)
-        assert result.name is None
-
-    def test_apply_frame_to_series(self):
-        grouped = self.df.groupby(['A', 'B'])
-        result = grouped.apply(len)
-        expected = grouped.count()['C']
-        tm.assert_index_equal(result.index, expected.index)
-        tm.assert_numpy_array_equal(result.values, expected.values)
-
-    def test_apply_frame_concat_series(self):
-        def trans(group):
-            return group.groupby('B')['C'].sum().sort_values()[:2]
-
-        def trans2(group):
-            grouped = group.groupby(df.reindex(group.index)['B'])
-            return grouped.sum().sort_values()[:2]
-
-        df = DataFrame({'A': np.random.randint(0, 5, 1000),
-                        'B': np.random.randint(0, 5, 1000),
-                        'C': np.random.randn(1000)})
-
-        result = df.groupby('A').apply(trans)
-        exp = df.groupby('A')['C'].apply(trans2)
-        assert_series_equal(result, exp, check_names=False)
-        assert result.name == 'C'
-
-    def test_apply_transform(self):
-        grouped = self.ts.groupby(lambda x: x.month)
-        result = grouped.apply(lambda x: x * 2)
-        expected = grouped.transform(lambda x: x * 2)
-        assert_series_equal(result, expected)
-
-    def test_apply_multikey_corner(self):
-        grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month])
-
-        def f(group):
-            return group.sort_values('A')[-5:]
-
-        result = grouped.apply(f)
-        for key, group in grouped:
-            assert_frame_equal(result.loc[key], f(group))
-
-    def test_mutate_groups(self):
-
-        # GH3380
-
-        mydf = DataFrame({
-            'cat1': ['a'] * 8 + ['b'] * 6,
-            'cat2': ['c'] * 2 + ['d'] * 2 + ['e'] * 2 + ['f'] * 2 + ['c'] * 2 +
-            ['d'] * 2 + ['e'] * 2,
-            'cat3': lmap(lambda x: 'g%s' % x, lrange(1, 15)),
-            'val': np.random.randint(100, size=14),
-        })
-
-        def f_copy(x):
-            x = x.copy()
-            x['rank'] = x.val.rank(method='min')
-            return x.groupby('cat2')['rank'].min()
-
-        def f_no_copy(x):
-            x['rank'] = x.val.rank(method='min')
-            return x.groupby('cat2')['rank'].min()
-
-        grpby_copy = mydf.groupby('cat1').apply(f_copy)
-        grpby_no_copy = mydf.groupby('cat1').apply(f_no_copy)
-        assert_series_equal(grpby_copy, grpby_no_copy)
-
-    def test_no_mutate_but_looks_like(self):
-
-        # GH 8467
-        # first show's mutation indicator
-        # second does not, but should yield the same results
-        df = DataFrame({'key': [1, 1, 1, 2, 2, 2, 3, 3, 3], 'value': range(9)})
-
-        result1 = df.groupby('key', group_keys=True).apply(lambda x: x[:].key)
-        result2 = df.groupby('key', group_keys=True).apply(lambda x: x.key)
-        assert_series_equal(result1, result2)
-
-    def test_apply_chunk_view(self):
-        # Low level tinkering could be unsafe, make sure not
-        df = DataFrame({'key': [1, 1, 1, 2, 2, 2, 3, 3, 3],
-                        'value': lrange(9)})
-
-        # return view
-        f = lambda x: x[:2]
-
-        result = df.groupby('key', group_keys=False).apply(f)
-        expected = df.take([0, 1, 3, 4, 6, 7])
-        assert_frame_equal(result, expected)
-
-    def test_apply_no_name_column_conflict(self):
-        df = DataFrame({'name': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2],
-                        'name2': [0, 0, 0, 1, 1, 1, 0, 0, 1, 1],
-                        'value': lrange(10)[::-1]})
-
-        # it works! #2605
-        grouped = df.groupby(['name', 'name2'])
-        grouped.apply(lambda x: x.sort_values('value', inplace=True))
-
-    def test_groupby_series_indexed_differently(self):
-        s1 = Series([5.0, -9.0, 4.0, 100., -5., 55., 6.7],
-                    index=Index(['a', 'b', 'c', 'd', 'e', 'f', 'g']))
-        s2 = Series([1.0, 1.0, 4.0, 5.0, 5.0, 7.0],
-                    index=Index(['a', 'b', 'd', 'f', 'g', 'h']))
-
-        grouped = s1.groupby(s2)
-        agged = grouped.mean()
-        exp = s1.groupby(s2.reindex(s1.index).get).mean()
-        assert_series_equal(agged, exp)
-
-    def test_groupby_with_hier_columns(self):
-        tuples = list(zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux',
-                             'qux'], ['one', 'two', 'one', 'two', 'one', 'two',
-                                      'one', 'two']]))
-        index = MultiIndex.from_tuples(tuples)
-        columns = MultiIndex.from_tuples([('A', 'cat'), ('B', 'dog'), (
-            'B', 'cat'), ('A', 'dog')])
-        df = DataFrame(np.random.randn(8, 4), index=index, columns=columns)
-
-        result = df.groupby(level=0).mean()
-        tm.assert_index_equal(result.columns, columns)
-
-        result = df.groupby(level=0, axis=1).mean()
-        tm.assert_index_equal(result.index, df.index)
-
-        result = df.groupby(level=0).agg(np.mean)
-        tm.assert_index_equal(result.columns, columns)
-
-        result = df.groupby(level=0).apply(lambda x: x.mean())
-        tm.assert_index_equal(result.columns, columns)
-
-        result = df.groupby(level=0, axis=1).agg(lambda x: x.mean(1))
-        tm.assert_index_equal(result.columns, Index(['A', 'B']))
-        tm.assert_index_equal(result.index, df.index)
-
-        # add a nuisance column
-        sorted_columns, _ = columns.sortlevel(0)
-        df['A', 'foo'] = 'bar'
-        result = df.groupby(level=0).mean()
-        tm.assert_index_equal(result.columns, df.columns[:-1])
-
-    def test_pass_args_kwargs(self):
-        from numpy import percentile
-
-        def f(x, q=None, axis=0):
-            return percentile(x, q, axis=axis)
-
-        g = lambda x: percentile(x, 80, axis=0)
-
-        # Series
-        ts_grouped = self.ts.groupby(lambda x: x.month)
-        agg_result = ts_grouped.agg(percentile, 80, axis=0)
-        apply_result = ts_grouped.apply(percentile, 80, axis=0)
-        trans_result = ts_grouped.transform(percentile, 80, axis=0)
-
-        agg_expected = ts_grouped.quantile(.8)
-        trans_expected = ts_grouped.transform(g)
-
-        assert_series_equal(apply_result, agg_expected)
-        assert_series_equal(agg_result, agg_expected, check_names=False)
-        assert_series_equal(trans_result, trans_expected)
-
-        agg_result = ts_grouped.agg(f, q=80)
-        apply_result = ts_grouped.apply(f, q=80)
-        trans_result = ts_grouped.transform(f, q=80)
-        assert_series_equal(agg_result, agg_expected)
-        assert_series_equal(apply_result, agg_expected)
-        assert_series_equal(trans_result, trans_expected)
-
-        # DataFrame
-        df_grouped = self.tsframe.groupby(lambda x: x.month)
-        agg_result = df_grouped.agg(percentile, 80, axis=0)
-        apply_result = df_grouped.apply(DataFrame.quantile, .8)
-        expected = df_grouped.quantile(.8)
-        assert_frame_equal(apply_result, expected)
-        assert_frame_equal(agg_result, expected, check_names=False)
-
-        agg_result = df_grouped.agg(f, q=80)
-        apply_result = df_grouped.apply(DataFrame.quantile, q=.8)
-        assert_frame_equal(agg_result, expected, check_names=False)
-        assert_frame_equal(apply_result, expected)
-
-    def test_non_cython_api(self):
-
-        # GH5610
-        # non-cython calls should not include the grouper
-
-        df = DataFrame(
-            [[1, 2, 'foo'],
-             [1, np.nan, 'bar'],
-             [3, np.nan, 'baz']],
-            columns=['A', 'B', 'C'])
-        g = df.groupby('A')
-        gni = df.groupby('A', as_index=False)
-
-        # mad
-        expected = DataFrame([[0], [np.nan]], columns=['B'], index=[1, 3])
-        expected.index.name = 'A'
-        result = g.mad()
-        assert_frame_equal(result, expected)
-
-        expected = DataFrame([[0., 0.], [0, np.nan]], columns=['A', 'B'],
-                             index=[0, 1])
-        result = gni.mad()
-        assert_frame_equal(result, expected)
-
-        # describe
-        expected_index = pd.Index([1, 3], name='A')
-        expected_col = pd.MultiIndex(levels=[['B'],
-                                             ['count', 'mean', 'std', 'min',
-                                              '25%', '50%', '75%', 'max']],
-                                     labels=[[0] * 8, list(range(8))])
-        expected = pd.DataFrame([[1.0, 2.0, np.nan, 2.0, 2.0, 2.0, 2.0, 2.0],
-                                 [0.0, np.nan, np.nan, np.nan, np.nan, np.nan,
-                                  np.nan, np.nan]],
-                                index=expected_index,
-                                columns=expected_col)
-        result = g.describe()
-        assert_frame_equal(result, expected)
-
-        expected = pd.concat([df[df.A == 1].describe().unstack().to_frame().T,
-                              df[df.A == 3].describe().unstack().to_frame().T])
-        expected.index = pd.Index([0, 1])
-        result = gni.describe()
-        assert_frame_equal(result, expected)
-
-        # any
-        expected = DataFrame([[True, True], [False, True]], columns=['B', 'C'],
-                             index=[1, 3])
-        expected.index.name = 'A'
-        result = g.any()
-        assert_frame_equal(result, expected)
-
-        # idxmax
-        expected = DataFrame([[0.0], [np.nan]], columns=['B'], index=[1, 3])
-        expected.index.name = 'A'
-        result = g.idxmax()
-        assert_frame_equal(result, expected)
-
-    def test_cython_api2(self):
-
-        # this takes the fast apply path
-
-        # cumsum (GH5614)
-        df = DataFrame(
-            [[1, 2, np.nan], [1, np.nan, 9], [3, 4, 9]
-             ], columns=['A', 'B', 'C'])
-        expected = DataFrame(
-            [[2, np.nan], [np.nan, 9], [4, 9]], columns=['B', 'C'])
-        result = df.groupby('A').cumsum()
-        assert_frame_equal(result, expected)
-
-        # GH 5755 - cumsum is a transformer and should ignore as_index
-        result = df.groupby('A', as_index=False).cumsum()
-        assert_frame_equal(result, expected)
-
-        # GH 13994
-        result = df.groupby('A').cumsum(axis=1)
-        expected = df.cumsum(axis=1)
-        assert_frame_equal(result, expected)
-        result = df.groupby('A').cumprod(axis=1)
-        expected = df.cumprod(axis=1)
-        assert_frame_equal(result, expected)
-
-    def test_grouping_ndarray(self):
-        grouped = self.df.groupby(self.df['A'].values)
-
-        result = grouped.sum()
-        expected = self.df.groupby('A').sum()
-        assert_frame_equal(result, expected, check_names=False
-                           )  # Note: no names when grouping by value
-
-    def test_apply_typecast_fail(self):
-        df = DataFrame({'d': [1., 1., 1., 2., 2., 2.],
-                        'c': np.tile(
-                            ['a', 'b', 'c'], 2),
-                        'v': np.arange(1., 7.)})
-
-        def f(group):
-            v = group['v']
-            group['v2'] = (v - v.min()) / (v.max() - v.min())
-            return group
-
-        result = df.groupby('d').apply(f)
-
-        expected = df.copy()
-        expected['v2'] = np.tile([0., 0.5, 1], 2)
-
-        assert_frame_equal(result, expected)
-
-    def test_apply_multiindex_fail(self):
-        index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]
-                                        ])
-        df = DataFrame({'d': [1., 1., 1., 2., 2., 2.],
-                        'c': np.tile(['a', 'b', 'c'], 2),
-                        'v': np.arange(1., 7.)}, index=index)
-
-        def f(group):
-            v = group['v']
-            group['v2'] = (v - v.min()) / (v.max() - v.min())
-            return group
-
-        result = df.groupby('d').apply(f)
-
-        expected = df.copy()
-        expected['v2'] = np.tile([0., 0.5, 1], 2)
-
-        assert_frame_equal(result, expected)
-
-    def test_apply_corner(self):
-        result = self.tsframe.groupby(lambda x: x.year).apply(lambda x: x * 2)
-        expected = self.tsframe * 2
-        assert_frame_equal(result, expected)
-
-    def test_apply_without_copy(self):
-        # GH 5545
-        # returning a non-copy in an applied function fails
-
-        data = DataFrame({'id_field': [100, 100, 200, 300],
-                          'category': ['a', 'b', 'c', 'c'],
-                          'value': [1, 2, 3, 4]})
-
-        def filt1(x):
-            if x.shape[0] == 1:
-                return x.copy()
-            else:
-                return x[x.category == 'c']
-
-        def filt2(x):
-            if x.shape[0] == 1:
-                return x
-            else:
-                return x[x.category == 'c']
-
-        expected = data.groupby('id_field').apply(filt1)
-        result = data.groupby('id_field').apply(filt2)
-        assert_frame_equal(result, expected)
-
-    def test_apply_corner_cases(self):
-        # #535, can't use sliding iterator
-
-        N = 1000
-        labels = np.random.randint(0, 100, size=N)
-        df = DataFrame({'key': labels,
-                        'value1': np.random.randn(N),
-                        'value2': ['foo', 'bar', 'baz', 'qux'] * (N // 4)})
-
-        grouped = df.groupby('key')
-
-        def f(g):
-            g['value3'] = g['value1'] * 2
-            return g
-
-        result = grouped.apply(f)
-        assert 'value3' in result
+    assert_frame_equal(result3, expected3)
+
+    # multi-key
+
+    grouped = df.groupby(['A', 'B'], as_index=False)
+
+    result = grouped.agg(np.mean)
+    expected = grouped.mean()
+    assert_frame_equal(result, expected)
+
+    result2 = grouped.agg(OrderedDict([['C', np.mean], ['D', np.sum]]))
+    expected2 = grouped.mean()
+    expected2['D'] = grouped.sum()['D']
+    assert_frame_equal(result2, expected2)
+
+    expected3 = grouped['C'].sum()
+    expected3 = DataFrame(expected3).rename(columns={'C': 'Q'})
+    result3 = grouped['C'].agg({'Q': np.sum})
+    assert_frame_equal(result3, expected3)
+
+    # GH7115 & GH8112 & GH8582
+    df = DataFrame(np.random.randint(0, 100, (50, 3)),
+                   columns=['jim', 'joe', 'jolie'])
+    ts = Series(np.random.randint(5, 10, 50), name='jim')
+
+    gr = df.groupby(ts)
+    gr.nth(0)  # invokes set_selection_from_grouper internally
+    assert_frame_equal(gr.apply(sum), df.groupby(ts).apply(sum))
+
+    for attr in ['mean', 'max', 'count', 'idxmax', 'cumsum', 'all']:
+        gr = df.groupby(ts, as_index=False)
+        left = getattr(gr, attr)()
+
+        gr = df.groupby(ts.values, as_index=True)
+        right = getattr(gr, attr)().reset_index(drop=True)
+
+        assert_frame_equal(left, right)
+
+
+def test_as_index_series_return_frame(df):
+    grouped = df.groupby('A', as_index=False)
+    grouped2 = df.groupby(['A', 'B'], as_index=False)
+
+    result = grouped['C'].agg(np.sum)
+    expected = grouped.agg(np.sum).loc[:, ['A', 'C']]
+    assert isinstance(result, DataFrame)
+    assert_frame_equal(result, expected)
+
+    result2 = grouped2['C'].agg(np.sum)
+    expected2 = grouped2.agg(np.sum).loc[:, ['A', 'B', 'C']]
+    assert isinstance(result2, DataFrame)
+    assert_frame_equal(result2, expected2)
+
+    result = grouped['C'].sum()
+    expected = grouped.sum().loc[:, ['A', 'C']]
+    assert isinstance(result, DataFrame)
+    assert_frame_equal(result, expected)
+
+    result2 = grouped2['C'].sum()
+    expected2 = grouped2.sum().loc[:, ['A', 'B', 'C']]
+    assert isinstance(result2, DataFrame)
+    assert_frame_equal(result2, expected2)
+
+    # corner case
+    pytest.raises(Exception, grouped['C'].__getitem__, 'D')
+
+
+def test_groupby_as_index_cython(df):
+    data = df
+
+    # single-key
+    grouped = data.groupby('A', as_index=False)
+    result = grouped.mean()
+    expected = data.groupby(['A']).mean()
+    expected.insert(0, 'A', expected.index)
+    expected.index = np.arange(len(expected))
+    assert_frame_equal(result, expected)
+
+    # multi-key
+    grouped = data.groupby(['A', 'B'], as_index=False)
+    result = grouped.mean()
+    expected = data.groupby(['A', 'B']).mean()
+
+    arrays = lzip(*expected.index.values)
+    expected.insert(0, 'A', arrays[0])
+    expected.insert(1, 'B', arrays[1])
+    expected.index = np.arange(len(expected))
+    assert_frame_equal(result, expected)
+
+
+def test_groupby_as_index_series_scalar(df):
+    grouped = df.groupby(['A', 'B'], as_index=False)
+
+    # GH #421
+
+    result = grouped['C'].agg(len)
+    expected = grouped.agg(len).loc[:, ['A', 'B', 'C']]
+    assert_frame_equal(result, expected)
+
+
+def test_groupby_as_index_corner(df, ts):
+    pytest.raises(TypeError, ts.groupby, lambda x: x.weekday(),
+                  as_index=False)
+
+    pytest.raises(ValueError, df.groupby, lambda x: x.lower(),
+                  as_index=False, axis=1)
+
+
+def test_groupby_multiple_key(df):
+    df = tm.makeTimeDataFrame()
+    grouped = df.groupby([lambda x: x.year, lambda x: x.month,
+                          lambda x: x.day])
+    agged = grouped.sum()
+    assert_almost_equal(df.values, agged.values)
+
+    grouped = df.T.groupby([lambda x: x.year,
+                            lambda x: x.month,
+                            lambda x: x.day], axis=1)
+
+    agged = grouped.agg(lambda x: x.sum())
+    tm.assert_index_equal(agged.index, df.columns)
+    assert_almost_equal(df.T.values, agged.values)
+
+    agged = grouped.agg(lambda x: x.sum())
+    assert_almost_equal(df.T.values, agged.values)
+
+
+def test_groupby_multi_corner(df):
+    # test that having an all-NA column doesn't mess you up
+    df = df.copy()
+    df['bad'] = np.nan
+    agged = df.groupby(['A', 'B']).mean()
+
+    expected = df.groupby(['A', 'B']).mean()
+    expected['bad'] = np.nan
+
+    assert_frame_equal(agged, expected)
+
+
+def test_omit_nuisance(df):
+    grouped = df.groupby('A')
+
+    result = grouped.mean()
+    expected = df.loc[:, ['A', 'C', 'D']].groupby('A').mean()
+    assert_frame_equal(result, expected)
+
+    agged = grouped.agg(np.mean)
+    exp = grouped.mean()
+    assert_frame_equal(agged, exp)
+
+    df = df.loc[:, ['A', 'C', 'D']]
+    df['E'] = datetime.now()
+    grouped = df.groupby('A')
+    result = grouped.agg(np.sum)
+    expected = grouped.sum()
+    assert_frame_equal(result, expected)
+
+    # won't work with axis = 1
+    grouped = df.groupby({'A': 0, 'C': 0, 'D': 1, 'E': 1}, axis=1)
+    result = pytest.raises(TypeError, grouped.agg,
+                           lambda x: x.sum(0, numeric_only=False))
+
+
+def test_omit_nuisance_python_multiple(three_group):
+    grouped = three_group.groupby(['A', 'B'])
+
+    agged = grouped.agg(np.mean)
+    exp = grouped.mean()
+    assert_frame_equal(agged, exp)
+
+
+def test_empty_groups_corner(mframe):
+    # handle empty groups
+    df = DataFrame({'k1': np.array(['b', 'b', 'b', 'a', 'a', 'a']),
+                    'k2': np.array(['1', '1', '1', '2', '2', '2']),
+                    'k3': ['foo', 'bar'] * 3,
+                    'v1': np.random.randn(6),
+                    'v2': np.random.randn(6)})
+
+    grouped = df.groupby(['k1', 'k2'])
+    result = grouped.agg(np.mean)
+    expected = grouped.mean()
+    assert_frame_equal(result, expected)
+
+    grouped = mframe[3:5].groupby(level=0)
+    agged = grouped.apply(lambda x: x.mean())
+    agged_A = grouped['A'].apply(np.mean)
+    assert_series_equal(agged['A'], agged_A)
+    assert agged.index.name == 'first'
+
+
+def test_nonsense_func():
+    df = DataFrame([0])
+    pytest.raises(Exception, df.groupby, lambda x: x + 'foo')
+
+
+def test_wrap_aggregated_output_multindex(mframe):
+    df = mframe.T
+    df['baz', 'two'] = 'peekaboo'
+
+    keys = [np.array([0, 0, 1]), np.array([0, 0, 1])]
+    agged = df.groupby(keys).agg(np.mean)
+    assert isinstance(agged.columns, MultiIndex)
+
+    def aggfun(ser):
+        if ser.name == ('foo', 'one'):
+            raise TypeError
+        else:
+            return ser.sum()
+
+    agged2 = df.groupby(keys).aggregate(aggfun)
+    assert len(agged2.columns) + 1 == len(df.columns)
+
+
+def test_groupby_level_apply(mframe):
+
+    result = mframe.groupby(level=0).count()
+    assert result.index.name == 'first'
+    result = mframe.groupby(level=1).count()
+    assert result.index.name == 'second'
+
+    result = mframe['A'].groupby(level=0).count()
+    assert result.index.name == 'first'
+
+
+def test_groupby_level_mapper(mframe):
+    deleveled = mframe.reset_index()
+
+    mapper0 = {'foo': 0, 'bar': 0, 'baz': 1, 'qux': 1}
+    mapper1 = {'one': 0, 'two': 0, 'three': 1}
+
+    result0 = mframe.groupby(mapper0, level=0).sum()
+    result1 = mframe.groupby(mapper1, level=1).sum()
+
+    mapped_level0 = np.array([mapper0.get(x) for x in deleveled['first']])
+    mapped_level1 = np.array([mapper1.get(x) for x in deleveled['second']])
+    expected0 = mframe.groupby(mapped_level0).sum()
+    expected1 = mframe.groupby(mapped_level1).sum()
+    expected0.index.name, expected1.index.name = 'first', 'second'
+
+    assert_frame_equal(result0, expected0)
+    assert_frame_equal(result1, expected1)
+
+
+def test_groupby_level_nonmulti():
+    # GH 1313, GH 13901
+    s = Series([1, 2, 3, 10, 4, 5, 20, 6],
+               Index([1, 2, 3, 1, 4, 5, 2, 6], name='foo'))
+    expected = Series([11, 22, 3, 4, 5, 6],
+                      Index(range(1, 7), name='foo'))
+
+    result = s.groupby(level=0).sum()
+    tm.assert_series_equal(result, expected)
+    result = s.groupby(level=[0]).sum()
+    tm.assert_series_equal(result, expected)
+    result = s.groupby(level=-1).sum()
+    tm.assert_series_equal(result, expected)
+    result = s.groupby(level=[-1]).sum()
+    tm.assert_series_equal(result, expected)
+
+    pytest.raises(ValueError, s.groupby, level=1)
+    pytest.raises(ValueError, s.groupby, level=-2)
+    pytest.raises(ValueError, s.groupby, level=[])
+    pytest.raises(ValueError, s.groupby, level=[0, 0])
+    pytest.raises(ValueError, s.groupby, level=[0, 1])
+    pytest.raises(ValueError, s.groupby, level=[1])
+
+
+def test_groupby_complex():
+    # GH 12902
+    a = Series(data=np.arange(4) * (1 + 2j), index=[0, 0, 1, 1])
+    expected = Series((1 + 2j, 5 + 10j))
+
+    result = a.groupby(level=0).sum()
+    assert_series_equal(result, expected)
+
+    result = a.sum(level=0)
+    assert_series_equal(result, expected)
+
+
+def test_mutate_groups():
+
+    # GH3380
+
+    df = DataFrame({
+        'cat1': ['a'] * 8 + ['b'] * 6,
+        'cat2': ['c'] * 2 + ['d'] * 2 + ['e'] * 2 + ['f'] * 2 + ['c'] * 2 +
+        ['d'] * 2 + ['e'] * 2,
+        'cat3': lmap(lambda x: 'g%s' % x, lrange(1, 15)),
+        'val': np.random.randint(100, size=14),
+    })
+
+    def f_copy(x):
+        x = x.copy()
+        x['rank'] = x.val.rank(method='min')
+        return x.groupby('cat2')['rank'].min()
+
+    def f_no_copy(x):
+        x['rank'] = x.val.rank(method='min')
+        return x.groupby('cat2')['rank'].min()
+
+    grpby_copy = df.groupby('cat1').apply(f_copy)
+    grpby_no_copy = df.groupby('cat1').apply(f_no_copy)
+    assert_series_equal(grpby_copy, grpby_no_copy)
+
+
+def test_no_mutate_but_looks_like():
+
+    # GH 8467
+    # first show's mutation indicator
+    # second does not, but should yield the same results
+    df = DataFrame({'key': [1, 1, 1, 2, 2, 2, 3, 3, 3], 'value': range(9)})
+
+    result1 = df.groupby('key', group_keys=True).apply(lambda x: x[:].key)
+    result2 = df.groupby('key', group_keys=True).apply(lambda x: x.key)
+    assert_series_equal(result1, result2)
+
+
+def test_groupby_series_indexed_differently():
+    s1 = Series([5.0, -9.0, 4.0, 100., -5., 55., 6.7],
+                index=Index(['a', 'b', 'c', 'd', 'e', 'f', 'g']))
+    s2 = Series([1.0, 1.0, 4.0, 5.0, 5.0, 7.0],
+                index=Index(['a', 'b', 'd', 'f', 'g', 'h']))
+
+    grouped = s1.groupby(s2)
+    agged = grouped.mean()
+    exp = s1.groupby(s2.reindex(s1.index).get).mean()
+    assert_series_equal(agged, exp)
+
+
+def test_groupby_with_hier_columns():
+    tuples = list(zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux',
+                         'qux'], ['one', 'two', 'one', 'two', 'one', 'two',
+                                  'one', 'two']]))
+    index = MultiIndex.from_tuples(tuples)
+    columns = MultiIndex.from_tuples([('A', 'cat'), ('B', 'dog'), (
+        'B', 'cat'), ('A', 'dog')])
+    df = DataFrame(np.random.randn(8, 4), index=index, columns=columns)
+
+    result = df.groupby(level=0).mean()
+    tm.assert_index_equal(result.columns, columns)
+
+    result = df.groupby(level=0, axis=1).mean()
+    tm.assert_index_equal(result.index, df.index)
+
+    result = df.groupby(level=0).agg(np.mean)
+    tm.assert_index_equal(result.columns, columns)
+
+    result = df.groupby(level=0).apply(lambda x: x.mean())
+    tm.assert_index_equal(result.columns, columns)
+
+    result = df.groupby(level=0, axis=1).agg(lambda x: x.mean(1))
+    tm.assert_index_equal(result.columns, Index(['A', 'B']))
+    tm.assert_index_equal(result.index, df.index)
+
+    # add a nuisance column
+    sorted_columns, _ = columns.sortlevel(0)
+    df['A', 'foo'] = 'bar'
+    result = df.groupby(level=0).mean()
+    tm.assert_index_equal(result.columns, df.columns[:-1])
 
-    def test_groupby_wrong_multi_labels(self):
-        data = """index,foo,bar,baz,spam,data
+
+def test_grouping_ndarray(df):
+    grouped = df.groupby(df['A'].values)
+
+    result = grouped.sum()
+    expected = df.groupby('A').sum()
+    assert_frame_equal(result, expected, check_names=False
+                       )  # Note: no names when grouping by value
+
+
+def test_groupby_wrong_multi_labels():
+    data = """index,foo,bar,baz,spam,data
 0,foo1,bar1,baz1,spam2,20
 1,foo1,bar2,baz1,spam3,30
 2,foo2,bar2,baz1,spam2,40
 3,foo1,bar1,baz2,spam1,50
 4,foo3,bar1,baz2,spam1,60"""
 
-        data = read_csv(StringIO(data), index_col=0)
-
-        grouped = data.groupby(['foo', 'bar', 'baz', 'spam'])
-
-        result = grouped.agg(np.mean)
-        expected = grouped.mean()
-        assert_frame_equal(result, expected)
-
-    def test_groupby_series_with_name(self):
-        result = self.df.groupby(self.df['A']).mean()
-        result2 = self.df.groupby(self.df['A'], as_index=False).mean()
-        assert result.index.name == 'A'
-        assert 'A' in result2
-
-        result = self.df.groupby([self.df['A'], self.df['B']]).mean()
-        result2 = self.df.groupby([self.df['A'], self.df['B']],
-                                  as_index=False).mean()
-        assert result.index.names == ('A', 'B')
-        assert 'A' in result2
-        assert 'B' in result2
-
-    def test_seriesgroupby_name_attr(self):
-        # GH 6265
-        result = self.df.groupby('A')['C']
-        assert result.count().name == 'C'
-        assert result.mean().name == 'C'
-
-        testFunc = lambda x: np.sum(x) * 2
-        assert result.agg(testFunc).name == 'C'
-
-    def test_consistency_name(self):
-        # GH 12363
-
-        df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
-                              'foo', 'bar', 'foo', 'foo'],
-                        'B': ['one', 'one', 'two', 'two',
-                              'two', 'two', 'one', 'two'],
-                        'C': np.random.randn(8) + 1.0,
-                        'D': np.arange(8)})
-
-        expected = df.groupby(['A']).B.count()
-        result = df.B.groupby(df.A).count()
-        assert_series_equal(result, expected)
-
-    def test_groupby_name_propagation(self):
-        # GH 6124
-        def summarize(df, name=None):
-            return Series({'count': 1, 'mean': 2, 'omissions': 3, }, name=name)
-
-        def summarize_random_name(df):
-            # Provide a different name for each Series.  In this case, groupby
-            # should not attempt to propagate the Series name since they are
-            # inconsistent.
-            return Series({
-                'count': 1,
-                'mean': 2,
-                'omissions': 3,
-            }, name=df.iloc[0]['A'])
-
-        metrics = self.df.groupby('A').apply(summarize)
-        assert metrics.columns.name is None
-        metrics = self.df.groupby('A').apply(summarize, 'metrics')
-        assert metrics.columns.name == 'metrics'
-        metrics = self.df.groupby('A').apply(summarize_random_name)
-        assert metrics.columns.name is None
-
-    def test_groupby_nonstring_columns(self):
-        df = DataFrame([np.arange(10) for x in range(10)])
-        grouped = df.groupby(0)
-        result = grouped.mean()
-        expected = df.groupby(df[0]).mean()
-        assert_frame_equal(result, expected)
-
-    def test_groupby_mixed_type_columns(self):
-        # GH 13432, unorderable types in py3
-        df = DataFrame([[0, 1, 2]], columns=['A', 'B', 0])
-        expected = DataFrame([[1, 2]], columns=['B', 0],
-                             index=Index([0], name='A'))
-
-        result = df.groupby('A').first()
-        tm.assert_frame_equal(result, expected)
-
-        result = df.groupby('A').sum()
-        tm.assert_frame_equal(result, expected)
-
-    def test_cython_grouper_series_bug_noncontig(self):
-        arr = np.empty((100, 100))
-        arr.fill(np.nan)
-        obj = Series(arr[:, 0], index=lrange(100))
-        inds = np.tile(lrange(10), 10)
-
-        result = obj.groupby(inds).agg(Series.median)
-        assert result.isna().all()
-
-    def test_series_grouper_noncontig_index(self):
-        index = Index(tm.rands_array(10, 100))
-
-        values = Series(np.random.randn(50), index=index[::2])
-        labels = np.random.randint(0, 5, 50)
-
-        # it works!
-        grouped = values.groupby(labels)
-
-        # accessing the index elements causes segfault
-        f = lambda x: len(set(map(id, x.index)))
-        grouped.agg(f)
-
-    def test_convert_objects_leave_decimal_alone(self):
-
-        from decimal import Decimal
-
-        s = Series(lrange(5))
-        labels = np.array(['a', 'b', 'c', 'd', 'e'], dtype='O')
-
-        def convert_fast(x):
-            return Decimal(str(x.mean()))
-
-        def convert_force_pure(x):
-            # base will be length 0
-            assert (len(x.base) > 0)
-            return Decimal(str(x.mean()))
-
-        grouped = s.groupby(labels)
-
-        result = grouped.agg(convert_fast)
-        assert result.dtype == np.object_
-        assert isinstance(result[0], Decimal)
-
-        result = grouped.agg(convert_force_pure)
-        assert result.dtype == np.object_
-        assert isinstance(result[0], Decimal)
-
-    def test_fast_apply(self):
-        # make sure that fast apply is correctly called
-        # rather than raising any kind of error
-        # otherwise the python path will be callsed
-        # which slows things down
-        N = 1000
-        labels = np.random.randint(0, 2000, size=N)
-        labels2 = np.random.randint(0, 3, size=N)
-        df = DataFrame({'key': labels,
-                        'key2': labels2,
-                        'value1': np.random.randn(N),
-                        'value2': ['foo', 'bar', 'baz', 'qux'] * (N // 4)})
-
-        def f(g):
-            return 1
-
-        g = df.groupby(['key', 'key2'])
-
-        grouper = g.grouper
-
-        splitter = grouper._get_splitter(g._selected_obj, axis=g.axis)
-        group_keys = grouper._get_group_keys()
-
-        values, mutated = splitter.fast_apply(f, group_keys)
-        assert not mutated
-
-    def test_apply_with_mixed_dtype(self):
-        # GH3480, apply with mixed dtype on axis=1 breaks in 0.11
-        df = DataFrame({'foo1': np.random.randn(6),
-                        'foo2': ['one', 'two', 'two', 'three', 'one', 'two']})
-        result = df.apply(lambda x: x, axis=1)
-        assert_series_equal(df.get_dtype_counts(), result.get_dtype_counts())
-
-        # GH 3610 incorrect dtype conversion with as_index=False
-        df = DataFrame({"c1": [1, 2, 6, 6, 8]})
-        df["c2"] = df.c1 / 2.0
-        result1 = df.groupby("c2").mean().reset_index().c2
-        result2 = df.groupby("c2", as_index=False).mean().c2
-        assert_series_equal(result1, result2)
-
-    def test_groupby_aggregation_mixed_dtype(self):
-
-        # GH 6212
-        expected = DataFrame({
-            'v1': [5, 5, 7, np.nan, 3, 3, 4, 1],
-            'v2': [55, 55, 77, np.nan, 33, 33, 44, 11]},
-            index=MultiIndex.from_tuples([(1, 95), (1, 99), (2, 95), (2, 99),
-                                          ('big', 'damp'),
-                                          ('blue', 'dry'),
-                                          ('red', 'red'), ('red', 'wet')],
-                                         names=['by1', 'by2']))
-
-        df = DataFrame({
-            'v1': [1, 3, 5, 7, 8, 3, 5, np.nan, 4, 5, 7, 9],
-            'v2': [11, 33, 55, 77, 88, 33, 55, np.nan, 44, 55, 77, 99],
-            'by1': ["red", "blue", 1, 2, np.nan, "big", 1, 2, "red", 1, np.nan,
-                    12],
-            'by2': ["wet", "dry", 99, 95, np.nan, "damp", 95, 99, "red", 99,
-                    np.nan, np.nan]
-        })
-
-        g = df.groupby(['by1', 'by2'])
-        result = g[['v1', 'v2']].mean()
-        assert_frame_equal(result, expected)
-
-    def test_groupby_dtype_inference_empty(self):
-        # GH 6733
-        df = DataFrame({'x': [], 'range': np.arange(0, dtype='int64')})
-        assert df['x'].dtype == np.float64
-
-        result = df.groupby('x').first()
-        exp_index = Index([], name='x', dtype=np.float64)
-        expected = DataFrame({'range': Series(
-            [], index=exp_index, dtype='int64')})
-        assert_frame_equal(result, expected, by_blocks=True)
-
-    def test_groupby_list_infer_array_like(self):
-        result = self.df.groupby(list(self.df['A'])).mean()
-        expected = self.df.groupby(self.df['A']).mean()
-        assert_frame_equal(result, expected, check_names=False)
-
-        pytest.raises(Exception, self.df.groupby, list(self.df['A'][:-1]))
-
-        # pathological case of ambiguity
-        df = DataFrame({'foo': [0, 1],
-                        'bar': [3, 4],
-                        'val': np.random.randn(2)})
-
-        result = df.groupby(['foo', 'bar']).mean()
-        expected = df.groupby([df['foo'], df['bar']]).mean()[['val']]
-
-    def test_groupby_keys_same_size_as_index(self):
-        # GH 11185
-        freq = 's'
-        index = pd.date_range(start=pd.Timestamp('2015-09-29T11:34:44-0700'),
-                              periods=2, freq=freq)
-        df = pd.DataFrame([['A', 10], ['B', 15]], columns=[
-            'metric', 'values'
-        ], index=index)
-        result = df.groupby([pd.Grouper(level=0, freq=freq), 'metric']).mean()
-        expected = df.set_index([df.index, 'metric'])
-
-        assert_frame_equal(result, expected)
-
-    def test_groupby_one_row(self):
-        # GH 11741
-        df1 = pd.DataFrame(np.random.randn(1, 4), columns=list('ABCD'))
-        pytest.raises(KeyError, df1.groupby, 'Z')
-        df2 = pd.DataFrame(np.random.randn(2, 4), columns=list('ABCD'))
-        pytest.raises(KeyError, df2.groupby, 'Z')
-
-    def test_groupby_nat_exclude(self):
-        # GH 6992
-        df = pd.DataFrame(
-            {'values': np.random.randn(8),
-             'dt': [np.nan, pd.Timestamp('2013-01-01'), np.nan, pd.Timestamp(
-                 '2013-02-01'), np.nan, pd.Timestamp('2013-02-01'), np.nan,
-                pd.Timestamp('2013-01-01')],
-             'str': [np.nan, 'a', np.nan, 'a', np.nan, 'a', np.nan, 'b']})
-        grouped = df.groupby('dt')
-
-        expected = [pd.Index([1, 7]), pd.Index([3, 5])]
-        keys = sorted(grouped.groups.keys())
-        assert len(keys) == 2
-        for k, e in zip(keys, expected):
-            # grouped.groups keys are np.datetime64 with system tz
-            # not to be affected by tz, only compare values
-            tm.assert_index_equal(grouped.groups[k], e)
-
-        # confirm obj is not filtered
-        tm.assert_frame_equal(grouped.grouper.groupings[0].obj, df)
-        assert grouped.ngroups == 2
-
-        expected = {
-            Timestamp('2013-01-01 00:00:00'): np.array([1, 7], dtype=np.int64),
-            Timestamp('2013-02-01 00:00:00'): np.array([3, 5], dtype=np.int64)
-        }
-
-        for k in grouped.indices:
-            tm.assert_numpy_array_equal(grouped.indices[k], expected[k])
-
-        tm.assert_frame_equal(
-            grouped.get_group(Timestamp('2013-01-01')), df.iloc[[1, 7]])
-        tm.assert_frame_equal(
-            grouped.get_group(Timestamp('2013-02-01')), df.iloc[[3, 5]])
+    data = read_csv(StringIO(data), index_col=0)
+
+    grouped = data.groupby(['foo', 'bar', 'baz', 'spam'])
+
+    result = grouped.agg(np.mean)
+    expected = grouped.mean()
+    assert_frame_equal(result, expected)
+
+
+def test_groupby_series_with_name(df):
+    result = df.groupby(df['A']).mean()
+    result2 = df.groupby(df['A'], as_index=False).mean()
+    assert result.index.name == 'A'
+    assert 'A' in result2
+
+    result = df.groupby([df['A'], df['B']]).mean()
+    result2 = df.groupby([df['A'], df['B']],
+                         as_index=False).mean()
+    assert result.index.names == ('A', 'B')
+    assert 'A' in result2
+    assert 'B' in result2
+
+
+def test_seriesgroupby_name_attr(df):
+    # GH 6265
+    result = df.groupby('A')['C']
+    assert result.count().name == 'C'
+    assert result.mean().name == 'C'
+
+    testFunc = lambda x: np.sum(x) * 2
+    assert result.agg(testFunc).name == 'C'
+
+
+def test_consistency_name():
+    # GH 12363
+
+    df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
+                          'foo', 'bar', 'foo', 'foo'],
+                    'B': ['one', 'one', 'two', 'two',
+                          'two', 'two', 'one', 'two'],
+                    'C': np.random.randn(8) + 1.0,
+                    'D': np.arange(8)})
+
+    expected = df.groupby(['A']).B.count()
+    result = df.B.groupby(df.A).count()
+    assert_series_equal(result, expected)
+
+
+def test_groupby_name_propagation(df):
+    # GH 6124
+    def summarize(df, name=None):
+        return Series({'count': 1, 'mean': 2, 'omissions': 3, }, name=name)
+
+    def summarize_random_name(df):
+        # Provide a different name for each Series.  In this case, groupby
+        # should not attempt to propagate the Series name since they are
+        # inconsistent.
+        return Series({
+            'count': 1,
+            'mean': 2,
+            'omissions': 3,
+        }, name=df.iloc[0]['A'])
+
+    metrics = df.groupby('A').apply(summarize)
+    assert metrics.columns.name is None
+    metrics = df.groupby('A').apply(summarize, 'metrics')
+    assert metrics.columns.name == 'metrics'
+    metrics = df.groupby('A').apply(summarize_random_name)
+    assert metrics.columns.name is None
+
+
+def test_groupby_nonstring_columns():
+    df = DataFrame([np.arange(10) for x in range(10)])
+    grouped = df.groupby(0)
+    result = grouped.mean()
+    expected = df.groupby(df[0]).mean()
+    assert_frame_equal(result, expected)
+
+
+def test_groupby_mixed_type_columns():
+    # GH 13432, unorderable types in py3
+    df = DataFrame([[0, 1, 2]], columns=['A', 'B', 0])
+    expected = DataFrame([[1, 2]], columns=['B', 0],
+                         index=Index([0], name='A'))
+
+    result = df.groupby('A').first()
+    tm.assert_frame_equal(result, expected)
+
+    result = df.groupby('A').sum()
+    tm.assert_frame_equal(result, expected)
+
+
+def test_cython_grouper_series_bug_noncontig():
+    arr = np.empty((100, 100))
+    arr.fill(np.nan)
+    obj = Series(arr[:, 0], index=lrange(100))
+    inds = np.tile(lrange(10), 10)
+
+    result = obj.groupby(inds).agg(Series.median)
+    assert result.isna().all()
+
+
+def test_series_grouper_noncontig_index():
+    index = Index(tm.rands_array(10, 100))
+
+    values = Series(np.random.randn(50), index=index[::2])
+    labels = np.random.randint(0, 5, 50)
+
+    # it works!
+    grouped = values.groupby(labels)
+
+    # accessing the index elements causes segfault
+    f = lambda x: len(set(map(id, x.index)))
+    grouped.agg(f)
+
 
+def test_convert_objects_leave_decimal_alone():
+
+    s = Series(lrange(5))
+    labels = np.array(['a', 'b', 'c', 'd', 'e'], dtype='O')
+
+    def convert_fast(x):
+        return Decimal(str(x.mean()))
+
+    def convert_force_pure(x):
+        # base will be length 0
+        assert (len(x.base) > 0)
+        return Decimal(str(x.mean()))
+
+    grouped = s.groupby(labels)
+
+    result = grouped.agg(convert_fast)
+    assert result.dtype == np.object_
+    assert isinstance(result[0], Decimal)
+
+    result = grouped.agg(convert_force_pure)
+    assert result.dtype == np.object_
+    assert isinstance(result[0], Decimal)
+
+
+def test_groupby_dtype_inference_empty():
+    # GH 6733
+    df = DataFrame({'x': [], 'range': np.arange(0, dtype='int64')})
+    assert df['x'].dtype == np.float64
+
+    result = df.groupby('x').first()
+    exp_index = Index([], name='x', dtype=np.float64)
+    expected = DataFrame({'range': Series(
+        [], index=exp_index, dtype='int64')})
+    assert_frame_equal(result, expected, by_blocks=True)
+
+
+def test_groupby_list_infer_array_like(df):
+    result = df.groupby(list(df['A'])).mean()
+    expected = df.groupby(df['A']).mean()
+    assert_frame_equal(result, expected, check_names=False)
+
+    pytest.raises(Exception, df.groupby, list(df['A'][:-1]))
+
+    # pathological case of ambiguity
+    df = DataFrame({'foo': [0, 1],
+                    'bar': [3, 4],
+                    'val': np.random.randn(2)})
+
+    result = df.groupby(['foo', 'bar']).mean()
+    expected = df.groupby([df['foo'], df['bar']]).mean()[['val']]
+
+
+def test_groupby_keys_same_size_as_index():
+    # GH 11185
+    freq = 's'
+    index = pd.date_range(start=pd.Timestamp('2015-09-29T11:34:44-0700'),
+                          periods=2, freq=freq)
+    df = pd.DataFrame([['A', 10], ['B', 15]], columns=[
+        'metric', 'values'
+    ], index=index)
+    result = df.groupby([pd.Grouper(level=0, freq=freq), 'metric']).mean()
+    expected = df.set_index([df.index, 'metric'])
+
+    assert_frame_equal(result, expected)
+
+
+def test_groupby_one_row():
+    # GH 11741
+    df1 = pd.DataFrame(np.random.randn(1, 4), columns=list('ABCD'))
+    pytest.raises(KeyError, df1.groupby, 'Z')
+    df2 = pd.DataFrame(np.random.randn(2, 4), columns=list('ABCD'))
+    pytest.raises(KeyError, df2.groupby, 'Z')
+
+
+def test_groupby_nat_exclude():
+    # GH 6992
+    df = pd.DataFrame(
+        {'values': np.random.randn(8),
+         'dt': [np.nan, pd.Timestamp('2013-01-01'), np.nan, pd.Timestamp(
+             '2013-02-01'), np.nan, pd.Timestamp('2013-02-01'), np.nan,
+            pd.Timestamp('2013-01-01')],
+         'str': [np.nan, 'a', np.nan, 'a', np.nan, 'a', np.nan, 'b']})
+    grouped = df.groupby('dt')
+
+    expected = [pd.Index([1, 7]), pd.Index([3, 5])]
+    keys = sorted(grouped.groups.keys())
+    assert len(keys) == 2
+    for k, e in zip(keys, expected):
+        # grouped.groups keys are np.datetime64 with system tz
+        # not to be affected by tz, only compare values
+        tm.assert_index_equal(grouped.groups[k], e)
+
+    # confirm obj is not filtered
+    tm.assert_frame_equal(grouped.grouper.groupings[0].obj, df)
+    assert grouped.ngroups == 2
+
+    expected = {
+        Timestamp('2013-01-01 00:00:00'): np.array([1, 7], dtype=np.int64),
+        Timestamp('2013-02-01 00:00:00'): np.array([3, 5], dtype=np.int64)
+    }
+
+    for k in grouped.indices:
+        tm.assert_numpy_array_equal(grouped.indices[k], expected[k])
+
+    tm.assert_frame_equal(
+        grouped.get_group(Timestamp('2013-01-01')), df.iloc[[1, 7]])
+    tm.assert_frame_equal(
+        grouped.get_group(Timestamp('2013-02-01')), df.iloc[[3, 5]])
+
+    pytest.raises(KeyError, grouped.get_group, pd.NaT)
+
+    nan_df = DataFrame({'nan': [np.nan, np.nan, np.nan],
+                        'nat': [pd.NaT, pd.NaT, pd.NaT]})
+    assert nan_df['nan'].dtype == 'float64'
+    assert nan_df['nat'].dtype == 'datetime64[ns]'
+
+    for key in ['nan', 'nat']:
+        grouped = nan_df.groupby(key)
+        assert grouped.groups == {}
+        assert grouped.ngroups == 0
+        assert grouped.indices == {}
+        pytest.raises(KeyError, grouped.get_group, np.nan)
         pytest.raises(KeyError, grouped.get_group, pd.NaT)
 
-        nan_df = DataFrame({'nan': [np.nan, np.nan, np.nan],
-                            'nat': [pd.NaT, pd.NaT, pd.NaT]})
-        assert nan_df['nan'].dtype == 'float64'
-        assert nan_df['nat'].dtype == 'datetime64[ns]'
-
-        for key in ['nan', 'nat']:
-            grouped = nan_df.groupby(key)
-            assert grouped.groups == {}
-            assert grouped.ngroups == 0
-            assert grouped.indices == {}
-            pytest.raises(KeyError, grouped.get_group, np.nan)
-            pytest.raises(KeyError, grouped.get_group, pd.NaT)
-
-    def test_sparse_friendly(self):
-        sdf = self.df[['C', 'D']].to_sparse()
-        with catch_warnings(record=True):
-            panel = tm.makePanel()
-            tm.add_nans(panel)
-
-        def _check_work(gp):
-            gp.mean()
-            gp.agg(np.mean)
-            dict(iter(gp))
-
-        # it works!
-        _check_work(sdf.groupby(lambda x: x // 2))
-        _check_work(sdf['C'].groupby(lambda x: x // 2))
-        _check_work(sdf.groupby(self.df['A']))
-
-        # do this someday
-        # _check_work(panel.groupby(lambda x: x.month, axis=1))
-
-    def test_panel_groupby(self):
-        with catch_warnings(record=True):
-            self.panel = tm.makePanel()
-            tm.add_nans(self.panel)
-            grouped = self.panel.groupby({'ItemA': 0, 'ItemB': 0, 'ItemC': 1},
-                                         axis='items')
-            agged = grouped.mean()
-            agged2 = grouped.agg(lambda x: x.mean('items'))
-
-            tm.assert_panel_equal(agged, agged2)
-
-            tm.assert_index_equal(agged.items, Index([0, 1]))
-
-            grouped = self.panel.groupby(lambda x: x.month, axis='major')
-            agged = grouped.mean()
-
-            exp = Index(sorted(list(set(self.panel.major_axis.month))))
-            tm.assert_index_equal(agged.major_axis, exp)
-
-            grouped = self.panel.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1},
-                                         axis='minor')
-            agged = grouped.mean()
-            tm.assert_index_equal(agged.minor_axis, Index([0, 1]))
-
-    def test_groupby_2d_malformed(self):
-        d = DataFrame(index=lrange(2))
-        d['group'] = ['g1', 'g2']
-        d['zeros'] = [0, 0]
-        d['ones'] = [1, 1]
-        d['label'] = ['l1', 'l2']
-        tmp = d.groupby(['group']).mean()
-        res_values = np.array([[0, 1], [0, 1]], dtype=np.int64)
-        tm.assert_index_equal(tmp.columns, Index(['zeros', 'ones']))
-        tm.assert_numpy_array_equal(tmp.values, res_values)
-
-    def test_int32_overflow(self):
-        B = np.concatenate((np.arange(10000), np.arange(10000), np.arange(5000)
-                            ))
-        A = np.arange(25000)
-        df = DataFrame({'A': A,
-                        'B': B,
-                        'C': A,
-                        'D': B,
-                        'E': np.random.randn(25000)})
-
-        left = df.groupby(['A', 'B', 'C', 'D']).sum()
-        right = df.groupby(['D', 'C', 'B', 'A']).sum()
-        assert len(left) == len(right)
-
-    def test_groupby_sort_multi(self):
-        df = DataFrame({'a': ['foo', 'bar', 'baz'],
-                        'b': [3, 2, 1],
-                        'c': [0, 1, 2],
-                        'd': np.random.randn(3)})
-
-        tups = lmap(tuple, df[['a', 'b', 'c']].values)
-        tups = com._asarray_tuplesafe(tups)
-        result = df.groupby(['a', 'b', 'c'], sort=True).sum()
-        tm.assert_numpy_array_equal(result.index.values, tups[[1, 2, 0]])
 
-        tups = lmap(tuple, df[['c', 'a', 'b']].values)
-        tups = com._asarray_tuplesafe(tups)
-        result = df.groupby(['c', 'a', 'b'], sort=True).sum()
-        tm.assert_numpy_array_equal(result.index.values, tups)
+def test_sparse_friendly(df):
+    sdf = df[['C', 'D']].to_sparse()
+    with catch_warnings(record=True):
+        panel = tm.makePanel()
+        tm.add_nans(panel)
+
+    def _check_work(gp):
+        gp.mean()
+        gp.agg(np.mean)
+        dict(iter(gp))
+
+    # it works!
+    _check_work(sdf.groupby(lambda x: x // 2))
+    _check_work(sdf['C'].groupby(lambda x: x // 2))
+    _check_work(sdf.groupby(df['A']))
+
+    # do this someday
+    # _check_work(panel.groupby(lambda x: x.month, axis=1))
+
+
+def test_panel_groupby():
+    with catch_warnings(record=True):
+        panel = tm.makePanel()
+        tm.add_nans(panel)
+        grouped = panel.groupby({'ItemA': 0, 'ItemB': 0, 'ItemC': 1},
+                                axis='items')
+        agged = grouped.mean()
+        agged2 = grouped.agg(lambda x: x.mean('items'))
+
+        tm.assert_panel_equal(agged, agged2)
+
+        tm.assert_index_equal(agged.items, Index([0, 1]))
 
-        tups = lmap(tuple, df[['b', 'c', 'a']].values)
+        grouped = panel.groupby(lambda x: x.month, axis='major')
+        agged = grouped.mean()
+
+        exp = Index(sorted(list(set(panel.major_axis.month))))
+        tm.assert_index_equal(agged.major_axis, exp)
+
+        grouped = panel.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1},
+                                axis='minor')
+        agged = grouped.mean()
+        tm.assert_index_equal(agged.minor_axis, Index([0, 1]))
+
+
+def test_groupby_2d_malformed():
+    d = DataFrame(index=lrange(2))
+    d['group'] = ['g1', 'g2']
+    d['zeros'] = [0, 0]
+    d['ones'] = [1, 1]
+    d['label'] = ['l1', 'l2']
+    tmp = d.groupby(['group']).mean()
+    res_values = np.array([[0, 1], [0, 1]], dtype=np.int64)
+    tm.assert_index_equal(tmp.columns, Index(['zeros', 'ones']))
+    tm.assert_numpy_array_equal(tmp.values, res_values)
+
+
+def test_int32_overflow():
+    B = np.concatenate((np.arange(10000), np.arange(10000), np.arange(5000)
+                        ))
+    A = np.arange(25000)
+    df = DataFrame({'A': A,
+                    'B': B,
+                    'C': A,
+                    'D': B,
+                    'E': np.random.randn(25000)})
+
+    left = df.groupby(['A', 'B', 'C', 'D']).sum()
+    right = df.groupby(['D', 'C', 'B', 'A']).sum()
+    assert len(left) == len(right)
+
+
+def test_groupby_sort_multi():
+    df = DataFrame({'a': ['foo', 'bar', 'baz'],
+                    'b': [3, 2, 1],
+                    'c': [0, 1, 2],
+                    'd': np.random.randn(3)})
+
+    tups = lmap(tuple, df[['a', 'b', 'c']].values)
+    tups = com._asarray_tuplesafe(tups)
+    result = df.groupby(['a', 'b', 'c'], sort=True).sum()
+    tm.assert_numpy_array_equal(result.index.values, tups[[1, 2, 0]])
+
+    tups = lmap(tuple, df[['c', 'a', 'b']].values)
+    tups = com._asarray_tuplesafe(tups)
+    result = df.groupby(['c', 'a', 'b'], sort=True).sum()
+    tm.assert_numpy_array_equal(result.index.values, tups)
+
+    tups = lmap(tuple, df[['b', 'c', 'a']].values)
+    tups = com._asarray_tuplesafe(tups)
+    result = df.groupby(['b', 'c', 'a'], sort=True).sum()
+    tm.assert_numpy_array_equal(result.index.values, tups[[2, 1, 0]])
+
+    df = DataFrame({'a': [0, 1, 2, 0, 1, 2],
+                    'b': [0, 0, 0, 1, 1, 1],
+                    'd': np.random.randn(6)})
+    grouped = df.groupby(['a', 'b'])['d']
+    result = grouped.sum()
+
+    def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
+        tups = lmap(tuple, df[keys].values)
         tups = com._asarray_tuplesafe(tups)
-        result = df.groupby(['b', 'c', 'a'], sort=True).sum()
-        tm.assert_numpy_array_equal(result.index.values, tups[[2, 1, 0]])
-
-        df = DataFrame({'a': [0, 1, 2, 0, 1, 2],
-                        'b': [0, 0, 0, 1, 1, 1],
-                        'd': np.random.randn(6)})
-        grouped = df.groupby(['a', 'b'])['d']
-        result = grouped.sum()
-        _check_groupby(df, result, ['a', 'b'], 'd')
-
-    def test_intercept_builtin_sum(self):
-        s = Series([1., 2., np.nan, 3.])
-        grouped = s.groupby([0, 1, 2, 2])
-
-        result = grouped.agg(builtins.sum)
-        result2 = grouped.apply(builtins.sum)
-        expected = grouped.sum()
-        assert_series_equal(result, expected)
-        assert_series_equal(result2, expected)
-
-    def test_rank_apply(self):
-        lev1 = tm.rands_array(10, 100)
-        lev2 = tm.rands_array(10, 130)
-        lab1 = np.random.randint(0, 100, size=500)
-        lab2 = np.random.randint(0, 130, size=500)
-
-        df = DataFrame({'value': np.random.randn(500),
-                        'key1': lev1.take(lab1),
-                        'key2': lev2.take(lab2)})
-
-        result = df.groupby(['key1', 'key2']).value.rank()
-
-        expected = []
-        for key, piece in df.groupby(['key1', 'key2']):
-            expected.append(piece.value.rank())
-        expected = concat(expected, axis=0)
-        expected = expected.reindex(result.index)
-        assert_series_equal(result, expected)
-
-        result = df.groupby(['key1', 'key2']).value.rank(pct=True)
-
-        expected = []
-        for key, piece in df.groupby(['key1', 'key2']):
-            expected.append(piece.value.rank(pct=True))
-        expected = concat(expected, axis=0)
-        expected = expected.reindex(result.index)
-        assert_series_equal(result, expected)
-
-    @pytest.mark.parametrize("grps", [
-        ['qux'], ['qux', 'quux']])
-    @pytest.mark.parametrize("vals", [
-        [2, 2, 8, 2, 6],
-        [pd.Timestamp('2018-01-02'), pd.Timestamp('2018-01-02'),
-         pd.Timestamp('2018-01-08'), pd.Timestamp('2018-01-02'),
-         pd.Timestamp('2018-01-06')]])
-    @pytest.mark.parametrize("ties_method,ascending,pct,exp", [
-        ('average', True, False, [2., 2., 5., 2., 4.]),
-        ('average', True, True, [0.4, 0.4, 1.0, 0.4, 0.8]),
-        ('average', False, False, [4., 4., 1., 4., 2.]),
-        ('average', False, True, [.8, .8, .2, .8, .4]),
-        ('min', True, False, [1., 1., 5., 1., 4.]),
-        ('min', True, True, [0.2, 0.2, 1.0, 0.2, 0.8]),
-        ('min', False, False, [3., 3., 1., 3., 2.]),
-        ('min', False, True, [.6, .6, .2, .6, .4]),
-        ('max', True, False, [3., 3., 5., 3., 4.]),
-        ('max', True, True, [0.6, 0.6, 1.0, 0.6, 0.8]),
-        ('max', False, False, [5., 5., 1., 5., 2.]),
-        ('max', False, True, [1., 1., .2, 1., .4]),
-        ('first', True, False, [1., 2., 5., 3., 4.]),
-        ('first', True, True, [0.2, 0.4, 1.0, 0.6, 0.8]),
-        ('first', False, False, [3., 4., 1., 5., 2.]),
-        ('first', False, True, [.6, .8, .2, 1., .4]),
-        ('dense', True, False, [1., 1., 3., 1., 2.]),
-        ('dense', True, True, [0.2, 0.2, 0.6, 0.2, 0.4]),
-        ('dense', False, False, [3., 3., 1., 3., 2.]),
-        ('dense', False, True, [.6, .6, .2, .6, .4]),
-    ])
-    def test_rank_args(self, grps, vals, ties_method, ascending, pct, exp):
-        key = np.repeat(grps, len(vals))
-        vals = vals * len(grps)
-        df = DataFrame({'key': key, 'val': vals})
-        result = df.groupby('key').rank(method=ties_method,
-                                        ascending=ascending, pct=pct)
-
-        exp_df = DataFrame(exp * len(grps), columns=['val'])
-        assert_frame_equal(result, exp_df)
-
-    @pytest.mark.parametrize("grps", [
-        ['qux'], ['qux', 'quux']])
-    @pytest.mark.parametrize("vals", [
-        [2, 2, np.nan, 8, 2, 6, np.nan, np.nan],  # floats
-        [pd.Timestamp('2018-01-02'), pd.Timestamp('2018-01-02'), np.nan,
-         pd.Timestamp('2018-01-08'), pd.Timestamp('2018-01-02'),
-         pd.Timestamp('2018-01-06'), np.nan, np.nan]
-    ])
-    @pytest.mark.parametrize("ties_method,ascending,na_option,pct,exp", [
-        ('average', True, 'keep', False,
-            [2., 2., np.nan, 5., 2., 4., np.nan, np.nan]),
-        ('average', True, 'keep', True,
-            [0.4, 0.4, np.nan, 1.0, 0.4, 0.8, np.nan, np.nan]),
-        ('average', False, 'keep', False,
-            [4., 4., np.nan, 1., 4., 2., np.nan, np.nan]),
-        ('average', False, 'keep', True,
-            [.8, 0.8, np.nan, 0.2, 0.8, 0.4, np.nan, np.nan]),
-        ('min', True, 'keep', False,
-            [1., 1., np.nan, 5., 1., 4., np.nan, np.nan]),
-        ('min', True, 'keep', True,
-            [0.2, 0.2, np.nan, 1.0, 0.2, 0.8, np.nan, np.nan]),
-        ('min', False, 'keep', False,
-            [3., 3., np.nan, 1., 3., 2., np.nan, np.nan]),
-        ('min', False, 'keep', True,
-            [.6, 0.6, np.nan, 0.2, 0.6, 0.4, np.nan, np.nan]),
-        ('max', True, 'keep', False,
-            [3., 3., np.nan, 5., 3., 4., np.nan, np.nan]),
-        ('max', True, 'keep', True,
-            [0.6, 0.6, np.nan, 1.0, 0.6, 0.8, np.nan, np.nan]),
-        ('max', False, 'keep', False,
-            [5., 5., np.nan, 1., 5., 2., np.nan, np.nan]),
-        ('max', False, 'keep', True,
-            [1., 1., np.nan, 0.2, 1., 0.4, np.nan, np.nan]),
-        ('first', True, 'keep', False,
-            [1., 2., np.nan, 5., 3., 4., np.nan, np.nan]),
-        ('first', True, 'keep', True,
-            [0.2, 0.4, np.nan, 1.0, 0.6, 0.8, np.nan, np.nan]),
-        ('first', False, 'keep', False,
-            [3., 4., np.nan, 1., 5., 2., np.nan, np.nan]),
-        ('first', False, 'keep', True,
-            [.6, 0.8, np.nan, 0.2, 1., 0.4, np.nan, np.nan]),
-        ('dense', True, 'keep', False,
-            [1., 1., np.nan, 3., 1., 2., np.nan, np.nan]),
-        ('dense', True, 'keep', True,
-            [0.2, 0.2, np.nan, 0.6, 0.2, 0.4, np.nan, np.nan]),
-        ('dense', False, 'keep', False,
-            [3., 3., np.nan, 1., 3., 2., np.nan, np.nan]),
-        ('dense', False, 'keep', True,
-            [.6, 0.6, np.nan, 0.2, 0.6, 0.4, np.nan, np.nan]),
-        ('average', True, 'no_na', False, [2., 2., 7., 5., 2., 4., 7., 7.]),
-        ('average', True, 'no_na', True,
-            [0.25, 0.25, 0.875, 0.625, 0.25, 0.5, 0.875, 0.875]),
-        ('average', False, 'no_na', False, [4., 4., 7., 1., 4., 2., 7., 7.]),
-        ('average', False, 'no_na', True,
-            [0.5, 0.5, 0.875, 0.125, 0.5, 0.25, 0.875, 0.875]),
-        ('min', True, 'no_na', False, [1., 1., 6., 5., 1., 4., 6., 6.]),
-        ('min', True, 'no_na', True,
-            [0.125, 0.125, 0.75, 0.625, 0.125, 0.5, 0.75, 0.75]),
-        ('min', False, 'no_na', False, [3., 3., 6., 1., 3., 2., 6., 6.]),
-        ('min', False, 'no_na', True,
-            [0.375, 0.375, 0.75, 0.125, 0.375, 0.25, 0.75, 0.75]),
-        ('max', True, 'no_na', False, [3., 3., 8., 5., 3., 4., 8., 8.]),
-        ('max', True, 'no_na', True,
-            [0.375, 0.375, 1., 0.625, 0.375, 0.5, 1., 1.]),
-        ('max', False, 'no_na', False, [5., 5., 8., 1., 5., 2., 8., 8.]),
-        ('max', False, 'no_na', True,
-            [0.625, 0.625, 1., 0.125, 0.625, 0.25, 1., 1.]),
-        ('first', True, 'no_na', False, [1., 2., 6., 5., 3., 4., 7., 8.]),
-        ('first', True, 'no_na', True,
-            [0.125, 0.25, 0.75, 0.625, 0.375, 0.5, 0.875, 1.]),
-        ('first', False, 'no_na', False, [3., 4., 6., 1., 5., 2., 7., 8.]),
-        ('first', False, 'no_na', True,
-            [0.375, 0.5, 0.75, 0.125, 0.625, 0.25, 0.875, 1.]),
-        ('dense', True, 'no_na', False, [1., 1., 4., 3., 1., 2., 4., 4.]),
-        ('dense', True, 'no_na', True,
-            [0.125, 0.125, 0.5, 0.375, 0.125, 0.25, 0.5, 0.5]),
-        ('dense', False, 'no_na', False, [3., 3., 4., 1., 3., 2., 4., 4.]),
-        ('dense', False, 'no_na', True,
-            [0.375, 0.375, 0.5, 0.125, 0.375, 0.25, 0.5, 0.5])
-    ])
-    def test_rank_args_missing(self, grps, vals, ties_method, ascending,
-                               na_option, pct, exp):
-        key = np.repeat(grps, len(vals))
-        vals = vals * len(grps)
-        df = DataFrame({'key': key, 'val': vals})
-        result = df.groupby('key').rank(method=ties_method,
-                                        ascending=ascending,
-                                        na_option=na_option, pct=pct)
-
-        exp_df = DataFrame(exp * len(grps), columns=['val'])
-        assert_frame_equal(result, exp_df)
-
-    @pytest.mark.parametrize("pct,exp", [
-        (False, [3., 3., 3., 3., 3.]),
-        (True, [.6, .6, .6, .6, .6])])
-    def test_rank_resets_each_group(self, pct, exp):
-        df = DataFrame(
-            {'key': ['a', 'a', 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'b'],
-             'val': [1] * 10}
-        )
-        result = df.groupby('key').rank(pct=pct)
-        exp_df = DataFrame(exp * 2, columns=['val'])
-        assert_frame_equal(result, exp_df)
-
-    def test_rank_avg_even_vals(self):
-        df = DataFrame({'key': ['a'] * 4, 'val': [1] * 4})
-        result = df.groupby('key').rank()
-        exp_df = DataFrame([2.5, 2.5, 2.5, 2.5], columns=['val'])
-        assert_frame_equal(result, exp_df)
-
-    @pytest.mark.parametrize("ties_method", [
-        'average', 'min', 'max', 'first', 'dense'])
-    @pytest.mark.parametrize("ascending", [True, False])
-    @pytest.mark.parametrize("na_option", ["keep", "top", "bottom"])
-    @pytest.mark.parametrize("pct", [True, False])
-    @pytest.mark.parametrize("vals", [
-        ['bar', 'bar', 'foo', 'bar', 'baz'],
-        ['bar', np.nan, 'foo', np.nan, 'baz']
-    ])
-    def test_rank_object_raises(self, ties_method, ascending, na_option,
-                                pct, vals):
-        df = DataFrame({'key': ['foo'] * 5, 'val': vals})
-        with tm.assert_raises_regex(TypeError, "not callable"):
-            df.groupby('key').rank(method=ties_method,
-                                   ascending=ascending,
-                                   na_option=na_option, pct=pct)
-
-    @pytest.mark.parametrize("agg_func", ['any', 'all'])
-    @pytest.mark.parametrize("skipna", [True, False])
-    @pytest.mark.parametrize("vals", [
-        ['foo', 'bar', 'baz'], ['foo', '', ''], ['', '', ''],
-        [1, 2, 3], [1, 0, 0], [0, 0, 0],
-        [1., 2., 3.], [1., 0., 0.], [0., 0., 0.],
-        [True, True, True], [True, False, False], [False, False, False],
-        [np.nan, np.nan, np.nan]
-    ])
-    def test_groupby_bool_aggs(self, agg_func, skipna, vals):
-        df = DataFrame({'key': ['a'] * 3 + ['b'] * 3, 'val': vals * 2})
-
-        # Figure out expectation using Python builtin
-        exp = getattr(compat.builtins, agg_func)(vals)
-
-        # edge case for missing data with skipna and 'any'
-        if skipna and all(isna(vals)) and agg_func == 'any':
-            exp = False
-
-        exp_df = DataFrame([exp] * 2, columns=['val'], index=pd.Index(
-            ['a', 'b'], name='key'))
-        result = getattr(df.groupby('key'), agg_func)(skipna=skipna)
-        assert_frame_equal(result, exp_df)
-
-    def test_dont_clobber_name_column(self):
-        df = DataFrame({'key': ['a', 'a', 'a', 'b', 'b', 'b'],
-                        'name': ['foo', 'bar', 'baz'] * 2})
-
-        result = df.groupby('key').apply(lambda x: x)
-        assert_frame_equal(result, df)
-
-    def test_skip_group_keys(self):
-        from pandas import concat
-
-        tsf = tm.makeTimeDataFrame()
-
-        grouped = tsf.groupby(lambda x: x.month, group_keys=False)
-        result = grouped.apply(lambda x: x.sort_values(by='A')[:3])
-
-        pieces = []
-        for key, group in grouped:
-            pieces.append(group.sort_values(by='A')[:3])
-
-        expected = concat(pieces)
-        assert_frame_equal(result, expected)
-
-        grouped = tsf['A'].groupby(lambda x: x.month, group_keys=False)
-        result = grouped.apply(lambda x: x.sort_values()[:3])
-
-        pieces = []
-        for key, group in grouped:
-            pieces.append(group.sort_values()[:3])
-
-        expected = concat(pieces)
-        assert_series_equal(result, expected)
-
-    def test_no_nonsense_name(self):
-        # GH #995
-        s = self.frame['C'].copy()
-        s.name = None
-
-        result = s.groupby(self.frame['A']).agg(np.sum)
-        assert result.name is None
-
-    def test_multifunc_sum_bug(self):
-        # GH #1065
-        x = DataFrame(np.arange(9).reshape(3, 3))
-        x['test'] = 0
-        x['fl'] = [1.3, 1.5, 1.6]
-
-        grouped = x.groupby('test')
-        result = grouped.agg({'fl': 'sum', 2: 'size'})
-        assert result['fl'].dtype == np.float64
-
-    def test_handle_dict_return_value(self):
-        def f(group):
-            return {'max': group.max(), 'min': group.min()}
-
-        def g(group):
-            return Series({'max': group.max(), 'min': group.min()})
-
-        result = self.df.groupby('A')['C'].apply(f)
-        expected = self.df.groupby('A')['C'].apply(g)
-
-        assert isinstance(result, Series)
-        assert_series_equal(result, expected)
-
-    def test_set_group_name(self):
-        def f(group):
-            assert group.name is not None
-            return group
-
-        def freduce(group):
-            assert group.name is not None
-            return group.sum()
-
-        def foo(x):
-            return freduce(x)
-
-        def _check_all(grouped):
-            # make sure all these work
-            grouped.apply(f)
-            grouped.aggregate(freduce)
-            grouped.aggregate({'C': freduce, 'D': freduce})
-            grouped.transform(f)
-
-            grouped['C'].apply(f)
-            grouped['C'].aggregate(freduce)
-            grouped['C'].aggregate([freduce, foo])
-            grouped['C'].transform(f)
+        expected = f(df.groupby(tups)[field])
+        for k, v in compat.iteritems(expected):
+            assert (result[k] == v)
 
-        _check_all(self.df.groupby('A'))
-        _check_all(self.df.groupby(['A', 'B']))
-
-    def test_group_name_available_in_inference_pass(self):
-        # gh-15062
-        df = pd.DataFrame({'a': [0, 0, 1, 1, 2, 2], 'b': np.arange(6)})
-
-        names = []
-
-        def f(group):
-            names.append(group.name)
-            return group.copy()
-
-        df.groupby('a', sort=False, group_keys=False).apply(f)
-        # we expect 2 zeros because we call ``f`` once to see if a faster route
-        # can be used.
-        expected_names = [0, 0, 1, 2]
-        assert names == expected_names
-
-    def test_no_dummy_key_names(self):
-        # see gh-1291
-        result = self.df.groupby(self.df['A'].values).sum()
-        assert result.index.name is None
-
-        result = self.df.groupby([self.df['A'].values, self.df['B'].values
-                                  ]).sum()
-        assert result.index.names == (None, None)
-
-    def test_groupby_sort_multiindex_series(self):
-        # series multiindex groupby sort argument was not being passed through
-        # _compress_group_index
-        # GH 9444
-        index = MultiIndex(levels=[[1, 2], [1, 2]],
-                           labels=[[0, 0, 0, 0, 1, 1], [1, 1, 0, 0, 0, 0]],
-                           names=['a', 'b'])
-        mseries = Series([0, 1, 2, 3, 4, 5], index=index)
-        index = MultiIndex(levels=[[1, 2], [1, 2]],
-                           labels=[[0, 0, 1], [1, 0, 0]], names=['a', 'b'])
-        mseries_result = Series([0, 2, 4], index=index)
-
-        result = mseries.groupby(level=['a', 'b'], sort=False).first()
-        assert_series_equal(result, mseries_result)
-        result = mseries.groupby(level=['a', 'b'], sort=True).first()
-        assert_series_equal(result, mseries_result.sort_index())
-
-    def test_groupby_reindex_inside_function(self):
-
-        periods = 1000
-        ind = DatetimeIndex(start='2012/1/1', freq='5min', periods=periods)
-        df = DataFrame({'high': np.arange(
-            periods), 'low': np.arange(periods)}, index=ind)
-
-        def agg_before(hour, func, fix=False):
-            """
-                Run an aggregate func on the subset of data.
-            """
-
-            def _func(data):
-                d = data.loc[data.index.map(
-                    lambda x: x.hour < 11)].dropna()
-                if fix:
-                    data[data.index[0]]
-                if len(d) == 0:
-                    return None
-                return func(d)
-
-            return _func
-
-        def afunc(data):
-            d = data.select(lambda x: x.hour < 11).dropna()
-            return np.max(d)
-
-        grouped = df.groupby(lambda x: datetime(x.year, x.month, x.day))
-        closure_bad = grouped.agg({'high': agg_before(11, np.max)})
-        closure_good = grouped.agg({'high': agg_before(11, np.max, True)})
-
-        assert_frame_equal(closure_bad, closure_good)
-
-    def test_cython_median(self):
-        df = DataFrame(np.random.randn(1000))
-        df.values[::2] = np.nan
-
-        labels = np.random.randint(0, 50, size=1000).astype(float)
-        labels[::17] = np.nan
-
-        result = df.groupby(labels).median()
-        exp = df.groupby(labels).agg(nanops.nanmedian)
-        assert_frame_equal(result, exp)
-
-        df = DataFrame(np.random.randn(1000, 5))
-        rs = df.groupby(labels).agg(np.median)
-        xp = df.groupby(labels).median()
-        assert_frame_equal(rs, xp)
-
-    def test_median_empty_bins(self):
-        df = pd.DataFrame(np.random.randint(0, 44, 500))
-
-        grps = range(0, 55, 5)
-        bins = pd.cut(df[0], grps)
-
-        result = df.groupby(bins).median()
-        expected = df.groupby(bins).agg(lambda x: x.median())
-        assert_frame_equal(result, expected)
-
-    @pytest.mark.parametrize("dtype", [
-        'int8', 'int16', 'int32', 'int64', 'float32', 'float64'])
-    @pytest.mark.parametrize("method,data", [
-        ('first', {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}),
-        ('last', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}),
-        ('min', {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}),
-        ('max', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}),
-        ('nth', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}],
-                 'args': [1]}),
-        ('count', {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 2}],
-                   'out_type': 'int64'})
-    ])
-    def test_groupby_non_arithmetic_agg_types(self, dtype, method, data):
-        # GH9311, GH6620
-        df = pd.DataFrame(
-            [{'a': 1, 'b': 1},
-             {'a': 1, 'b': 2},
-             {'a': 2, 'b': 3},
-             {'a': 2, 'b': 4}])
-
-        df['b'] = df.b.astype(dtype)
-
-        if 'args' not in data:
-            data['args'] = []
-
-        if 'out_type' in data:
-            out_type = data['out_type']
-        else:
-            out_type = dtype
-
-        exp = data['df']
-        df_out = pd.DataFrame(exp)
-
-        df_out['b'] = df_out.b.astype(out_type)
-        df_out.set_index('a', inplace=True)
-
-        grpd = df.groupby('a')
-        t = getattr(grpd, method)(*data['args'])
-        assert_frame_equal(t, df_out)
-
-    def test_groupby_non_arithmetic_agg_intlike_precision(self):
-        # GH9311, GH6620
-        c = 24650000000000000
-
-        inputs = ((Timestamp('2011-01-15 12:50:28.502376'),
-                   Timestamp('2011-01-20 12:50:28.593448')), (1 + c, 2 + c))
-
-        for i in inputs:
-            df = pd.DataFrame([{'a': 1, 'b': i[0]}, {'a': 1, 'b': i[1]}])
-
-            grp_exp = {'first': {'expected': i[0]},
-                       'last': {'expected': i[1]},
-                       'min': {'expected': i[0]},
-                       'max': {'expected': i[1]},
-                       'nth': {'expected': i[1],
-                               'args': [1]},
-                       'count': {'expected': 2}}
-
-            for method, data in compat.iteritems(grp_exp):
-                if 'args' not in data:
-                    data['args'] = []
-
-                grpd = df.groupby('a')
-                res = getattr(grpd, method)(*data['args'])
-                assert res.iloc[0].b == data['expected']
-
-    def test_groupby_multiindex_missing_pair(self):
-        # GH9049
-        df = DataFrame({'group1': ['a', 'a', 'a', 'b'],
-                        'group2': ['c', 'c', 'd', 'c'],
-                        'value': [1, 1, 1, 5]})
-        df = df.set_index(['group1', 'group2'])
-        df_grouped = df.groupby(level=['group1', 'group2'], sort=True)
-
-        res = df_grouped.agg('sum')
-        idx = MultiIndex.from_tuples(
-            [('a', 'c'), ('a', 'd'), ('b', 'c')], names=['group1', 'group2'])
-        exp = DataFrame([[2], [1], [5]], index=idx, columns=['value'])
-
-        tm.assert_frame_equal(res, exp)
-
-    def test_groupby_multiindex_not_lexsorted(self):
-        # GH 11640
-
-        # define the lexsorted version
-        lexsorted_mi = MultiIndex.from_tuples(
-            [('a', ''), ('b1', 'c1'), ('b2', 'c2')], names=['b', 'c'])
-        lexsorted_df = DataFrame([[1, 3, 4]], columns=lexsorted_mi)
-        assert lexsorted_df.columns.is_lexsorted()
-
-        # define the non-lexsorted version
-        not_lexsorted_df = DataFrame(columns=['a', 'b', 'c', 'd'],
-                                     data=[[1, 'b1', 'c1', 3],
-                                           [1, 'b2', 'c2', 4]])
-        not_lexsorted_df = not_lexsorted_df.pivot_table(
-            index='a', columns=['b', 'c'], values='d')
-        not_lexsorted_df = not_lexsorted_df.reset_index()
-        assert not not_lexsorted_df.columns.is_lexsorted()
-
-        # compare the results
-        tm.assert_frame_equal(lexsorted_df, not_lexsorted_df)
-
-        expected = lexsorted_df.groupby('a').mean()
-        with tm.assert_produces_warning(PerformanceWarning):
-            result = not_lexsorted_df.groupby('a').mean()
-        tm.assert_frame_equal(expected, result)
-
-        # a transforming function should work regardless of sort
-        # GH 14776
-        df = DataFrame({'x': ['a', 'a', 'b', 'a'],
-                        'y': [1, 1, 2, 2],
-                        'z': [1, 2, 3, 4]}).set_index(['x', 'y'])
-        assert not df.index.is_lexsorted()
-
-        for level in [0, 1, [0, 1]]:
-            for sort in [False, True]:
-                result = df.groupby(level=level, sort=sort).apply(
-                    DataFrame.drop_duplicates)
-                expected = df
-                tm.assert_frame_equal(expected, result)
-
-                result = df.sort_index().groupby(level=level, sort=sort).apply(
-                    DataFrame.drop_duplicates)
-                expected = df.sort_index()
-                tm.assert_frame_equal(expected, result)
-
-    def test_gb_apply_list_of_unequal_len_arrays(self):
-
-        # GH1738
-        df = DataFrame({'group1': ['a', 'a', 'a', 'b', 'b', 'b', 'a', 'a', 'a',
-                                   'b', 'b', 'b'],
-                        'group2': ['c', 'c', 'd', 'd', 'd', 'e', 'c', 'c', 'd',
-                                   'd', 'd', 'e'],
-                        'weight': [1.1, 2, 3, 4, 5, 6, 2, 4, 6, 8, 1, 2],
-                        'value': [7.1, 8, 9, 10, 11, 12, 8, 7, 6, 5, 4, 3]})
-        df = df.set_index(['group1', 'group2'])
-        df_grouped = df.groupby(level=['group1', 'group2'], sort=True)
-
-        def noddy(value, weight):
-            out = np.array(value * weight).repeat(3)
-            return out
-
-        # the kernel function returns arrays of unequal length
-        # pandas sniffs the first one, sees it's an array and not
-        # a list, and assumed the rest are of equal length
-        # and so tries a vstack
-
-        # don't die
-        df_grouped.apply(lambda x: noddy(x.value, x.weight))
-
-    def test_fill_constistency(self):
-
-        # GH9221
-        # pass thru keyword arguments to the generated wrapper
-        # are set if the passed kw is None (only)
-        df = DataFrame(index=pd.MultiIndex.from_product(
-            [['value1', 'value2'], date_range('2014-01-01', '2014-01-06')]),
-            columns=Index(
-            ['1', '2'], name='id'))
-        df['1'] = [np.nan, 1, np.nan, np.nan, 11, np.nan, np.nan, 2, np.nan,
-                   np.nan, 22, np.nan]
-        df['2'] = [np.nan, 3, np.nan, np.nan, 33, np.nan, np.nan, 4, np.nan,
-                   np.nan, 44, np.nan]
-
-        expected = df.groupby(level=0, axis=0).fillna(method='ffill')
-        result = df.T.groupby(level=0, axis=1).fillna(method='ffill').T
-        assert_frame_equal(result, expected)
-
-    def test_index_label_overlaps_location(self):
-        # checking we don't have any label/location confusion in the
-        # the wake of GH5375
-        df = DataFrame(list('ABCDE'), index=[2, 0, 2, 1, 1])
-        g = df.groupby(list('ababb'))
-        actual = g.filter(lambda x: len(x) > 2)
-        expected = df.iloc[[1, 3, 4]]
-        assert_frame_equal(actual, expected)
-
-        ser = df[0]
-        g = ser.groupby(list('ababb'))
-        actual = g.filter(lambda x: len(x) > 2)
-        expected = ser.take([1, 3, 4])
-        assert_series_equal(actual, expected)
-
-        # ... and again, with a generic Index of floats
-        df.index = df.index.astype(float)
-        g = df.groupby(list('ababb'))
-        actual = g.filter(lambda x: len(x) > 2)
-        expected = df.iloc[[1, 3, 4]]
-        assert_frame_equal(actual, expected)
-
-        ser = df[0]
-        g = ser.groupby(list('ababb'))
-        actual = g.filter(lambda x: len(x) > 2)
-        expected = ser.take([1, 3, 4])
-        assert_series_equal(actual, expected)
-
-    def test_groupby_cumprod(self):
-        # GH 4095
-        df = pd.DataFrame({'key': ['b'] * 10, 'value': 2})
-
-        actual = df.groupby('key')['value'].cumprod()
-        expected = df.groupby('key')['value'].apply(lambda x: x.cumprod())
-        expected.name = 'value'
-        tm.assert_series_equal(actual, expected)
-
-        df = pd.DataFrame({'key': ['b'] * 100, 'value': 2})
-        actual = df.groupby('key')['value'].cumprod()
-        # if overflows, groupby product casts to float
-        # while numpy passes back invalid values
-        df['value'] = df['value'].astype(float)
-        expected = df.groupby('key')['value'].apply(lambda x: x.cumprod())
-        expected.name = 'value'
-        tm.assert_series_equal(actual, expected)
-
-    def test_ops_general(self):
-        ops = [('mean', np.mean),
-               ('median', np.median),
-               ('std', np.std),
-               ('var', np.var),
-               ('sum', np.sum),
-               ('prod', np.prod),
-               ('min', np.min),
-               ('max', np.max),
-               ('first', lambda x: x.iloc[0]),
-               ('last', lambda x: x.iloc[-1]),
-               ('count', np.size), ]
-        try:
-            from scipy.stats import sem
-        except ImportError:
-            pass
-        else:
-            ops.append(('sem', sem))
-        df = DataFrame(np.random.randn(1000))
-        labels = np.random.randint(0, 50, size=1000).astype(float)
-
-        for op, targop in ops:
-            result = getattr(df.groupby(labels), op)().astype(float)
-            expected = df.groupby(labels).agg(targop)
-            try:
-                tm.assert_frame_equal(result, expected)
-            except BaseException as exc:
-                exc.args += ('operation: %s' % op, )
-                raise
-
-    def test_max_nan_bug(self):
-        raw = """,Date,app,File
-2013-04-23,2013-04-23 00:00:00,,log080001.log
-2013-05-06,2013-05-06 00:00:00,,log.log
-2013-05-07,2013-05-07 00:00:00,OE,xlsx"""
-
-        df = pd.read_csv(StringIO(raw), parse_dates=[0])
-        gb = df.groupby('Date')
-        r = gb[['File']].max()
-        e = gb['File'].max().to_frame()
-        tm.assert_frame_equal(r, e)
-        assert not r['File'].isna().any()
-
-    def test_nlargest(self):
-        a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10])
-        b = Series(list('a' * 5 + 'b' * 5))
-        gb = a.groupby(b)
-        r = gb.nlargest(3)
-        e = Series([
-            7, 5, 3, 10, 9, 6
-        ], index=MultiIndex.from_arrays([list('aaabbb'), [3, 2, 1, 9, 5, 8]]))
-        tm.assert_series_equal(r, e)
-
-        a = Series([1, 1, 3, 2, 0, 3, 3, 2, 1, 0])
-        gb = a.groupby(b)
-        e = Series([
-            3, 2, 1, 3, 3, 2
-        ], index=MultiIndex.from_arrays([list('aaabbb'), [2, 3, 1, 6, 5, 7]]))
-        assert_series_equal(gb.nlargest(3, keep='last'), e)
-
-    def test_nsmallest(self):
-        a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10])
-        b = Series(list('a' * 5 + 'b' * 5))
-        gb = a.groupby(b)
-        r = gb.nsmallest(3)
-        e = Series([
-            1, 2, 3, 0, 4, 6
-        ], index=MultiIndex.from_arrays([list('aaabbb'), [0, 4, 1, 6, 7, 8]]))
-        tm.assert_series_equal(r, e)
-
-        a = Series([1, 1, 3, 2, 0, 3, 3, 2, 1, 0])
-        gb = a.groupby(b)
-        e = Series([
-            0, 1, 1, 0, 1, 2
-        ], index=MultiIndex.from_arrays([list('aaabbb'), [4, 1, 0, 9, 8, 7]]))
-        assert_series_equal(gb.nsmallest(3, keep='last'), e)
-
-    def test_transform_doesnt_clobber_ints(self):
-        # GH 7972
-        n = 6
-        x = np.arange(n)
-        df = DataFrame({'a': x // 2, 'b': 2.0 * x, 'c': 3.0 * x})
-        df2 = DataFrame({'a': x // 2 * 1.0, 'b': 2.0 * x, 'c': 3.0 * x})
-
-        gb = df.groupby('a')
-        result = gb.transform('mean')
-
-        gb2 = df2.groupby('a')
-        expected = gb2.transform('mean')
-        tm.assert_frame_equal(result, expected)
-
-    def test_groupby_apply_all_none(self):
-        # Tests to make sure no errors if apply function returns all None
-        # values. Issue 9684.
-        test_df = DataFrame({'groups': [0, 0, 1, 1],
-                             'random_vars': [8, 7, 4, 5]})
-
-        def test_func(x):
-            pass
-
-        result = test_df.groupby('groups').apply(test_func)
-        expected = DataFrame()
-        tm.assert_frame_equal(result, expected)
-
-    def test_groupby_apply_none_first(self):
-        # GH 12824. Tests if apply returns None first.
-        test_df1 = DataFrame({'groups': [1, 1, 1, 2], 'vars': [0, 1, 2, 3]})
-        test_df2 = DataFrame({'groups': [1, 2, 2, 2], 'vars': [0, 1, 2, 3]})
-
-        def test_func(x):
-            if x.shape[0] < 2:
+    _check_groupby(df, result, ['a', 'b'], 'd')
+
+
+def test_dont_clobber_name_column():
+    df = DataFrame({'key': ['a', 'a', 'a', 'b', 'b', 'b'],
+                    'name': ['foo', 'bar', 'baz'] * 2})
+
+    result = df.groupby('key').apply(lambda x: x)
+    assert_frame_equal(result, df)
+
+
+def test_skip_group_keys():
+
+    tsf = tm.makeTimeDataFrame()
+
+    grouped = tsf.groupby(lambda x: x.month, group_keys=False)
+    result = grouped.apply(lambda x: x.sort_values(by='A')[:3])
+
+    pieces = []
+    for key, group in grouped:
+        pieces.append(group.sort_values(by='A')[:3])
+
+    expected = pd.concat(pieces)
+    assert_frame_equal(result, expected)
+
+    grouped = tsf['A'].groupby(lambda x: x.month, group_keys=False)
+    result = grouped.apply(lambda x: x.sort_values()[:3])
+
+    pieces = []
+    for key, group in grouped:
+        pieces.append(group.sort_values()[:3])
+
+    expected = pd.concat(pieces)
+    assert_series_equal(result, expected)
+
+
+def test_no_nonsense_name(frame):
+    # GH #995
+    s = frame['C'].copy()
+    s.name = None
+
+    result = s.groupby(frame['A']).agg(np.sum)
+    assert result.name is None
+
+
+def test_multifunc_sum_bug():
+    # GH #1065
+    x = DataFrame(np.arange(9).reshape(3, 3))
+    x['test'] = 0
+    x['fl'] = [1.3, 1.5, 1.6]
+
+    grouped = x.groupby('test')
+    result = grouped.agg({'fl': 'sum', 2: 'size'})
+    assert result['fl'].dtype == np.float64
+
+
+def test_handle_dict_return_value(df):
+    def f(group):
+        return {'max': group.max(), 'min': group.min()}
+
+    def g(group):
+        return Series({'max': group.max(), 'min': group.min()})
+
+    result = df.groupby('A')['C'].apply(f)
+    expected = df.groupby('A')['C'].apply(g)
+
+    assert isinstance(result, Series)
+    assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize('grouper', ['A', ['A', 'B']])
+def test_set_group_name(df, grouper):
+    def f(group):
+        assert group.name is not None
+        return group
+
+    def freduce(group):
+        assert group.name is not None
+        return group.sum()
+
+    def foo(x):
+        return freduce(x)
+
+    grouped = df.groupby(grouper)
+
+    # make sure all these work
+    grouped.apply(f)
+    grouped.aggregate(freduce)
+    grouped.aggregate({'C': freduce, 'D': freduce})
+    grouped.transform(f)
+
+    grouped['C'].apply(f)
+    grouped['C'].aggregate(freduce)
+    grouped['C'].aggregate([freduce, foo])
+    grouped['C'].transform(f)
+
+
+def test_group_name_available_in_inference_pass():
+    # gh-15062
+    df = pd.DataFrame({'a': [0, 0, 1, 1, 2, 2], 'b': np.arange(6)})
+
+    names = []
+
+    def f(group):
+        names.append(group.name)
+        return group.copy()
+
+    df.groupby('a', sort=False, group_keys=False).apply(f)
+    # we expect 2 zeros because we call ``f`` once to see if a faster route
+    # can be used.
+    expected_names = [0, 0, 1, 2]
+    assert names == expected_names
+
+
+def test_no_dummy_key_names(df):
+    # see gh-1291
+    result = df.groupby(df['A'].values).sum()
+    assert result.index.name is None
+
+    result = df.groupby([df['A'].values, df['B'].values]).sum()
+    assert result.index.names == (None, None)
+
+
+def test_groupby_sort_multiindex_series():
+    # series multiindex groupby sort argument was not being passed through
+    # _compress_group_index
+    # GH 9444
+    index = MultiIndex(levels=[[1, 2], [1, 2]],
+                       labels=[[0, 0, 0, 0, 1, 1], [1, 1, 0, 0, 0, 0]],
+                       names=['a', 'b'])
+    mseries = Series([0, 1, 2, 3, 4, 5], index=index)
+    index = MultiIndex(levels=[[1, 2], [1, 2]],
+                       labels=[[0, 0, 1], [1, 0, 0]], names=['a', 'b'])
+    mseries_result = Series([0, 2, 4], index=index)
+
+    result = mseries.groupby(level=['a', 'b'], sort=False).first()
+    assert_series_equal(result, mseries_result)
+    result = mseries.groupby(level=['a', 'b'], sort=True).first()
+    assert_series_equal(result, mseries_result.sort_index())
+
+
+def test_groupby_reindex_inside_function():
+
+    periods = 1000
+    ind = DatetimeIndex(start='2012/1/1', freq='5min', periods=periods)
+    df = DataFrame({'high': np.arange(
+        periods), 'low': np.arange(periods)}, index=ind)
+
+    def agg_before(hour, func, fix=False):
+        """
+            Run an aggregate func on the subset of data.
+        """
+
+        def _func(data):
+            d = data.loc[data.index.map(
+                lambda x: x.hour < 11)].dropna()
+            if fix:
+                data[data.index[0]]
+            if len(d) == 0:
                 return None
-            return x.iloc[[0, -1]]
-
-        result1 = test_df1.groupby('groups').apply(test_func)
-        result2 = test_df2.groupby('groups').apply(test_func)
-        index1 = MultiIndex.from_arrays([[1, 1], [0, 2]],
-                                        names=['groups', None])
-        index2 = MultiIndex.from_arrays([[2, 2], [1, 3]],
-                                        names=['groups', None])
-        expected1 = DataFrame({'groups': [1, 1], 'vars': [0, 2]},
-                              index=index1)
-        expected2 = DataFrame({'groups': [2, 2], 'vars': [1, 3]},
-                              index=index2)
-        tm.assert_frame_equal(result1, expected1)
-        tm.assert_frame_equal(result2, expected2)
-
-    def test_groupby_preserves_sort(self):
-        # Test to ensure that groupby always preserves sort order of original
-        # object. Issue #8588 and #9651
-
-        df = DataFrame(
-            {'int_groups': [3, 1, 0, 1, 0, 3, 3, 3],
-             'string_groups': ['z', 'a', 'z', 'a', 'a', 'g', 'g', 'g'],
-             'ints': [8, 7, 4, 5, 2, 9, 1, 1],
-             'floats': [2.3, 5.3, 6.2, -2.4, 2.2, 1.1, 1.1, 5],
-             'strings': ['z', 'd', 'a', 'e', 'word', 'word2', '42', '47']})
-
-        # Try sorting on different types and with different group types
-        for sort_column in ['ints', 'floats', 'strings', ['ints', 'floats'],
-                            ['ints', 'strings']]:
-            for group_column in ['int_groups', 'string_groups',
-                                 ['int_groups', 'string_groups']]:
-
-                df = df.sort_values(by=sort_column)
-
-                g = df.groupby(group_column)
-
-                def test_sort(x):
-                    assert_frame_equal(x, x.sort_values(by=sort_column))
-
-                g.apply(test_sort)
-
-    def test_numpy_compat(self):
-        # see gh-12811
-        df = pd.DataFrame({'A': [1, 2, 1], 'B': [1, 2, 3]})
-        g = df.groupby('A')
-
-        msg = "numpy operations are not valid with groupby"
-
-        for func in ('mean', 'var', 'std', 'cumprod', 'cumsum'):
-            tm.assert_raises_regex(UnsupportedFunctionCall, msg,
-                                   getattr(g, func), 1, 2, 3)
-            tm.assert_raises_regex(UnsupportedFunctionCall, msg,
-                                   getattr(g, func), foo=1)
-
-    def test_group_shift_with_null_key(self):
-        # This test is designed to replicate the segfault in issue #13813.
-        n_rows = 1200
-
-        # Generate a moderately large dataframe with occasional missing
-        # values in column `B`, and then group by [`A`, `B`]. This should
-        # force `-1` in `labels` array of `g.grouper.group_info` exactly
-        # at those places, where the group-by key is partially missing.
-        df = DataFrame([(i % 12, i % 3 if i % 3 else np.nan, i)
-                        for i in range(n_rows)], dtype=float,
-                       columns=["A", "B", "Z"], index=None)
-        g = df.groupby(["A", "B"])
-
-        expected = DataFrame([(i + 12 if i % 3 and i < n_rows - 12
-                               else np.nan)
-                              for i in range(n_rows)], dtype=float,
-                             columns=["Z"], index=None)
-        result = g.shift(-1)
-
-        assert_frame_equal(result, expected)
-
-    def test_pivot_table_values_key_error(self):
-        # This test is designed to replicate the error in issue #14938
-        df = pd.DataFrame({'eventDate':
-                           pd.date_range(pd.datetime.today(),
-                                         periods=20, freq='M').tolist(),
-                           'thename': range(0, 20)})
-
-        df['year'] = df.set_index('eventDate').index.year
-        df['month'] = df.set_index('eventDate').index.month
-
-        with pytest.raises(KeyError):
-            df.reset_index().pivot_table(index='year', columns='month',
-                                         values='badname', aggfunc='count')
-
-    def test_cummin_cummax(self):
-        # GH 15048
-        num_types = [np.int32, np.int64, np.float32, np.float64]
-        num_mins = [np.iinfo(np.int32).min, np.iinfo(np.int64).min,
-                    np.finfo(np.float32).min, np.finfo(np.float64).min]
-        num_max = [np.iinfo(np.int32).max, np.iinfo(np.int64).max,
-                   np.finfo(np.float32).max, np.finfo(np.float64).max]
-        base_df = pd.DataFrame({'A': [1, 1, 1, 1, 2, 2, 2, 2],
-                                'B': [3, 4, 3, 2, 2, 3, 2, 1]})
-        expected_mins = [3, 3, 3, 2, 2, 2, 2, 1]
-        expected_maxs = [3, 4, 4, 4, 2, 3, 3, 3]
-
-        for dtype, min_val, max_val in zip(num_types, num_mins, num_max):
-            df = base_df.astype(dtype)
-
-            # cummin
-            expected = pd.DataFrame({'B': expected_mins}).astype(dtype)
-            result = df.groupby('A').cummin()
-            tm.assert_frame_equal(result, expected)
-            result = df.groupby('A').B.apply(lambda x: x.cummin()).to_frame()
-            tm.assert_frame_equal(result, expected)
-
-            # Test cummin w/ min value for dtype
-            df.loc[[2, 6], 'B'] = min_val
-            expected.loc[[2, 3, 6, 7], 'B'] = min_val
-            result = df.groupby('A').cummin()
-            tm.assert_frame_equal(result, expected)
-            expected = df.groupby('A').B.apply(lambda x: x.cummin()).to_frame()
-            tm.assert_frame_equal(result, expected)
-
-            # cummax
-            expected = pd.DataFrame({'B': expected_maxs}).astype(dtype)
-            result = df.groupby('A').cummax()
-            tm.assert_frame_equal(result, expected)
-            result = df.groupby('A').B.apply(lambda x: x.cummax()).to_frame()
-            tm.assert_frame_equal(result, expected)
-
-            # Test cummax w/ max value for dtype
-            df.loc[[2, 6], 'B'] = max_val
-            expected.loc[[2, 3, 6, 7], 'B'] = max_val
-            result = df.groupby('A').cummax()
-            tm.assert_frame_equal(result, expected)
-            expected = df.groupby('A').B.apply(lambda x: x.cummax()).to_frame()
-            tm.assert_frame_equal(result, expected)
-
-        # Test nan in some values
-        base_df.loc[[0, 2, 4, 6], 'B'] = np.nan
-        expected = pd.DataFrame({'B': [np.nan, 4, np.nan, 2,
-                                       np.nan, 3, np.nan, 1]})
-        result = base_df.groupby('A').cummin()
-        tm.assert_frame_equal(result, expected)
-        expected = (base_df.groupby('A')
-                           .B
-                           .apply(lambda x: x.cummin())
-                           .to_frame())
-        tm.assert_frame_equal(result, expected)
-
-        expected = pd.DataFrame({'B': [np.nan, 4, np.nan, 4,
-                                       np.nan, 3, np.nan, 3]})
-        result = base_df.groupby('A').cummax()
-        tm.assert_frame_equal(result, expected)
-        expected = (base_df.groupby('A')
-                           .B
-                           .apply(lambda x: x.cummax())
-                           .to_frame())
-        tm.assert_frame_equal(result, expected)
-
-        # Test nan in entire column
-        base_df['B'] = np.nan
-        expected = pd.DataFrame({'B': [np.nan] * 8})
-        result = base_df.groupby('A').cummin()
-        tm.assert_frame_equal(expected, result)
-        result = base_df.groupby('A').B.apply(lambda x: x.cummin()).to_frame()
-        tm.assert_frame_equal(expected, result)
-        result = base_df.groupby('A').cummax()
-        tm.assert_frame_equal(expected, result)
-        result = base_df.groupby('A').B.apply(lambda x: x.cummax()).to_frame()
-        tm.assert_frame_equal(expected, result)
-
-        # GH 15561
-        df = pd.DataFrame(dict(a=[1], b=pd.to_datetime(['2001'])))
-        expected = pd.Series(pd.to_datetime('2001'), index=[0], name='b')
-        for method in ['cummax', 'cummin']:
-            result = getattr(df.groupby('a')['b'], method)()
-            tm.assert_series_equal(expected, result)
-
-        # GH 15635
-        df = pd.DataFrame(dict(a=[1, 2, 1], b=[2, 1, 1]))
-        result = df.groupby('a').b.cummax()
-        expected = pd.Series([2, 1, 2], name='b')
-        tm.assert_series_equal(result, expected)
-
-        df = pd.DataFrame(dict(a=[1, 2, 1], b=[1, 2, 2]))
-        result = df.groupby('a').b.cummin()
-        expected = pd.Series([1, 2, 1], name='b')
-        tm.assert_series_equal(result, expected)
-
-    @pytest.mark.parametrize('in_vals, out_vals', [
-
-        # Basics: strictly increasing (T), strictly decreasing (F),
-        # abs val increasing (F), non-strictly increasing (T)
-        ([1, 2, 5, 3, 2, 0, 4, 5, -6, 1, 1],
-         [True, False, False, True]),
-
-        # Test with inf vals
-        ([1, 2.1, np.inf, 3, 2, np.inf, -np.inf, 5, 11, 1, -np.inf],
-         [True, False, True, False]),
-
-        # Test with nan vals; should always be False
-        ([1, 2, np.nan, 3, 2, np.nan, np.nan, 5, -np.inf, 1, np.nan],
-         [False, False, False, False]),
-    ])
-    def test_is_monotonic_increasing(self, in_vals, out_vals):
-        # GH 17015
-        source_dict = {
-            'A': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11'],
-            'B': ['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c', 'd', 'd'],
-            'C': in_vals}
-        df = pd.DataFrame(source_dict)
-        result = df.groupby('B').C.is_monotonic_increasing
-        index = Index(list('abcd'), name='B')
-        expected = pd.Series(index=index, data=out_vals, name='C')
-        tm.assert_series_equal(result, expected)
-
-        # Also check result equal to manually taking x.is_monotonic_increasing.
-        expected = (
-            df.groupby(['B']).C.apply(lambda x: x.is_monotonic_increasing))
-        tm.assert_series_equal(result, expected)
-
-    @pytest.mark.parametrize('in_vals, out_vals', [
-        # Basics: strictly decreasing (T), strictly increasing (F),
-        # abs val decreasing (F), non-strictly increasing (T)
-        ([10, 9, 7, 3, 4, 5, -3, 2, 0, 1, 1],
-         [True, False, False, True]),
-
-        # Test with inf vals
-        ([np.inf, 1, -np.inf, np.inf, 2, -3, -np.inf, 5, -3, -np.inf, -np.inf],
-         [True, True, False, True]),
-
-        # Test with nan vals; should always be False
-        ([1, 2, np.nan, 3, 2, np.nan, np.nan, 5, -np.inf, 1, np.nan],
-         [False, False, False, False]),
-    ])
-    def test_is_monotonic_decreasing(self, in_vals, out_vals):
-        # GH 17015
-        source_dict = {
-            'A': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11'],
-            'B': ['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c', 'd', 'd'],
-            'C': in_vals}
-
-        df = pd.DataFrame(source_dict)
-        result = df.groupby('B').C.is_monotonic_decreasing
-        index = Index(list('abcd'), name='B')
-        expected = pd.Series(index=index, data=out_vals, name='C')
-        tm.assert_series_equal(result, expected)
-
-    def test_apply_numeric_coercion_when_datetime(self):
-        # In the past, group-by/apply operations have been over-eager
-        # in converting dtypes to numeric, in the presence of datetime
-        # columns.  Various GH issues were filed, the reproductions
-        # for which are here.
-
-        # GH 15670
-        df = pd.DataFrame({'Number': [1, 2],
-                           'Date': ["2017-03-02"] * 2,
-                           'Str': ["foo", "inf"]})
-        expected = df.groupby(['Number']).apply(lambda x: x.iloc[0])
-        df.Date = pd.to_datetime(df.Date)
-        result = df.groupby(['Number']).apply(lambda x: x.iloc[0])
-        tm.assert_series_equal(result['Str'], expected['Str'])
-
-        # GH 15421
-        df = pd.DataFrame({'A': [10, 20, 30],
-                           'B': ['foo', '3', '4'],
-                           'T': [pd.Timestamp("12:31:22")] * 3})
-
-        def get_B(g):
-            return g.iloc[0][['B']]
-        result = df.groupby('A').apply(get_B)['B']
-        expected = df.B
-        expected.index = df.A
-        tm.assert_series_equal(result, expected)
-
-        # GH 14423
-        def predictions(tool):
-            out = pd.Series(index=['p1', 'p2', 'useTime'], dtype=object)
-            if 'step1' in list(tool.State):
-                out['p1'] = str(tool[tool.State == 'step1'].Machine.values[0])
-            if 'step2' in list(tool.State):
-                out['p2'] = str(tool[tool.State == 'step2'].Machine.values[0])
-                out['useTime'] = str(
-                    tool[tool.State == 'step2'].oTime.values[0])
-            return out
-        df1 = pd.DataFrame({'Key': ['B', 'B', 'A', 'A'],
-                            'State': ['step1', 'step2', 'step1', 'step2'],
-                            'oTime': ['', '2016-09-19 05:24:33',
-                                      '', '2016-09-19 23:59:04'],
-                            'Machine': ['23', '36L', '36R', '36R']})
-        df2 = df1.copy()
-        df2.oTime = pd.to_datetime(df2.oTime)
-        expected = df1.groupby('Key').apply(predictions).p1
-        result = df2.groupby('Key').apply(predictions).p1
-        tm.assert_series_equal(expected, result)
-
-    def test_pipe(self):
-        # Test the pipe method of DataFrameGroupBy.
-        # Issue #17871
-
-        random_state = np.random.RandomState(1234567890)
-
-        df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
-                              'foo', 'bar', 'foo', 'foo'],
-                        'B': random_state.randn(8),
-                        'C': random_state.randn(8)})
-
-        def f(dfgb):
-            return dfgb.B.max() - dfgb.C.min().min()
-
-        def square(srs):
-            return srs ** 2
-
-        # Note that the transformations are
-        # GroupBy -> Series
-        # Series -> Series
-        # This then chains the GroupBy.pipe and the
-        # NDFrame.pipe methods
-        result = df.groupby('A').pipe(f).pipe(square)
-
-        index = Index([u'bar', u'foo'], dtype='object', name=u'A')
-        expected = pd.Series([8.99110003361, 8.17516964785], name='B',
-                             index=index)
-
-        assert_series_equal(expected, result)
-
-    def test_pipe_args(self):
-        # Test passing args to the pipe method of DataFrameGroupBy.
-        # Issue #17871
-
-        df = pd.DataFrame({'group': ['A', 'A', 'B', 'B', 'C'],
-                           'x': [1.0, 2.0, 3.0, 2.0, 5.0],
-                           'y': [10.0, 100.0, 1000.0, -100.0, -1000.0]})
-
-        def f(dfgb, arg1):
-            return (dfgb.filter(lambda grp: grp.y.mean() > arg1, dropna=False)
-                        .groupby(dfgb.grouper))
-
-        def g(dfgb, arg2):
-            return dfgb.sum() / dfgb.sum().sum() + arg2
-
-        def h(df, arg3):
-            return df.x + df.y - arg3
-
-        result = (df
-                  .groupby('group')
-                  .pipe(f, 0)
-                  .pipe(g, 10)
-                  .pipe(h, 100))
-
-        # Assert the results here
-        index = pd.Index(['A', 'B', 'C'], name='group')
-        expected = pd.Series([-79.5160891089, -78.4839108911, -80],
-                             index=index)
-
-        assert_series_equal(expected, result)
-
-        # test SeriesGroupby.pipe
-        ser = pd.Series([1, 1, 2, 2, 3, 3])
-        result = ser.groupby(ser).pipe(lambda grp: grp.sum() * grp.count())
-
-        expected = pd.Series([4, 8, 12], index=pd.Int64Index([1, 2, 3]))
-
-        assert_series_equal(result, expected)
-
-    def test_empty_dataframe_groupby(self):
-        # GH8093
-        df = DataFrame(columns=['A', 'B', 'C'])
-
-        result = df.groupby('A').sum()
-        expected = DataFrame(columns=['B', 'C'], dtype=np.float64)
-        expected.index.name = 'A'
-
-        assert_frame_equal(result, expected)
-
-    def test_tuple_warns(self):
-        # https://github.com/pandas-dev/pandas/issues/18314
-        df = pd.DataFrame({('a', 'b'): [1, 1, 2, 2], 'a': [1, 1, 1, 2],
-                           'b': [1, 2, 2, 2], 'c': [1, 1, 1, 1]})
-        with tm.assert_produces_warning(FutureWarning) as w:
-            df[['a', 'b', 'c']].groupby(('a', 'b')).c.mean()
-
-        assert "Interpreting tuple 'by' as a list" in str(w[0].message)
+            return func(d)
+
+        return _func
+
+    def afunc(data):
+        d = data.select(lambda x: x.hour < 11).dropna()
+        return np.max(d)
+
+    grouped = df.groupby(lambda x: datetime(x.year, x.month, x.day))
+    closure_bad = grouped.agg({'high': agg_before(11, np.max)})
+    closure_good = grouped.agg({'high': agg_before(11, np.max, True)})
+
+    assert_frame_equal(closure_bad, closure_good)
+
+
+def test_groupby_multiindex_missing_pair():
+    # GH9049
+    df = DataFrame({'group1': ['a', 'a', 'a', 'b'],
+                    'group2': ['c', 'c', 'd', 'c'],
+                    'value': [1, 1, 1, 5]})
+    df = df.set_index(['group1', 'group2'])
+    df_grouped = df.groupby(level=['group1', 'group2'], sort=True)
+
+    res = df_grouped.agg('sum')
+    idx = MultiIndex.from_tuples(
+        [('a', 'c'), ('a', 'd'), ('b', 'c')], names=['group1', 'group2'])
+    exp = DataFrame([[2], [1], [5]], index=idx, columns=['value'])
+
+    tm.assert_frame_equal(res, exp)
+
+
+def test_groupby_multiindex_not_lexsorted():
+    # GH 11640
+
+    # define the lexsorted version
+    lexsorted_mi = MultiIndex.from_tuples(
+        [('a', ''), ('b1', 'c1'), ('b2', 'c2')], names=['b', 'c'])
+    lexsorted_df = DataFrame([[1, 3, 4]], columns=lexsorted_mi)
+    assert lexsorted_df.columns.is_lexsorted()
+
+    # define the non-lexsorted version
+    not_lexsorted_df = DataFrame(columns=['a', 'b', 'c', 'd'],
+                                 data=[[1, 'b1', 'c1', 3],
+                                       [1, 'b2', 'c2', 4]])
+    not_lexsorted_df = not_lexsorted_df.pivot_table(
+        index='a', columns=['b', 'c'], values='d')
+    not_lexsorted_df = not_lexsorted_df.reset_index()
+    assert not not_lexsorted_df.columns.is_lexsorted()
+
+    # compare the results
+    tm.assert_frame_equal(lexsorted_df, not_lexsorted_df)
+
+    expected = lexsorted_df.groupby('a').mean()
+    with tm.assert_produces_warning(PerformanceWarning):
+        result = not_lexsorted_df.groupby('a').mean()
+    tm.assert_frame_equal(expected, result)
+
+    # a transforming function should work regardless of sort
+    # GH 14776
+    df = DataFrame({'x': ['a', 'a', 'b', 'a'],
+                    'y': [1, 1, 2, 2],
+                    'z': [1, 2, 3, 4]}).set_index(['x', 'y'])
+    assert not df.index.is_lexsorted()
+
+    for level in [0, 1, [0, 1]]:
+        for sort in [False, True]:
+            result = df.groupby(level=level, sort=sort).apply(
+                DataFrame.drop_duplicates)
+            expected = df
+            tm.assert_frame_equal(expected, result)
+
+            result = df.sort_index().groupby(level=level, sort=sort).apply(
+                DataFrame.drop_duplicates)
+            expected = df.sort_index()
+            tm.assert_frame_equal(expected, result)
+
+
+def test_index_label_overlaps_location():
+    # checking we don't have any label/location confusion in the
+    # the wake of GH5375
+    df = DataFrame(list('ABCDE'), index=[2, 0, 2, 1, 1])
+    g = df.groupby(list('ababb'))
+    actual = g.filter(lambda x: len(x) > 2)
+    expected = df.iloc[[1, 3, 4]]
+    assert_frame_equal(actual, expected)
+
+    ser = df[0]
+    g = ser.groupby(list('ababb'))
+    actual = g.filter(lambda x: len(x) > 2)
+    expected = ser.take([1, 3, 4])
+    assert_series_equal(actual, expected)
+
+    # ... and again, with a generic Index of floats
+    df.index = df.index.astype(float)
+    g = df.groupby(list('ababb'))
+    actual = g.filter(lambda x: len(x) > 2)
+    expected = df.iloc[[1, 3, 4]]
+    assert_frame_equal(actual, expected)
+
+    ser = df[0]
+    g = ser.groupby(list('ababb'))
+    actual = g.filter(lambda x: len(x) > 2)
+    expected = ser.take([1, 3, 4])
+    assert_series_equal(actual, expected)
+
+
+def test_transform_doesnt_clobber_ints():
+    # GH 7972
+    n = 6
+    x = np.arange(n)
+    df = DataFrame({'a': x // 2, 'b': 2.0 * x, 'c': 3.0 * x})
+    df2 = DataFrame({'a': x // 2 * 1.0, 'b': 2.0 * x, 'c': 3.0 * x})
+
+    gb = df.groupby('a')
+    result = gb.transform('mean')
+
+    gb2 = df2.groupby('a')
+    expected = gb2.transform('mean')
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize('sort_column', ['ints', 'floats', 'strings',
+                                         ['ints', 'floats'],
+                                         ['ints', 'strings']])
+@pytest.mark.parametrize('group_column', ['int_groups', 'string_groups',
+                                          ['int_groups', 'string_groups']])
+def test_groupby_preserves_sort(sort_column, group_column):
+    # Test to ensure that groupby always preserves sort order of original
+    # object. Issue #8588 and #9651
+
+    df = DataFrame(
+        {'int_groups': [3, 1, 0, 1, 0, 3, 3, 3],
+         'string_groups': ['z', 'a', 'z', 'a', 'a', 'g', 'g', 'g'],
+         'ints': [8, 7, 4, 5, 2, 9, 1, 1],
+         'floats': [2.3, 5.3, 6.2, -2.4, 2.2, 1.1, 1.1, 5],
+         'strings': ['z', 'd', 'a', 'e', 'word', 'word2', '42', '47']})
+
+    # Try sorting on different types and with different group types
+
+    df = df.sort_values(by=sort_column)
+    g = df.groupby(group_column)
+
+    def test_sort(x):
+        assert_frame_equal(x, x.sort_values(by=sort_column))
+    g.apply(test_sort)
+
+
+def test_group_shift_with_null_key():
+    # This test is designed to replicate the segfault in issue #13813.
+    n_rows = 1200
+
+    # Generate a moderately large dataframe with occasional missing
+    # values in column `B`, and then group by [`A`, `B`]. This should
+    # force `-1` in `labels` array of `g.grouper.group_info` exactly
+    # at those places, where the group-by key is partially missing.
+    df = DataFrame([(i % 12, i % 3 if i % 3 else np.nan, i)
+                    for i in range(n_rows)], dtype=float,
+                   columns=["A", "B", "Z"], index=None)
+    g = df.groupby(["A", "B"])
+
+    expected = DataFrame([(i + 12 if i % 3 and i < n_rows - 12
+                           else np.nan)
+                          for i in range(n_rows)], dtype=float,
+                         columns=["Z"], index=None)
+    result = g.shift(-1)
+
+    assert_frame_equal(result, expected)
+
 
-        with tm.assert_produces_warning(None):
-            df.groupby(('a', 'b')).c.mean()
+def test_pivot_table_values_key_error():
+    # This test is designed to replicate the error in issue #14938
+    df = pd.DataFrame({'eventDate':
+                       pd.date_range(pd.datetime.today(),
+                                     periods=20, freq='M').tolist(),
+                       'thename': range(0, 20)})
 
-    def test_tuple_warns_unhashable(self):
-        # https://github.com/pandas-dev/pandas/issues/18314
-        business_dates = date_range(start='4/1/2014', end='6/30/2014',
-                                    freq='B')
-        df = DataFrame(1, index=business_dates, columns=['a', 'b'])
+    df['year'] = df.set_index('eventDate').index.year
+    df['month'] = df.set_index('eventDate').index.month
+
+    with pytest.raises(KeyError):
+        df.reset_index().pivot_table(index='year', columns='month',
+                                     values='badname', aggfunc='count')
 
-        with tm.assert_produces_warning(FutureWarning) as w:
-            df.groupby((df.index.year, df.index.month)).nth([0, 3, -1])
 
-        assert "Interpreting tuple 'by' as a list" in str(w[0].message)
+def test_empty_dataframe_groupby():
+    # GH8093
+    df = DataFrame(columns=['A', 'B', 'C'])
 
-    def test_tuple_correct_keyerror(self):
-        # https://github.com/pandas-dev/pandas/issues/18798
-        df = pd.DataFrame(1, index=range(3),
-                          columns=pd.MultiIndex.from_product([[1, 2],
-                                                              [3, 4]]))
-        with tm.assert_raises_regex(KeyError, "(7, 8)"):
-            df.groupby((7, 8)).mean()
+    result = df.groupby('A').sum()
+    expected = DataFrame(columns=['B', 'C'], dtype=np.float64)
+    expected.index.name = 'A'
 
+    assert_frame_equal(result, expected)
 
-def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
-    tups = lmap(tuple, df[keys].values)
-    tups = com._asarray_tuplesafe(tups)
-    expected = f(df.groupby(tups)[field])
-    for k, v in compat.iteritems(expected):
-        assert (result[k] == v)
+
+def test_tuple_warns():
+    # https://github.com/pandas-dev/pandas/issues/18314
+    df = pd.DataFrame({('a', 'b'): [1, 1, 2, 2], 'a': [1, 1, 1, 2],
+                       'b': [1, 2, 2, 2], 'c': [1, 1, 1, 1]})
+    with tm.assert_produces_warning(FutureWarning) as w:
+        df[['a', 'b', 'c']].groupby(('a', 'b')).c.mean()
+
+    assert "Interpreting tuple 'by' as a list" in str(w[0].message)
+
+    with tm.assert_produces_warning(None):
+        df.groupby(('a', 'b')).c.mean()
+
+
+def test_tuple_warns_unhashable():
+    # https://github.com/pandas-dev/pandas/issues/18314
+    business_dates = date_range(start='4/1/2014', end='6/30/2014',
+                                freq='B')
+    df = DataFrame(1, index=business_dates, columns=['a', 'b'])
+
+    with tm.assert_produces_warning(FutureWarning) as w:
+        df.groupby((df.index.year, df.index.month)).nth([0, 3, -1])
+
+    assert "Interpreting tuple 'by' as a list" in str(w[0].message)
+
+
+def test_tuple_correct_keyerror():
+    # https://github.com/pandas-dev/pandas/issues/18798
+    df = pd.DataFrame(1, index=range(3),
+                      columns=pd.MultiIndex.from_product([[1, 2],
+                                                          [3, 4]]))
+    with tm.assert_raises_regex(KeyError, "(7, 8)"):
+        df.groupby((7, 8)).mean()
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index 57becd342d370..743237f5b386c 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -9,6 +9,7 @@
                     Index, MultiIndex, DataFrame, Series, CategoricalIndex)
 from pandas.util.testing import (assert_panel_equal, assert_frame_equal,
                                  assert_series_equal, assert_almost_equal)
+from pandas.core.groupby.groupby import Grouping
 from pandas.compat import lrange, long
 
 from pandas import compat
@@ -16,13 +17,12 @@
 
 import pandas.util.testing as tm
 import pandas as pd
-from .common import MixIn
 
 
 # selection
 # --------------------------------
 
-class TestSelection(MixIn):
+class TestSelection():
 
     def test_select_bad_cols(self):
         df = DataFrame([[1, 2]], columns=['A', 'B'])
@@ -48,14 +48,14 @@ def test_groupby_duplicated_column_errormsg(self):
         assert c.columns.nlevels == 1
         assert c.columns.size == 3
 
-    def test_column_select_via_attr(self):
-        result = self.df.groupby('A').C.sum()
-        expected = self.df.groupby('A')['C'].sum()
+    def test_column_select_via_attr(self, df):
+        result = df.groupby('A').C.sum()
+        expected = df.groupby('A')['C'].sum()
         assert_series_equal(result, expected)
 
-        self.df['mean'] = 1.5
-        result = self.df.groupby('A').mean()
-        expected = self.df.groupby('A').agg(np.mean)
+        df['mean'] = 1.5
+        result = df.groupby('A').mean()
+        expected = df.groupby('A').agg(np.mean)
         assert_frame_equal(result, expected)
 
     def test_getitem_list_of_columns(self):
@@ -96,7 +96,7 @@ def test_getitem_numeric_column_names(self):
 # grouping
 # --------------------------------
 
-class TestGrouping(MixIn):
+class TestGrouping():
 
     def test_grouper_index_types(self):
         # related GH5375
@@ -291,17 +291,17 @@ def test_grouper_getting_correct_binner(self):
                                  names=['one', 'two']))
         assert_frame_equal(result, expected)
 
-    def test_grouper_iter(self):
-        assert sorted(self.df.groupby('A').grouper) == ['bar', 'foo']
+    def test_grouper_iter(self, df):
+        assert sorted(df.groupby('A').grouper) == ['bar', 'foo']
 
-    def test_empty_groups(self):
+    def test_empty_groups(self, df):
         # see gh-1048
-        pytest.raises(ValueError, self.df.groupby, [])
+        pytest.raises(ValueError, df.groupby, [])
 
-    def test_groupby_grouper(self):
-        grouped = self.df.groupby('A')
+    def test_groupby_grouper(self, df):
+        grouped = df.groupby('A')
 
-        result = self.df.groupby(grouped.grouper).mean()
+        result = df.groupby(grouped.grouper).mean()
         expected = grouped.mean()
         tm.assert_frame_equal(result, expected)
 
@@ -339,10 +339,9 @@ def test_groupby_grouper_f_sanity_checked(self):
 
         pytest.raises(AssertionError, ts.groupby, lambda key: key[0:6])
 
-    def test_grouping_error_on_multidim_input(self):
-        from pandas.core.groupby.groupby import Grouping
+    def test_grouping_error_on_multidim_input(self, df):
         pytest.raises(ValueError,
-                      Grouping, self.df.index, self.df[['A', 'A']])
+                      Grouping, df.index, df[['A', 'A']])
 
     def test_multiindex_passthru(self):
 
@@ -354,26 +353,25 @@ def test_multiindex_passthru(self):
         result = df.groupby(axis=1, level=[0, 1]).first()
         assert_frame_equal(result, df)
 
-    def test_multiindex_negative_level(self):
+    def test_multiindex_negative_level(self, mframe):
         # GH 13901
-        result = self.mframe.groupby(level=-1).sum()
-        expected = self.mframe.groupby(level='second').sum()
+        result = mframe.groupby(level=-1).sum()
+        expected = mframe.groupby(level='second').sum()
         assert_frame_equal(result, expected)
 
-        result = self.mframe.groupby(level=-2).sum()
-        expected = self.mframe.groupby(level='first').sum()
+        result = mframe.groupby(level=-2).sum()
+        expected = mframe.groupby(level='first').sum()
         assert_frame_equal(result, expected)
 
-        result = self.mframe.groupby(level=[-2, -1]).sum()
-        expected = self.mframe
+        result = mframe.groupby(level=[-2, -1]).sum()
+        expected = mframe
         assert_frame_equal(result, expected)
 
-        result = self.mframe.groupby(level=[-1, 'first']).sum()
-        expected = self.mframe.groupby(level=['second', 'first']).sum()
+        result = mframe.groupby(level=[-1, 'first']).sum()
+        expected = mframe.groupby(level=['second', 'first']).sum()
         assert_frame_equal(result, expected)
 
-    def test_multifunc_select_col_integer_cols(self):
-        df = self.df
+    def test_multifunc_select_col_integer_cols(self, df):
         df.columns = np.arange(len(df.columns))
 
         # it works!
@@ -428,9 +426,9 @@ def test_groupby_multiindex_tuple(self):
         tm.assert_dict_equal(expected, result)
 
     @pytest.mark.parametrize('sort', [True, False])
-    def test_groupby_level(self, sort):
+    def test_groupby_level(self, sort, mframe, df):
         # GH 17537
-        frame = self.mframe
+        frame = mframe
         deleveled = frame.reset_index()
 
         result0 = frame.groupby(level=0, sort=sort).sum()
@@ -464,7 +462,7 @@ def test_groupby_level(self, sort):
         assert_frame_equal(result1, expected1.T)
 
         # raise exception for non-MultiIndex
-        pytest.raises(ValueError, self.df.groupby, level=1)
+        pytest.raises(ValueError, df.groupby, level=1)
 
     def test_groupby_level_index_names(self):
         # GH4014 this used to raise ValueError since 'exp'>1 (in py2)
@@ -496,9 +494,9 @@ def test_groupby_level_with_nas(self, sort):
         expected = Series([6., 18.], index=[0.0, 1.0])
         assert_series_equal(result, expected)
 
-    def test_groupby_args(self):
+    def test_groupby_args(self, mframe):
         # PR8618 and issue 8015
-        frame = self.mframe
+        frame = mframe
 
         def j():
             frame.groupby()
@@ -516,14 +514,14 @@ def k():
         [True, [2, 2, 2, 0, 0, 1, 1, 3, 3, 3]],
         [False, [0, 0, 0, 1, 1, 2, 2, 3, 3, 3]]
     ])
-    def test_level_preserve_order(self, sort, labels):
+    def test_level_preserve_order(self, sort, labels, mframe):
         # GH 17537
-        grouped = self.mframe.groupby(level=0, sort=sort)
+        grouped = mframe.groupby(level=0, sort=sort)
         exp_labels = np.array(labels, np.intp)
         assert_almost_equal(grouped.grouper.labels[0], exp_labels)
 
-    def test_grouping_labels(self):
-        grouped = self.mframe.groupby(self.mframe.index.get_level_values(0))
+    def test_grouping_labels(self, mframe):
+        grouped = mframe.groupby(mframe.index.get_level_values(0))
         exp_labels = np.array([2, 2, 2, 0, 0, 1, 1, 3, 3, 3], dtype=np.intp)
         assert_almost_equal(grouped.grouper.labels[0], exp_labels)
 
@@ -531,7 +529,7 @@ def test_grouping_labels(self):
 # get_group
 # --------------------------------
 
-class TestGetGroup(MixIn):
+class TestGetGroup():
 
     def test_get_group(self):
         with catch_warnings(record=True):
@@ -638,29 +636,28 @@ def test_gb_key_len_equal_axis_len(self):
 # groups & iteration
 # --------------------------------
 
-class TestIteration(MixIn):
+class TestIteration():
 
-    def test_groups(self):
-        grouped = self.df.groupby(['A'])
+    def test_groups(self, df):
+        grouped = df.groupby(['A'])
         groups = grouped.groups
         assert groups is grouped.groups  # caching works
 
         for k, v in compat.iteritems(grouped.groups):
-            assert (self.df.loc[v]['A'] == k).all()
+            assert (df.loc[v]['A'] == k).all()
 
-        grouped = self.df.groupby(['A', 'B'])
+        grouped = df.groupby(['A', 'B'])
         groups = grouped.groups
         assert groups is grouped.groups  # caching works
 
         for k, v in compat.iteritems(grouped.groups):
-            assert (self.df.loc[v]['A'] == k[0]).all()
-            assert (self.df.loc[v]['B'] == k[1]).all()
+            assert (df.loc[v]['A'] == k[0]).all()
+            assert (df.loc[v]['B'] == k[1]).all()
 
-    def test_grouping_is_iterable(self):
+    def test_grouping_is_iterable(self, tsframe):
         # this code path isn't used anywhere else
         # not sure it's useful
-        grouped = self.tsframe.groupby([lambda x: x.weekday(), lambda x: x.year
-                                        ])
+        grouped = tsframe.groupby([lambda x: x.weekday(), lambda x: x.year])
 
         # test it works
         for g in grouped.grouper.groupings[0]:
@@ -682,7 +679,7 @@ def test_multi_iter(self):
             assert e2 == two
             assert_series_equal(three, e3)
 
-    def test_multi_iter_frame(self):
+    def test_multi_iter_frame(self, three_group):
         k1 = np.array(['b', 'b', 'b', 'a', 'a', 'a'])
         k2 = np.array(['1', '2', '1', '2', '1', '2'])
         df = DataFrame({'v1': np.random.randn(6),
@@ -715,7 +712,7 @@ def test_multi_iter_frame(self):
         assert len(groups) == 2
 
         # axis = 1
-        three_levels = self.three_group.groupby(['A', 'B', 'C']).mean()
+        three_levels = three_group.groupby(['A', 'B', 'C']).mean()
         grouped = three_levels.T.groupby(axis=1, level=(1, 2))
         for key, group in grouped:
             pass
@@ -733,13 +730,13 @@ def test_multi_iter_panel(self):
                 expected = wp.reindex(major=exp_axis)
                 assert_panel_equal(group, expected)
 
-    def test_dictify(self):
-        dict(iter(self.df.groupby('A')))
-        dict(iter(self.df.groupby(['A', 'B'])))
-        dict(iter(self.df['C'].groupby(self.df['A'])))
-        dict(iter(self.df['C'].groupby([self.df['A'], self.df['B']])))
-        dict(iter(self.df.groupby('A')['C']))
-        dict(iter(self.df.groupby(['A', 'B'])['C']))
+    def test_dictify(self, df):
+        dict(iter(df.groupby('A')))
+        dict(iter(df.groupby(['A', 'B'])))
+        dict(iter(df['C'].groupby(df['A'])))
+        dict(iter(df['C'].groupby([df['A'], df['B']])))
+        dict(iter(df.groupby('A')['C']))
+        dict(iter(df.groupby(['A', 'B'])['C']))
 
     def test_groupby_with_small_elem(self):
         # GH 8542
diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py
index ccde545b5b8e9..a32ba9ad76f14 100644
--- a/pandas/tests/groupby/test_nth.py
+++ b/pandas/tests/groupby/test_nth.py
@@ -7,314 +7,316 @@
     assert_produces_warning,
     assert_series_equal)
 
-from .common import MixIn
-
-
-class TestNth(MixIn):
-
-    def test_first_last_nth(self):
-        # tests for first / last / nth
-        grouped = self.df.groupby('A')
-        first = grouped.first()
-        expected = self.df.loc[[1, 0], ['B', 'C', 'D']]
-        expected.index = Index(['bar', 'foo'], name='A')
-        expected = expected.sort_index()
-        assert_frame_equal(first, expected)
-
-        nth = grouped.nth(0)
-        assert_frame_equal(nth, expected)
-
-        last = grouped.last()
-        expected = self.df.loc[[5, 7], ['B', 'C', 'D']]
-        expected.index = Index(['bar', 'foo'], name='A')
-        assert_frame_equal(last, expected)
-
-        nth = grouped.nth(-1)
-        assert_frame_equal(nth, expected)
-
-        nth = grouped.nth(1)
-        expected = self.df.loc[[2, 3], ['B', 'C', 'D']].copy()
-        expected.index = Index(['foo', 'bar'], name='A')
-        expected = expected.sort_index()
-        assert_frame_equal(nth, expected)
-
-        # it works!
-        grouped['B'].first()
-        grouped['B'].last()
-        grouped['B'].nth(0)
-
-        self.df.loc[self.df['A'] == 'foo', 'B'] = np.nan
-        assert isna(grouped['B'].first()['foo'])
-        assert isna(grouped['B'].last()['foo'])
-        assert isna(grouped['B'].nth(0)['foo'])
-
-        # v0.14.0 whatsnew
-        df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
-        g = df.groupby('A')
-        result = g.first()
-        expected = df.iloc[[1, 2]].set_index('A')
-        assert_frame_equal(result, expected)
-
-        expected = df.iloc[[1, 2]].set_index('A')
-        result = g.nth(0, dropna='any')
-        assert_frame_equal(result, expected)
-
-    def test_first_last_nth_dtypes(self):
-
-        df = self.df_mixed_floats.copy()
-        df['E'] = True
-        df['F'] = 1
-
-        # tests for first / last / nth
-        grouped = df.groupby('A')
-        first = grouped.first()
-        expected = df.loc[[1, 0], ['B', 'C', 'D', 'E', 'F']]
-        expected.index = Index(['bar', 'foo'], name='A')
-        expected = expected.sort_index()
-        assert_frame_equal(first, expected)
-
-        last = grouped.last()
-        expected = df.loc[[5, 7], ['B', 'C', 'D', 'E', 'F']]
-        expected.index = Index(['bar', 'foo'], name='A')
-        expected = expected.sort_index()
-        assert_frame_equal(last, expected)
-
-        nth = grouped.nth(1)
-        expected = df.loc[[3, 2], ['B', 'C', 'D', 'E', 'F']]
-        expected.index = Index(['bar', 'foo'], name='A')
-        expected = expected.sort_index()
-        assert_frame_equal(nth, expected)
-
-        # GH 2763, first/last shifting dtypes
-        idx = lrange(10)
-        idx.append(9)
-        s = Series(data=lrange(11), index=idx, name='IntCol')
-        assert s.dtype == 'int64'
-        f = s.groupby(level=0).first()
-        assert f.dtype == 'int64'
-
-    def test_nth(self):
-        df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
-        g = df.groupby('A')
-
-        assert_frame_equal(g.nth(0), df.iloc[[0, 2]].set_index('A'))
-        assert_frame_equal(g.nth(1), df.iloc[[1]].set_index('A'))
-        assert_frame_equal(g.nth(2), df.loc[[]].set_index('A'))
-        assert_frame_equal(g.nth(-1), df.iloc[[1, 2]].set_index('A'))
-        assert_frame_equal(g.nth(-2), df.iloc[[0]].set_index('A'))
-        assert_frame_equal(g.nth(-3), df.loc[[]].set_index('A'))
-        assert_series_equal(g.B.nth(0), df.set_index('A').B.iloc[[0, 2]])
-        assert_series_equal(g.B.nth(1), df.set_index('A').B.iloc[[1]])
-        assert_frame_equal(g[['B']].nth(0),
-                           df.loc[[0, 2], ['A', 'B']].set_index('A'))
-
-        exp = df.set_index('A')
-        assert_frame_equal(g.nth(0, dropna='any'), exp.iloc[[1, 2]])
-        assert_frame_equal(g.nth(-1, dropna='any'), exp.iloc[[1, 2]])
-
-        exp['B'] = np.nan
-        assert_frame_equal(g.nth(7, dropna='any'), exp.iloc[[1, 2]])
-        assert_frame_equal(g.nth(2, dropna='any'), exp.iloc[[1, 2]])
-
-        # out of bounds, regression from 0.13.1
-        # GH 6621
-        df = DataFrame({'color': {0: 'green',
-                                  1: 'green',
-                                  2: 'red',
-                                  3: 'red',
-                                  4: 'red'},
-                        'food': {0: 'ham',
-                                 1: 'eggs',
-                                 2: 'eggs',
-                                 3: 'ham',
-                                 4: 'pork'},
-                        'two': {0: 1.5456590000000001,
-                                1: -0.070345000000000005,
-                                2: -2.4004539999999999,
-                                3: 0.46206000000000003,
-                                4: 0.52350799999999997},
-                        'one': {0: 0.56573799999999996,
-                                1: -0.9742360000000001,
-                                2: 1.033801,
-                                3: -0.78543499999999999,
-                                4: 0.70422799999999997}}).set_index(['color',
-                                                                     'food'])
-
-        result = df.groupby(level=0, as_index=False).nth(2)
-        expected = df.iloc[[-1]]
-        assert_frame_equal(result, expected)
-
-        result = df.groupby(level=0, as_index=False).nth(3)
-        expected = df.loc[[]]
-        assert_frame_equal(result, expected)
-
-        # GH 7559
-        # from the vbench
-        df = DataFrame(np.random.randint(1, 10, (100, 2)), dtype='int64')
-        s = df[1]
-        g = df[0]
-        expected = s.groupby(g).first()
-        expected2 = s.groupby(g).apply(lambda x: x.iloc[0])
-        assert_series_equal(expected2, expected, check_names=False)
-        assert expected.name == 1
-        assert expected2.name == 1
-
-        # validate first
-        v = s[g == 1].iloc[0]
-        assert expected.iloc[0] == v
-        assert expected2.iloc[0] == v
-
-        # this is NOT the same as .first (as sorted is default!)
-        # as it keeps the order in the series (and not the group order)
-        # related GH 7287
-        expected = s.groupby(g, sort=False).first()
-        result = s.groupby(g, sort=False).nth(0, dropna='all')
-        assert_series_equal(result, expected)
-
-        # doc example
-        df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
-        g = df.groupby('A')
-        # PR 17493, related to issue 11038
-        # test Series.nth with True for dropna produces FutureWarning
-        with assert_produces_warning(FutureWarning):
-            result = g.B.nth(0, dropna=True)
-        expected = g.B.first()
-        assert_series_equal(result, expected)
-
-        # test multiple nth values
-        df = DataFrame([[1, np.nan], [1, 3], [1, 4], [5, 6], [5, 7]],
-                       columns=['A', 'B'])
-        g = df.groupby('A')
-
-        assert_frame_equal(g.nth(0), df.iloc[[0, 3]].set_index('A'))
-        assert_frame_equal(g.nth([0]), df.iloc[[0, 3]].set_index('A'))
-        assert_frame_equal(g.nth([0, 1]), df.iloc[[0, 1, 3, 4]].set_index('A'))
-        assert_frame_equal(
-            g.nth([0, -1]), df.iloc[[0, 2, 3, 4]].set_index('A'))
-        assert_frame_equal(
-            g.nth([0, 1, 2]), df.iloc[[0, 1, 2, 3, 4]].set_index('A'))
-        assert_frame_equal(
-            g.nth([0, 1, -1]), df.iloc[[0, 1, 2, 3, 4]].set_index('A'))
-        assert_frame_equal(g.nth([2]), df.iloc[[2]].set_index('A'))
-        assert_frame_equal(g.nth([3, 4]), df.loc[[]].set_index('A'))
-
-        business_dates = pd.date_range(start='4/1/2014', end='6/30/2014',
-                                       freq='B')
-        df = DataFrame(1, index=business_dates, columns=['a', 'b'])
-        # get the first, fourth and last two business days for each month
-        key = [df.index.year, df.index.month]
-        result = df.groupby(key, as_index=False).nth([0, 3, -2, -1])
-        expected_dates = pd.to_datetime(
-            ['2014/4/1', '2014/4/4', '2014/4/29', '2014/4/30', '2014/5/1',
-             '2014/5/6', '2014/5/29', '2014/5/30', '2014/6/2', '2014/6/5',
-             '2014/6/27', '2014/6/30'])
-        expected = DataFrame(1, columns=['a', 'b'], index=expected_dates)
-        assert_frame_equal(result, expected)
-
-    def test_nth_multi_index(self):
-        # PR 9090, related to issue 8979
-        # test nth on MultiIndex, should match .first()
-        grouped = self.three_group.groupby(['A', 'B'])
-        result = grouped.nth(0)
-        expected = grouped.first()
-        assert_frame_equal(result, expected)
-
-    def test_nth_multi_index_as_expected(self):
-        # PR 9090, related to issue 8979
-        # test nth on MultiIndex
-        three_group = DataFrame(
-            {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar',
-                   'foo', 'foo', 'foo'],
-             'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two',
-                   'two', 'two', 'one'],
-             'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny',
-                   'dull', 'shiny', 'shiny', 'shiny']})
-        grouped = three_group.groupby(['A', 'B'])
-        result = grouped.nth(0)
-        expected = DataFrame(
-            {'C': ['dull', 'dull', 'dull', 'dull']},
-            index=MultiIndex.from_arrays([['bar', 'bar', 'foo', 'foo'],
-                                          ['one', 'two', 'one', 'two']],
-                                         names=['A', 'B']))
-        assert_frame_equal(result, expected)
-
-    def test_groupby_head_tail(self):
-        df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
-        g_as = df.groupby('A', as_index=True)
-        g_not_as = df.groupby('A', as_index=False)
-
-        # as_index= False, much easier
-        assert_frame_equal(df.loc[[0, 2]], g_not_as.head(1))
-        assert_frame_equal(df.loc[[1, 2]], g_not_as.tail(1))
-
-        empty_not_as = DataFrame(columns=df.columns,
-                                 index=pd.Index([], dtype=df.index.dtype))
-        empty_not_as['A'] = empty_not_as['A'].astype(df.A.dtype)
-        empty_not_as['B'] = empty_not_as['B'].astype(df.B.dtype)
-        assert_frame_equal(empty_not_as, g_not_as.head(0))
-        assert_frame_equal(empty_not_as, g_not_as.tail(0))
-        assert_frame_equal(empty_not_as, g_not_as.head(-1))
-        assert_frame_equal(empty_not_as, g_not_as.tail(-1))
-
-        assert_frame_equal(df, g_not_as.head(7))  # contains all
-        assert_frame_equal(df, g_not_as.tail(7))
-
-        # as_index=True, (used to be different)
-        df_as = df
-
-        assert_frame_equal(df_as.loc[[0, 2]], g_as.head(1))
-        assert_frame_equal(df_as.loc[[1, 2]], g_as.tail(1))
-
-        empty_as = DataFrame(index=df_as.index[:0], columns=df.columns)
-        empty_as['A'] = empty_not_as['A'].astype(df.A.dtype)
-        empty_as['B'] = empty_not_as['B'].astype(df.B.dtype)
-        assert_frame_equal(empty_as, g_as.head(0))
-        assert_frame_equal(empty_as, g_as.tail(0))
-        assert_frame_equal(empty_as, g_as.head(-1))
-        assert_frame_equal(empty_as, g_as.tail(-1))
-
-        assert_frame_equal(df_as, g_as.head(7))  # contains all
-        assert_frame_equal(df_as, g_as.tail(7))
-
-        # test with selection
-        assert_frame_equal(g_as[[]].head(1), df_as.loc[[0, 2], []])
-        assert_frame_equal(g_as[['A']].head(1), df_as.loc[[0, 2], ['A']])
-        assert_frame_equal(g_as[['B']].head(1), df_as.loc[[0, 2], ['B']])
-        assert_frame_equal(g_as[['A', 'B']].head(1), df_as.loc[[0, 2]])
-
-        assert_frame_equal(g_not_as[[]].head(1), df_as.loc[[0, 2], []])
-        assert_frame_equal(g_not_as[['A']].head(1), df_as.loc[[0, 2], ['A']])
-        assert_frame_equal(g_not_as[['B']].head(1), df_as.loc[[0, 2], ['B']])
-        assert_frame_equal(g_not_as[['A', 'B']].head(1), df_as.loc[[0, 2]])
-
-    def test_group_selection_cache(self):
-        # GH 12839 nth, head, and tail should return same result consistently
-        df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
-        expected = df.iloc[[0, 2]].set_index('A')
-
-        g = df.groupby('A')
-        result1 = g.head(n=2)
-        result2 = g.nth(0)
-        assert_frame_equal(result1, df)
-        assert_frame_equal(result2, expected)
-
-        g = df.groupby('A')
-        result1 = g.tail(n=2)
-        result2 = g.nth(0)
-        assert_frame_equal(result1, df)
-        assert_frame_equal(result2, expected)
-
-        g = df.groupby('A')
-        result1 = g.nth(0)
-        result2 = g.head(n=2)
-        assert_frame_equal(result1, expected)
-        assert_frame_equal(result2, df)
-
-        g = df.groupby('A')
-        result1 = g.nth(0)
-        result2 = g.tail(n=2)
-        assert_frame_equal(result1, expected)
-        assert_frame_equal(result2, df)
+
+def test_first_last_nth(df):
+    # tests for first / last / nth
+    grouped = df.groupby('A')
+    first = grouped.first()
+    expected = df.loc[[1, 0], ['B', 'C', 'D']]
+    expected.index = Index(['bar', 'foo'], name='A')
+    expected = expected.sort_index()
+    assert_frame_equal(first, expected)
+
+    nth = grouped.nth(0)
+    assert_frame_equal(nth, expected)
+
+    last = grouped.last()
+    expected = df.loc[[5, 7], ['B', 'C', 'D']]
+    expected.index = Index(['bar', 'foo'], name='A')
+    assert_frame_equal(last, expected)
+
+    nth = grouped.nth(-1)
+    assert_frame_equal(nth, expected)
+
+    nth = grouped.nth(1)
+    expected = df.loc[[2, 3], ['B', 'C', 'D']].copy()
+    expected.index = Index(['foo', 'bar'], name='A')
+    expected = expected.sort_index()
+    assert_frame_equal(nth, expected)
+
+    # it works!
+    grouped['B'].first()
+    grouped['B'].last()
+    grouped['B'].nth(0)
+
+    df.loc[df['A'] == 'foo', 'B'] = np.nan
+    assert isna(grouped['B'].first()['foo'])
+    assert isna(grouped['B'].last()['foo'])
+    assert isna(grouped['B'].nth(0)['foo'])
+
+    # v0.14.0 whatsnew
+    df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
+    g = df.groupby('A')
+    result = g.first()
+    expected = df.iloc[[1, 2]].set_index('A')
+    assert_frame_equal(result, expected)
+
+    expected = df.iloc[[1, 2]].set_index('A')
+    result = g.nth(0, dropna='any')
+    assert_frame_equal(result, expected)
+
+
+def test_first_last_nth_dtypes(df_mixed_floats):
+
+    df = df_mixed_floats.copy()
+    df['E'] = True
+    df['F'] = 1
+
+    # tests for first / last / nth
+    grouped = df.groupby('A')
+    first = grouped.first()
+    expected = df.loc[[1, 0], ['B', 'C', 'D', 'E', 'F']]
+    expected.index = Index(['bar', 'foo'], name='A')
+    expected = expected.sort_index()
+    assert_frame_equal(first, expected)
+
+    last = grouped.last()
+    expected = df.loc[[5, 7], ['B', 'C', 'D', 'E', 'F']]
+    expected.index = Index(['bar', 'foo'], name='A')
+    expected = expected.sort_index()
+    assert_frame_equal(last, expected)
+
+    nth = grouped.nth(1)
+    expected = df.loc[[3, 2], ['B', 'C', 'D', 'E', 'F']]
+    expected.index = Index(['bar', 'foo'], name='A')
+    expected = expected.sort_index()
+    assert_frame_equal(nth, expected)
+
+    # GH 2763, first/last shifting dtypes
+    idx = lrange(10)
+    idx.append(9)
+    s = Series(data=lrange(11), index=idx, name='IntCol')
+    assert s.dtype == 'int64'
+    f = s.groupby(level=0).first()
+    assert f.dtype == 'int64'
+
+
+def test_nth():
+    df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
+    g = df.groupby('A')
+
+    assert_frame_equal(g.nth(0), df.iloc[[0, 2]].set_index('A'))
+    assert_frame_equal(g.nth(1), df.iloc[[1]].set_index('A'))
+    assert_frame_equal(g.nth(2), df.loc[[]].set_index('A'))
+    assert_frame_equal(g.nth(-1), df.iloc[[1, 2]].set_index('A'))
+    assert_frame_equal(g.nth(-2), df.iloc[[0]].set_index('A'))
+    assert_frame_equal(g.nth(-3), df.loc[[]].set_index('A'))
+    assert_series_equal(g.B.nth(0), df.set_index('A').B.iloc[[0, 2]])
+    assert_series_equal(g.B.nth(1), df.set_index('A').B.iloc[[1]])
+    assert_frame_equal(g[['B']].nth(0),
+                       df.loc[[0, 2], ['A', 'B']].set_index('A'))
+
+    exp = df.set_index('A')
+    assert_frame_equal(g.nth(0, dropna='any'), exp.iloc[[1, 2]])
+    assert_frame_equal(g.nth(-1, dropna='any'), exp.iloc[[1, 2]])
+
+    exp['B'] = np.nan
+    assert_frame_equal(g.nth(7, dropna='any'), exp.iloc[[1, 2]])
+    assert_frame_equal(g.nth(2, dropna='any'), exp.iloc[[1, 2]])
+
+    # out of bounds, regression from 0.13.1
+    # GH 6621
+    df = DataFrame({'color': {0: 'green',
+                              1: 'green',
+                              2: 'red',
+                              3: 'red',
+                              4: 'red'},
+                    'food': {0: 'ham',
+                             1: 'eggs',
+                             2: 'eggs',
+                             3: 'ham',
+                             4: 'pork'},
+                    'two': {0: 1.5456590000000001,
+                            1: -0.070345000000000005,
+                            2: -2.4004539999999999,
+                            3: 0.46206000000000003,
+                            4: 0.52350799999999997},
+                    'one': {0: 0.56573799999999996,
+                            1: -0.9742360000000001,
+                            2: 1.033801,
+                            3: -0.78543499999999999,
+                            4: 0.70422799999999997}}).set_index(['color',
+                                                                 'food'])
+
+    result = df.groupby(level=0, as_index=False).nth(2)
+    expected = df.iloc[[-1]]
+    assert_frame_equal(result, expected)
+
+    result = df.groupby(level=0, as_index=False).nth(3)
+    expected = df.loc[[]]
+    assert_frame_equal(result, expected)
+
+    # GH 7559
+    # from the vbench
+    df = DataFrame(np.random.randint(1, 10, (100, 2)), dtype='int64')
+    s = df[1]
+    g = df[0]
+    expected = s.groupby(g).first()
+    expected2 = s.groupby(g).apply(lambda x: x.iloc[0])
+    assert_series_equal(expected2, expected, check_names=False)
+    assert expected.name == 1
+    assert expected2.name == 1
+
+    # validate first
+    v = s[g == 1].iloc[0]
+    assert expected.iloc[0] == v
+    assert expected2.iloc[0] == v
+
+    # this is NOT the same as .first (as sorted is default!)
+    # as it keeps the order in the series (and not the group order)
+    # related GH 7287
+    expected = s.groupby(g, sort=False).first()
+    result = s.groupby(g, sort=False).nth(0, dropna='all')
+    assert_series_equal(result, expected)
+
+    # doc example
+    df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
+    g = df.groupby('A')
+    # PR 17493, related to issue 11038
+    # test Series.nth with True for dropna produces FutureWarning
+    with assert_produces_warning(FutureWarning):
+        result = g.B.nth(0, dropna=True)
+    expected = g.B.first()
+    assert_series_equal(result, expected)
+
+    # test multiple nth values
+    df = DataFrame([[1, np.nan], [1, 3], [1, 4], [5, 6], [5, 7]],
+                   columns=['A', 'B'])
+    g = df.groupby('A')
+
+    assert_frame_equal(g.nth(0), df.iloc[[0, 3]].set_index('A'))
+    assert_frame_equal(g.nth([0]), df.iloc[[0, 3]].set_index('A'))
+    assert_frame_equal(g.nth([0, 1]), df.iloc[[0, 1, 3, 4]].set_index('A'))
+    assert_frame_equal(
+        g.nth([0, -1]), df.iloc[[0, 2, 3, 4]].set_index('A'))
+    assert_frame_equal(
+        g.nth([0, 1, 2]), df.iloc[[0, 1, 2, 3, 4]].set_index('A'))
+    assert_frame_equal(
+        g.nth([0, 1, -1]), df.iloc[[0, 1, 2, 3, 4]].set_index('A'))
+    assert_frame_equal(g.nth([2]), df.iloc[[2]].set_index('A'))
+    assert_frame_equal(g.nth([3, 4]), df.loc[[]].set_index('A'))
+
+    business_dates = pd.date_range(start='4/1/2014', end='6/30/2014',
+                                   freq='B')
+    df = DataFrame(1, index=business_dates, columns=['a', 'b'])
+    # get the first, fourth and last two business days for each month
+    key = [df.index.year, df.index.month]
+    result = df.groupby(key, as_index=False).nth([0, 3, -2, -1])
+    expected_dates = pd.to_datetime(
+        ['2014/4/1', '2014/4/4', '2014/4/29', '2014/4/30', '2014/5/1',
+         '2014/5/6', '2014/5/29', '2014/5/30', '2014/6/2', '2014/6/5',
+         '2014/6/27', '2014/6/30'])
+    expected = DataFrame(1, columns=['a', 'b'], index=expected_dates)
+    assert_frame_equal(result, expected)
+
+
+def test_nth_multi_index(three_group):
+    # PR 9090, related to issue 8979
+    # test nth on MultiIndex, should match .first()
+    grouped = three_group.groupby(['A', 'B'])
+    result = grouped.nth(0)
+    expected = grouped.first()
+    assert_frame_equal(result, expected)
+
+
+def test_nth_multi_index_as_expected():
+    # PR 9090, related to issue 8979
+    # test nth on MultiIndex
+    three_group = DataFrame(
+        {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar',
+               'foo', 'foo', 'foo'],
+         'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two',
+               'two', 'two', 'one'],
+         'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny',
+               'dull', 'shiny', 'shiny', 'shiny']})
+    grouped = three_group.groupby(['A', 'B'])
+    result = grouped.nth(0)
+    expected = DataFrame(
+        {'C': ['dull', 'dull', 'dull', 'dull']},
+        index=MultiIndex.from_arrays([['bar', 'bar', 'foo', 'foo'],
+                                      ['one', 'two', 'one', 'two']],
+                                     names=['A', 'B']))
+    assert_frame_equal(result, expected)
+
+
+def test_groupby_head_tail():
+    df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
+    g_as = df.groupby('A', as_index=True)
+    g_not_as = df.groupby('A', as_index=False)
+
+    # as_index= False, much easier
+    assert_frame_equal(df.loc[[0, 2]], g_not_as.head(1))
+    assert_frame_equal(df.loc[[1, 2]], g_not_as.tail(1))
+
+    empty_not_as = DataFrame(columns=df.columns,
+                             index=pd.Index([], dtype=df.index.dtype))
+    empty_not_as['A'] = empty_not_as['A'].astype(df.A.dtype)
+    empty_not_as['B'] = empty_not_as['B'].astype(df.B.dtype)
+    assert_frame_equal(empty_not_as, g_not_as.head(0))
+    assert_frame_equal(empty_not_as, g_not_as.tail(0))
+    assert_frame_equal(empty_not_as, g_not_as.head(-1))
+    assert_frame_equal(empty_not_as, g_not_as.tail(-1))
+
+    assert_frame_equal(df, g_not_as.head(7))  # contains all
+    assert_frame_equal(df, g_not_as.tail(7))
+
+    # as_index=True, (used to be different)
+    df_as = df
+
+    assert_frame_equal(df_as.loc[[0, 2]], g_as.head(1))
+    assert_frame_equal(df_as.loc[[1, 2]], g_as.tail(1))
+
+    empty_as = DataFrame(index=df_as.index[:0], columns=df.columns)
+    empty_as['A'] = empty_not_as['A'].astype(df.A.dtype)
+    empty_as['B'] = empty_not_as['B'].astype(df.B.dtype)
+    assert_frame_equal(empty_as, g_as.head(0))
+    assert_frame_equal(empty_as, g_as.tail(0))
+    assert_frame_equal(empty_as, g_as.head(-1))
+    assert_frame_equal(empty_as, g_as.tail(-1))
+
+    assert_frame_equal(df_as, g_as.head(7))  # contains all
+    assert_frame_equal(df_as, g_as.tail(7))
+
+    # test with selection
+    assert_frame_equal(g_as[[]].head(1), df_as.loc[[0, 2], []])
+    assert_frame_equal(g_as[['A']].head(1), df_as.loc[[0, 2], ['A']])
+    assert_frame_equal(g_as[['B']].head(1), df_as.loc[[0, 2], ['B']])
+    assert_frame_equal(g_as[['A', 'B']].head(1), df_as.loc[[0, 2]])
+
+    assert_frame_equal(g_not_as[[]].head(1), df_as.loc[[0, 2], []])
+    assert_frame_equal(g_not_as[['A']].head(1), df_as.loc[[0, 2], ['A']])
+    assert_frame_equal(g_not_as[['B']].head(1), df_as.loc[[0, 2], ['B']])
+    assert_frame_equal(g_not_as[['A', 'B']].head(1), df_as.loc[[0, 2]])
+
+
+def test_group_selection_cache():
+    # GH 12839 nth, head, and tail should return same result consistently
+    df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
+    expected = df.iloc[[0, 2]].set_index('A')
+
+    g = df.groupby('A')
+    result1 = g.head(n=2)
+    result2 = g.nth(0)
+    assert_frame_equal(result1, df)
+    assert_frame_equal(result2, expected)
+
+    g = df.groupby('A')
+    result1 = g.tail(n=2)
+    result2 = g.nth(0)
+    assert_frame_equal(result1, df)
+    assert_frame_equal(result2, expected)
+
+    g = df.groupby('A')
+    result1 = g.nth(0)
+    result2 = g.head(n=2)
+    assert_frame_equal(result1, expected)
+    assert_frame_equal(result2, df)
+
+    g = df.groupby('A')
+    result1 = g.nth(0)
+    result2 = g.tail(n=2)
+    assert_frame_equal(result1, expected)
+    assert_frame_equal(result2, df)
 
 
 def test_nth_empty():
diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py
new file mode 100644
index 0000000000000..6ad8b4905abff
--- /dev/null
+++ b/pandas/tests/groupby/test_rank.py
@@ -0,0 +1,254 @@
+import pytest
+import numpy as np
+import pandas as pd
+from pandas import DataFrame, concat
+from pandas.util import testing as tm
+
+
+def test_rank_apply():
+    lev1 = tm.rands_array(10, 100)
+    lev2 = tm.rands_array(10, 130)
+    lab1 = np.random.randint(0, 100, size=500)
+    lab2 = np.random.randint(0, 130, size=500)
+
+    df = DataFrame({'value': np.random.randn(500),
+                    'key1': lev1.take(lab1),
+                    'key2': lev2.take(lab2)})
+
+    result = df.groupby(['key1', 'key2']).value.rank()
+
+    expected = []
+    for key, piece in df.groupby(['key1', 'key2']):
+        expected.append(piece.value.rank())
+    expected = concat(expected, axis=0)
+    expected = expected.reindex(result.index)
+    tm.assert_series_equal(result, expected)
+
+    result = df.groupby(['key1', 'key2']).value.rank(pct=True)
+
+    expected = []
+    for key, piece in df.groupby(['key1', 'key2']):
+        expected.append(piece.value.rank(pct=True))
+    expected = concat(expected, axis=0)
+    expected = expected.reindex(result.index)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("grps", [
+    ['qux'], ['qux', 'quux']])
+@pytest.mark.parametrize("vals", [
+    [2, 2, 8, 2, 6],
+    [pd.Timestamp('2018-01-02'), pd.Timestamp('2018-01-02'),
+     pd.Timestamp('2018-01-08'), pd.Timestamp('2018-01-02'),
+     pd.Timestamp('2018-01-06')]])
+@pytest.mark.parametrize("ties_method,ascending,pct,exp", [
+    ('average', True, False, [2., 2., 5., 2., 4.]),
+    ('average', True, True, [0.4, 0.4, 1.0, 0.4, 0.8]),
+    ('average', False, False, [4., 4., 1., 4., 2.]),
+    ('average', False, True, [.8, .8, .2, .8, .4]),
+    ('min', True, False, [1., 1., 5., 1., 4.]),
+    ('min', True, True, [0.2, 0.2, 1.0, 0.2, 0.8]),
+    ('min', False, False, [3., 3., 1., 3., 2.]),
+    ('min', False, True, [.6, .6, .2, .6, .4]),
+    ('max', True, False, [3., 3., 5., 3., 4.]),
+    ('max', True, True, [0.6, 0.6, 1.0, 0.6, 0.8]),
+    ('max', False, False, [5., 5., 1., 5., 2.]),
+    ('max', False, True, [1., 1., .2, 1., .4]),
+    ('first', True, False, [1., 2., 5., 3., 4.]),
+    ('first', True, True, [0.2, 0.4, 1.0, 0.6, 0.8]),
+    ('first', False, False, [3., 4., 1., 5., 2.]),
+    ('first', False, True, [.6, .8, .2, 1., .4]),
+    ('dense', True, False, [1., 1., 3., 1., 2.]),
+    ('dense', True, True, [0.2, 0.2, 0.6, 0.2, 0.4]),
+    ('dense', False, False, [3., 3., 1., 3., 2.]),
+    ('dense', False, True, [.6, .6, .2, .6, .4]),
+])
+def test_rank_args(grps, vals, ties_method, ascending, pct, exp):
+    key = np.repeat(grps, len(vals))
+    vals = vals * len(grps)
+    df = DataFrame({'key': key, 'val': vals})
+    result = df.groupby('key').rank(method=ties_method,
+                                    ascending=ascending, pct=pct)
+
+    exp_df = DataFrame(exp * len(grps), columns=['val'])
+    tm.assert_frame_equal(result, exp_df)
+
+
+@pytest.mark.parametrize("grps", [
+    ['qux'], ['qux', 'quux']])
+@pytest.mark.parametrize("vals", [
+    [-np.inf, -np.inf, np.nan, 1., np.nan, np.inf, np.inf],
+])
+@pytest.mark.parametrize("ties_method,ascending,na_option,exp", [
+    ('average', True, 'keep', [1.5, 1.5, np.nan, 3, np.nan, 4.5, 4.5]),
+    ('average', True, 'top', [3.5, 3.5, 1.5, 5., 1.5, 6.5, 6.5]),
+    ('average', True, 'bottom', [1.5, 1.5, 6.5, 3., 6.5, 4.5, 4.5]),
+    ('average', False, 'keep', [4.5, 4.5, np.nan, 3, np.nan, 1.5, 1.5]),
+    ('average', False, 'top', [6.5, 6.5, 1.5, 5., 1.5, 3.5, 3.5]),
+    ('average', False, 'bottom', [4.5, 4.5, 6.5, 3., 6.5, 1.5, 1.5]),
+    ('min', True, 'keep', [1., 1., np.nan, 3., np.nan, 4., 4.]),
+    ('min', True, 'top', [3., 3., 1., 5., 1., 6., 6.]),
+    ('min', True, 'bottom', [1., 1., 6., 3., 6., 4., 4.]),
+    ('min', False, 'keep', [4., 4., np.nan, 3., np.nan, 1., 1.]),
+    ('min', False, 'top', [6., 6., 1., 5., 1., 3., 3.]),
+    ('min', False, 'bottom', [4., 4., 6., 3., 6., 1., 1.]),
+    ('max', True, 'keep', [2., 2., np.nan, 3., np.nan, 5., 5.]),
+    ('max', True, 'top', [4., 4., 2., 5., 2., 7., 7.]),
+    ('max', True, 'bottom', [2., 2., 7., 3., 7., 5., 5.]),
+    ('max', False, 'keep', [5., 5., np.nan, 3., np.nan, 2., 2.]),
+    ('max', False, 'top', [7., 7., 2., 5., 2., 4., 4.]),
+    ('max', False, 'bottom', [5., 5., 7., 3., 7., 2., 2.]),
+    ('first', True, 'keep', [1., 2., np.nan, 3., np.nan, 4., 5.]),
+    ('first', True, 'top', [3., 4., 1., 5., 2., 6., 7.]),
+    ('first', True, 'bottom', [1., 2., 6., 3., 7., 4., 5.]),
+    ('first', False, 'keep', [4., 5., np.nan, 3., np.nan, 1., 2.]),
+    ('first', False, 'top', [6., 7., 1., 5., 2., 3., 4.]),
+    ('first', False, 'bottom', [4., 5., 6., 3., 7., 1., 2.]),
+    ('dense', True, 'keep', [1., 1., np.nan, 2., np.nan, 3., 3.]),
+    ('dense', True, 'top', [2., 2., 1., 3., 1., 4., 4.]),
+    ('dense', True, 'bottom', [1., 1., 4., 2., 4., 3., 3.]),
+    ('dense', False, 'keep', [3., 3., np.nan, 2., np.nan, 1., 1.]),
+    ('dense', False, 'top', [4., 4., 1., 3., 1., 2., 2.]),
+    ('dense', False, 'bottom', [3., 3., 4., 2., 4., 1., 1.])
+])
+def test_infs_n_nans(grps, vals, ties_method, ascending, na_option, exp):
+    # GH 20561
+    key = np.repeat(grps, len(vals))
+    vals = vals * len(grps)
+    df = DataFrame({'key': key, 'val': vals})
+    result = df.groupby('key').rank(method=ties_method,
+                                    ascending=ascending,
+                                    na_option=na_option)
+    exp_df = DataFrame(exp * len(grps), columns=['val'])
+    tm.assert_frame_equal(result, exp_df)
+
+
+@pytest.mark.parametrize("grps", [
+    ['qux'], ['qux', 'quux']])
+@pytest.mark.parametrize("vals", [
+    [2, 2, np.nan, 8, 2, 6, np.nan, np.nan],  # floats
+    [pd.Timestamp('2018-01-02'), pd.Timestamp('2018-01-02'), np.nan,
+     pd.Timestamp('2018-01-08'), pd.Timestamp('2018-01-02'),
+     pd.Timestamp('2018-01-06'), np.nan, np.nan]
+])
+@pytest.mark.parametrize("ties_method,ascending,na_option,pct,exp", [
+    ('average', True, 'keep', False,
+        [2., 2., np.nan, 5., 2., 4., np.nan, np.nan]),
+    ('average', True, 'keep', True,
+        [0.4, 0.4, np.nan, 1.0, 0.4, 0.8, np.nan, np.nan]),
+    ('average', False, 'keep', False,
+        [4., 4., np.nan, 1., 4., 2., np.nan, np.nan]),
+    ('average', False, 'keep', True,
+        [.8, 0.8, np.nan, 0.2, 0.8, 0.4, np.nan, np.nan]),
+    ('min', True, 'keep', False,
+        [1., 1., np.nan, 5., 1., 4., np.nan, np.nan]),
+    ('min', True, 'keep', True,
+        [0.2, 0.2, np.nan, 1.0, 0.2, 0.8, np.nan, np.nan]),
+    ('min', False, 'keep', False,
+        [3., 3., np.nan, 1., 3., 2., np.nan, np.nan]),
+    ('min', False, 'keep', True,
+        [.6, 0.6, np.nan, 0.2, 0.6, 0.4, np.nan, np.nan]),
+    ('max', True, 'keep', False,
+        [3., 3., np.nan, 5., 3., 4., np.nan, np.nan]),
+    ('max', True, 'keep', True,
+        [0.6, 0.6, np.nan, 1.0, 0.6, 0.8, np.nan, np.nan]),
+    ('max', False, 'keep', False,
+        [5., 5., np.nan, 1., 5., 2., np.nan, np.nan]),
+    ('max', False, 'keep', True,
+        [1., 1., np.nan, 0.2, 1., 0.4, np.nan, np.nan]),
+    ('first', True, 'keep', False,
+        [1., 2., np.nan, 5., 3., 4., np.nan, np.nan]),
+    ('first', True, 'keep', True,
+        [0.2, 0.4, np.nan, 1.0, 0.6, 0.8, np.nan, np.nan]),
+    ('first', False, 'keep', False,
+        [3., 4., np.nan, 1., 5., 2., np.nan, np.nan]),
+    ('first', False, 'keep', True,
+        [.6, 0.8, np.nan, 0.2, 1., 0.4, np.nan, np.nan]),
+    ('dense', True, 'keep', False,
+        [1., 1., np.nan, 3., 1., 2., np.nan, np.nan]),
+    ('dense', True, 'keep', True,
+        [0.2, 0.2, np.nan, 0.6, 0.2, 0.4, np.nan, np.nan]),
+    ('dense', False, 'keep', False,
+        [3., 3., np.nan, 1., 3., 2., np.nan, np.nan]),
+    ('dense', False, 'keep', True,
+        [.6, 0.6, np.nan, 0.2, 0.6, 0.4, np.nan, np.nan]),
+    ('average', True, 'no_na', False, [2., 2., 7., 5., 2., 4., 7., 7.]),
+    ('average', True, 'no_na', True,
+        [0.25, 0.25, 0.875, 0.625, 0.25, 0.5, 0.875, 0.875]),
+    ('average', False, 'no_na', False, [4., 4., 7., 1., 4., 2., 7., 7.]),
+    ('average', False, 'no_na', True,
+        [0.5, 0.5, 0.875, 0.125, 0.5, 0.25, 0.875, 0.875]),
+    ('min', True, 'no_na', False, [1., 1., 6., 5., 1., 4., 6., 6.]),
+    ('min', True, 'no_na', True,
+        [0.125, 0.125, 0.75, 0.625, 0.125, 0.5, 0.75, 0.75]),
+    ('min', False, 'no_na', False, [3., 3., 6., 1., 3., 2., 6., 6.]),
+    ('min', False, 'no_na', True,
+        [0.375, 0.375, 0.75, 0.125, 0.375, 0.25, 0.75, 0.75]),
+    ('max', True, 'no_na', False, [3., 3., 8., 5., 3., 4., 8., 8.]),
+    ('max', True, 'no_na', True,
+        [0.375, 0.375, 1., 0.625, 0.375, 0.5, 1., 1.]),
+    ('max', False, 'no_na', False, [5., 5., 8., 1., 5., 2., 8., 8.]),
+    ('max', False, 'no_na', True,
+        [0.625, 0.625, 1., 0.125, 0.625, 0.25, 1., 1.]),
+    ('first', True, 'no_na', False, [1., 2., 6., 5., 3., 4., 7., 8.]),
+    ('first', True, 'no_na', True,
+        [0.125, 0.25, 0.75, 0.625, 0.375, 0.5, 0.875, 1.]),
+    ('first', False, 'no_na', False, [3., 4., 6., 1., 5., 2., 7., 8.]),
+    ('first', False, 'no_na', True,
+        [0.375, 0.5, 0.75, 0.125, 0.625, 0.25, 0.875, 1.]),
+    ('dense', True, 'no_na', False, [1., 1., 4., 3., 1., 2., 4., 4.]),
+    ('dense', True, 'no_na', True,
+        [0.125, 0.125, 0.5, 0.375, 0.125, 0.25, 0.5, 0.5]),
+    ('dense', False, 'no_na', False, [3., 3., 4., 1., 3., 2., 4., 4.]),
+    ('dense', False, 'no_na', True,
+        [0.375, 0.375, 0.5, 0.125, 0.375, 0.25, 0.5, 0.5])
+])
+def test_rank_args_missing(grps, vals, ties_method, ascending,
+                           na_option, pct, exp):
+    key = np.repeat(grps, len(vals))
+    vals = vals * len(grps)
+    df = DataFrame({'key': key, 'val': vals})
+    result = df.groupby('key').rank(method=ties_method,
+                                    ascending=ascending,
+                                    na_option=na_option, pct=pct)
+
+    exp_df = DataFrame(exp * len(grps), columns=['val'])
+    tm.assert_frame_equal(result, exp_df)
+
+
+@pytest.mark.parametrize("pct,exp", [
+    (False, [3., 3., 3., 3., 3.]),
+    (True, [.6, .6, .6, .6, .6])])
+def test_rank_resets_each_group(pct, exp):
+    df = DataFrame(
+        {'key': ['a', 'a', 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'b'],
+         'val': [1] * 10}
+    )
+    result = df.groupby('key').rank(pct=pct)
+    exp_df = DataFrame(exp * 2, columns=['val'])
+    tm.assert_frame_equal(result, exp_df)
+
+
+def test_rank_avg_even_vals():
+    df = DataFrame({'key': ['a'] * 4, 'val': [1] * 4})
+    result = df.groupby('key').rank()
+    exp_df = DataFrame([2.5, 2.5, 2.5, 2.5], columns=['val'])
+    tm.assert_frame_equal(result, exp_df)
+
+
+@pytest.mark.parametrize("ties_method", [
+    'average', 'min', 'max', 'first', 'dense'])
+@pytest.mark.parametrize("ascending", [True, False])
+@pytest.mark.parametrize("na_option", ["keep", "top", "bottom"])
+@pytest.mark.parametrize("pct", [True, False])
+@pytest.mark.parametrize("vals", [
+    ['bar', 'bar', 'foo', 'bar', 'baz'],
+    ['bar', np.nan, 'foo', np.nan, 'baz']
+])
+def test_rank_object_raises(ties_method, ascending, na_option,
+                            pct, vals):
+    df = DataFrame({'key': ['foo'] * 5, 'val': vals})
+    with tm.assert_raises_regex(TypeError, "not callable"):
+        df.groupby('key').rank(method=ties_method,
+                               ascending=ascending,
+                               na_option=na_option, pct=pct)
diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py
index 390b99d0fab1c..626057c1ea760 100644
--- a/pandas/tests/groupby/test_transform.py
+++ b/pandas/tests/groupby/test_transform.py
@@ -10,728 +10,758 @@
     _ensure_platform_int, is_timedelta64_dtype)
 from pandas.compat import StringIO
 from pandas._libs import groupby
-from .common import MixIn, assert_fp_equal
 
 from pandas.util.testing import assert_frame_equal, assert_series_equal
 from pandas.core.groupby.groupby import DataError
 from pandas.core.config import option_context
 
 
-class TestGroupBy(MixIn):
-
-    def test_transform(self):
-        data = Series(np.arange(9) // 3, index=np.arange(9))
-
-        index = np.arange(9)
-        np.random.shuffle(index)
-        data = data.reindex(index)
-
-        grouped = data.groupby(lambda x: x // 3)
-
-        transformed = grouped.transform(lambda x: x * x.sum())
-        assert transformed[7] == 12
-
-        # GH 8046
-        # make sure that we preserve the input order
-
-        df = DataFrame(
-            np.arange(6, dtype='int64').reshape(
-                3, 2), columns=["a", "b"], index=[0, 2, 1])
-        key = [0, 0, 1]
-        expected = df.sort_index().groupby(key).transform(
-            lambda x: x - x.mean()).groupby(key).mean()
-        result = df.groupby(key).transform(lambda x: x - x.mean()).groupby(
-            key).mean()
-        assert_frame_equal(result, expected)
-
-        def demean(arr):
-            return arr - arr.mean()
-
-        people = DataFrame(np.random.randn(5, 5),
-                           columns=['a', 'b', 'c', 'd', 'e'],
-                           index=['Joe', 'Steve', 'Wes', 'Jim', 'Travis'])
-        key = ['one', 'two', 'one', 'two', 'one']
-        result = people.groupby(key).transform(demean).groupby(key).mean()
-        expected = people.groupby(key).apply(demean).groupby(key).mean()
-        assert_frame_equal(result, expected)
-
-        # GH 8430
-        df = tm.makeTimeDataFrame()
-        g = df.groupby(pd.Grouper(freq='M'))
-        g.transform(lambda x: x - 1)
-
-        # GH 9700
-        df = DataFrame({'a': range(5, 10), 'b': range(5)})
-        result = df.groupby('a').transform(max)
-        expected = DataFrame({'b': range(5)})
-        tm.assert_frame_equal(result, expected)
-
-    def test_transform_fast(self):
-
-        df = DataFrame({'id': np.arange(100000) / 3,
-                        'val': np.random.randn(100000)})
-
-        grp = df.groupby('id')['val']
-
-        values = np.repeat(grp.mean().values,
-                           _ensure_platform_int(grp.count().values))
-        expected = pd.Series(values, index=df.index, name='val')
-
-        result = grp.transform(np.mean)
-        assert_series_equal(result, expected)
-
-        result = grp.transform('mean')
-        assert_series_equal(result, expected)
-
-        # GH 12737
-        df = pd.DataFrame({'grouping': [0, 1, 1, 3], 'f': [1.1, 2.1, 3.1, 4.5],
-                           'd': pd.date_range('2014-1-1', '2014-1-4'),
-                           'i': [1, 2, 3, 4]},
-                          columns=['grouping', 'f', 'i', 'd'])
-        result = df.groupby('grouping').transform('first')
-
-        dates = [pd.Timestamp('2014-1-1'), pd.Timestamp('2014-1-2'),
-                 pd.Timestamp('2014-1-2'), pd.Timestamp('2014-1-4')]
-        expected = pd.DataFrame({'f': [1.1, 2.1, 2.1, 4.5],
-                                 'd': dates,
-                                 'i': [1, 2, 2, 4]},
-                                columns=['f', 'i', 'd'])
-        assert_frame_equal(result, expected)
-
-        # selection
-        result = df.groupby('grouping')[['f', 'i']].transform('first')
-        expected = expected[['f', 'i']]
-        assert_frame_equal(result, expected)
-
-        # dup columns
-        df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['g', 'a', 'a'])
-        result = df.groupby('g').transform('first')
-        expected = df.drop('g', axis=1)
-        assert_frame_equal(result, expected)
-
-    def test_transform_broadcast(self):
-        grouped = self.ts.groupby(lambda x: x.month)
-        result = grouped.transform(np.mean)
-
-        tm.assert_index_equal(result.index, self.ts.index)
-        for _, gp in grouped:
-            assert_fp_equal(result.reindex(gp.index), gp.mean())
-
-        grouped = self.tsframe.groupby(lambda x: x.month)
-        result = grouped.transform(np.mean)
-        tm.assert_index_equal(result.index, self.tsframe.index)
-        for _, gp in grouped:
-            agged = gp.mean()
-            res = result.reindex(gp.index)
-            for col in self.tsframe:
-                assert_fp_equal(res[col], agged[col])
-
-        # group columns
-        grouped = self.tsframe.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1},
-                                       axis=1)
-        result = grouped.transform(np.mean)
-        tm.assert_index_equal(result.index, self.tsframe.index)
-        tm.assert_index_equal(result.columns, self.tsframe.columns)
-        for _, gp in grouped:
-            agged = gp.mean(1)
-            res = result.reindex(columns=gp.columns)
-            for idx in gp.index:
-                assert_fp_equal(res.xs(idx), agged[idx])
-
-    def test_transform_axis(self):
-
-        # make sure that we are setting the axes
-        # correctly when on axis=0 or 1
-        # in the presence of a non-monotonic indexer
-        # GH12713
-
-        base = self.tsframe.iloc[0:5]
-        r = len(base.index)
-        c = len(base.columns)
-        tso = DataFrame(np.random.randn(r, c),
-                        index=base.index,
-                        columns=base.columns,
-                        dtype='float64')
-        # monotonic
-        ts = tso
-        grouped = ts.groupby(lambda x: x.weekday())
-        result = ts - grouped.transform('mean')
-        expected = grouped.apply(lambda x: x - x.mean())
-        assert_frame_equal(result, expected)
-
-        ts = ts.T
-        grouped = ts.groupby(lambda x: x.weekday(), axis=1)
-        result = ts - grouped.transform('mean')
-        expected = grouped.apply(lambda x: (x.T - x.mean(1)).T)
-        assert_frame_equal(result, expected)
-
-        # non-monotonic
-        ts = tso.iloc[[1, 0] + list(range(2, len(base)))]
-        grouped = ts.groupby(lambda x: x.weekday())
-        result = ts - grouped.transform('mean')
-        expected = grouped.apply(lambda x: x - x.mean())
-        assert_frame_equal(result, expected)
-
-        ts = ts.T
-        grouped = ts.groupby(lambda x: x.weekday(), axis=1)
-        result = ts - grouped.transform('mean')
-        expected = grouped.apply(lambda x: (x.T - x.mean(1)).T)
-        assert_frame_equal(result, expected)
-
-    def test_transform_dtype(self):
-        # GH 9807
-        # Check transform dtype output is preserved
-        df = DataFrame([[1, 3], [2, 3]])
-        result = df.groupby(1).transform('mean')
-        expected = DataFrame([[1.5], [1.5]])
-        assert_frame_equal(result, expected)
-
-    def test_transform_bug(self):
-        # GH 5712
-        # transforming on a datetime column
-        df = DataFrame(dict(A=Timestamp('20130101'), B=np.arange(5)))
-        result = df.groupby('A')['B'].transform(
-            lambda x: x.rank(ascending=False))
-        expected = Series(np.arange(5, 0, step=-1), name='B')
-        assert_series_equal(result, expected)
-
-    def test_transform_numeric_to_boolean(self):
-        # GH 16875
-        # inconsistency in transforming boolean values
-        expected = pd.Series([True, True], name='A')
-
-        df = pd.DataFrame({'A': [1.1, 2.2], 'B': [1, 2]})
-        result = df.groupby('B').A.transform(lambda x: True)
-        assert_series_equal(result, expected)
-
-        df = pd.DataFrame({'A': [1, 2], 'B': [1, 2]})
-        result = df.groupby('B').A.transform(lambda x: True)
-        assert_series_equal(result, expected)
-
-    def test_transform_datetime_to_timedelta(self):
-        # GH 15429
-        # transforming a datetime to timedelta
-        df = DataFrame(dict(A=Timestamp('20130101'), B=np.arange(5)))
-        expected = pd.Series([
-            Timestamp('20130101') - Timestamp('20130101')] * 5, name='A')
-
-        # this does date math without changing result type in transform
-        base_time = df['A'][0]
-        result = df.groupby('A')['A'].transform(
-            lambda x: x.max() - x.min() + base_time) - base_time
-        assert_series_equal(result, expected)
-
-        # this does date math and causes the transform to return timedelta
-        result = df.groupby('A')['A'].transform(lambda x: x.max() - x.min())
-        assert_series_equal(result, expected)
-
-    def test_transform_datetime_to_numeric(self):
-        # GH 10972
-        # convert dt to float
-        df = DataFrame({
-            'a': 1, 'b': date_range('2015-01-01', periods=2, freq='D')})
-        result = df.groupby('a').b.transform(
-            lambda x: x.dt.dayofweek - x.dt.dayofweek.mean())
-
-        expected = Series([-0.5, 0.5], name='b')
-        assert_series_equal(result, expected)
-
-        # convert dt to int
-        df = DataFrame({
-            'a': 1, 'b': date_range('2015-01-01', periods=2, freq='D')})
-        result = df.groupby('a').b.transform(
-            lambda x: x.dt.dayofweek - x.dt.dayofweek.min())
-
-        expected = Series([0, 1], name='b')
-        assert_series_equal(result, expected)
-
-    def test_transform_casting(self):
-        # 13046
-        data = """
-        idx     A         ID3              DATETIME
-        0   B-028  b76cd912ff "2014-10-08 13:43:27"
-        1   B-054  4a57ed0b02 "2014-10-08 14:26:19"
-        2   B-076  1a682034f8 "2014-10-08 14:29:01"
-        3   B-023  b76cd912ff "2014-10-08 18:39:34"
-        4   B-023  f88g8d7sds "2014-10-08 18:40:18"
-        5   B-033  b76cd912ff "2014-10-08 18:44:30"
-        6   B-032  b76cd912ff "2014-10-08 18:46:00"
-        7   B-037  b76cd912ff "2014-10-08 18:52:15"
-        8   B-046  db959faf02 "2014-10-08 18:59:59"
-        9   B-053  b76cd912ff "2014-10-08 19:17:48"
-        10  B-065  b76cd912ff "2014-10-08 19:21:38"
-        """
-        df = pd.read_csv(StringIO(data), sep=r'\s+',
-                         index_col=[0], parse_dates=['DATETIME'])
-
-        result = df.groupby('ID3')['DATETIME'].transform(lambda x: x.diff())
-        assert is_timedelta64_dtype(result.dtype)
-
-        result = df[['ID3', 'DATETIME']].groupby('ID3').transform(
-            lambda x: x.diff())
-        assert is_timedelta64_dtype(result.DATETIME.dtype)
-
-    def test_transform_multiple(self):
-        grouped = self.ts.groupby([lambda x: x.year, lambda x: x.month])
-
-        grouped.transform(lambda x: x * 2)
-        grouped.transform(np.mean)
-
-    def test_dispatch_transform(self):
-        df = self.tsframe[::5].reindex(self.tsframe.index)
-
-        grouped = df.groupby(lambda x: x.month)
-
-        filled = grouped.fillna(method='pad')
-        fillit = lambda x: x.fillna(method='pad')
-        expected = df.groupby(lambda x: x.month).transform(fillit)
-        assert_frame_equal(filled, expected)
-
-    def test_transform_select_columns(self):
-        f = lambda x: x.mean()
-        result = self.df.groupby('A')['C', 'D'].transform(f)
-
-        selection = self.df[['C', 'D']]
-        expected = selection.groupby(self.df['A']).transform(f)
-
-        assert_frame_equal(result, expected)
-
-    def test_transform_exclude_nuisance(self):
-
-        # this also tests orderings in transform between
-        # series/frame to make sure it's consistent
-        expected = {}
-        grouped = self.df.groupby('A')
-        expected['C'] = grouped['C'].transform(np.mean)
-        expected['D'] = grouped['D'].transform(np.mean)
-        expected = DataFrame(expected)
-        result = self.df.groupby('A').transform(np.mean)
-
-        assert_frame_equal(result, expected)
-
-    def test_transform_function_aliases(self):
-        result = self.df.groupby('A').transform('mean')
-        expected = self.df.groupby('A').transform(np.mean)
-        assert_frame_equal(result, expected)
-
-        result = self.df.groupby('A')['C'].transform('mean')
-        expected = self.df.groupby('A')['C'].transform(np.mean)
-        assert_series_equal(result, expected)
-
-    def test_series_fast_transform_date(self):
-        # GH 13191
-        df = pd.DataFrame({'grouping': [np.nan, 1, 1, 3],
-                           'd': pd.date_range('2014-1-1', '2014-1-4')})
-        result = df.groupby('grouping')['d'].transform('first')
-        dates = [pd.NaT, pd.Timestamp('2014-1-2'), pd.Timestamp('2014-1-2'),
-                 pd.Timestamp('2014-1-4')]
-        expected = pd.Series(dates, name='d')
-        assert_series_equal(result, expected)
-
-    def test_transform_length(self):
-        # GH 9697
-        df = pd.DataFrame({'col1': [1, 1, 2, 2], 'col2': [1, 2, 3, np.nan]})
-        expected = pd.Series([3.0] * 4)
-
-        def nsum(x):
-            return np.nansum(x)
-
-        results = [df.groupby('col1').transform(sum)['col2'],
-                   df.groupby('col1')['col2'].transform(sum),
-                   df.groupby('col1').transform(nsum)['col2'],
-                   df.groupby('col1')['col2'].transform(nsum)]
-        for result in results:
-            assert_series_equal(result, expected, check_names=False)
-
-    def test_transform_coercion(self):
-
-        # 14457
-        # when we are transforming be sure to not coerce
-        # via assignment
-        df = pd.DataFrame(dict(A=['a', 'a'], B=[0, 1]))
-        g = df.groupby('A')
-
-        expected = g.transform(np.mean)
-        result = g.transform(lambda x: np.mean(x))
-        assert_frame_equal(result, expected)
-
-    def test_groupby_transform_with_int(self):
-
-        # GH 3740, make sure that we might upcast on item-by-item transform
-
-        # floats
-        df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=Series(1, dtype='float64'),
-                            C=Series(
-                                [1, 2, 3, 1, 2, 3], dtype='float64'), D='foo'))
-        with np.errstate(all='ignore'):
-            result = df.groupby('A').transform(
-                lambda x: (x - x.mean()) / x.std())
-        expected = DataFrame(dict(B=np.nan, C=Series(
-            [-1, 0, 1, -1, 0, 1], dtype='float64')))
-        assert_frame_equal(result, expected)
-
-        # int case
-        df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=1,
-                            C=[1, 2, 3, 1, 2, 3], D='foo'))
-        with np.errstate(all='ignore'):
-            result = df.groupby('A').transform(
-                lambda x: (x - x.mean()) / x.std())
-        expected = DataFrame(dict(B=np.nan, C=[-1, 0, 1, -1, 0, 1]))
-        assert_frame_equal(result, expected)
-
-        # int that needs float conversion
-        s = Series([2, 3, 4, 10, 5, -1])
-        df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=1, C=s, D='foo'))
-        with np.errstate(all='ignore'):
-            result = df.groupby('A').transform(
-                lambda x: (x - x.mean()) / x.std())
-
-        s1 = s.iloc[0:3]
-        s1 = (s1 - s1.mean()) / s1.std()
-        s2 = s.iloc[3:6]
-        s2 = (s2 - s2.mean()) / s2.std()
-        expected = DataFrame(dict(B=np.nan, C=concat([s1, s2])))
-        assert_frame_equal(result, expected)
-
-        # int downcasting
-        result = df.groupby('A').transform(lambda x: x * 2 / 2)
-        expected = DataFrame(dict(B=1, C=[2, 3, 4, 10, 5, -1]))
-        assert_frame_equal(result, expected)
-
-    def test_groupby_transform_with_nan_group(self):
-        # GH 9941
-        df = pd.DataFrame({'a': range(10),
-                           'b': [1, 1, 2, 3, np.nan, 4, 4, 5, 5, 5]})
-        result = df.groupby(df.b)['a'].transform(max)
-        expected = pd.Series([1., 1., 2., 3., np.nan, 6., 6., 9., 9., 9.],
-                             name='a')
-        assert_series_equal(result, expected)
-
-    def test_transform_mixed_type(self):
-        index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]
-                                        ])
-        df = DataFrame({'d': [1., 1., 1., 2., 2., 2.],
-                        'c': np.tile(['a', 'b', 'c'], 2),
-                        'v': np.arange(1., 7.)}, index=index)
-
-        def f(group):
-            group['g'] = group['d'] * 2
-            return group[:1]
-
-        grouped = df.groupby('c')
-        result = grouped.apply(f)
-
-        assert result['d'].dtype == np.float64
-
-        # this is by definition a mutating operation!
-        with option_context('mode.chained_assignment', None):
-            for key, group in grouped:
-                res = f(group)
-                assert_frame_equal(res, result.loc[key])
-
-    def test_cython_group_transform_algos(self):
-        # GH 4095
-        dtypes = [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint32,
-                  np.uint64, np.float32, np.float64]
-
-        ops = [(groupby.group_cumprod_float64, np.cumproduct, [np.float64]),
-               (groupby.group_cumsum, np.cumsum, dtypes)]
-
-        is_datetimelike = False
-        for pd_op, np_op, dtypes in ops:
-            for dtype in dtypes:
-                data = np.array([[1], [2], [3], [4]], dtype=dtype)
-                ans = np.zeros_like(data)
-                labels = np.array([0, 0, 0, 0], dtype=np.int64)
-                pd_op(ans, data, labels, is_datetimelike)
-                tm.assert_numpy_array_equal(np_op(data), ans[:, 0],
-                                            check_dtype=False)
-
-        # with nans
-        labels = np.array([0, 0, 0, 0, 0], dtype=np.int64)
-
-        data = np.array([[1], [2], [3], [np.nan], [4]], dtype='float64')
-        actual = np.zeros_like(data)
-        actual.fill(np.nan)
-        groupby.group_cumprod_float64(actual, data, labels, is_datetimelike)
-        expected = np.array([1, 2, 6, np.nan, 24], dtype='float64')
-        tm.assert_numpy_array_equal(actual[:, 0], expected)
-
-        actual = np.zeros_like(data)
-        actual.fill(np.nan)
-        groupby.group_cumsum(actual, data, labels, is_datetimelike)
-        expected = np.array([1, 3, 6, np.nan, 10], dtype='float64')
-        tm.assert_numpy_array_equal(actual[:, 0], expected)
-
-        # timedelta
-        is_datetimelike = True
-        data = np.array([np.timedelta64(1, 'ns')] * 5, dtype='m8[ns]')[:, None]
-        actual = np.zeros_like(data, dtype='int64')
-        groupby.group_cumsum(actual, data.view('int64'), labels,
-                             is_datetimelike)
-        expected = np.array([np.timedelta64(1, 'ns'), np.timedelta64(
-            2, 'ns'), np.timedelta64(3, 'ns'), np.timedelta64(4, 'ns'),
-            np.timedelta64(5, 'ns')])
-        tm.assert_numpy_array_equal(actual[:, 0].view('m8[ns]'), expected)
-
-    @pytest.mark.parametrize(
-        "op, args, targop",
-        [('cumprod', (), lambda x: x.cumprod()),
-         ('cumsum', (), lambda x: x.cumsum()),
-         ('shift', (-1, ), lambda x: x.shift(-1)),
-         ('shift', (1, ), lambda x: x.shift())])
-    def test_cython_transform_series(self, op, args, targop):
-        # GH 4095
-        s = Series(np.random.randn(1000))
-        s_missing = s.copy()
-        s_missing.iloc[2:10] = np.nan
-        labels = np.random.randint(0, 50, size=1000).astype(float)
-
-        # series
-        for data in [s, s_missing]:
-            # print(data.head())
-            expected = data.groupby(labels).transform(targop)
-
-            tm.assert_series_equal(
+def assert_fp_equal(a, b):
+    assert (np.abs(a - b) < 1e-12).all()
+
+
+def test_transform():
+    data = Series(np.arange(9) // 3, index=np.arange(9))
+
+    index = np.arange(9)
+    np.random.shuffle(index)
+    data = data.reindex(index)
+
+    grouped = data.groupby(lambda x: x // 3)
+
+    transformed = grouped.transform(lambda x: x * x.sum())
+    assert transformed[7] == 12
+
+    # GH 8046
+    # make sure that we preserve the input order
+
+    df = DataFrame(
+        np.arange(6, dtype='int64').reshape(
+            3, 2), columns=["a", "b"], index=[0, 2, 1])
+    key = [0, 0, 1]
+    expected = df.sort_index().groupby(key).transform(
+        lambda x: x - x.mean()).groupby(key).mean()
+    result = df.groupby(key).transform(lambda x: x - x.mean()).groupby(
+        key).mean()
+    assert_frame_equal(result, expected)
+
+    def demean(arr):
+        return arr - arr.mean()
+
+    people = DataFrame(np.random.randn(5, 5),
+                       columns=['a', 'b', 'c', 'd', 'e'],
+                       index=['Joe', 'Steve', 'Wes', 'Jim', 'Travis'])
+    key = ['one', 'two', 'one', 'two', 'one']
+    result = people.groupby(key).transform(demean).groupby(key).mean()
+    expected = people.groupby(key).apply(demean).groupby(key).mean()
+    assert_frame_equal(result, expected)
+
+    # GH 8430
+    df = tm.makeTimeDataFrame()
+    g = df.groupby(pd.Grouper(freq='M'))
+    g.transform(lambda x: x - 1)
+
+    # GH 9700
+    df = DataFrame({'a': range(5, 10), 'b': range(5)})
+    result = df.groupby('a').transform(max)
+    expected = DataFrame({'b': range(5)})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_transform_fast():
+
+    df = DataFrame({'id': np.arange(100000) / 3,
+                    'val': np.random.randn(100000)})
+
+    grp = df.groupby('id')['val']
+
+    values = np.repeat(grp.mean().values,
+                       _ensure_platform_int(grp.count().values))
+    expected = pd.Series(values, index=df.index, name='val')
+
+    result = grp.transform(np.mean)
+    assert_series_equal(result, expected)
+
+    result = grp.transform('mean')
+    assert_series_equal(result, expected)
+
+    # GH 12737
+    df = pd.DataFrame({'grouping': [0, 1, 1, 3], 'f': [1.1, 2.1, 3.1, 4.5],
+                       'd': pd.date_range('2014-1-1', '2014-1-4'),
+                       'i': [1, 2, 3, 4]},
+                      columns=['grouping', 'f', 'i', 'd'])
+    result = df.groupby('grouping').transform('first')
+
+    dates = [pd.Timestamp('2014-1-1'), pd.Timestamp('2014-1-2'),
+             pd.Timestamp('2014-1-2'), pd.Timestamp('2014-1-4')]
+    expected = pd.DataFrame({'f': [1.1, 2.1, 2.1, 4.5],
+                             'd': dates,
+                             'i': [1, 2, 2, 4]},
+                            columns=['f', 'i', 'd'])
+    assert_frame_equal(result, expected)
+
+    # selection
+    result = df.groupby('grouping')[['f', 'i']].transform('first')
+    expected = expected[['f', 'i']]
+    assert_frame_equal(result, expected)
+
+    # dup columns
+    df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['g', 'a', 'a'])
+    result = df.groupby('g').transform('first')
+    expected = df.drop('g', axis=1)
+    assert_frame_equal(result, expected)
+
+
+def test_transform_broadcast(tsframe, ts):
+    grouped = ts.groupby(lambda x: x.month)
+    result = grouped.transform(np.mean)
+
+    tm.assert_index_equal(result.index, ts.index)
+    for _, gp in grouped:
+        assert_fp_equal(result.reindex(gp.index), gp.mean())
+
+    grouped = tsframe.groupby(lambda x: x.month)
+    result = grouped.transform(np.mean)
+    tm.assert_index_equal(result.index, tsframe.index)
+    for _, gp in grouped:
+        agged = gp.mean()
+        res = result.reindex(gp.index)
+        for col in tsframe:
+            assert_fp_equal(res[col], agged[col])
+
+    # group columns
+    grouped = tsframe.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1},
+                              axis=1)
+    result = grouped.transform(np.mean)
+    tm.assert_index_equal(result.index, tsframe.index)
+    tm.assert_index_equal(result.columns, tsframe.columns)
+    for _, gp in grouped:
+        agged = gp.mean(1)
+        res = result.reindex(columns=gp.columns)
+        for idx in gp.index:
+            assert_fp_equal(res.xs(idx), agged[idx])
+
+
+def test_transform_axis(tsframe):
+
+    # make sure that we are setting the axes
+    # correctly when on axis=0 or 1
+    # in the presence of a non-monotonic indexer
+    # GH12713
+
+    base = tsframe.iloc[0:5]
+    r = len(base.index)
+    c = len(base.columns)
+    tso = DataFrame(np.random.randn(r, c),
+                    index=base.index,
+                    columns=base.columns,
+                    dtype='float64')
+    # monotonic
+    ts = tso
+    grouped = ts.groupby(lambda x: x.weekday())
+    result = ts - grouped.transform('mean')
+    expected = grouped.apply(lambda x: x - x.mean())
+    assert_frame_equal(result, expected)
+
+    ts = ts.T
+    grouped = ts.groupby(lambda x: x.weekday(), axis=1)
+    result = ts - grouped.transform('mean')
+    expected = grouped.apply(lambda x: (x.T - x.mean(1)).T)
+    assert_frame_equal(result, expected)
+
+    # non-monotonic
+    ts = tso.iloc[[1, 0] + list(range(2, len(base)))]
+    grouped = ts.groupby(lambda x: x.weekday())
+    result = ts - grouped.transform('mean')
+    expected = grouped.apply(lambda x: x - x.mean())
+    assert_frame_equal(result, expected)
+
+    ts = ts.T
+    grouped = ts.groupby(lambda x: x.weekday(), axis=1)
+    result = ts - grouped.transform('mean')
+    expected = grouped.apply(lambda x: (x.T - x.mean(1)).T)
+    assert_frame_equal(result, expected)
+
+
+def test_transform_dtype():
+    # GH 9807
+    # Check transform dtype output is preserved
+    df = DataFrame([[1, 3], [2, 3]])
+    result = df.groupby(1).transform('mean')
+    expected = DataFrame([[1.5], [1.5]])
+    assert_frame_equal(result, expected)
+
+
+def test_transform_bug():
+    # GH 5712
+    # transforming on a datetime column
+    df = DataFrame(dict(A=Timestamp('20130101'), B=np.arange(5)))
+    result = df.groupby('A')['B'].transform(
+        lambda x: x.rank(ascending=False))
+    expected = Series(np.arange(5, 0, step=-1), name='B')
+    assert_series_equal(result, expected)
+
+
+def test_transform_numeric_to_boolean():
+    # GH 16875
+    # inconsistency in transforming boolean values
+    expected = pd.Series([True, True], name='A')
+
+    df = pd.DataFrame({'A': [1.1, 2.2], 'B': [1, 2]})
+    result = df.groupby('B').A.transform(lambda x: True)
+    assert_series_equal(result, expected)
+
+    df = pd.DataFrame({'A': [1, 2], 'B': [1, 2]})
+    result = df.groupby('B').A.transform(lambda x: True)
+    assert_series_equal(result, expected)
+
+
+def test_transform_datetime_to_timedelta():
+    # GH 15429
+    # transforming a datetime to timedelta
+    df = DataFrame(dict(A=Timestamp('20130101'), B=np.arange(5)))
+    expected = pd.Series([
+        Timestamp('20130101') - Timestamp('20130101')] * 5, name='A')
+
+    # this does date math without changing result type in transform
+    base_time = df['A'][0]
+    result = df.groupby('A')['A'].transform(
+        lambda x: x.max() - x.min() + base_time) - base_time
+    assert_series_equal(result, expected)
+
+    # this does date math and causes the transform to return timedelta
+    result = df.groupby('A')['A'].transform(lambda x: x.max() - x.min())
+    assert_series_equal(result, expected)
+
+
+def test_transform_datetime_to_numeric():
+    # GH 10972
+    # convert dt to float
+    df = DataFrame({
+        'a': 1, 'b': date_range('2015-01-01', periods=2, freq='D')})
+    result = df.groupby('a').b.transform(
+        lambda x: x.dt.dayofweek - x.dt.dayofweek.mean())
+
+    expected = Series([-0.5, 0.5], name='b')
+    assert_series_equal(result, expected)
+
+    # convert dt to int
+    df = DataFrame({
+        'a': 1, 'b': date_range('2015-01-01', periods=2, freq='D')})
+    result = df.groupby('a').b.transform(
+        lambda x: x.dt.dayofweek - x.dt.dayofweek.min())
+
+    expected = Series([0, 1], name='b')
+    assert_series_equal(result, expected)
+
+
+def test_transform_casting():
+    # 13046
+    data = """
+    idx     A         ID3              DATETIME
+    0   B-028  b76cd912ff "2014-10-08 13:43:27"
+    1   B-054  4a57ed0b02 "2014-10-08 14:26:19"
+    2   B-076  1a682034f8 "2014-10-08 14:29:01"
+    3   B-023  b76cd912ff "2014-10-08 18:39:34"
+    4   B-023  f88g8d7sds "2014-10-08 18:40:18"
+    5   B-033  b76cd912ff "2014-10-08 18:44:30"
+    6   B-032  b76cd912ff "2014-10-08 18:46:00"
+    7   B-037  b76cd912ff "2014-10-08 18:52:15"
+    8   B-046  db959faf02 "2014-10-08 18:59:59"
+    9   B-053  b76cd912ff "2014-10-08 19:17:48"
+    10  B-065  b76cd912ff "2014-10-08 19:21:38"
+    """
+    df = pd.read_csv(StringIO(data), sep=r'\s+',
+                     index_col=[0], parse_dates=['DATETIME'])
+
+    result = df.groupby('ID3')['DATETIME'].transform(lambda x: x.diff())
+    assert is_timedelta64_dtype(result.dtype)
+
+    result = df[['ID3', 'DATETIME']].groupby('ID3').transform(
+        lambda x: x.diff())
+    assert is_timedelta64_dtype(result.DATETIME.dtype)
+
+
+def test_transform_multiple(ts):
+    grouped = ts.groupby([lambda x: x.year, lambda x: x.month])
+
+    grouped.transform(lambda x: x * 2)
+    grouped.transform(np.mean)
+
+
+def test_dispatch_transform(tsframe):
+    df = tsframe[::5].reindex(tsframe.index)
+
+    grouped = df.groupby(lambda x: x.month)
+
+    filled = grouped.fillna(method='pad')
+    fillit = lambda x: x.fillna(method='pad')
+    expected = df.groupby(lambda x: x.month).transform(fillit)
+    assert_frame_equal(filled, expected)
+
+
+def test_transform_select_columns(df):
+    f = lambda x: x.mean()
+    result = df.groupby('A')['C', 'D'].transform(f)
+
+    selection = df[['C', 'D']]
+    expected = selection.groupby(df['A']).transform(f)
+
+    assert_frame_equal(result, expected)
+
+
+def test_transform_exclude_nuisance(df):
+
+    # this also tests orderings in transform between
+    # series/frame to make sure it's consistent
+    expected = {}
+    grouped = df.groupby('A')
+    expected['C'] = grouped['C'].transform(np.mean)
+    expected['D'] = grouped['D'].transform(np.mean)
+    expected = DataFrame(expected)
+    result = df.groupby('A').transform(np.mean)
+
+    assert_frame_equal(result, expected)
+
+
+def test_transform_function_aliases(df):
+    result = df.groupby('A').transform('mean')
+    expected = df.groupby('A').transform(np.mean)
+    assert_frame_equal(result, expected)
+
+    result = df.groupby('A')['C'].transform('mean')
+    expected = df.groupby('A')['C'].transform(np.mean)
+    assert_series_equal(result, expected)
+
+
+def test_series_fast_transform_date():
+    # GH 13191
+    df = pd.DataFrame({'grouping': [np.nan, 1, 1, 3],
+                       'd': pd.date_range('2014-1-1', '2014-1-4')})
+    result = df.groupby('grouping')['d'].transform('first')
+    dates = [pd.NaT, pd.Timestamp('2014-1-2'), pd.Timestamp('2014-1-2'),
+             pd.Timestamp('2014-1-4')]
+    expected = pd.Series(dates, name='d')
+    assert_series_equal(result, expected)
+
+
+def test_transform_length():
+    # GH 9697
+    df = pd.DataFrame({'col1': [1, 1, 2, 2], 'col2': [1, 2, 3, np.nan]})
+    expected = pd.Series([3.0] * 4)
+
+    def nsum(x):
+        return np.nansum(x)
+
+    results = [df.groupby('col1').transform(sum)['col2'],
+               df.groupby('col1')['col2'].transform(sum),
+               df.groupby('col1').transform(nsum)['col2'],
+               df.groupby('col1')['col2'].transform(nsum)]
+    for result in results:
+        assert_series_equal(result, expected, check_names=False)
+
+
+def test_transform_coercion():
+
+    # 14457
+    # when we are transforming be sure to not coerce
+    # via assignment
+    df = pd.DataFrame(dict(A=['a', 'a'], B=[0, 1]))
+    g = df.groupby('A')
+
+    expected = g.transform(np.mean)
+    result = g.transform(lambda x: np.mean(x))
+    assert_frame_equal(result, expected)
+
+
+def test_groupby_transform_with_int():
+
+    # GH 3740, make sure that we might upcast on item-by-item transform
+
+    # floats
+    df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=Series(1, dtype='float64'),
+                        C=Series(
+                            [1, 2, 3, 1, 2, 3], dtype='float64'), D='foo'))
+    with np.errstate(all='ignore'):
+        result = df.groupby('A').transform(
+            lambda x: (x - x.mean()) / x.std())
+    expected = DataFrame(dict(B=np.nan, C=Series(
+        [-1, 0, 1, -1, 0, 1], dtype='float64')))
+    assert_frame_equal(result, expected)
+
+    # int case
+    df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=1,
+                        C=[1, 2, 3, 1, 2, 3], D='foo'))
+    with np.errstate(all='ignore'):
+        result = df.groupby('A').transform(
+            lambda x: (x - x.mean()) / x.std())
+    expected = DataFrame(dict(B=np.nan, C=[-1, 0, 1, -1, 0, 1]))
+    assert_frame_equal(result, expected)
+
+    # int that needs float conversion
+    s = Series([2, 3, 4, 10, 5, -1])
+    df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=1, C=s, D='foo'))
+    with np.errstate(all='ignore'):
+        result = df.groupby('A').transform(
+            lambda x: (x - x.mean()) / x.std())
+
+    s1 = s.iloc[0:3]
+    s1 = (s1 - s1.mean()) / s1.std()
+    s2 = s.iloc[3:6]
+    s2 = (s2 - s2.mean()) / s2.std()
+    expected = DataFrame(dict(B=np.nan, C=concat([s1, s2])))
+    assert_frame_equal(result, expected)
+
+    # int downcasting
+    result = df.groupby('A').transform(lambda x: x * 2 / 2)
+    expected = DataFrame(dict(B=1, C=[2, 3, 4, 10, 5, -1]))
+    assert_frame_equal(result, expected)
+
+
+def test_groupby_transform_with_nan_group():
+    # GH 9941
+    df = pd.DataFrame({'a': range(10),
+                       'b': [1, 1, 2, 3, np.nan, 4, 4, 5, 5, 5]})
+    result = df.groupby(df.b)['a'].transform(max)
+    expected = pd.Series([1., 1., 2., 3., np.nan, 6., 6., 9., 9., 9.],
+                         name='a')
+    assert_series_equal(result, expected)
+
+
+def test_transform_mixed_type():
+    index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]
+                                    ])
+    df = DataFrame({'d': [1., 1., 1., 2., 2., 2.],
+                    'c': np.tile(['a', 'b', 'c'], 2),
+                    'v': np.arange(1., 7.)}, index=index)
+
+    def f(group):
+        group['g'] = group['d'] * 2
+        return group[:1]
+
+    grouped = df.groupby('c')
+    result = grouped.apply(f)
+
+    assert result['d'].dtype == np.float64
+
+    # this is by definition a mutating operation!
+    with option_context('mode.chained_assignment', None):
+        for key, group in grouped:
+            res = f(group)
+            assert_frame_equal(res, result.loc[key])
+
+
+def test_cython_group_transform_algos():
+    # GH 4095
+    dtypes = [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint32,
+              np.uint64, np.float32, np.float64]
+
+    ops = [(groupby.group_cumprod_float64, np.cumproduct, [np.float64]),
+           (groupby.group_cumsum, np.cumsum, dtypes)]
+
+    is_datetimelike = False
+    for pd_op, np_op, dtypes in ops:
+        for dtype in dtypes:
+            data = np.array([[1], [2], [3], [4]], dtype=dtype)
+            ans = np.zeros_like(data)
+            labels = np.array([0, 0, 0, 0], dtype=np.int64)
+            pd_op(ans, data, labels, is_datetimelike)
+            tm.assert_numpy_array_equal(np_op(data), ans[:, 0],
+                                        check_dtype=False)
+
+    # with nans
+    labels = np.array([0, 0, 0, 0, 0], dtype=np.int64)
+
+    data = np.array([[1], [2], [3], [np.nan], [4]], dtype='float64')
+    actual = np.zeros_like(data)
+    actual.fill(np.nan)
+    groupby.group_cumprod_float64(actual, data, labels, is_datetimelike)
+    expected = np.array([1, 2, 6, np.nan, 24], dtype='float64')
+    tm.assert_numpy_array_equal(actual[:, 0], expected)
+
+    actual = np.zeros_like(data)
+    actual.fill(np.nan)
+    groupby.group_cumsum(actual, data, labels, is_datetimelike)
+    expected = np.array([1, 3, 6, np.nan, 10], dtype='float64')
+    tm.assert_numpy_array_equal(actual[:, 0], expected)
+
+    # timedelta
+    is_datetimelike = True
+    data = np.array([np.timedelta64(1, 'ns')] * 5, dtype='m8[ns]')[:, None]
+    actual = np.zeros_like(data, dtype='int64')
+    groupby.group_cumsum(actual, data.view('int64'), labels,
+                         is_datetimelike)
+    expected = np.array([np.timedelta64(1, 'ns'), np.timedelta64(
+        2, 'ns'), np.timedelta64(3, 'ns'), np.timedelta64(4, 'ns'),
+        np.timedelta64(5, 'ns')])
+    tm.assert_numpy_array_equal(actual[:, 0].view('m8[ns]'), expected)
+
+
+@pytest.mark.parametrize(
+    "op, args, targop",
+    [('cumprod', (), lambda x: x.cumprod()),
+     ('cumsum', (), lambda x: x.cumsum()),
+     ('shift', (-1, ), lambda x: x.shift(-1)),
+     ('shift', (1, ), lambda x: x.shift())])
+def test_cython_transform_series(op, args, targop):
+    # GH 4095
+    s = Series(np.random.randn(1000))
+    s_missing = s.copy()
+    s_missing.iloc[2:10] = np.nan
+    labels = np.random.randint(0, 50, size=1000).astype(float)
+
+    # series
+    for data in [s, s_missing]:
+        # print(data.head())
+        expected = data.groupby(labels).transform(targop)
+
+        tm.assert_series_equal(
+            expected,
+            data.groupby(labels).transform(op, *args))
+        tm.assert_series_equal(expected, getattr(
+            data.groupby(labels), op)(*args))
+
+
+@pytest.mark.parametrize("op", ['cumprod', 'cumsum'])
+@pytest.mark.parametrize("skipna", [False, True])
+@pytest.mark.parametrize('input, exp', [
+    # When everything is NaN
+    ({'key': ['b'] * 10, 'value': np.nan},
+     pd.Series([np.nan] * 10, name='value')),
+    # When there is a single NaN
+    ({'key': ['b'] * 10 + ['a'] * 2,
+      'value': [3] * 3 + [np.nan] + [3] * 8},
+     {('cumprod', False): [3.0, 9.0, 27.0] + [np.nan] * 7 + [3.0, 9.0],
+      ('cumprod', True): [3.0, 9.0, 27.0, np.nan, 81., 243., 729.,
+                          2187., 6561., 19683., 3.0, 9.0],
+      ('cumsum', False): [3.0, 6.0, 9.0] + [np.nan] * 7 + [3.0, 6.0],
+      ('cumsum', True): [3.0, 6.0, 9.0, np.nan, 12., 15., 18.,
+                         21., 24., 27., 3.0, 6.0]})])
+def test_groupby_cum_skipna(op, skipna, input, exp):
+    df = pd.DataFrame(input)
+    result = df.groupby('key')['value'].transform(op, skipna=skipna)
+    if isinstance(exp, dict):
+        expected = exp[(op, skipna)]
+    else:
+        expected = exp
+    expected = pd.Series(expected, name='value')
+    tm.assert_series_equal(expected, result)
+
+
+@pytest.mark.parametrize(
+    "op, args, targop",
+    [('cumprod', (), lambda x: x.cumprod()),
+     ('cumsum', (), lambda x: x.cumsum()),
+     ('shift', (-1, ), lambda x: x.shift(-1)),
+     ('shift', (1, ), lambda x: x.shift())])
+def test_cython_transform_frame(op, args, targop):
+    s = Series(np.random.randn(1000))
+    s_missing = s.copy()
+    s_missing.iloc[2:10] = np.nan
+    labels = np.random.randint(0, 50, size=1000).astype(float)
+    strings = list('qwertyuiopasdfghjklz')
+    strings_missing = strings[:]
+    strings_missing[5] = np.nan
+    df = DataFrame({'float': s,
+                    'float_missing': s_missing,
+                    'int': [1, 1, 1, 1, 2] * 200,
+                    'datetime': pd.date_range('1990-1-1', periods=1000),
+                    'timedelta': pd.timedelta_range(1, freq='s',
+                                                    periods=1000),
+                    'string': strings * 50,
+                    'string_missing': strings_missing * 50},
+                   columns=['float', 'float_missing', 'int', 'datetime',
+                            'timedelta', 'string', 'string_missing'])
+    df['cat'] = df['string'].astype('category')
+
+    df2 = df.copy()
+    df2.index = pd.MultiIndex.from_product([range(100), range(10)])
+
+    # DataFrame - Single and MultiIndex,
+    # group by values, index level, columns
+    for df in [df, df2]:
+        for gb_target in [dict(by=labels), dict(level=0), dict(by='string')
+                          ]:  # dict(by='string_missing')]:
+            # dict(by=['int','string'])]:
+
+            gb = df.groupby(**gb_target)
+            # whitelisted methods set the selection before applying
+            # bit a of hack to make sure the cythonized shift
+            # is equivalent to pre 0.17.1 behavior
+            if op == 'shift':
+                gb._set_group_selection()
+
+            if op != 'shift' and 'int' not in gb_target:
+                # numeric apply fastpath promotes dtype so have
+                # to apply separately and concat
+                i = gb[['int']].apply(targop)
+                f = gb[['float', 'float_missing']].apply(targop)
+                expected = pd.concat([f, i], axis=1)
+            else:
+                expected = gb.apply(targop)
+
+            expected = expected.sort_index(axis=1)
+            tm.assert_frame_equal(expected,
+                                  gb.transform(op, *args).sort_index(
+                                      axis=1))
+            tm.assert_frame_equal(
                 expected,
-                data.groupby(labels).transform(op, *args))
-            tm.assert_series_equal(expected, getattr(
-                data.groupby(labels), op)(*args))
-
-    @pytest.mark.parametrize("op", ['cumprod', 'cumsum'])
-    @pytest.mark.parametrize("skipna", [False, True])
-    @pytest.mark.parametrize('input, exp', [
-        # When everything is NaN
-        ({'key': ['b'] * 10, 'value': np.nan},
-         pd.Series([np.nan] * 10, name='value')),
-        # When there is a single NaN
-        ({'key': ['b'] * 10 + ['a'] * 2,
-          'value': [3] * 3 + [np.nan] + [3] * 8},
-         {('cumprod', False): [3.0, 9.0, 27.0] + [np.nan] * 7 + [3.0, 9.0],
-          ('cumprod', True): [3.0, 9.0, 27.0, np.nan, 81., 243., 729.,
-                              2187., 6561., 19683., 3.0, 9.0],
-          ('cumsum', False): [3.0, 6.0, 9.0] + [np.nan] * 7 + [3.0, 6.0],
-          ('cumsum', True): [3.0, 6.0, 9.0, np.nan, 12., 15., 18.,
-                             21., 24., 27., 3.0, 6.0]})])
-    def test_groupby_cum_skipna(self, op, skipna, input, exp):
-        df = pd.DataFrame(input)
-        result = df.groupby('key')['value'].transform(op, skipna=skipna)
-        if isinstance(exp, dict):
-            expected = exp[(op, skipna)]
-        else:
-            expected = exp
-        expected = pd.Series(expected, name='value')
-        tm.assert_series_equal(expected, result)
-
-    @pytest.mark.parametrize(
-        "op, args, targop",
-        [('cumprod', (), lambda x: x.cumprod()),
-         ('cumsum', (), lambda x: x.cumsum()),
-         ('shift', (-1, ), lambda x: x.shift(-1)),
-         ('shift', (1, ), lambda x: x.shift())])
-    def test_cython_transform_frame(self, op, args, targop):
-        s = Series(np.random.randn(1000))
-        s_missing = s.copy()
-        s_missing.iloc[2:10] = np.nan
-        labels = np.random.randint(0, 50, size=1000).astype(float)
-        strings = list('qwertyuiopasdfghjklz')
-        strings_missing = strings[:]
-        strings_missing[5] = np.nan
-        df = DataFrame({'float': s,
-                        'float_missing': s_missing,
-                        'int': [1, 1, 1, 1, 2] * 200,
-                        'datetime': pd.date_range('1990-1-1', periods=1000),
-                        'timedelta': pd.timedelta_range(1, freq='s',
-                                                        periods=1000),
-                        'string': strings * 50,
-                        'string_missing': strings_missing * 50},
-                       columns=['float', 'float_missing', 'int', 'datetime',
-                                'timedelta', 'string', 'string_missing'])
-        df['cat'] = df['string'].astype('category')
-
-        df2 = df.copy()
-        df2.index = pd.MultiIndex.from_product([range(100), range(10)])
-
-        # DataFrame - Single and MultiIndex,
-        # group by values, index level, columns
-        for df in [df, df2]:
-            for gb_target in [dict(by=labels), dict(level=0), dict(by='string')
-                              ]:  # dict(by='string_missing')]:
-                # dict(by=['int','string'])]:
-
-                gb = df.groupby(**gb_target)
-                # whitelisted methods set the selection before applying
-                # bit a of hack to make sure the cythonized shift
-                # is equivalent to pre 0.17.1 behavior
-                if op == 'shift':
-                    gb._set_group_selection()
-
-                if op != 'shift' and 'int' not in gb_target:
-                    # numeric apply fastpath promotes dtype so have
-                    # to apply separately and concat
-                    i = gb[['int']].apply(targop)
-                    f = gb[['float', 'float_missing']].apply(targop)
-                    expected = pd.concat([f, i], axis=1)
+                getattr(gb, op)(*args).sort_index(axis=1))
+            # individual columns
+            for c in df:
+                if c not in ['float', 'int', 'float_missing'
+                             ] and op != 'shift':
+                    pytest.raises(DataError, gb[c].transform, op)
+                    pytest.raises(DataError, getattr(gb[c], op))
                 else:
-                    expected = gb.apply(targop)
-
-                expected = expected.sort_index(axis=1)
-                tm.assert_frame_equal(expected,
-                                      gb.transform(op, *args).sort_index(
-                                          axis=1))
-                tm.assert_frame_equal(
-                    expected,
-                    getattr(gb, op)(*args).sort_index(axis=1))
-                # individual columns
-                for c in df:
-                    if c not in ['float', 'int', 'float_missing'
-                                 ] and op != 'shift':
-                        pytest.raises(DataError, gb[c].transform, op)
-                        pytest.raises(DataError, getattr(gb[c], op))
-                    else:
-                        expected = gb[c].apply(targop)
-                        expected.name = c
-                        tm.assert_series_equal(expected,
-                                               gb[c].transform(op, *args))
-                        tm.assert_series_equal(expected,
-                                               getattr(gb[c], op)(*args))
-
-    def test_transform_with_non_scalar_group(self):
-        # GH 10165
-        cols = pd.MultiIndex.from_tuples([
-            ('syn', 'A'), ('mis', 'A'), ('non', 'A'),
-            ('syn', 'C'), ('mis', 'C'), ('non', 'C'),
-            ('syn', 'T'), ('mis', 'T'), ('non', 'T'),
-            ('syn', 'G'), ('mis', 'G'), ('non', 'G')])
-        df = pd.DataFrame(np.random.randint(1, 10, (4, 12)),
-                          columns=cols,
-                          index=['A', 'C', 'G', 'T'])
-        tm.assert_raises_regex(ValueError, 'transform must return '
-                               'a scalar value for each '
-                               'group.*',
-                               df.groupby(axis=1, level=1).transform,
-                               lambda z: z.div(z.sum(axis=1), axis=0))
-
-    @pytest.mark.parametrize('cols,exp,comp_func', [
-        ('a', pd.Series([1, 1, 1], name='a'), tm.assert_series_equal),
-        (['a', 'c'], pd.DataFrame({'a': [1, 1, 1], 'c': [1, 1, 1]}),
-         tm.assert_frame_equal)
-    ])
-    @pytest.mark.parametrize('agg_func', [
-        'count', 'rank', 'size'])
-    def test_transform_numeric_ret(self, cols, exp, comp_func, agg_func):
-        if agg_func == 'size' and isinstance(cols, list):
-            pytest.xfail("'size' transformation not supported with "
-                         "NDFrameGroupy")
-
-        # GH 19200
-        df = pd.DataFrame(
-            {'a': pd.date_range('2018-01-01', periods=3),
-             'b': range(3),
-             'c': range(7, 10)})
-
-        result = df.groupby('b')[cols].transform(agg_func)
-
-        if agg_func == 'rank':
-            exp = exp.astype('float')
-
-        comp_func(result, exp)
-
-    @pytest.mark.parametrize("mix_groupings", [True, False])
-    @pytest.mark.parametrize("as_series", [True, False])
-    @pytest.mark.parametrize("val1,val2", [
-        ('foo', 'bar'), (1, 2), (1., 2.)])
-    @pytest.mark.parametrize("fill_method,limit,exp_vals", [
-        ("ffill", None,
-         [np.nan, np.nan, 'val1', 'val1', 'val1', 'val2', 'val2', 'val2']),
-        ("ffill", 1,
-         [np.nan, np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan]),
-        ("bfill", None,
-         ['val1', 'val1', 'val1', 'val2', 'val2', 'val2', np.nan, np.nan]),
-        ("bfill", 1,
-         [np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan, np.nan])
-    ])
-    def test_group_fill_methods(self, mix_groupings, as_series, val1, val2,
-                                fill_method, limit, exp_vals):
-        vals = [np.nan, np.nan, val1, np.nan, np.nan, val2, np.nan, np.nan]
-        _exp_vals = list(exp_vals)
-        # Overwrite placeholder values
-        for index, exp_val in enumerate(_exp_vals):
-            if exp_val == 'val1':
-                _exp_vals[index] = val1
-            elif exp_val == 'val2':
-                _exp_vals[index] = val2
-
-        # Need to modify values and expectations depending on the
-        # Series / DataFrame that we ultimately want to generate
-        if mix_groupings:  # ['a', 'b', 'a, 'b', ...]
-            keys = ['a', 'b'] * len(vals)
-
-            def interweave(list_obj):
-                temp = list()
-                for x in list_obj:
-                    temp.extend([x, x])
-
-                return temp
-
-            _exp_vals = interweave(_exp_vals)
-            vals = interweave(vals)
-        else:  # ['a', 'a', 'a', ... 'b', 'b', 'b']
-            keys = ['a'] * len(vals) + ['b'] * len(vals)
-            _exp_vals = _exp_vals * 2
-            vals = vals * 2
-
-        df = DataFrame({'key': keys, 'val': vals})
-        if as_series:
-            result = getattr(
-                df.groupby('key')['val'], fill_method)(limit=limit)
-            exp = Series(_exp_vals, name='val')
-            assert_series_equal(result, exp)
-        else:
-            result = getattr(df.groupby('key'), fill_method)(limit=limit)
-            exp = DataFrame({'key': keys, 'val': _exp_vals})
-            assert_frame_equal(result, exp)
-
-    @pytest.mark.parametrize("test_series", [True, False])
-    @pytest.mark.parametrize("periods,fill_method,limit", [
-        (1, 'ffill', None), (1, 'ffill', 1),
-        (1, 'bfill', None), (1, 'bfill', 1),
-        (-1, 'ffill', None), (-1, 'ffill', 1),
-        (-1, 'bfill', None), (-1, 'bfill', 1)])
-    def test_pct_change(self, test_series, periods, fill_method, limit):
-        vals = [np.nan, np.nan, 1, 2, 4, 10, np.nan, np.nan]
-        exp_vals = Series(vals).pct_change(periods=periods,
-                                           fill_method=fill_method,
-                                           limit=limit).tolist()
-
-        df = DataFrame({'key': ['a'] * len(vals) + ['b'] * len(vals),
-                        'vals': vals * 2})
-        grp = df.groupby('key')
-
-        def get_result(grp_obj):
-            return grp_obj.pct_change(periods=periods,
-                                      fill_method=fill_method,
-                                      limit=limit)
-
-        if test_series:
-            exp = pd.Series(exp_vals * 2)
-            exp.name = 'vals'
-            grp = grp['vals']
-            result = get_result(grp)
-            tm.assert_series_equal(result, exp)
-        else:
-            exp = DataFrame({'vals': exp_vals * 2})
-            result = get_result(grp)
-            tm.assert_frame_equal(result, exp)
-
-    @pytest.mark.parametrize("func", [np.any, np.all])
-    def test_any_all_np_func(self, func):
-        # GH 20653
-        df = pd.DataFrame([['foo', True],
-                           [np.nan, True],
-                           ['foo', True]], columns=['key', 'val'])
-
-        exp = pd.Series([True, np.nan, True], name='val')
-
-        res = df.groupby('key')['val'].transform(func)
-        tm.assert_series_equal(res, exp)
+                    expected = gb[c].apply(targop)
+                    expected.name = c
+                    tm.assert_series_equal(expected,
+                                           gb[c].transform(op, *args))
+                    tm.assert_series_equal(expected,
+                                           getattr(gb[c], op)(*args))
+
+
+def test_transform_with_non_scalar_group():
+    # GH 10165
+    cols = pd.MultiIndex.from_tuples([
+        ('syn', 'A'), ('mis', 'A'), ('non', 'A'),
+        ('syn', 'C'), ('mis', 'C'), ('non', 'C'),
+        ('syn', 'T'), ('mis', 'T'), ('non', 'T'),
+        ('syn', 'G'), ('mis', 'G'), ('non', 'G')])
+    df = pd.DataFrame(np.random.randint(1, 10, (4, 12)),
+                      columns=cols,
+                      index=['A', 'C', 'G', 'T'])
+    tm.assert_raises_regex(ValueError, 'transform must return '
+                           'a scalar value for each '
+                           'group.*',
+                           df.groupby(axis=1, level=1).transform,
+                           lambda z: z.div(z.sum(axis=1), axis=0))
+
+
+@pytest.mark.parametrize('cols,exp,comp_func', [
+    ('a', pd.Series([1, 1, 1], name='a'), tm.assert_series_equal),
+    (['a', 'c'], pd.DataFrame({'a': [1, 1, 1], 'c': [1, 1, 1]}),
+     tm.assert_frame_equal)
+])
+@pytest.mark.parametrize('agg_func', [
+    'count', 'rank', 'size'])
+def test_transform_numeric_ret(cols, exp, comp_func, agg_func):
+    if agg_func == 'size' and isinstance(cols, list):
+        pytest.xfail("'size' transformation not supported with "
+                     "NDFrameGroupy")
+
+    # GH 19200
+    df = pd.DataFrame(
+        {'a': pd.date_range('2018-01-01', periods=3),
+         'b': range(3),
+         'c': range(7, 10)})
+
+    result = df.groupby('b')[cols].transform(agg_func)
+
+    if agg_func == 'rank':
+        exp = exp.astype('float')
+
+    comp_func(result, exp)
+
+
+@pytest.mark.parametrize("mix_groupings", [True, False])
+@pytest.mark.parametrize("as_series", [True, False])
+@pytest.mark.parametrize("val1,val2", [
+    ('foo', 'bar'), (1, 2), (1., 2.)])
+@pytest.mark.parametrize("fill_method,limit,exp_vals", [
+    ("ffill", None,
+     [np.nan, np.nan, 'val1', 'val1', 'val1', 'val2', 'val2', 'val2']),
+    ("ffill", 1,
+     [np.nan, np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan]),
+    ("bfill", None,
+     ['val1', 'val1', 'val1', 'val2', 'val2', 'val2', np.nan, np.nan]),
+    ("bfill", 1,
+     [np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan, np.nan])
+])
+def test_group_fill_methods(mix_groupings, as_series, val1, val2,
+                            fill_method, limit, exp_vals):
+    vals = [np.nan, np.nan, val1, np.nan, np.nan, val2, np.nan, np.nan]
+    _exp_vals = list(exp_vals)
+    # Overwrite placeholder values
+    for index, exp_val in enumerate(_exp_vals):
+        if exp_val == 'val1':
+            _exp_vals[index] = val1
+        elif exp_val == 'val2':
+            _exp_vals[index] = val2
+
+    # Need to modify values and expectations depending on the
+    # Series / DataFrame that we ultimately want to generate
+    if mix_groupings:  # ['a', 'b', 'a, 'b', ...]
+        keys = ['a', 'b'] * len(vals)
+
+        def interweave(list_obj):
+            temp = list()
+            for x in list_obj:
+                temp.extend([x, x])
+
+            return temp
+
+        _exp_vals = interweave(_exp_vals)
+        vals = interweave(vals)
+    else:  # ['a', 'a', 'a', ... 'b', 'b', 'b']
+        keys = ['a'] * len(vals) + ['b'] * len(vals)
+        _exp_vals = _exp_vals * 2
+        vals = vals * 2
+
+    df = DataFrame({'key': keys, 'val': vals})
+    if as_series:
+        result = getattr(
+            df.groupby('key')['val'], fill_method)(limit=limit)
+        exp = Series(_exp_vals, name='val')
+        assert_series_equal(result, exp)
+    else:
+        result = getattr(df.groupby('key'), fill_method)(limit=limit)
+        exp = DataFrame({'key': keys, 'val': _exp_vals})
+        assert_frame_equal(result, exp)
+
+
+@pytest.mark.parametrize("test_series", [True, False])
+@pytest.mark.parametrize("periods,fill_method,limit", [
+    (1, 'ffill', None), (1, 'ffill', 1),
+    (1, 'bfill', None), (1, 'bfill', 1),
+    (-1, 'ffill', None), (-1, 'ffill', 1),
+    (-1, 'bfill', None), (-1, 'bfill', 1)])
+def test_pct_change(test_series, periods, fill_method, limit):
+    vals = [np.nan, np.nan, 1, 2, 4, 10, np.nan, np.nan]
+    exp_vals = Series(vals).pct_change(periods=periods,
+                                       fill_method=fill_method,
+                                       limit=limit).tolist()
+
+    df = DataFrame({'key': ['a'] * len(vals) + ['b'] * len(vals),
+                    'vals': vals * 2})
+    grp = df.groupby('key')
+
+    def get_result(grp_obj):
+        return grp_obj.pct_change(periods=periods,
+                                  fill_method=fill_method,
+                                  limit=limit)
+
+    if test_series:
+        exp = pd.Series(exp_vals * 2)
+        exp.name = 'vals'
+        grp = grp['vals']
+        result = get_result(grp)
+        tm.assert_series_equal(result, exp)
+    else:
+        exp = DataFrame({'vals': exp_vals * 2})
+        result = get_result(grp)
+        tm.assert_frame_equal(result, exp)
+
+
+@pytest.mark.parametrize("func", [np.any, np.all])
+def test_any_all_np_func(func):
+    # GH 20653
+    df = pd.DataFrame([['foo', True],
+                       [np.nan, True],
+                       ['foo', True]], columns=['key', 'val'])
+
+    exp = pd.Series([True, np.nan, True], name='val')
+
+    res = df.groupby('key')['val'].transform(func)
+    tm.assert_series_equal(res, exp)
diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py
index 1cf854ad4a926..dae69a86910af 100644
--- a/pandas/tests/indexes/datetimes/test_construction.py
+++ b/pandas/tests/indexes/datetimes/test_construction.py
@@ -598,16 +598,16 @@ def test_datetimeindex_constructor_misc(self):
         idx2 = DatetimeIndex(start=sdate, end=edate,
                              freq=offsets.Week(weekday=6))
         assert len(idx1) == len(idx2)
-        assert idx1.offset == idx2.offset
+        assert idx1.freq == idx2.freq
 
         idx1 = DatetimeIndex(start=sdate, end=edate, freq='QS')
         idx2 = DatetimeIndex(start=sdate, end=edate,
                              freq=offsets.QuarterBegin(startingMonth=1))
         assert len(idx1) == len(idx2)
-        assert idx1.offset == idx2.offset
+        assert idx1.freq == idx2.freq
 
         idx1 = DatetimeIndex(start=sdate, end=edate, freq='BQ')
         idx2 = DatetimeIndex(start=sdate, end=edate,
                              freq=offsets.BQuarterEnd(startingMonth=12))
         assert len(idx1) == len(idx2)
-        assert idx1.offset == idx2.offset
+        assert idx1.freq == idx2.freq
diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py
index d2ec465468dfb..e5291ed52a86c 100644
--- a/pandas/tests/indexes/datetimes/test_date_range.py
+++ b/pandas/tests/indexes/datetimes/test_date_range.py
@@ -236,6 +236,12 @@ def test_catch_infinite_loop(self):
         pytest.raises(Exception, date_range, datetime(2011, 11, 11),
                       datetime(2011, 11, 12), freq=offset)
 
+    @pytest.mark.parametrize('periods', (1, 2))
+    def test_wom_len(self, periods):
+        # https://github.com/pandas-dev/pandas/issues/20517
+        res = date_range(start='20110101', periods=periods, freq='WOM-1MON')
+        assert len(res) == periods
+
 
 class TestGenRangeGeneration(object):
 
@@ -331,21 +337,21 @@ def test_naive_aware_conflicts(self):
             aware.join(naive)
 
     def test_cached_range(self):
-        DatetimeIndex._cached_range(START, END, offset=BDay())
-        DatetimeIndex._cached_range(START, periods=20, offset=BDay())
-        DatetimeIndex._cached_range(end=START, periods=20, offset=BDay())
+        DatetimeIndex._cached_range(START, END, freq=BDay())
+        DatetimeIndex._cached_range(START, periods=20, freq=BDay())
+        DatetimeIndex._cached_range(end=START, periods=20, freq=BDay())
 
-        with tm.assert_raises_regex(TypeError, "offset"):
+        with tm.assert_raises_regex(TypeError, "freq"):
             DatetimeIndex._cached_range(START, END)
 
         with tm.assert_raises_regex(TypeError, "specify period"):
-            DatetimeIndex._cached_range(START, offset=BDay())
+            DatetimeIndex._cached_range(START, freq=BDay())
 
         with tm.assert_raises_regex(TypeError, "specify period"):
-            DatetimeIndex._cached_range(end=END, offset=BDay())
+            DatetimeIndex._cached_range(end=END, freq=BDay())
 
         with tm.assert_raises_regex(TypeError, "start or end"):
-            DatetimeIndex._cached_range(periods=20, offset=BDay())
+            DatetimeIndex._cached_range(periods=20, freq=BDay())
 
     def test_cached_range_bug(self):
         rng = date_range('2010-09-01 05:00:00', periods=50,
@@ -393,7 +399,7 @@ def test_daterange_bug_456(self):
         # GH #456
         rng1 = bdate_range('12/5/2011', '12/5/2011')
         rng2 = bdate_range('12/2/2011', '12/5/2011')
-        rng2.offset = BDay()
+        rng2.freq = BDay()
 
         result = rng1.union(rng2)
         assert isinstance(result, DatetimeIndex)
@@ -605,27 +611,27 @@ def test_constructor(self):
             bdate_range('2011-1-1', '2012-1-1', 'C')
 
     def test_cached_range(self):
-        DatetimeIndex._cached_range(START, END, offset=CDay())
+        DatetimeIndex._cached_range(START, END, freq=CDay())
         DatetimeIndex._cached_range(START, periods=20,
-                                    offset=CDay())
+                                    freq=CDay())
         DatetimeIndex._cached_range(end=START, periods=20,
-                                    offset=CDay())
+                                    freq=CDay())
 
         # with pytest.raises(TypeError):
-        with tm.assert_raises_regex(TypeError, "offset"):
+        with tm.assert_raises_regex(TypeError, "freq"):
             DatetimeIndex._cached_range(START, END)
 
         # with pytest.raises(TypeError):
         with tm.assert_raises_regex(TypeError, "specify period"):
-            DatetimeIndex._cached_range(START, offset=CDay())
+            DatetimeIndex._cached_range(START, freq=CDay())
 
         # with pytest.raises(TypeError):
         with tm.assert_raises_regex(TypeError, "specify period"):
-            DatetimeIndex._cached_range(end=END, offset=CDay())
+            DatetimeIndex._cached_range(end=END, freq=CDay())
 
         # with pytest.raises(TypeError):
         with tm.assert_raises_regex(TypeError, "start or end"):
-            DatetimeIndex._cached_range(periods=20, offset=CDay())
+            DatetimeIndex._cached_range(periods=20, freq=CDay())
 
     def test_misc(self):
         end = datetime(2009, 5, 13)
@@ -640,7 +646,7 @@ def test_daterange_bug_456(self):
         # GH #456
         rng1 = bdate_range('12/5/2011', '12/5/2011', freq='C')
         rng2 = bdate_range('12/2/2011', '12/5/2011', freq='C')
-        rng2.offset = CDay()
+        rng2.freq = CDay()
 
         result = rng1.union(rng2)
         assert isinstance(result, DatetimeIndex)
diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py
index af65a8618d30f..dd192db4b0eb3 100644
--- a/pandas/tests/indexes/datetimes/test_indexing.py
+++ b/pandas/tests/indexes/datetimes/test_indexing.py
@@ -53,10 +53,10 @@ def test_dti_business_getitem(self):
         exp = DatetimeIndex(rng.view(np.ndarray)[:5])
         tm.assert_index_equal(smaller, exp)
 
-        assert smaller.offset == rng.offset
+        assert smaller.freq == rng.freq
 
         sliced = rng[::5]
-        assert sliced.offset == BDay() * 5
+        assert sliced.freq == BDay() * 5
 
         fancy_indexed = rng[[4, 3, 2, 1, 0]]
         assert len(fancy_indexed) == 5
@@ -77,10 +77,10 @@ def test_dti_custom_getitem(self):
         smaller = rng[:5]
         exp = DatetimeIndex(rng.view(np.ndarray)[:5])
         tm.assert_index_equal(smaller, exp)
-        assert smaller.offset == rng.offset
+        assert smaller.freq == rng.freq
 
         sliced = rng[::5]
-        assert sliced.offset == CDay() * 5
+        assert sliced.freq == CDay() * 5
 
         fancy_indexed = rng[[4, 3, 2, 1, 0]]
         assert len(fancy_indexed) == 5
diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py
index 8986828399a98..3c7d5d37e98f3 100644
--- a/pandas/tests/indexes/datetimes/test_ops.py
+++ b/pandas/tests/indexes/datetimes/test_ops.py
@@ -405,6 +405,18 @@ def test_equals(self):
         assert not idx.equals(list(idx3))
         assert not idx.equals(pd.Series(idx3))
 
+    def test_offset_deprecated(self):
+        # GH 20716
+        idx = pd.DatetimeIndex(['20180101', '20180102'])
+
+        # getter deprecated
+        with tm.assert_produces_warning(FutureWarning):
+            idx.offset
+
+        # setter deprecated
+        with tm.assert_produces_warning(FutureWarning):
+            idx.offset = BDay()
+
 
 class TestBusinessDatetimeIndex(object):
 
@@ -420,7 +432,7 @@ def test_comparison(self):
 
     def test_pickle_unpickle(self):
         unpickled = tm.round_trip_pickle(self.rng)
-        assert unpickled.offset is not None
+        assert unpickled.freq is not None
 
     def test_copy(self):
         cp = self.rng.copy()
@@ -430,15 +442,15 @@ def test_copy(self):
     def test_shift(self):
         shifted = self.rng.shift(5)
         assert shifted[0] == self.rng[5]
-        assert shifted.offset == self.rng.offset
+        assert shifted.freq == self.rng.freq
 
         shifted = self.rng.shift(-5)
         assert shifted[5] == self.rng[0]
-        assert shifted.offset == self.rng.offset
+        assert shifted.freq == self.rng.freq
 
         shifted = self.rng.shift(0)
         assert shifted[0] == self.rng[0]
-        assert shifted.offset == self.rng.offset
+        assert shifted.freq == self.rng.freq
 
         rng = date_range(START, END, freq=BMonthEnd())
         shifted = rng.shift(1, freq=BDay())
@@ -485,15 +497,15 @@ def test_shift(self):
 
         shifted = self.rng.shift(5)
         assert shifted[0] == self.rng[5]
-        assert shifted.offset == self.rng.offset
+        assert shifted.freq == self.rng.freq
 
         shifted = self.rng.shift(-5)
         assert shifted[5] == self.rng[0]
-        assert shifted.offset == self.rng.offset
+        assert shifted.freq == self.rng.freq
 
         shifted = self.rng.shift(0)
         assert shifted[0] == self.rng[0]
-        assert shifted.offset == self.rng.offset
+        assert shifted.freq == self.rng.freq
 
         # PerformanceWarning
         with warnings.catch_warnings(record=True):
@@ -503,7 +515,7 @@ def test_shift(self):
 
     def test_pickle_unpickle(self):
         unpickled = tm.round_trip_pickle(self.rng)
-        assert unpickled.offset is not None
+        assert unpickled.freq is not None
 
     def test_equals(self):
         assert not self.rng.equals(list(self.rng))
diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py
index f263ac78cd343..4580d9fff31d5 100644
--- a/pandas/tests/indexes/datetimes/test_partial_slicing.py
+++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py
@@ -91,6 +91,27 @@ def test_slice_duplicate_monotonic(self):
         expected = Timestamp('2017-01-01')
         assert result == expected
 
+    def test_monotone_DTI_indexing_bug(self):
+        # GH 19362
+        # Testing accessing the first element in a montononic descending
+        # partial string indexing.
+
+        df = pd.DataFrame(list(range(5)))
+        date_list = ['2018-01-02', '2017-02-10', '2016-03-10',
+                     '2015-03-15', '2014-03-16']
+        date_index = pd.to_datetime(date_list)
+        df['date'] = date_index
+        expected = pd.DataFrame({0: list(range(5)), 'date': date_index})
+        tm.assert_frame_equal(df, expected)
+
+        df = pd.DataFrame({'A': [1, 2, 3]},
+                          index=pd.date_range('20170101',
+                                              periods=3)[::-1])
+        expected = pd.DataFrame({'A': 1},
+                                index=pd.date_range('20170103',
+                                                    periods=1))
+        tm.assert_frame_equal(df.loc['2017-01-03'], expected)
+
     def test_slice_year(self):
         dti = DatetimeIndex(freq='B', start=datetime(2005, 1, 1), periods=500)
 
diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py
index 84632e59e2bfb..cb9364edc0cc3 100644
--- a/pandas/tests/indexes/datetimes/test_setops.py
+++ b/pandas/tests/indexes/datetimes/test_setops.py
@@ -357,7 +357,7 @@ def test_intersection(self):
         expected = rng[10:25]
         tm.assert_index_equal(the_int, expected)
         assert isinstance(the_int, DatetimeIndex)
-        assert the_int.offset == rng.offset
+        assert the_int.freq == rng.freq
 
         the_int = rng1.intersection(rng2.view(DatetimeIndex))
         tm.assert_index_equal(the_int, expected)
diff --git a/pandas/tests/indexes/period/test_formats.py b/pandas/tests/indexes/period/test_formats.py
index c3926cc5f1633..daf44a559cf5c 100644
--- a/pandas/tests/indexes/period/test_formats.py
+++ b/pandas/tests/indexes/period/test_formats.py
@@ -13,7 +13,7 @@ def test_to_native_types():
 
     # First, with no arguments.
     expected = np.array(['2017-01-01', '2017-01-02',
-                         '2017-01-03'], dtype='<U10')
+                         '2017-01-03'], dtype='=U10')
 
     result = index.to_native_types()
     tm.assert_numpy_array_equal(result, expected)
@@ -23,14 +23,14 @@ def test_to_native_types():
     tm.assert_numpy_array_equal(result, expected)
 
     # Make sure slicing works
-    expected = np.array(['2017-01-01', '2017-01-03'], dtype='<U10')
+    expected = np.array(['2017-01-01', '2017-01-03'], dtype='=U10')
 
     result = index.to_native_types([0, 2])
     tm.assert_numpy_array_equal(result, expected)
 
     # Make sure date formatting works
     expected = np.array(['01-2017-01', '01-2017-02',
-                         '01-2017-03'], dtype='<U10')
+                         '01-2017-03'], dtype='=U10')
 
     result = index.to_native_types(date_format='%m-%Y-%d')
     tm.assert_numpy_array_equal(result, expected)
diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
index a5506abe8f355..f1178d44dbfe0 100644
--- a/pandas/tests/indexing/test_iloc.py
+++ b/pandas/tests/indexing/test_iloc.py
@@ -10,6 +10,7 @@
 from pandas import Series, DataFrame, date_range, concat, isna
 from pandas.util import testing as tm
 from pandas.tests.indexing.common import Base
+from pandas.api.types import is_scalar
 
 
 class TestiLoc(Base):
@@ -526,6 +527,21 @@ def test_iloc_setitem_list_of_lists(self):
                                   B=[5, 6, 11, 13, 9]))
         tm.assert_frame_equal(df, expected)
 
+    @pytest.mark.parametrize(
+        'indexer', [[0], slice(None, 1, None), np.array([0])])
+    @pytest.mark.parametrize(
+        'value', [['Z'], np.array(['Z'])])
+    def test_iloc_setitem_with_scalar_index(self, indexer, value):
+        # GH #19474
+        # assigning like "df.iloc[0, [0]] = ['Z']" should be evaluated
+        # elementwisely, not using "setter('A', ['Z'])".
+
+        df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
+        df.iloc[0, indexer] = value
+        result = df.iloc[0, 0]
+
+        assert is_scalar(result) and result == 'Z'
+
     def test_iloc_mask(self):
 
         # GH 3631, iloc with a mask (of a series) should raise
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index 86a5a82441ee8..39f4d2b7bd395 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -11,6 +11,7 @@
 from pandas import Series, DataFrame, Timestamp, date_range, MultiIndex, Index
 from pandas.util import testing as tm
 from pandas.tests.indexing.common import Base
+from pandas.api.types import is_scalar
 
 
 class TestLoc(Base):
@@ -555,6 +556,21 @@ def test_loc_setitem_frame_multiples(self):
         df.loc[2:4] = rhs
         tm.assert_frame_equal(df, expected)
 
+    @pytest.mark.parametrize(
+        'indexer', [['A'], slice(None, 'A', None), np.array(['A'])])
+    @pytest.mark.parametrize(
+        'value', [['Z'], np.array(['Z'])])
+    def test_loc_setitem_with_scalar_index(self, indexer, value):
+        # GH #19474
+        # assigning like "df.loc[0, ['A']] = ['Z']" should be evaluated
+        # elementwisely, not using "setter('A', ['Z'])".
+
+        df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
+        df.loc[0, indexer] = value
+        result = df.loc[0, 'A']
+
+        assert is_scalar(result) and result == 'Z'
+
     def test_loc_coerceion(self):
 
         # 12411
diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py
index fdf45f307e953..416535aac3a4c 100644
--- a/pandas/tests/io/parser/test_network.py
+++ b/pandas/tests/io/parser/test_network.py
@@ -183,6 +183,7 @@ def test_read_csv_handles_boto_s3_object(self,
         expected = read_csv(tips_file)
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.xfail(reason="buggy s3 / moto interaction on CI: gh-20720")
     def test_read_csv_chunked_download(self, s3_resource, caplog):
         # 8 MB, S3FS usees 5MB chunks
         df = DataFrame(np.random.randn(100000, 4), columns=list('abcd'))
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 11cbea8ce6331..41a1db57c954b 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -432,6 +432,7 @@ def test_categorical_unsupported(self, pa_lt_070):
         df = pd.DataFrame({'a': pd.Categorical(list('abc'))})
         self.check_error_on_write(df, pa, NotImplementedError)
 
+    @pytest.mark.xfail(reason="buggy s3 / moto interaction on CI: gh-20720")
     def test_s3_roundtrip(self, df_compat, s3_resource, pa):
         # GH #19134
         check_round_trip(df_compat, pa,
@@ -498,6 +499,7 @@ def test_filter_row_groups(self, fp):
             result = read_parquet(path, fp, filters=[('a', '==', 0)])
         assert len(result) == 1
 
+    @pytest.mark.xfail(reason="buggy s3 / moto interaction on CI: gh-20720")
     def test_s3_roundtrip(self, df_compat, s3_resource, fp):
         # GH #19134
         check_round_trip(df_compat, fp,
diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
index b34723d6cf72c..a6a38e005b9b6 100644
--- a/pandas/tests/io/test_pytables.py
+++ b/pandas/tests/io/test_pytables.py
@@ -3836,8 +3836,15 @@ def test_read_column(self):
 
         with ensure_clean_store(self.path) as store:
             _maybe_remove(store, 'df')
-            store.append('df', df)
 
+            # GH 17912
+            # HDFStore.select_column should raise a KeyError
+            # exception if the key is not a valid store
+            with pytest.raises(KeyError,
+                               message='No object named index in the file'):
+                store.select_column('df', 'index')
+
+            store.append('df', df)
             # error
             pytest.raises(KeyError, store.select_column, 'df', 'foo')
 
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index ffd37dc4b2f59..640d09f3587fb 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -1,5 +1,7 @@
 from warnings import catch_warnings
+from itertools import combinations, product
 
+import datetime as dt
 import dateutil
 import numpy as np
 from numpy.random import randn
@@ -829,12 +831,102 @@ def test_append_preserve_index_name(self):
         result = df1.append(df2)
         assert result.index.name == 'A'
 
+    indexes_can_append = [
+        pd.RangeIndex(3),
+        pd.Index([4, 5, 6]),
+        pd.Index([4.5, 5.5, 6.5]),
+        pd.Index(list('abc')),
+        pd.CategoricalIndex('A B C'.split()),
+        pd.CategoricalIndex('D E F'.split(), ordered=True),
+        pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0),
+                          dt.datetime(2013, 1, 3, 6, 10),
+                          dt.datetime(2013, 1, 3, 7, 12)]),
+    ]
+
+    indexes_cannot_append_with_other = [
+        pd.IntervalIndex.from_breaks([0, 1, 2, 3]),
+        pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]),
+    ]
+
+    all_indexes = indexes_can_append + indexes_cannot_append_with_other
+
+    @pytest.mark.parametrize("index",
+                             all_indexes,
+                             ids=lambda x: x.__class__.__name__)
+    def test_append_same_columns_type(self, index):
+        # GH18359
+
+        # df wider than ser
+        df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=index)
+        ser_index = index[:2]
+        ser = pd.Series([7, 8], index=ser_index, name=2)
+        result = df.append(ser)
+        expected = pd.DataFrame([[1., 2., 3.], [4, 5, 6], [7, 8, np.nan]],
+                                index=[0, 1, 2],
+                                columns=index)
+        assert_frame_equal(result, expected)
+
+        # ser wider than df
+        ser_index = index
+        index = index[:2]
+        df = pd.DataFrame([[1, 2], [4, 5]], columns=index)
+        ser = pd.Series([7, 8, 9], index=ser_index, name=2)
+        result = df.append(ser)
+        expected = pd.DataFrame([[1, 2, np.nan], [4, 5, np.nan], [7, 8, 9]],
+                                index=[0, 1, 2],
+                                columns=ser_index)
+        assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("df_columns, series_index",
+                             combinations(indexes_can_append, r=2),
+                             ids=lambda x: x.__class__.__name__)
+    def test_append_different_columns_types(self, df_columns, series_index):
+        # GH18359
+        # See also test 'test_append_different_columns_types_raises' below
+        # for errors raised when appending
+
+        df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns)
+        ser = pd.Series([7, 8, 9], index=series_index, name=2)
+
+        result = df.append(ser)
+        idx_diff = ser.index.difference(df_columns)
+        combined_columns = Index(df_columns.tolist()).append(idx_diff)
+        expected = pd.DataFrame([[1., 2., 3., np.nan, np.nan, np.nan],
+                                 [4, 5, 6, np.nan, np.nan, np.nan],
+                                 [np.nan, np.nan, np.nan, 7, 8, 9]],
+                                index=[0, 1, 2],
+                                columns=combined_columns)
+        assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "index_can_append, index_cannot_append_with_other",
+        product(indexes_can_append, indexes_cannot_append_with_other),
+        ids=lambda x: x.__class__.__name__)
+    def test_append_different_columns_types_raises(
+            self, index_can_append, index_cannot_append_with_other):
+        # GH18359
+        # Dataframe.append will raise if IntervalIndex/MultiIndex appends
+        # or is appended to a different index type
+        #
+        # See also test 'test_append_different_columns_types' above for
+        # appending without raising.
+
+        df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=index_can_append)
+        ser = pd.Series([7, 8, 9], index=index_cannot_append_with_other,
+                        name=2)
+        with pytest.raises(TypeError):
+            df.append(ser)
+
+        df = pd.DataFrame([[1, 2, 3], [4, 5, 6]],
+                          columns=index_cannot_append_with_other)
+        ser = pd.Series([7, 8, 9], index=index_can_append, name=2)
+        with pytest.raises(TypeError):
+            df.append(ser)
+
     def test_append_dtype_coerce(self):
 
         # GH 4993
         # appending with datetime will incorrectly convert datetime64
-        import datetime as dt
-        from pandas import NaT
 
         df1 = DataFrame(index=[1, 2], data=[dt.datetime(2013, 1, 1, 0, 0),
                                             dt.datetime(2013, 1, 2, 0, 0)],
@@ -845,7 +937,9 @@ def test_append_dtype_coerce(self):
                                              dt.datetime(2013, 1, 4, 7, 10)]],
                         columns=['start_time', 'end_time'])
 
-        expected = concat([Series([NaT, NaT, dt.datetime(2013, 1, 3, 6, 10),
+        expected = concat([Series([pd.NaT,
+                                   pd.NaT,
+                                   dt.datetime(2013, 1, 3, 6, 10),
                                    dt.datetime(2013, 1, 4, 7, 10)],
                                   name='end_time'),
                            Series([dt.datetime(2013, 1, 1, 0, 0),
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 92bedbabdf2f1..30c68b7672098 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -1237,6 +1237,16 @@ def test_pivot_string_func_vs_func(self, f, f_numpy):
                                aggfunc=f_numpy)
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.slow
+    def test_pivot_number_of_levels_larger_than_int32(self):
+        # GH 20601
+        df = DataFrame({'ind1': np.arange(2 ** 16),
+                        'ind2': np.arange(2 ** 16),
+                        'count': np.arange(2 ** 16)})
+        with tm.assert_raises_regex(ValueError, 'int32 overflow'):
+            df.pivot_table(index='ind1', columns='ind2',
+                           values='count', aggfunc='count')
+
 
 class TestCrosstab(object):
 
@@ -1540,12 +1550,14 @@ def test_crosstab_normalize(self):
                                           index=pd.Index([1, 2, 'All'],
                                                          name='a',
                                                          dtype='object'),
-                                          columns=pd.Index([3, 4], name='b'))
+                                          columns=pd.Index([3, 4], name='b',
+                                                           dtype='object'))
         col_normal_margins = pd.DataFrame([[0.5, 0, 0.2], [0.5, 1.0, 0.8]],
                                           index=pd.Index([1, 2], name='a',
                                                          dtype='object'),
                                           columns=pd.Index([3, 4, 'All'],
-                                                           name='b'))
+                                                           name='b',
+                                                           dtype='object'))
 
         all_normal_margins = pd.DataFrame([[0.2, 0, 0.2],
                                            [0.2, 0.6, 0.8],
@@ -1554,7 +1566,8 @@ def test_crosstab_normalize(self):
                                                          name='a',
                                                          dtype='object'),
                                           columns=pd.Index([3, 4, 'All'],
-                                                           name='b'))
+                                                           name='b',
+                                                           dtype='object'))
         tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='index',
                                           margins=True), row_normal_margins)
         tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='columns',
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
index 79e05c90a21b0..24ad31de494d8 100644
--- a/pandas/tests/test_multilevel.py
+++ b/pandas/tests/test_multilevel.py
@@ -1192,6 +1192,13 @@ def test_unstack_unobserved_keys(self):
         recons = result.stack()
         tm.assert_frame_equal(recons, df)
 
+    @pytest.mark.slow
+    def test_unstack_number_of_levels_larger_than_int32(self):
+        # GH 20601
+        df = DataFrame(np.random.randn(2 ** 16, 2), index=[np.arange(2 ** 16), np.arange(2 ** 16)])
+        with tm.assert_raises_regex(ValueError, 'int32 overflow'):
+            df.unstack()
+
     def test_stack_order_with_unsorted_levels(self):
         # GH 16323
 
diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py
index dabdb1e8e689c..605230390ff1d 100644
--- a/pandas/tests/test_window.py
+++ b/pandas/tests/test_window.py
@@ -29,6 +29,22 @@ def assert_equal(left, right):
         tm.assert_frame_equal(left, right)
 
 
+@pytest.fixture(params=[True, False])
+def raw(request):
+    return request.param
+
+
+@pytest.fixture(params=['triang', 'blackman', 'hamming', 'bartlett', 'bohman',
+                        'blackmanharris', 'nuttall', 'barthann'])
+def win_types(request):
+    return request.param
+
+
+@pytest.fixture(params=['kaiser', 'gaussian', 'general_gaussian', 'slepian'])
+def win_types_special(request):
+    return request.param
+
+
 class Base(object):
 
     _nan_locs = np.arange(20, 40)
@@ -157,9 +173,16 @@ def test_agg(self):
         expected.columns = pd.MultiIndex.from_tuples(exp_cols)
         tm.assert_frame_equal(result, expected, check_like=True)
 
+    def test_agg_apply(self, raw):
+
         # passed lambda
+        df = DataFrame({'A': range(5), 'B': range(0, 10, 2)})
+
+        r = df.rolling(window=3)
+        a_sum = r['A'].sum()
+
         result = r.agg({'A': np.sum, 'B': lambda x: np.std(x, ddof=1)})
-        rcustom = r['B'].apply(lambda x: np.std(x, ddof=1))
+        rcustom = r['B'].apply(lambda x: np.std(x, ddof=1), raw=raw)
         expected = concat([a_sum, rcustom], axis=1)
         tm.assert_frame_equal(result, expected, check_like=True)
 
@@ -289,43 +312,51 @@ def setup_method(self, method):
         self._create_data()
 
     @td.skip_if_no_scipy
-    def test_constructor(self):
+    @pytest.mark.parametrize(
+        'which', ['series', 'frame'])
+    def test_constructor(self, which):
         # GH 12669
 
-        for o in [self.series, self.frame]:
-            c = o.rolling
+        o = getattr(self, which)
+        c = o.rolling
 
-            # valid
-            c(win_type='boxcar', window=2, min_periods=1)
-            c(win_type='boxcar', window=2, min_periods=1, center=True)
-            c(win_type='boxcar', window=2, min_periods=1, center=False)
+        # valid
+        c(win_type='boxcar', window=2, min_periods=1)
+        c(win_type='boxcar', window=2, min_periods=1, center=True)
+        c(win_type='boxcar', window=2, min_periods=1, center=False)
 
-            for wt in ['boxcar', 'triang', 'blackman', 'hamming', 'bartlett',
-                       'bohman', 'blackmanharris', 'nuttall', 'barthann']:
-                c(win_type=wt, window=2)
+        # not valid
+        for w in [2., 'foo', np.array([2])]:
+            with pytest.raises(ValueError):
+                c(win_type='boxcar', window=2, min_periods=w)
+            with pytest.raises(ValueError):
+                c(win_type='boxcar', window=2, min_periods=1, center=w)
 
-            # not valid
-            for w in [2., 'foo', np.array([2])]:
-                with pytest.raises(ValueError):
-                    c(win_type='boxcar', window=2, min_periods=w)
-                with pytest.raises(ValueError):
-                    c(win_type='boxcar', window=2, min_periods=1, center=w)
+        for wt in ['foobar', 1]:
+            with pytest.raises(ValueError):
+                c(win_type=wt, window=2)
 
-            for wt in ['foobar', 1]:
-                with pytest.raises(ValueError):
-                    c(win_type=wt, window=2)
+    @td.skip_if_no_scipy
+    @pytest.mark.parametrize(
+        'which', ['series', 'frame'])
+    def test_constructor_with_win_type(self, which, win_types):
+        # GH 12669
+        o = getattr(self, which)
+        c = o.rolling
+        c(win_type=win_types, window=2)
 
-    def test_numpy_compat(self):
+    @pytest.mark.parametrize(
+        'method', ['sum', 'mean'])
+    def test_numpy_compat(self, method):
         # see gh-12811
         w = rwindow.Window(Series([2, 4, 6]), window=[0, 2])
 
         msg = "numpy operations are not valid with window objects"
 
-        for func in ('sum', 'mean'):
-            tm.assert_raises_regex(UnsupportedFunctionCall, msg,
-                                   getattr(w, func), 1, 2, 3)
-            tm.assert_raises_regex(UnsupportedFunctionCall, msg,
-                                   getattr(w, func), dtype=np.float64)
+        tm.assert_raises_regex(UnsupportedFunctionCall, msg,
+                               getattr(w, method), 1, 2, 3)
+        tm.assert_raises_regex(UnsupportedFunctionCall, msg,
+                               getattr(w, method), dtype=np.float64)
 
 
 class TestRolling(Base):
@@ -340,59 +371,65 @@ def test_doc_string(self):
         df.rolling(2).sum()
         df.rolling(2, min_periods=1).sum()
 
-    def test_constructor(self):
+    @pytest.mark.parametrize(
+        'which', ['series', 'frame'])
+    def test_constructor(self, which):
         # GH 12669
 
-        for o in [self.series, self.frame]:
-            c = o.rolling
+        o = getattr(self, which)
+        c = o.rolling
 
-            # valid
-            c(window=2)
-            c(window=2, min_periods=1)
-            c(window=2, min_periods=1, center=True)
-            c(window=2, min_periods=1, center=False)
+        # valid
+        c(window=2)
+        c(window=2, min_periods=1)
+        c(window=2, min_periods=1, center=True)
+        c(window=2, min_periods=1, center=False)
 
-            # GH 13383
-            c(0)
-            with pytest.raises(ValueError):
-                c(-1)
+        # GH 13383
+        c(0)
+        with pytest.raises(ValueError):
+            c(-1)
 
-            # not valid
-            for w in [2., 'foo', np.array([2])]:
-                with pytest.raises(ValueError):
-                    c(window=w)
-                with pytest.raises(ValueError):
-                    c(window=2, min_periods=w)
-                with pytest.raises(ValueError):
-                    c(window=2, min_periods=1, center=w)
+        # not valid
+        for w in [2., 'foo', np.array([2])]:
+            with pytest.raises(ValueError):
+                c(window=w)
+            with pytest.raises(ValueError):
+                c(window=2, min_periods=w)
+            with pytest.raises(ValueError):
+                c(window=2, min_periods=1, center=w)
 
     @td.skip_if_no_scipy
-    def test_constructor_with_win_type(self):
+    @pytest.mark.parametrize(
+        'which', ['series', 'frame'])
+    def test_constructor_with_win_type(self, which):
         # GH 13383
-        for o in [self.series, self.frame]:
-            c = o.rolling
-            c(0, win_type='boxcar')
-            with pytest.raises(ValueError):
-                c(-1, win_type='boxcar')
+        o = getattr(self, which)
+        c = o.rolling
+        c(0, win_type='boxcar')
+        with pytest.raises(ValueError):
+            c(-1, win_type='boxcar')
 
-    def test_constructor_with_timedelta_window(self):
+    @pytest.mark.parametrize(
+        'window', [timedelta(days=3), pd.Timedelta(days=3)])
+    def test_constructor_with_timedelta_window(self, window):
         # GH 15440
         n = 10
         df = DataFrame({'value': np.arange(n)},
                        index=pd.date_range('2015-12-24', periods=n, freq="D"))
         expected_data = np.append([0., 1.], np.arange(3., 27., 3))
-        for window in [timedelta(days=3), pd.Timedelta(days=3)]:
-            result = df.rolling(window=window).sum()
-            expected = DataFrame({'value': expected_data},
-                                 index=pd.date_range('2015-12-24', periods=n,
-                                                     freq="D"))
-            tm.assert_frame_equal(result, expected)
-            expected = df.rolling('3D').sum()
-            tm.assert_frame_equal(result, expected)
+
+        result = df.rolling(window=window).sum()
+        expected = DataFrame({'value': expected_data},
+                             index=pd.date_range('2015-12-24', periods=n,
+                                                 freq="D"))
+        tm.assert_frame_equal(result, expected)
+        expected = df.rolling('3D').sum()
+        tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize(
         'window', [timedelta(days=3), pd.Timedelta(days=3), '3D'])
-    def test_constructor_with_timedelta_window_and_minperiods(self, window):
+    def test_constructor_timedelta_window_and_minperiods(self, window, raw):
         # GH 15305
         n = 10
         df = DataFrame({'value': np.arange(n)},
@@ -402,21 +439,22 @@ def test_constructor_with_timedelta_window_and_minperiods(self, window):
             index=pd.date_range('2017-08-08', periods=n, freq="D"))
         result_roll_sum = df.rolling(window=window, min_periods=2).sum()
         result_roll_generic = df.rolling(window=window,
-                                         min_periods=2).apply(sum)
+                                         min_periods=2).apply(sum, raw=raw)
         tm.assert_frame_equal(result_roll_sum, expected)
         tm.assert_frame_equal(result_roll_generic, expected)
 
-    def test_numpy_compat(self):
+    @pytest.mark.parametrize(
+        'method', ['std', 'mean', 'sum', 'max', 'min', 'var'])
+    def test_numpy_compat(self, method):
         # see gh-12811
         r = rwindow.Rolling(Series([2, 4, 6]), window=2)
 
         msg = "numpy operations are not valid with window objects"
 
-        for func in ('std', 'mean', 'sum', 'max', 'min', 'var'):
-            tm.assert_raises_regex(UnsupportedFunctionCall, msg,
-                                   getattr(r, func), 1, 2, 3)
-            tm.assert_raises_regex(UnsupportedFunctionCall, msg,
-                                   getattr(r, func), dtype=np.float64)
+        tm.assert_raises_regex(UnsupportedFunctionCall, msg,
+                               getattr(r, method), 1, 2, 3)
+        tm.assert_raises_regex(UnsupportedFunctionCall, msg,
+                               getattr(r, method), dtype=np.float64)
 
     def test_closed(self):
         df = DataFrame({'A': [0, 1, 2, 3, 4]})
@@ -483,35 +521,38 @@ def test_doc_string(self):
         df
         df.expanding(2).sum()
 
-    def test_constructor(self):
+    @pytest.mark.parametrize(
+        'which', ['series', 'frame'])
+    def test_constructor(self, which):
         # GH 12669
 
-        for o in [self.series, self.frame]:
-            c = o.expanding
+        o = getattr(self, which)
+        c = o.expanding
 
-            # valid
-            c(min_periods=1)
-            c(min_periods=1, center=True)
-            c(min_periods=1, center=False)
+        # valid
+        c(min_periods=1)
+        c(min_periods=1, center=True)
+        c(min_periods=1, center=False)
 
-            # not valid
-            for w in [2., 'foo', np.array([2])]:
-                with pytest.raises(ValueError):
-                    c(min_periods=w)
-                with pytest.raises(ValueError):
-                    c(min_periods=1, center=w)
+        # not valid
+        for w in [2., 'foo', np.array([2])]:
+            with pytest.raises(ValueError):
+                c(min_periods=w)
+            with pytest.raises(ValueError):
+                c(min_periods=1, center=w)
 
-    def test_numpy_compat(self):
+    @pytest.mark.parametrize(
+        'method', ['std', 'mean', 'sum', 'max', 'min', 'var'])
+    def test_numpy_compat(self, method):
         # see gh-12811
         e = rwindow.Expanding(Series([2, 4, 6]), window=2)
 
         msg = "numpy operations are not valid with window objects"
 
-        for func in ('std', 'mean', 'sum', 'max', 'min', 'var'):
-            tm.assert_raises_regex(UnsupportedFunctionCall, msg,
-                                   getattr(e, func), 1, 2, 3)
-            tm.assert_raises_regex(UnsupportedFunctionCall, msg,
-                                   getattr(e, func), dtype=np.float64)
+        tm.assert_raises_regex(UnsupportedFunctionCall, msg,
+                               getattr(e, method), 1, 2, 3)
+        tm.assert_raises_regex(UnsupportedFunctionCall, msg,
+                               getattr(e, method), dtype=np.float64)
 
     @pytest.mark.parametrize(
         'expander',
@@ -558,55 +599,58 @@ def test_doc_string(self):
         df
         df.ewm(com=0.5).mean()
 
-    def test_constructor(self):
-        for o in [self.series, self.frame]:
-            c = o.ewm
-
-            # valid
-            c(com=0.5)
-            c(span=1.5)
-            c(alpha=0.5)
-            c(halflife=0.75)
-            c(com=0.5, span=None)
-            c(alpha=0.5, com=None)
-            c(halflife=0.75, alpha=None)
+    @pytest.mark.parametrize(
+        'which', ['series', 'frame'])
+    def test_constructor(self, which):
+        o = getattr(self, which)
+        c = o.ewm
+
+        # valid
+        c(com=0.5)
+        c(span=1.5)
+        c(alpha=0.5)
+        c(halflife=0.75)
+        c(com=0.5, span=None)
+        c(alpha=0.5, com=None)
+        c(halflife=0.75, alpha=None)
+
+        # not valid: mutually exclusive
+        with pytest.raises(ValueError):
+            c(com=0.5, alpha=0.5)
+        with pytest.raises(ValueError):
+            c(span=1.5, halflife=0.75)
+        with pytest.raises(ValueError):
+            c(alpha=0.5, span=1.5)
 
-            # not valid: mutually exclusive
-            with pytest.raises(ValueError):
-                c(com=0.5, alpha=0.5)
-            with pytest.raises(ValueError):
-                c(span=1.5, halflife=0.75)
-            with pytest.raises(ValueError):
-                c(alpha=0.5, span=1.5)
+        # not valid: com < 0
+        with pytest.raises(ValueError):
+            c(com=-0.5)
 
-            # not valid: com < 0
-            with pytest.raises(ValueError):
-                c(com=-0.5)
+        # not valid: span < 1
+        with pytest.raises(ValueError):
+            c(span=0.5)
 
-            # not valid: span < 1
-            with pytest.raises(ValueError):
-                c(span=0.5)
+        # not valid: halflife <= 0
+        with pytest.raises(ValueError):
+            c(halflife=0)
 
-            # not valid: halflife <= 0
+        # not valid: alpha <= 0 or alpha > 1
+        for alpha in (-0.5, 1.5):
             with pytest.raises(ValueError):
-                c(halflife=0)
+                c(alpha=alpha)
 
-            # not valid: alpha <= 0 or alpha > 1
-            for alpha in (-0.5, 1.5):
-                with pytest.raises(ValueError):
-                    c(alpha=alpha)
-
-    def test_numpy_compat(self):
+    @pytest.mark.parametrize(
+        'method', ['std', 'mean', 'var'])
+    def test_numpy_compat(self, method):
         # see gh-12811
         e = rwindow.EWM(Series([2, 4, 6]), alpha=0.5)
 
         msg = "numpy operations are not valid with window objects"
 
-        for func in ('std', 'mean', 'var'):
-            tm.assert_raises_regex(UnsupportedFunctionCall, msg,
-                                   getattr(e, func), 1, 2, 3)
-            tm.assert_raises_regex(UnsupportedFunctionCall, msg,
-                                   getattr(e, func), dtype=np.float64)
+        tm.assert_raises_regex(UnsupportedFunctionCall, msg,
+                               getattr(e, method), 1, 2, 3)
+        tm.assert_raises_regex(UnsupportedFunctionCall, msg,
+                               getattr(e, method), dtype=np.float64)
 
 
 # gh-12373 : rolling functions error on float32 data
@@ -943,11 +987,8 @@ def test_cmov_window_na_min_periods(self):
         tm.assert_series_equal(xp, rs)
 
     @td.skip_if_no_scipy
-    def test_cmov_window_regular(self):
+    def test_cmov_window_regular(self, win_types):
         # GH 8238
-        win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman',
-                     'blackmanharris', 'nuttall', 'barthann']
-
         vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48,
                          10.63, 14.48])
         xps = {
@@ -969,33 +1010,25 @@ def test_cmov_window_regular(self):
                          14.0825, 11.5675, np.nan, np.nan]
         }
 
-        for wt in win_types:
-            xp = Series(xps[wt])
-            rs = Series(vals).rolling(5, win_type=wt, center=True).mean()
-            tm.assert_series_equal(xp, rs)
+        xp = Series(xps[win_types])
+        rs = Series(vals).rolling(5, win_type=win_types, center=True).mean()
+        tm.assert_series_equal(xp, rs)
 
     @td.skip_if_no_scipy
-    def test_cmov_window_regular_linear_range(self):
+    def test_cmov_window_regular_linear_range(self, win_types):
         # GH 8238
-        win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman',
-                     'blackmanharris', 'nuttall', 'barthann']
-
         vals = np.array(range(10), dtype=np.float)
         xp = vals.copy()
         xp[:2] = np.nan
         xp[-2:] = np.nan
         xp = Series(xp)
 
-        for wt in win_types:
-            rs = Series(vals).rolling(5, win_type=wt, center=True).mean()
-            tm.assert_series_equal(xp, rs)
+        rs = Series(vals).rolling(5, win_type=win_types, center=True).mean()
+        tm.assert_series_equal(xp, rs)
 
     @td.skip_if_no_scipy
-    def test_cmov_window_regular_missing_data(self):
+    def test_cmov_window_regular_missing_data(self, win_types):
         # GH 8238
-        win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman',
-                     'blackmanharris', 'nuttall', 'barthann']
-
         vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, np.nan,
                          10.63, 14.48])
         xps = {
@@ -1017,17 +1050,18 @@ def test_cmov_window_regular_missing_data(self):
                                9.16438, 13.05052, 14.02175, 16.1098, 13.65509]
         }
 
-        for wt in win_types:
-            xp = Series(xps[wt])
-            rs = Series(vals).rolling(5, win_type=wt, min_periods=3).mean()
-            tm.assert_series_equal(xp, rs)
+        xp = Series(xps[win_types])
+        rs = Series(vals).rolling(5, win_type=win_types, min_periods=3).mean()
+        tm.assert_series_equal(xp, rs)
 
     @td.skip_if_no_scipy
-    def test_cmov_window_special(self):
+    def test_cmov_window_special(self, win_types_special):
         # GH 8238
-        win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian']
-        kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2.,
-                                            'width': 2.}, {'width': 0.5}]
+        kwds = {
+            'kaiser': {'beta': 1.},
+            'gaussian': {'std': 1.},
+            'general_gaussian': {'power': 2., 'width': 2.},
+            'slepian': {'width': 0.5}}
 
         vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48,
                          10.63, 14.48])
@@ -1043,17 +1077,20 @@ def test_cmov_window_special(self):
                        12.90702, 12.83757, np.nan, np.nan]
         }
 
-        for wt, k in zip(win_types, kwds):
-            xp = Series(xps[wt])
-            rs = Series(vals).rolling(5, win_type=wt, center=True).mean(**k)
-            tm.assert_series_equal(xp, rs)
+        xp = Series(xps[win_types_special])
+        rs = Series(vals).rolling(
+            5, win_type=win_types_special, center=True).mean(
+            **kwds[win_types_special])
+        tm.assert_series_equal(xp, rs)
 
     @td.skip_if_no_scipy
-    def test_cmov_window_special_linear_range(self):
+    def test_cmov_window_special_linear_range(self, win_types_special):
         # GH 8238
-        win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian']
-        kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2.,
-                                            'width': 2.}, {'width': 0.5}]
+        kwds = {
+            'kaiser': {'beta': 1.},
+            'gaussian': {'std': 1.},
+            'general_gaussian': {'power': 2., 'width': 2.},
+            'slepian': {'width': 0.5}}
 
         vals = np.array(range(10), dtype=np.float)
         xp = vals.copy()
@@ -1061,9 +1098,10 @@ def test_cmov_window_special_linear_range(self):
         xp[-2:] = np.nan
         xp = Series(xp)
 
-        for wt, k in zip(win_types, kwds):
-            rs = Series(vals).rolling(5, win_type=wt, center=True).mean(**k)
-            tm.assert_series_equal(xp, rs)
+        rs = Series(vals).rolling(
+            5, win_type=win_types_special, center=True).mean(
+            **kwds[win_types_special])
+        tm.assert_series_equal(xp, rs)
 
     def test_rolling_median(self):
         self._check_moment_func(np.median, name='median')
@@ -1150,43 +1188,76 @@ def test_rolling_quantile_param(self):
         with pytest.raises(TypeError):
             ser.rolling(3).quantile('foo')
 
-    def test_rolling_apply(self):
+    def test_rolling_apply(self, raw):
         # suppress warnings about empty slices, as we are deliberately testing
         # with a 0-length Series
+
         with warnings.catch_warnings():
             warnings.filterwarnings("ignore",
                                     message=".*(empty slice|0 for slice).*",
                                     category=RuntimeWarning)
 
-            ser = Series([])
-            tm.assert_series_equal(ser,
-                                   ser.rolling(10).apply(lambda x: x.mean()))
-
             def f(x):
                 return x[np.isfinite(x)].mean()
 
-            self._check_moment_func(np.mean, name='apply', func=f)
+            self._check_moment_func(np.mean, name='apply', func=f, raw=raw)
 
-        # GH 8080
+            expected = Series([])
+            result = expected.rolling(10).apply(lambda x: x.mean(), raw=raw)
+            tm.assert_series_equal(result, expected)
+
+        # gh-8080
         s = Series([None, None, None])
-        result = s.rolling(2, min_periods=0).apply(lambda x: len(x))
+        result = s.rolling(2, min_periods=0).apply(lambda x: len(x), raw=raw)
         expected = Series([1., 2., 2.])
         tm.assert_series_equal(result, expected)
 
-        result = s.rolling(2, min_periods=0).apply(len)
+        result = s.rolling(2, min_periods=0).apply(len, raw=raw)
         tm.assert_series_equal(result, expected)
 
-    def test_rolling_apply_out_of_bounds(self):
-        # #1850
+    @pytest.mark.parametrize('klass', [Series, DataFrame])
+    @pytest.mark.parametrize(
+        'method', [lambda x: x.rolling(window=2), lambda x: x.expanding()])
+    def test_apply_future_warning(self, klass, method):
+
+        # gh-5071
+        s = klass(np.arange(3))
+
+        with tm.assert_produces_warning(FutureWarning):
+            method(s).apply(lambda x: len(x))
+
+    def test_rolling_apply_out_of_bounds(self, raw):
+        # gh-1850
         vals = pd.Series([1, 2, 3, 4])
 
-        result = vals.rolling(10).apply(np.sum)
+        result = vals.rolling(10).apply(np.sum, raw=raw)
         assert result.isna().all()
 
-        result = vals.rolling(10, min_periods=1).apply(np.sum)
+        result = vals.rolling(10, min_periods=1).apply(np.sum, raw=raw)
         expected = pd.Series([1, 3, 6, 10], dtype=float)
         tm.assert_almost_equal(result, expected)
 
+    @pytest.mark.parametrize('window', [2, '2s'])
+    def test_rolling_apply_with_pandas_objects(self, window):
+        # 5071
+        df = pd.DataFrame({'A': np.random.randn(5),
+                           'B': np.random.randint(0, 10, size=5)},
+                          index=pd.date_range('20130101', periods=5, freq='s'))
+
+        # we have an equal spaced timeseries index
+        # so simulate removing the first period
+        def f(x):
+            if x.index[0] == df.index[0]:
+                return np.nan
+            return x.iloc[-1]
+
+        result = df.rolling(window).apply(f, raw=False)
+        expected = df.iloc[2:].reindex_like(df)
+        tm.assert_frame_equal(result, expected)
+
+        with pytest.raises(AttributeError):
+            df.rolling(window).apply(f, raw=True)
+
     def test_rolling_std(self):
         self._check_moment_func(lambda x: np.std(x, ddof=1),
                                 name='std')
@@ -1256,10 +1327,10 @@ def get_result(obj, window, min_periods=None, center=False):
 
         frame_result = get_result(self.frame, window=50)
         assert isinstance(frame_result, DataFrame)
-        tm.assert_series_equal(frame_result.iloc[-1, :],
-                               self.frame.iloc[-50:, :].apply(static_comp,
-                                                              axis=0),
-                               check_names=False)
+        tm.assert_series_equal(
+            frame_result.iloc[-1, :],
+            self.frame.iloc[-50:, :].apply(static_comp, axis=0, raw=raw),
+            check_names=False)
 
         # check time_rule works
         if has_time_rule:
@@ -1287,7 +1358,7 @@ def get_result(obj, window, min_periods=None, center=False):
                                    static_comp(trunc_series))
 
             tm.assert_series_equal(frame_result.xs(last_date),
-                                   trunc_frame.apply(static_comp),
+                                   trunc_frame.apply(static_comp, raw=raw),
                                    check_names=False)
 
         # excluding NaNs correctly
@@ -1402,26 +1473,20 @@ def test_ewma(self):
         result = vals.ewm(span=100, adjust=False).mean().sum()
         assert np.abs(result - 1) < 1e-2
 
+    @pytest.mark.parametrize('adjust', [True, False])
+    @pytest.mark.parametrize('ignore_na', [True, False])
+    def test_ewma_cases(self, adjust, ignore_na):
+        # try adjust/ignore_na args matrix
+
         s = Series([1.0, 2.0, 4.0, 8.0])
 
-        expected = Series([1.0, 1.6, 2.736842, 4.923077])
-        for f in [lambda s: s.ewm(com=2.0, adjust=True).mean(),
-                  lambda s: s.ewm(com=2.0, adjust=True,
-                                  ignore_na=False).mean(),
-                  lambda s: s.ewm(com=2.0, adjust=True, ignore_na=True).mean(),
-                  ]:
-            result = f(s)
-            tm.assert_series_equal(result, expected)
+        if adjust:
+            expected = Series([1.0, 1.6, 2.736842, 4.923077])
+        else:
+            expected = Series([1.0, 1.333333, 2.222222, 4.148148])
 
-        expected = Series([1.0, 1.333333, 2.222222, 4.148148])
-        for f in [lambda s: s.ewm(com=2.0, adjust=False).mean(),
-                  lambda s: s.ewm(com=2.0, adjust=False,
-                                  ignore_na=False).mean(),
-                  lambda s: s.ewm(com=2.0, adjust=False,
-                                  ignore_na=True).mean(),
-                  ]:
-            result = f(s)
-            tm.assert_series_equal(result, expected)
+        result = s.ewm(com=2.0, adjust=adjust, ignore_na=ignore_na).mean()
+        tm.assert_series_equal(result, expected)
 
     def test_ewma_nan_handling(self):
         s = Series([1.] + [np.nan] * 5 + [1.])
@@ -1555,14 +1620,13 @@ def test_ewm_domain_checks(self):
         s.ewm(alpha=1.0)
         pytest.raises(ValueError, s.ewm, alpha=1.1)
 
-    def test_ew_empty_series(self):
+    @pytest.mark.parametrize('method', ['mean', 'vol', 'var'])
+    def test_ew_empty_series(self, method):
         vals = pd.Series([], dtype=np.float64)
 
         ewm = vals.ewm(3)
-        funcs = ['mean', 'vol', 'var']
-        for f in funcs:
-            result = getattr(ewm, f)()
-            tm.assert_almost_equal(result, vals)
+        result = getattr(ewm, method)()
+        tm.assert_almost_equal(result, vals)
 
     def _check_ew(self, name=None, preserve_nan=False):
         series_result = getattr(self.series.ewm(com=10), name)()
@@ -2160,7 +2224,7 @@ def test_expanding_consistency(self, min_periods):
                     if name == 'count':
                         expanding_f_result = expanding_f()
                         expanding_apply_f_result = x.expanding(
-                            min_periods=0).apply(func=f)
+                            min_periods=0).apply(func=f, raw=True)
                     else:
                         if name in ['cov', 'corr']:
                             expanding_f_result = expanding_f(
@@ -2168,7 +2232,7 @@ def test_expanding_consistency(self, min_periods):
                         else:
                             expanding_f_result = expanding_f()
                         expanding_apply_f_result = x.expanding(
-                            min_periods=min_periods).apply(func=f)
+                            min_periods=min_periods).apply(func=f, raw=True)
 
                     # GH 9422
                     if name in ['sum', 'prod']:
@@ -2259,7 +2323,7 @@ def test_rolling_consistency(self, window, min_periods, center):
                         rolling_f_result = rolling_f()
                         rolling_apply_f_result = x.rolling(
                             window=window, min_periods=0,
-                            center=center).apply(func=f)
+                            center=center).apply(func=f, raw=True)
                     else:
                         if name in ['cov', 'corr']:
                             rolling_f_result = rolling_f(
@@ -2268,7 +2332,7 @@ def test_rolling_consistency(self, window, min_periods, center):
                             rolling_f_result = rolling_f()
                         rolling_apply_f_result = x.rolling(
                             window=window, min_periods=min_periods,
-                            center=center).apply(func=f)
+                            center=center).apply(func=f, raw=True)
 
                     # GH 9422
                     if name in ['sum', 'prod']:
@@ -2348,29 +2412,25 @@ def test_corr_sanity(self):
             except AssertionError:
                 print(res)
 
-    def test_flex_binary_frame(self):
-        def _check(method):
-            series = self.frame[1]
+    @pytest.mark.parametrize('method', ['corr', 'cov'])
+    def test_flex_binary_frame(self, method):
+        series = self.frame[1]
 
-            res = getattr(series.rolling(window=10), method)(self.frame)
-            res2 = getattr(self.frame.rolling(window=10), method)(series)
-            exp = self.frame.apply(lambda x: getattr(
-                series.rolling(window=10), method)(x))
+        res = getattr(series.rolling(window=10), method)(self.frame)
+        res2 = getattr(self.frame.rolling(window=10), method)(series)
+        exp = self.frame.apply(lambda x: getattr(
+            series.rolling(window=10), method)(x))
 
-            tm.assert_frame_equal(res, exp)
-            tm.assert_frame_equal(res2, exp)
+        tm.assert_frame_equal(res, exp)
+        tm.assert_frame_equal(res2, exp)
 
-            frame2 = self.frame.copy()
-            frame2.values[:] = np.random.randn(*frame2.shape)
+        frame2 = self.frame.copy()
+        frame2.values[:] = np.random.randn(*frame2.shape)
 
-            res3 = getattr(self.frame.rolling(window=10), method)(frame2)
-            exp = DataFrame(dict((k, getattr(self.frame[k].rolling(
-                window=10), method)(frame2[k])) for k in self.frame))
-            tm.assert_frame_equal(res3, exp)
-
-        methods = ['corr', 'cov']
-        for meth in methods:
-            _check(meth)
+        res3 = getattr(self.frame.rolling(window=10), method)(frame2)
+        exp = DataFrame(dict((k, getattr(self.frame[k].rolling(
+            window=10), method)(frame2[k])) for k in self.frame))
+        tm.assert_frame_equal(res3, exp)
 
     def test_ewmcov(self):
         self._check_binary_ew('cov')
@@ -2417,19 +2477,24 @@ def func(A, B, com, **kwargs):
 
         pytest.raises(Exception, func, A, randn(50), 20, min_periods=5)
 
-    def test_expanding_apply_args_kwargs(self):
+    def test_expanding_apply_args_kwargs(self, raw):
+
         def mean_w_arg(x, const):
             return np.mean(x) + const
 
         df = DataFrame(np.random.rand(20, 3))
 
-        expected = df.expanding().apply(np.mean) + 20.
+        expected = df.expanding().apply(np.mean, raw=raw) + 20.
 
-        tm.assert_frame_equal(df.expanding().apply(mean_w_arg, args=(20, )),
-                              expected)
-        tm.assert_frame_equal(df.expanding().apply(mean_w_arg,
-                                                   kwargs={'const': 20}),
-                              expected)
+        result = df.expanding().apply(mean_w_arg,
+                                      raw=raw,
+                                      args=(20, ))
+        tm.assert_frame_equal(result, expected)
+
+        result = df.expanding().apply(mean_w_arg,
+                                      raw=raw,
+                                      kwargs={'const': 20})
+        tm.assert_frame_equal(result, expected)
 
     def test_expanding_corr(self):
         A = self.series.dropna()
@@ -2539,42 +2604,47 @@ def test_rolling_corr_diff_length(self):
         result = s1.rolling(window=3, min_periods=2).corr(s2a)
         tm.assert_series_equal(result, expected)
 
-    def test_rolling_functions_window_non_shrinkage(self):
+    @pytest.mark.parametrize(
+        'f',
+        [
+            lambda x: (x.rolling(window=10, min_periods=5)
+                       .cov(x, pairwise=False)),
+            lambda x: (x.rolling(window=10, min_periods=5)
+                       .corr(x, pairwise=False)),
+            lambda x: x.rolling(window=10, min_periods=5).max(),
+            lambda x: x.rolling(window=10, min_periods=5).min(),
+            lambda x: x.rolling(window=10, min_periods=5).sum(),
+            lambda x: x.rolling(window=10, min_periods=5).mean(),
+            lambda x: x.rolling(window=10, min_periods=5).std(),
+            lambda x: x.rolling(window=10, min_periods=5).var(),
+            lambda x: x.rolling(window=10, min_periods=5).skew(),
+            lambda x: x.rolling(window=10, min_periods=5).kurt(),
+            lambda x: x.rolling(
+                window=10, min_periods=5).quantile(quantile=0.5),
+            lambda x: x.rolling(window=10, min_periods=5).median(),
+            lambda x: x.rolling(window=10, min_periods=5).apply(
+                sum, raw=False),
+            lambda x: x.rolling(window=10, min_periods=5).apply(
+                sum, raw=True),
+            lambda x: x.rolling(win_type='boxcar',
+                                window=10, min_periods=5).mean()])
+    def test_rolling_functions_window_non_shrinkage(self, f):
         # GH 7764
         s = Series(range(4))
         s_expected = Series(np.nan, index=s.index)
         df = DataFrame([[1, 5], [3, 2], [3, 9], [-1, 0]], columns=['A', 'B'])
         df_expected = DataFrame(np.nan, index=df.index, columns=df.columns)
 
-        functions = [lambda x: (x.rolling(window=10, min_periods=5)
-                                .cov(x, pairwise=False)),
-                     lambda x: (x.rolling(window=10, min_periods=5)
-                                .corr(x, pairwise=False)),
-                     lambda x: x.rolling(window=10, min_periods=5).max(),
-                     lambda x: x.rolling(window=10, min_periods=5).min(),
-                     lambda x: x.rolling(window=10, min_periods=5).sum(),
-                     lambda x: x.rolling(window=10, min_periods=5).mean(),
-                     lambda x: x.rolling(window=10, min_periods=5).std(),
-                     lambda x: x.rolling(window=10, min_periods=5).var(),
-                     lambda x: x.rolling(window=10, min_periods=5).skew(),
-                     lambda x: x.rolling(window=10, min_periods=5).kurt(),
-                     lambda x: x.rolling(
-                         window=10, min_periods=5).quantile(quantile=0.5),
-                     lambda x: x.rolling(window=10, min_periods=5).median(),
-                     lambda x: x.rolling(window=10, min_periods=5).apply(sum),
-                     lambda x: x.rolling(win_type='boxcar',
-                                         window=10, min_periods=5).mean()]
-        for f in functions:
-            try:
-                s_result = f(s)
-                tm.assert_series_equal(s_result, s_expected)
+        try:
+            s_result = f(s)
+            tm.assert_series_equal(s_result, s_expected)
 
-                df_result = f(df)
-                tm.assert_frame_equal(df_result, df_expected)
-            except (ImportError):
+            df_result = f(df)
+            tm.assert_frame_equal(df_result, df_expected)
+        except (ImportError):
 
-                # scipy needed for rolling_window
-                continue
+            # scipy needed for rolling_window
+            pytest.skip("scipy not available")
 
     def test_rolling_functions_window_non_shrinkage_binary(self):
 
@@ -2620,7 +2690,10 @@ def test_moment_functions_zero_length(self):
                      lambda x: x.expanding(min_periods=5).kurt(),
                      lambda x: x.expanding(min_periods=5).quantile(0.5),
                      lambda x: x.expanding(min_periods=5).median(),
-                     lambda x: x.expanding(min_periods=5).apply(sum),
+                     lambda x: x.expanding(min_periods=5).apply(
+                         sum, raw=False),
+                     lambda x: x.expanding(min_periods=5).apply(
+                         sum, raw=True),
                      lambda x: x.rolling(window=10).count(),
                      lambda x: x.rolling(window=10, min_periods=5).cov(
                          x, pairwise=False),
@@ -2637,7 +2710,10 @@ def test_moment_functions_zero_length(self):
                      lambda x: x.rolling(
                          window=10, min_periods=5).quantile(0.5),
                      lambda x: x.rolling(window=10, min_periods=5).median(),
-                     lambda x: x.rolling(window=10, min_periods=5).apply(sum),
+                     lambda x: x.rolling(window=10, min_periods=5).apply(
+                         sum, raw=False),
+                     lambda x: x.rolling(window=10, min_periods=5).apply(
+                         sum, raw=True),
                      lambda x: x.rolling(win_type='boxcar',
                                          window=10, min_periods=5).mean(),
                      ]
@@ -2805,20 +2881,25 @@ def expanding_func(x, min_periods=1, center=False, axis=0):
             return getattr(exp, func)()
         self._check_expanding(expanding_func, static_comp, preserve_nan=False)
 
-    def test_expanding_apply(self):
+    def test_expanding_apply(self, raw):
 
         def expanding_mean(x, min_periods=1):
+
             exp = x.expanding(min_periods=min_periods)
-            return exp.apply(lambda x: x.mean())
+            result = exp.apply(lambda x: x.mean(), raw=raw)
+            return result
 
-        self._check_expanding(expanding_mean, np.mean)
+        # TODO(jreback), needed to add preserve_nan=False
+        # here to make this pass
+        self._check_expanding(expanding_mean, np.mean, preserve_nan=False)
 
         ser = Series([])
-        tm.assert_series_equal(ser, ser.expanding().apply(lambda x: x.mean()))
+        tm.assert_series_equal(ser, ser.expanding().apply(
+            lambda x: x.mean(), raw=raw))
 
         # GH 8080
         s = Series([None, None, None])
-        result = s.expanding(min_periods=0).apply(lambda x: len(x))
+        result = s.expanding(min_periods=0).apply(lambda x: len(x), raw=raw)
         expected = Series([1., 2., 3.])
         tm.assert_series_equal(result, expected)
 
@@ -3057,13 +3138,14 @@ def func(x):
             expected = g.apply(func)
             tm.assert_series_equal(result, expected)
 
-    def test_rolling_apply(self):
+    def test_rolling_apply(self, raw):
         g = self.frame.groupby('A')
         r = g.rolling(window=4)
 
         # reduction
-        result = r.apply(lambda x: x.sum())
-        expected = g.apply(lambda x: x.rolling(4).apply(lambda y: y.sum()))
+        result = r.apply(lambda x: x.sum(), raw=raw)
+        expected = g.apply(
+            lambda x: x.rolling(4).apply(lambda y: y.sum(), raw=raw))
         tm.assert_frame_equal(result, expected)
 
     def test_expanding(self):
@@ -3104,13 +3186,14 @@ def func(x):
             expected = g.apply(func)
             tm.assert_series_equal(result, expected)
 
-    def test_expanding_apply(self):
+    def test_expanding_apply(self, raw):
         g = self.frame.groupby('A')
         r = g.expanding()
 
         # reduction
-        result = r.apply(lambda x: x.sum())
-        expected = g.apply(lambda x: x.expanding().apply(lambda y: y.sum()))
+        result = r.apply(lambda x: x.sum(), raw=raw)
+        expected = g.apply(
+            lambda x: x.expanding().apply(lambda y: y.sum(), raw=raw))
         tm.assert_frame_equal(result, expected)
 
 
@@ -3624,22 +3707,22 @@ def test_ragged_max(self):
         expected['B'] = [0.0, 1, 2, 3, 4]
         tm.assert_frame_equal(result, expected)
 
-    def test_ragged_apply(self):
+    def test_ragged_apply(self, raw):
 
         df = self.ragged
 
         f = lambda x: 1
-        result = df.rolling(window='1s', min_periods=1).apply(f)
+        result = df.rolling(window='1s', min_periods=1).apply(f, raw=raw)
         expected = df.copy()
         expected['B'] = 1.
         tm.assert_frame_equal(result, expected)
 
-        result = df.rolling(window='2s', min_periods=1).apply(f)
+        result = df.rolling(window='2s', min_periods=1).apply(f, raw=raw)
         expected = df.copy()
         expected['B'] = 1.
         tm.assert_frame_equal(result, expected)
 
-        result = df.rolling(window='5s', min_periods=1).apply(f)
+        result = df.rolling(window='5s', min_periods=1).apply(f, raw=raw)
         expected = df.copy()
         expected['B'] = 1.
         tm.assert_frame_equal(result, expected)
@@ -3662,8 +3745,14 @@ def test_all(self):
         expected = er.quantile(0.5)
         tm.assert_frame_equal(result, expected)
 
-        result = r.apply(lambda x: 1)
-        expected = er.apply(lambda x: 1)
+    def test_all_apply(self, raw):
+
+        df = self.regular * 2
+        er = df.rolling(window=1)
+        r = df.rolling(window='1s')
+
+        result = r.apply(lambda x: 1, raw=raw)
+        expected = er.apply(lambda x: 1, raw=raw)
         tm.assert_frame_equal(result, expected)
 
     def test_all2(self):
diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
index d96ebab615d12..5369b1a94a956 100644
--- a/pandas/tests/tseries/offsets/test_offsets.py
+++ b/pandas/tests/tseries/offsets/test_offsets.py
@@ -2228,8 +2228,6 @@ class TestWeekOfMonth(Base):
     _offset = WeekOfMonth
 
     def test_constructor(self):
-        tm.assert_raises_regex(ValueError, "^N cannot be 0",
-                               WeekOfMonth, n=0, week=1, weekday=1)
         tm.assert_raises_regex(ValueError, "^Week", WeekOfMonth,
                                n=1, week=4, weekday=0)
         tm.assert_raises_regex(ValueError, "^Week", WeekOfMonth,
@@ -2261,6 +2259,19 @@ def test_offset(self):
             (-1, 2, 1, date3, datetime(2010, 12, 21)),
             (-1, 2, 1, date4, datetime(2011, 1, 18)),
 
+            (0, 0, 1, date1, datetime(2011, 1, 4)),
+            (0, 0, 1, date2, datetime(2011, 2, 1)),
+            (0, 0, 1, date3, datetime(2011, 2, 1)),
+            (0, 0, 1, date4, datetime(2011, 2, 1)),
+            (0, 1, 1, date1, datetime(2011, 1, 11)),
+            (0, 1, 1, date2, datetime(2011, 1, 11)),
+            (0, 1, 1, date3, datetime(2011, 2, 8)),
+            (0, 1, 1, date4, datetime(2011, 2, 8)),
+            (0, 0, 1, date1, datetime(2011, 1, 4)),
+            (0, 1, 1, date2, datetime(2011, 1, 11)),
+            (0, 2, 1, date3, datetime(2011, 1, 18)),
+            (0, 3, 1, date4, datetime(2011, 1, 25)),
+
             (1, 0, 0, date1, datetime(2011, 2, 7)),
             (1, 0, 0, date2, datetime(2011, 2, 7)),
             (1, 0, 0, date3, datetime(2011, 2, 7)),
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index 2e4be7fbdeebf..749165f894819 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -1461,9 +1461,6 @@ def __init__(self, n=1, normalize=False, week=0, weekday=0):
         self.weekday = weekday
         self.week = week
 
-        if self.n == 0:
-            raise ValueError('N cannot be 0')
-
         if self.weekday < 0 or self.weekday > 6:
             raise ValueError('Day must be 0<=day<=6, got {day}'
                              .format(day=self.weekday))
diff --git a/pyproject.toml b/pyproject.toml
deleted file mode 100644
index f0d57d1d808a2..0000000000000
--- a/pyproject.toml
+++ /dev/null
@@ -1,9 +0,0 @@
-[build-system]
-requires = [
-    "wheel",
-    "setuptools",
-    "Cython",  # required for VCS build, optional for released source
-    "numpy==1.9.3; python_version=='3.5'",
-    "numpy==1.12.1; python_version=='3.6'",
-    "numpy==1.13.1; python_version>='3.7'",
-]
diff --git a/setup.py b/setup.py
index 7fb5358d0950b..973b4c0abcde2 100755
--- a/setup.py
+++ b/setup.py
@@ -748,4 +748,5 @@ def pxd(name):
       long_description=LONG_DESCRIPTION,
       classifiers=CLASSIFIERS,
       platforms='any',
+      python_requires='>=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*',
       **setuptools_kwargs)

Latest Release		+ + + +
		+ + + +
Package Status		+ +
License		+ + + +
Build Status
Coverage
Conda	- - +	+ +
Conda-forge	Downloads	@@ -67,16 +78,16 @@
PyPI	- - - -	Gitter	+ + +