From b28456a7926fa53ca88a39a3a714eeb0c9260d7b Mon Sep 17 00:00:00 2001
From: 0xF4D3C0D3 <dongho971220@gmail.com>
Date: Wed, 25 Sep 2019 12:51:44 +0900
Subject: [PATCH 01/19] BUG: value_counts can handle the case even with empty
 groups (#28479)

    * If applying rep to recons_labels go fail, use ids which has no
      consecutive duplicates instead.
---
 doc/source/whatsnew/v1.0.0.rst            |  1 +
 pandas/core/groupby/generic.py            |  9 +++++++-
 pandas/tests/groupby/test_value_counts.py | 27 ++++++++++++++++++++++-
 3 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 7ca93d7d75854..fbda7011e066d 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -299,6 +299,7 @@ Other
 - Using :meth:`DataFrame.replace` with overlapping keys in a nested dictionary will no longer raise, now matching the behavior of a flat dictionary (:issue:`27660`)
 - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support dicts as ``compression`` argument with key ``'method'`` being the compression method and others as additional compression options when the compression method is ``'zip'``. (:issue:`26023`)
 - :meth:`Series.append` will no longer raise a ``TypeError`` when passed a tuple of ``Series`` (:issue:`28410`)
+- :meth:`SeriesGroupBy.value_counts` will be able to handle the case even when the :class:`Grouper` makes empty groups (:issue: 28479)
 
 .. _whatsnew_1000.contributors:
 
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index f8f1455561c03..fbbcfc06bbdc3 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1259,7 +1259,14 @@ def value_counts(
         rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx))
 
         # multi-index components
-        labels = list(map(rep, self.grouper.recons_labels)) + [llab(lab, inc)]
+        try:
+            labels = list(map(rep, self.grouper.recons_labels)) + [llab(lab, inc)]
+        except ValueError:
+            # If applying rep to recons_labels go fail, use ids which has no
+            # consecutive duplicates instead.
+            _ids_idx = np.ones(len(ids), dtype=bool)
+            _ids_idx[1:] = ids[1:] != ids[:-1]
+            labels = list(map(rep, [ids[_ids_idx]])) + [llab(lab, inc)]
         levels = [ping.group_index for ping in self.grouper.groupings] + [lev]
         names = self.grouper.names + [self._selection_name]
 
diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
index f8bd8843ab7e3..b7236ab491011 100644
--- a/pandas/tests/groupby/test_value_counts.py
+++ b/pandas/tests/groupby/test_value_counts.py
@@ -9,7 +9,7 @@
 import numpy as np
 import pytest
 
-from pandas import DataFrame, MultiIndex, Series, date_range
+from pandas import DataFrame, MultiIndex, Series, date_range, Grouper
 from pandas.util import testing as tm
 
 
@@ -79,3 +79,28 @@ def rebuild_index(df):
     # have to sort on index because of unstable sort on values
     left, right = map(rebuild_index, (left, right))  # xref GH9212
     tm.assert_series_equal(left.sort_index(), right.sort_index())
+
+
+@pytest.mark.parametrize(
+    "freq, size, frac", product(["1D", "2D", "1W", "1Y"], [100, 1000], [0.1, 0.5, 1])
+)
+def test_series_groupby_value_counts_with_grouper(freq, size, frac):
+    np.random.seed(42)
+
+    df = DataFrame.from_dict(
+        {
+            "date": date_range("2019-09-25", periods=size),
+            "name": np.random.choice(list("abcd"), size),
+        }
+    ).sample(frac=frac)
+
+    gr = df.groupby(Grouper(key="date", freq=freq))["name"]
+
+    # have to sort on index because of unstable sort on values xref GH9212
+    result = gr.value_counts().sort_index()
+    expected = gr.apply(Series.value_counts).sort_index()
+    expected.index.names = (
+        result.index.names
+    )  # .apply(Series.value_counts) can't create all names
+
+    tm.assert_series_equal(result, expected)

From 3ef5e8a445bdd55481f38404e2fe194a94535416 Mon Sep 17 00:00:00 2001
From: 0xF4D3C0D3 <dongho971220@gmail.com>
Date: Thu, 26 Sep 2019 22:57:22 +0900
Subject: [PATCH 02/19] .

---
 pandas/tests/groupby/test_value_counts.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
index b7236ab491011..d1470c0eb1e70 100644
--- a/pandas/tests/groupby/test_value_counts.py
+++ b/pandas/tests/groupby/test_value_counts.py
@@ -9,7 +9,7 @@
 import numpy as np
 import pytest
 
-from pandas import DataFrame, MultiIndex, Series, date_range, Grouper
+from pandas import DataFrame, Grouper, MultiIndex, Series, date_range
 from pandas.util import testing as tm
 
 

From 40475e8c5ca3381ef1ff35d53db13fab25d611c9 Mon Sep 17 00:00:00 2001
From: 0xF4D3C0D3 <dongho971220@gmail.com>
Date: Fri, 27 Sep 2019 17:58:09 +0900
Subject: [PATCH 03/19] removing consecutive duplicates was the same as just
 unique

---
 pandas/core/groupby/generic.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index fbbcfc06bbdc3..ae851110af800 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1262,11 +1262,8 @@ def value_counts(
         try:
             labels = list(map(rep, self.grouper.recons_labels)) + [llab(lab, inc)]
         except ValueError:
-            # If applying rep to recons_labels go fail, use ids which has no
-            # consecutive duplicates instead.
-            _ids_idx = np.ones(len(ids), dtype=bool)
-            _ids_idx[1:] = ids[1:] != ids[:-1]
-            labels = list(map(rep, [ids[_ids_idx]])) + [llab(lab, inc)]
+            # If applying rep to recons_labels go fail, use unique ids
+            labels = list(map(rep, [np.unique(ids)])) + [llab(lab, inc)]
         levels = [ping.group_index for ping in self.grouper.groupings] + [lev]
         names = self.grouper.names + [self._selection_name]
 

From f1f104a606b9e2bc3acaa2310ecf316d0ee056ad Mon Sep 17 00:00:00 2001
From: 0xF4D3C0D3 <dongho971220@gmail.com>
Date: Sat, 28 Sep 2019 18:44:17 +0900
Subject: [PATCH 04/19] get the performance while handling the exception
 explicitly

---
 pandas/core/groupby/generic.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index ae851110af800..32db190f30880 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -54,6 +54,7 @@
     _transform_template,
     groupby,
 )
+from pandas.core.groupby.ops import BinGrouper
 from pandas.core.index import Index, MultiIndex, _all_indexes_same
 import pandas.core.indexes.base as ibase
 from pandas.core.internals import BlockManager, make_block
@@ -1262,8 +1263,11 @@ def value_counts(
         try:
             labels = list(map(rep, self.grouper.recons_labels)) + [llab(lab, inc)]
         except ValueError:
-            # If applying rep to recons_labels go fail, use unique ids
-            labels = list(map(rep, [np.unique(ids)])) + [llab(lab, inc)]
+            # If applying rep to recons_labels go fail and that's because empty periods,
+            is_len_different = len(self.grouper.binlabels) != len(self.grouper.indices)
+            if isinstance(self.grouper, BinGrouper) and is_len_different:
+                # then use unidue ids instead of self.grouper.recons_labels
+                labels = list(map(rep, [np.unique(ids)])) + [llab(lab, inc)]
         levels = [ping.group_index for ping in self.grouper.groupings] + [lev]
         names = self.grouper.names + [self._selection_name]
 

From 6fbaaa6b13f257d26f6522ddbc4b2af7f4c7af9b Mon Sep 17 00:00:00 2001
From: 0xF4D3C0D3 <dongho971220@gmail.com>
Date: Thu, 3 Oct 2019 09:56:41 +0900
Subject: [PATCH 05/19] get rid of try-except

---
 pandas/core/groupby/generic.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 32db190f30880..baadfe198ffa4 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1260,14 +1260,12 @@ def value_counts(
         rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx))
 
         # multi-index components
-        try:
+        if isinstance(self.grouper, BinGrouper) and (
+            len(self.grouper.binlabels) != len(self.grouper.indices)
+        ):
+            labels = list(map(rep, [np.unique(ids)])) + [llab(lab, inc)]
+        else:
             labels = list(map(rep, self.grouper.recons_labels)) + [llab(lab, inc)]
-        except ValueError:
-            # If applying rep to recons_labels go fail and that's because empty periods,
-            is_len_different = len(self.grouper.binlabels) != len(self.grouper.indices)
-            if isinstance(self.grouper, BinGrouper) and is_len_different:
-                # then use unidue ids instead of self.grouper.recons_labels
-                labels = list(map(rep, [np.unique(ids)])) + [llab(lab, inc)]
         levels = [ping.group_index for ping in self.grouper.groupings] + [lev]
         names = self.grouper.names + [self._selection_name]
 

From 3a7f71e806ea26c85b36322f5765f96d9c16a877 Mon Sep 17 00:00:00 2001
From: 0xF4D3C0D3 <dongho971220@gmail.com>
Date: Thu, 3 Oct 2019 10:16:25 +0900
Subject: [PATCH 06/19] make test more idiomatic

---
 pandas/tests/groupby/test_value_counts.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
index d1470c0eb1e70..70d6b3db1923e 100644
--- a/pandas/tests/groupby/test_value_counts.py
+++ b/pandas/tests/groupby/test_value_counts.py
@@ -81,9 +81,9 @@ def rebuild_index(df):
     tm.assert_series_equal(left.sort_index(), right.sort_index())
 
 
-@pytest.mark.parametrize(
-    "freq, size, frac", product(["1D", "2D", "1W", "1Y"], [100, 1000], [0.1, 0.5, 1])
-)
+@pytest.mark.parametrize("freq", ["1D", "2D", "1W", "1Y"])
+@pytest.mark.parametrize("size", [100, 1000])
+@pytest.mark.parametrize("frac", [0.1, 0.5, 1])
 def test_series_groupby_value_counts_with_grouper(freq, size, frac):
     np.random.seed(42)
 

From d101a730da855eecf14a9b20aae7cc257f2ff265 Mon Sep 17 00:00:00 2001
From: 0xF4D3C0D3 <dongho971220@gmail.com>
Date: Mon, 14 Oct 2019 17:24:29 +0900
Subject: [PATCH 07/19] Merge origin/master into fix-GH28479-1

---
 pandas/core/groupby/generic.py            | 8 +++++++-
 pandas/tests/groupby/test_value_counts.py | 6 +++---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 068d5e5275f0d..598f13de309b4 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -53,6 +53,7 @@
     _transform_template,
     groupby,
 )
+from pandas.core.groupby.ops import BinGrouper
 from pandas.core.index import Index, MultiIndex, _all_indexes_same
 import pandas.core.indexes.base as ibase
 from pandas.core.internals import BlockManager, make_block
@@ -639,7 +640,12 @@ def value_counts(
         rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx))
 
         # multi-index components
-        labels = list(map(rep, self.grouper.recons_labels)) + [llab(lab, inc)]
+        if isinstance(self.grouper, BinGrouper) and (
+            len(self.grouper.binlabels) != len(self.grouper.indices)
+        ):
+            labels = list(map(rep, [np.unique(ids)])) + [llab(lab, inc)]
+        else:
+            labels = list(map(rep, self.grouper.recons_labels)) + [llab(lab, inc)]
         levels = [ping.group_index for ping in self.grouper.groupings] + [lev]
         names = self.grouper.names + [self._selection_name]
 
diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
index d1470c0eb1e70..70d6b3db1923e 100644
--- a/pandas/tests/groupby/test_value_counts.py
+++ b/pandas/tests/groupby/test_value_counts.py
@@ -81,9 +81,9 @@ def rebuild_index(df):
     tm.assert_series_equal(left.sort_index(), right.sort_index())
 
 
-@pytest.mark.parametrize(
-    "freq, size, frac", product(["1D", "2D", "1W", "1Y"], [100, 1000], [0.1, 0.5, 1])
-)
+@pytest.mark.parametrize("freq", ["1D", "2D", "1W", "1Y"])
+@pytest.mark.parametrize("size", [100, 1000])
+@pytest.mark.parametrize("frac", [0.1, 0.5, 1])
 def test_series_groupby_value_counts_with_grouper(freq, size, frac):
     np.random.seed(42)
 

From 800560351ca4ab9203f8e8858a83b87f9014e285 Mon Sep 17 00:00:00 2001
From: 0xF4D3C0D3 <dongho971220@gmail.com>
Date: Sat, 19 Oct 2019 19:08:08 +0900
Subject: [PATCH 08/19] move the logic into the BinGrouper.recons_labels

---
 .travis.yml                                   |  13 +-
 ci/build38.sh                                 |  19 +
 ci/setup_env.sh                               |   5 +
 doc/source/getting_started/install.rst        |   2 +-
 doc/source/user_guide/advanced.rst            |  36 +-
 doc/source/user_guide/io.rst                  |  34 +-
 doc/source/user_guide/reshaping.rst           |  16 +-
 doc/source/whatsnew/v0.25.2.rst               |  79 +-
 doc/source/whatsnew/v1.0.0.rst                |  40 +-
 pandas/_libs/algos_rank_helper.pxi.in         | 424 +++++-----
 pandas/_libs/algos_take_helper.pxi.in         |  43 +-
 pandas/_libs/groupby.pyx                      | 733 +++++++++++++++++-
 pandas/_libs/groupby_helper.pxi.in            | 670 ----------------
 pandas/_libs/index.pyx                        |  50 +-
 pandas/_libs/intervaltree.pxi.in              |   4 +-
 pandas/_libs/lib.pyx                          |  25 +-
 pandas/_libs/reduction.pyx                    |   2 +-
 pandas/_libs/tslibs/parsing.pyx               |   2 +-
 pandas/_libs/tslibs/timezones.pyx             |   7 +-
 pandas/compat/numpy/__init__.py               |   1 +
 pandas/core/accessor.py                       |   6 +-
 pandas/core/algorithms.py                     |  79 +-
 pandas/core/apply.py                          |  46 +-
 pandas/core/arrays/base.py                    |  13 +-
 pandas/core/arrays/categorical.py             |   6 +-
 pandas/core/base.py                           |  22 +-
 pandas/core/frame.py                          |  69 +-
 pandas/core/generic.py                        |   2 +-
 pandas/core/groupby/generic.py                | 110 ++-
 pandas/core/groupby/groupby.py                |  47 +-
 pandas/core/groupby/ops.py                    |  18 +-
 pandas/core/indexes/base.py                   |  37 +-
 pandas/core/indexes/category.py               |   8 -
 pandas/core/indexes/multi.py                  |  22 +-
 pandas/core/indexes/period.py                 |  10 +-
 pandas/core/indexes/timedeltas.py             |   3 +-
 pandas/core/internals/blocks.py               |  19 +-
 pandas/core/resample.py                       |  21 +-
 pandas/core/reshape/reshape.py                |  17 +-
 pandas/core/series.py                         |   4 +-
 pandas/core/sorting.py                        |   5 +-
 pandas/io/formats/format.py                   |   5 +-
 pandas/io/json/_table_schema.py               |   6 +-
 pandas/tests/computation/test_eval.py         |   9 +-
 pandas/tests/extension/list/__init__.py       |   3 +
 pandas/tests/extension/list/array.py          | 133 ++++
 pandas/tests/extension/list/test_list.py      |  30 +
 pandas/tests/frame/test_apply.py              |  11 -
 pandas/tests/frame/test_convert_to.py         |   4 +-
 pandas/tests/groupby/aggregate/test_other.py  |   2 +-
 pandas/tests/groupby/test_categorical.py      |  12 +-
 pandas/tests/groupby/test_function.py         |   2 +-
 pandas/tests/groupby/test_groupby.py          |  10 +
 pandas/tests/indexes/multi/test_astype.py     |   2 +-
 .../tests/indexes/multi/test_constructor.py   |   2 +
 pandas/tests/indexes/multi/test_names.py      |  35 +-
 pandas/tests/indexes/multi/test_reindex.py    |  10 +-
 pandas/tests/indexes/multi/test_reshape.py    |   1 +
 pandas/tests/indexes/test_category.py         |  20 +-
 pandas/tests/indexing/test_categorical.py     |  71 +-
 pandas/tests/indexing/test_coercion.py        |   3 +-
 pandas/tests/io/formats/test_to_html.py       |   8 +
 pandas/tests/io/json/test_ujson.py            |   5 +-
 pandas/tests/io/parser/conftest.py            |   9 +-
 pandas/tests/io/test_sql.py                   |   4 +-
 pandas/tests/plotting/test_backend.py         |   2 +-
 pandas/tests/reshape/test_concat.py           |   6 +-
 pandas/tests/reshape/test_reshape.py          |   5 +-
 pandas/tests/series/test_analytics.py         |  13 +
 pandas/tests/series/test_operators.py         |  35 +-
 pandas/tests/test_base.py                     |   6 +
 pandas/tests/test_multilevel.py               |  22 +-
 .../offsets/test_offsets_properties.py        |   4 +-
 pandas/tests/util/test_assert_frame_equal.py  |   2 +-
 pandas/tests/util/test_assert_series_equal.py |   2 +-
 pandas/util/testing.py                        |  11 +-
 scripts/tests/test_validate_docstrings.py     |  19 +
 scripts/validate_docstrings.py                |  24 +-
 setup.cfg                                     |  24 -
 setup.py                                      |   4 +-
 80 files changed, 1877 insertions(+), 1468 deletions(-)
 create mode 100644 ci/build38.sh
 delete mode 100644 pandas/_libs/groupby_helper.pxi.in
 create mode 100644 pandas/tests/extension/list/__init__.py
 create mode 100644 pandas/tests/extension/list/array.py
 create mode 100644 pandas/tests/extension/list/test_list.py

diff --git a/.travis.yml b/.travis.yml
index 79fecc41bec0d..b9fa06304d387 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -30,6 +30,12 @@ matrix:
       - python: 3.5
 
     include:
+    - dist: bionic
+      # 18.04
+      python: 3.8-dev
+      env:
+        - JOB="3.8-dev" PATTERN="(not slow and not network)"
+
     - dist: trusty
       env:
         - JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" PATTERN="(not slow and not network)"
@@ -71,6 +77,7 @@ before_install:
   # This overrides travis and tells it to look nowhere.
   - export BOTO_CONFIG=/dev/null
 
+
 install:
   - echo "install start"
   - ci/prep_cython_cache.sh
@@ -78,17 +85,19 @@ install:
   - ci/submit_cython_cache.sh
   - echo "install done"
 
+
 before_script:
   # display server (for clipboard functionality) needs to be started here,
   # does not work if done in install:setup_env.sh (GH-26103)
   - export DISPLAY=":99.0"
   - echo "sh -e /etc/init.d/xvfb start"
-  - sh -e /etc/init.d/xvfb start
+  - if [ "$JOB" != "3.8-dev" ]; then sh -e /etc/init.d/xvfb start; fi
   - sleep 3
 
 script:
   - echo "script start"
-  - source activate pandas-dev
+  - echo "$JOB"
+  - if [ "$JOB" != "3.8-dev" ]; then source activate pandas-dev; fi
   - ci/run_tests.sh
 
 after_script:
diff --git a/ci/build38.sh b/ci/build38.sh
new file mode 100644
index 0000000000000..903016536d240
--- /dev/null
+++ b/ci/build38.sh
@@ -0,0 +1,19 @@
+#!/bin/bash -e
+# Special build for python3.8 until numpy puts its own wheels up
+
+sudo apt-get install build-essential gcc xvfb
+pip install --no-deps -U pip wheel setuptools
+pip install python-dateutil pytz cython pytest pytest-xdist hypothesis
+
+# Possible alternative for getting numpy:
+pip install --pre -f https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com/ numpy
+
+python setup.py build_ext -inplace
+python -m pip install --no-build-isolation -e .
+
+python -c "import sys; print(sys.version_info)"
+python -c "import pandas as pd"
+python -c "import hypothesis"
+
+# TODO: Is there anything else in setup_env that we really want to do?
+# ci/setup_env.sh
diff --git a/ci/setup_env.sh b/ci/setup_env.sh
index 382491a947488..794130355fd74 100755
--- a/ci/setup_env.sh
+++ b/ci/setup_env.sh
@@ -1,5 +1,9 @@
 #!/bin/bash -e
 
+if [ "$JOB" == "3.8-dev" ]; then
+    /bin/bash ci/build38.sh
+    exit 0
+fi
 
 # edit the locale file if needed
 if [ -n "$LOCALE_OVERRIDE" ]; then
@@ -51,6 +55,7 @@ echo
 echo "update conda"
 conda config --set ssl_verify false
 conda config --set quiet true --set always_yes true --set changeps1 false
+conda install pip  # create conda to create a historical artifact for pip & setuptools
 conda update -n base conda
 
 echo "conda info -a"
diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
index fc99b458fa0af..7d1150c2f65fa 100644
--- a/doc/source/getting_started/install.rst
+++ b/doc/source/getting_started/install.rst
@@ -18,7 +18,7 @@ Instructions for installing from source,
 Python version support
 ----------------------
 
-Officially Python 3.5.3 and above, 3.6, and 3.7.
+Officially Python 3.5.3 and above, 3.6, 3.7, and 3.8.
 
 Installing pandas
 -----------------
diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst
index 62a9b6396404a..4949dd580414f 100644
--- a/doc/source/user_guide/advanced.rst
+++ b/doc/source/user_guide/advanced.rst
@@ -783,27 +783,41 @@ values **not** in the categories, similarly to how you can reindex **any** panda
 
 .. ipython:: python
 
-   df2.reindex(['a', 'e'])
-   df2.reindex(['a', 'e']).index
-   df2.reindex(pd.Categorical(['a', 'e'], categories=list('abcde')))
-   df2.reindex(pd.Categorical(['a', 'e'], categories=list('abcde'))).index
+   df3 = pd.DataFrame({'A': np.arange(3),
+                       'B': pd.Series(list('abc')).astype('category')})
+   df3 = df3.set_index('B')
+   df3
+
+.. ipython:: python
+
+   df3.reindex(['a', 'e'])
+   df3.reindex(['a', 'e']).index
+   df3.reindex(pd.Categorical(['a', 'e'], categories=list('abe')))
+   df3.reindex(pd.Categorical(['a', 'e'], categories=list('abe'))).index
 
 .. warning::
 
    Reshaping and Comparison operations on a ``CategoricalIndex`` must have the same categories
    or a ``TypeError`` will be raised.
 
-   .. code-block:: ipython
+   .. ipython:: python
 
-    In [9]: df3 = pd.DataFrame({'A': np.arange(6), 'B': pd.Series(list('aabbca')).astype('category')})
+      df4 = pd.DataFrame({'A': np.arange(2),
+                          'B': list('ba')})
+      df4['B'] = df4['B'].astype(CategoricalDtype(list('ab')))
+      df4 = df4.set_index('B')
+      df4.index
 
-    In [11]: df3 = df3.set_index('B')
+      df5 = pd.DataFrame({'A': np.arange(2),
+                          'B': list('bc')})
+      df5['B'] = df5['B'].astype(CategoricalDtype(list('bc')))
+      df5 = df5.set_index('B')
+      df5.index
 
-    In [11]: df3.index
-    Out[11]: CategoricalIndex(['a', 'a', 'b', 'b', 'c', 'a'], categories=['a', 'b', 'c'], ordered=False, name='B', dtype='category')
+   .. code-block:: ipython
 
-    In [12]: pd.concat([df2, df3])
-    TypeError: categories must match existing categories when appending
+      In [1]: pd.concat([df4, df5])
+      TypeError: categories must match existing categories when appending
 
 .. _indexing.rangeindex:
 
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index ee097c1f4d5e8..6b23c814843e1 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -3811,6 +3811,8 @@ storing/selecting from homogeneous index ``DataFrames``.
         # the levels are automatically included as data columns
         store.select('df_mi', 'foo=bar')
 
+.. note::
+   The ``index`` keyword is reserved and cannot be use as a level name.
 
 .. _io.hdf5-query:
 
@@ -3829,6 +3831,7 @@ A query is specified using the ``Term`` class under the hood, as a boolean expre
 
 * ``index`` and ``columns`` are supported indexers of ``DataFrames``.
 * if ``data_columns`` are specified, these can be used as additional indexers.
+* level name in a MultiIndex, with default name  ``level_0``, ``level_1``, … if not provided.
 
 Valid comparison operators are:
 
@@ -3947,7 +3950,7 @@ space. These are in terms of the total number of rows in a table.
 
 .. _io.hdf5-timedelta:
 
-Using timedelta64[ns]
+Query timedelta64[ns]
 +++++++++++++++++++++
 
 You can store and query using the ``timedelta64[ns]`` type. Terms can be
@@ -3966,6 +3969,35 @@ specified in the format: ``<float>(<unit>)``, where float may be signed (and fra
    store.append('dftd', dftd, data_columns=True)
    store.select('dftd', "C<'-3.5D'")
 
+Query MultiIndex
+++++++++++++++++
+
+Selecting from a ``MultiIndex`` can be achieved by using the name of the level.
+
+.. ipython:: python
+
+   df_mi.index.names
+   store.select('df_mi', "foo=baz and bar=two")
+
+If the ``MultiIndex`` levels names are ``None``, the levels are automatically made available via
+the ``level_n`` keyword with ``n`` the level of the ``MultiIndex`` you want to select from.
+
+.. ipython:: python
+
+   index = pd.MultiIndex(
+       levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
+       codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
+   )
+   df_mi_2 = pd.DataFrame(np.random.randn(10, 3),
+                          index=index, columns=["A", "B", "C"])
+   df_mi_2
+
+   store.append("df_mi_2", df_mi_2)
+
+   # the levels are automatically included as data columns with keyword level_n
+   store.select("df_mi_2", "level_0=foo and level_1=two")
+
+
 Indexing
 ++++++++
 
diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst
index dd6d3062a8f0a..b2ee252495f23 100644
--- a/doc/source/user_guide/reshaping.rst
+++ b/doc/source/user_guide/reshaping.rst
@@ -728,14 +728,14 @@ Suppose we wanted to pivot ``df`` such that the ``col`` values are columns,
 ``row`` values are the index, and the mean of ``val0`` are the values? In
 particular, the resulting DataFrame should look like:
 
-.. note::
-
-   col   col0   col1   col2   col3  col4
-   row
-   row0  0.77  0.605    NaN  0.860  0.65
-   row2  0.13    NaN  0.395  0.500  0.25
-   row3   NaN  0.310    NaN  0.545   NaN
-   row4   NaN  0.100  0.395  0.760  0.24
+.. code-block:: text
+
+    col   col0   col1   col2   col3  col4
+    row
+    row0  0.77  0.605    NaN  0.860  0.65
+    row2  0.13    NaN  0.395  0.500  0.25
+    row3   NaN  0.310    NaN  0.545   NaN
+    row4   NaN  0.100  0.395  0.760  0.24
 
 This solution uses :func:`~pandas.pivot_table`. Also note that
 ``aggfunc='mean'`` is the default. It is included here to be explicit.
diff --git a/doc/source/whatsnew/v0.25.2.rst b/doc/source/whatsnew/v0.25.2.rst
index 9789c9fce3541..a99751f9bab9f 100644
--- a/doc/source/whatsnew/v0.25.2.rst
+++ b/doc/source/whatsnew/v0.25.2.rst
@@ -1,101 +1,38 @@
 .. _whatsnew_0252:
 
-What's new in 0.25.2 (October XX, 2019)
+What's new in 0.25.2 (October 15, 2019)
 ---------------------------------------
 
 These are the changes in pandas 0.25.2. See :ref:`release` for a full changelog
 including other versions of pandas.
 
+.. note::
+
+    Pandas 0.25.2 adds compatibility for Python 3.8 (:issue:`28147`).
+
 .. _whatsnew_0252.bug_fixes:
 
 Bug fixes
 ~~~~~~~~~
 
-Categorical
-^^^^^^^^^^^
-
--
-
-Datetimelike
-^^^^^^^^^^^^
-
--
--
--
-
-Timezones
-^^^^^^^^^
-
--
-
-Numeric
-^^^^^^^
-
--
--
--
--
-
-Conversion
-^^^^^^^^^^
-
--
-
-Interval
-^^^^^^^^
-
--
-
 Indexing
 ^^^^^^^^
 
-- Fix regression in :meth:`DataFrame.reindex` not following ``limit`` argument (:issue:`28631`).
+- Fix regression in :meth:`DataFrame.reindex` not following the ``limit`` argument (:issue:`28631`).
 - Fix regression in :meth:`RangeIndex.get_indexer` for decreasing :class:`RangeIndex` where target values may be improperly identified as missing/present (:issue:`28678`)
--
--
-
-Missing
-^^^^^^^
-
--
 
 I/O
 ^^^
 
-- Fix regression in notebook display where <th> tags not used for :attr:`DataFrame.index` (:issue:`28204`).
+- Fix regression in notebook display where ``<th>`` tags were missing for :attr:`DataFrame.index` values (:issue:`28204`).
 - Regression in :meth:`~DataFrame.to_csv` where writing a :class:`Series` or :class:`DataFrame` indexed by an :class:`IntervalIndex` would incorrectly raise a ``TypeError`` (:issue:`28210`)
--
--
-
-Plotting
-^^^^^^^^
-
--
--
--
+- Fix :meth:`~DataFrame.to_csv` with ``ExtensionArray`` with list-like values (:issue:`28840`).
 
 Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
 - Bug incorrectly raising an ``IndexError`` when passing a list of quantiles to :meth:`pandas.core.groupby.DataFrameGroupBy.quantile` (:issue:`28113`).
 - Bug in :meth:`pandas.core.groupby.GroupBy.shift`, :meth:`pandas.core.groupby.GroupBy.bfill` and :meth:`pandas.core.groupby.GroupBy.ffill` where timezone information would be dropped (:issue:`19995`, :issue:`27992`)
--
--
--
-
-Reshaping
-^^^^^^^^^
-
--
--
--
--
--
-
-Sparse
-^^^^^^
-
--
 
 Other
 ^^^^^
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 0001dbf188620..d13c815f0b829 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -109,6 +109,7 @@ Other enhancements
   (:issue:`28368`)
 - :meth:`DataFrame.to_json` now accepts an ``indent`` integer argument to enable pretty printing of JSON output (:issue:`12004`)
 - :meth:`read_stata` can read Stata 119 dta files. (:issue:`28250`)
+- Added ``encoding`` argument to :func:`DataFrame.to_html` for non-ascii text (:issue:`28663`)
 
 Build Changes
 ^^^^^^^^^^^^^
@@ -123,7 +124,37 @@ source, you should no longer need to install Cython into your build environment
 Backwards incompatible API changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-- :class:`pandas.core.groupby.GroupBy.transform` now raises on invalid operation names (:issue:`27489`).
+.. _whatsnew_1000.api_breaking.MultiIndex._names:
+
+``MultiIndex.levels`` do not hold level names any longer
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- A :class:`MultiIndex` previously stored the level names as attributes of each of its
+  :attr:`MultiIndex.levels`. From Pandas 1.0, the names are only accessed through
+  :attr:`MultiIndex.names` (which was also possible previously). This is done in order to
+  make :attr:`MultiIndex.levels` more similar to :attr:`CategoricalIndex.categories` (:issue:`27242`:).
+
+*pandas 0.25.x*
+
+.. code-block:: ipython
+
+   In [1]: mi = pd.MultiIndex.from_product([[1, 2], ['a', 'b']], names=['x', 'y'])
+   Out[2]: mi
+   MultiIndex([(1, 'a'),
+               (1, 'b'),
+               (2, 'a'),
+               (2, 'b')],
+              names=['x', 'y'])
+   Out[3]: mi.levels[0].name
+   'x'
+
+*pandas 1.0.0*
+
+.. ipython:: python
+
+   mi = pd.MultiIndex.from_product([[1, 2], ['a', 'b']], names=['x', 'y'])
+   mi.levels[0].name
+
 - :class:`pandas.core.arrays.IntervalArray` adopts a new ``__repr__`` in accordance with other array classes (:issue:`25022`)
 
 *pandas 0.25.x*
@@ -149,6 +180,7 @@ Backwards incompatible API changes
 Other API changes
 ^^^^^^^^^^^^^^^^^
 
+- :class:`pandas.core.groupby.GroupBy.transform` now raises on invalid operation names (:issue:`27489`)
 - :meth:`pandas.api.types.infer_dtype` will now return "integer-na" for integer and ``np.nan`` mix (:issue:`27283`)
 - :meth:`MultiIndex.from_arrays` will no longer infer names from arrays if ``names=None`` is explicitly provided (:issue:`27292`)
 - In order to improve tab-completion, Pandas does not include most deprecated attributes when introspecting a pandas object using ``dir`` (e.g. ``dir(df)``).
@@ -162,6 +194,7 @@ Documentation Improvements
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 - Added new section on :ref:`scale` (:issue:`28315`).
+- Added sub-section Query MultiIndex in IO tools user guide (:issue:`28791`)
 
 .. _whatsnew_1000.deprecations:
 
@@ -194,6 +227,7 @@ Removal of prior version deprecations/changes
 - Removed the previously deprecated :meth:`ExtensionArray._formatting_values`. Use :attr:`ExtensionArray._formatter` instead. (:issue:`23601`)
 - Removed the previously deprecated ``IntervalIndex.from_intervals`` in favor of the :class:`IntervalIndex` constructor (:issue:`19263`)
 - Ability to read pickles containing :class:`Categorical` instances created with pre-0.16 version of pandas has been removed (:issue:`27538`)
+- Removed the previously deprecated ``reduce`` and ``broadcast`` arguments from :meth:`DataFrame.apply` (:issue:`18577`)
 -
 
 .. _whatsnew_1000.performance:
@@ -221,6 +255,7 @@ Categorical
 
 - Added test to assert the :func:`fillna` raises the correct ValueError message when the value isn't a value from categories (:issue:`13628`)
 - Bug in :meth:`Categorical.astype` where ``NaN`` values were handled incorrectly when casting to int (:issue:`28406`)
+- :meth:`DataFrame.reindex` with a :class:`CategoricalIndex` would fail when the targets contained duplicates, and wouldn't fail if the source contained duplicates (:issue:`28107`)
 - Bug in :meth:`Categorical.astype` not allowing for casting to extension dtypes (:issue:`28668`)
 - Bug where :func:`merge` was unable to join on categorical and extension dtype columns (:issue:`28668`)
 - :meth:`Categorical.searchsorted` and :meth:`CategoricalIndex.searchsorted` now work on unordered categoricals also (:issue:`21667`)
@@ -290,6 +325,9 @@ Indexing
 - Bug in reindexing a :meth:`PeriodIndex` with another type of index that contained a `Period` (:issue:`28323`) (:issue:`28337`)
 - Fix assignment of column via `.loc` with numpy non-ns datetime type (:issue:`27395`)
 - Bug in :meth:`Float64Index.astype` where ``np.inf`` was not handled properly when casting to an integer dtype (:issue:`28475`)
+- :meth:`Index.union` could fail when the left contained duplicates (:issue:`28257`)
+- :meth:`Index.get_indexer_non_unique` could fail with `TypeError` in some cases, such as when searching for ints in a string index (:issue:`28257`)
+-
 
 Missing
 ^^^^^^^
diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in
index 5dac94394c7ed..d5a31b6a13010 100644
--- a/pandas/_libs/algos_rank_helper.pxi.in
+++ b/pandas/_libs/algos_rank_helper.pxi.in
@@ -8,24 +8,17 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 # rank_1d, rank_2d
 # ----------------------------------------------------------------------
 
-{{py:
-
-# dtype ctype pos_nan_value neg_nan_value
-dtypes = [('object', 'object', 'Infinity()', 'NegInfinity()'),
-          ('float64', 'float64_t', 'np.inf', '-np.inf'),
-          ('uint64', 'uint64_t', '', ''),
-          ('int64', 'int64_t', 'np.iinfo(np.int64).max',
-           'np.iinfo(np.int64).min')]
-
-}}
-
-{{for dtype, ctype, pos_nan_value, neg_nan_value in dtypes}}
+ctypedef fused rank_t:
+    object
+    float64_t
+    uint64_t
+    int64_t
 
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def rank_1d_{{dtype}}(object in_arr, ties_method='average',
-                      ascending=True, na_option='keep', pct=False):
+def rank_1d(rank_t[:] in_arr, ties_method='average',
+            ascending=True, na_option='keep', pct=False):
     """
     Fast NaN-friendly version of scipy.stats.rankdata
     """
@@ -33,85 +26,86 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
     cdef:
         Py_ssize_t i, j, n, dups = 0, total_tie_count = 0, non_na_idx = 0
 
-        {{if dtype == 'object'}}
-        ndarray sorted_data, values
-        {{else}}
-        ndarray[{{ctype}}] sorted_data, values
-        {{endif}}
+        ndarray[rank_t] sorted_data, values
 
         ndarray[float64_t] ranks
         ndarray[int64_t] argsorted
         ndarray[uint8_t, cast=True] sorted_mask
 
-        {{if dtype == 'uint64'}}
-        {{ctype}} val
-        {{else}}
-        {{ctype}} val, nan_value
-        {{endif}}
+        rank_t val, nan_value
 
         float64_t sum_ranks = 0
         int tiebreak = 0
         bint keep_na = 0
-        bint isnan
+        bint isnan, condition
         float64_t count = 0.0
+
     tiebreak = tiebreakers[ties_method]
 
-    {{if dtype == 'float64'}}
-    values = np.asarray(in_arr).copy()
-    {{elif dtype == 'object'}}
-    values = np.array(in_arr, copy=True)
+    if rank_t is float64_t:
+        values = np.asarray(in_arr).copy()
+    elif rank_t is object:
+        values = np.array(in_arr, copy=True)
 
-    if values.dtype != np.object_:
-        values = values.astype('O')
-    {{else}}
-    values = np.asarray(in_arr)
-    {{endif}}
+        if values.dtype != np.object_:
+            values = values.astype('O')
+    else:
+        values = np.asarray(in_arr)
 
     keep_na = na_option == 'keep'
 
-    {{if dtype == 'object'}}
-    mask = missing.isnaobj(values)
-    {{elif dtype == 'float64'}}
-    mask = np.isnan(values)
-    {{elif dtype == 'int64'}}
-    mask = values == NPY_NAT
+    if rank_t is object:
+        mask = missing.isnaobj(values)
+    elif rank_t is float64_t:
+        mask = np.isnan(values)
+    elif rank_t is int64_t:
+        mask = values == NPY_NAT
 
-    # create copy in case of NPY_NAT
-    # values are mutated inplace
-    if mask.any():
-        values = values.copy()
-    {{endif}}
+        # create copy in case of NPY_NAT
+        # values are mutated inplace
+        if mask.any():
+            values = values.copy()
 
     # double sort first by mask and then by values to ensure nan values are
     # either at the beginning or the end. mask/(~mask) controls padding at
     # tail or the head
-    {{if dtype != 'uint64'}}
-    if ascending ^ (na_option == 'top'):
-        nan_value = {{pos_nan_value}}
-        order = (values, mask)
+    if rank_t is not uint64_t:
+        if ascending ^ (na_option == 'top'):
+            if rank_t is object:
+                nan_value = Infinity()
+            elif rank_t is float64_t:
+                nan_value = np.inf
+            elif rank_t is int64_t:
+                nan_value = np.iinfo(np.int64).max
+
+            order = (values, mask)
+        else:
+            if rank_t is object:
+                nan_value = NegInfinity()
+            elif rank_t is float64_t:
+                nan_value = -np.inf
+            elif rank_t is int64_t:
+                nan_value = np.iinfo(np.int64).min
+
+            order = (values, ~mask)
+        np.putmask(values, mask, nan_value)
     else:
-        nan_value = {{neg_nan_value}}
-        order = (values, ~mask)
-    np.putmask(values, mask, nan_value)
-    {{else}}
-    mask = np.zeros(shape=len(values), dtype=bool)
-    order = (values, mask)
-    {{endif}}
+        mask = np.zeros(shape=len(values), dtype=bool)
+        order = (values, mask)
 
     n = len(values)
     ranks = np.empty(n, dtype='f8')
 
-    {{if dtype == 'object'}}
-    _as = np.lexsort(keys=order)
-    {{else}}
-    if tiebreak == TIEBREAK_FIRST:
-        # need to use a stable sort here
+    if rank_t is object:
         _as = np.lexsort(keys=order)
-        if not ascending:
-            tiebreak = TIEBREAK_FIRST_DESCENDING
     else:
-        _as = np.lexsort(keys=order)
-    {{endif}}
+        if tiebreak == TIEBREAK_FIRST:
+            # need to use a stable sort here
+            _as = np.lexsort(keys=order)
+            if not ascending:
+                tiebreak = TIEBREAK_FIRST_DESCENDING
+        else:
+            _as = np.lexsort(keys=order)
 
     if not ascending:
         _as = _as[::-1]
@@ -122,38 +116,32 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
     non_na_idx = _indices[0] if len(_indices) > 0 else -1
     argsorted = _as.astype('i8')
 
-    {{if dtype == 'object'}}
-    if True:
-    {{else}}
-    with nogil:
-    {{endif}}
-        # TODO: why does the 2d version not have a nogil block?
+    if rank_t is object:
+        # TODO: de-duplicate once cython supports conditional nogil
         for i in range(n):
             sum_ranks += i + 1
             dups += 1
 
-            {{if dtype == 'object'}}
-            val = util.get_value_at(sorted_data, i)
-            {{else}}
             val = sorted_data[i]
-            {{endif}}
 
-            {{if dtype != 'uint64'}}
-            isnan = sorted_mask[i]
-            if isnan and keep_na:
-                ranks[argsorted[i]] = NaN
-                continue
-            {{endif}}
+            if rank_t is not uint64_t:
+                isnan = sorted_mask[i]
+                if isnan and keep_na:
+                    ranks[argsorted[i]] = NaN
+                    continue
 
             count += 1.0
 
-            {{if dtype == 'object'}}
-            if (i == n - 1 or
-                    are_diff(util.get_value_at(sorted_data, i + 1), val) or
-                    i == non_na_idx):
-            {{else}}
-            if (i == n - 1 or sorted_data[i + 1] != val or i == non_na_idx):
-            {{endif}}
+            if rank_t is object:
+                condition = (i == n - 1 or
+                    are_diff(sorted_data[i + 1], val) or
+                    i == non_na_idx)
+            else:
+                condition = (i == n - 1 or
+                    sorted_data[i + 1] != val or
+                    i == non_na_idx)
+
+            if condition:
 
                 if tiebreak == TIEBREAK_AVERAGE:
                     for j in range(i - dups + 1, i + 1):
@@ -165,13 +153,12 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
                     for j in range(i - dups + 1, i + 1):
                         ranks[argsorted[j]] = i + 1
                 elif tiebreak == TIEBREAK_FIRST:
-                    {{if dtype == 'object'}}
-                    raise ValueError('first not supported for '
-                                     'non-numeric data')
-                    {{else}}
-                    for j in range(i - dups + 1, i + 1):
-                        ranks[argsorted[j]] = j + 1
-                    {{endif}}
+                    if rank_t is object:
+                        raise ValueError('first not supported for '
+                                         'non-numeric data')
+                    else:
+                        for j in range(i - dups + 1, i + 1):
+                            ranks[argsorted[j]] = j + 1
                 elif tiebreak == TIEBREAK_FIRST_DESCENDING:
                     for j in range(i - dups + 1, i + 1):
                         ranks[argsorted[j]] = 2 * i - j - dups + 2
@@ -180,6 +167,60 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
                     for j in range(i - dups + 1, i + 1):
                         ranks[argsorted[j]] = total_tie_count
                 sum_ranks = dups = 0
+
+    else:
+        with nogil:
+            # TODO: why does the 2d version not have a nogil block?
+            for i in range(n):
+                sum_ranks += i + 1
+                dups += 1
+
+                val = sorted_data[i]
+
+                if rank_t is not uint64_t:
+                    isnan = sorted_mask[i]
+                    if isnan and keep_na:
+                        ranks[argsorted[i]] = NaN
+                        continue
+
+                count += 1.0
+
+                if rank_t is object:
+                    condition = (i == n - 1 or
+                        are_diff(sorted_data[i + 1], val) or
+                        i == non_na_idx)
+                else:
+                    condition = (i == n - 1 or
+                        sorted_data[i + 1] != val or
+                        i == non_na_idx)
+
+                if condition:
+
+                    if tiebreak == TIEBREAK_AVERAGE:
+                        for j in range(i - dups + 1, i + 1):
+                            ranks[argsorted[j]] = sum_ranks / dups
+                    elif tiebreak == TIEBREAK_MIN:
+                        for j in range(i - dups + 1, i + 1):
+                            ranks[argsorted[j]] = i - dups + 2
+                    elif tiebreak == TIEBREAK_MAX:
+                        for j in range(i - dups + 1, i + 1):
+                            ranks[argsorted[j]] = i + 1
+                    elif tiebreak == TIEBREAK_FIRST:
+                        if rank_t is object:
+                            raise ValueError('first not supported for '
+                                             'non-numeric data')
+                        else:
+                            for j in range(i - dups + 1, i + 1):
+                                ranks[argsorted[j]] = j + 1
+                    elif tiebreak == TIEBREAK_FIRST_DESCENDING:
+                        for j in range(i - dups + 1, i + 1):
+                            ranks[argsorted[j]] = 2 * i - j - dups + 2
+                    elif tiebreak == TIEBREAK_DENSE:
+                        total_tie_count += 1
+                        for j in range(i - dups + 1, i + 1):
+                            ranks[argsorted[j]] = total_tie_count
+                    sum_ranks = dups = 0
+
     if pct:
         if tiebreak == TIEBREAK_DENSE:
             return ranks / total_tie_count
@@ -189,8 +230,14 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
         return ranks
 
 
-def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average',
-                      ascending=True, na_option='keep', pct=False):
+rank_1d_object = rank_1d["object"]
+rank_1d_float64 = rank_1d["float64_t"]
+rank_1d_uint64 = rank_1d["uint64_t"]
+rank_1d_int64 = rank_1d["int64_t"]
+
+
+def rank_2d(rank_t[:, :] in_arr, axis=0, ties_method='average',
+            ascending=True, na_option='keep', pct=False):
     """
     Fast NaN-friendly version of scipy.stats.rankdata
     """
@@ -198,138 +245,130 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average',
     cdef:
         Py_ssize_t i, j, z, k, n, dups = 0, total_tie_count = 0
 
-        {{if dtype == 'object'}}
         Py_ssize_t infs
-        {{endif}}
 
         ndarray[float64_t, ndim=2] ranks
-        {{if dtype == 'int64' or dtype == 'uint64'}}
-        ndarray[{{ctype}}, ndim=2, cast=True] values
-        {{else}}
-        ndarray[{{ctype}}, ndim=2] values
-        {{endif}}
+        ndarray[rank_t, ndim=2] values
 
         ndarray[int64_t, ndim=2] argsorted
 
-        {{if dtype == 'uint64'}}
-        {{ctype}} val
-        {{else}}
-        {{ctype}} val, nan_value
-        {{endif}}
+        rank_t val, nan_value
 
         float64_t sum_ranks = 0
         int tiebreak = 0
         bint keep_na = 0
         float64_t count = 0.0
+        bint condition, skip_condition
 
     tiebreak = tiebreakers[ties_method]
 
     keep_na = na_option == 'keep'
 
-    in_arr = np.asarray(in_arr)
-
     if axis == 0:
-        values = in_arr.T.copy()
+        values = np.asarray(in_arr).T.copy()
     else:
-        values = in_arr.copy()
-
-    {{if dtype == 'object'}}
-    if values.dtype != np.object_:
-        values = values.astype('O')
-    {{endif}}
+        values = np.asarray(in_arr).copy()
 
-    {{if dtype != 'uint64'}}
-    if ascending ^ (na_option == 'top'):
-        nan_value = {{pos_nan_value}}
-    else:
-        nan_value = {{neg_nan_value}}
+    if rank_t is object:
+        if values.dtype != np.object_:
+            values = values.astype('O')
 
-    {{if dtype == 'object'}}
-    mask = missing.isnaobj2d(values)
-    {{elif dtype == 'float64'}}
-    mask = np.isnan(values)
-    {{elif dtype == 'int64'}}
-    mask = values == NPY_NAT
-    {{endif}}
+    if rank_t is not uint64_t:
+        if ascending ^ (na_option == 'top'):
+            if rank_t is object:
+                nan_value = Infinity()
+            elif rank_t is float64_t:
+                nan_value = np.inf
+            elif rank_t is int64_t:
+                nan_value = np.iinfo(np.int64).max
 
-    np.putmask(values, mask, nan_value)
-    {{endif}}
+        else:
+            if rank_t is object:
+                nan_value = NegInfinity()
+            elif rank_t is float64_t:
+                nan_value = -np.inf
+            elif rank_t is int64_t:
+                nan_value = NPY_NAT
+
+        if rank_t is object:
+            mask = missing.isnaobj2d(values)
+        elif rank_t is float64_t:
+            mask = np.isnan(values)
+        elif rank_t is int64_t:
+            mask = values == NPY_NAT
+
+        np.putmask(values, mask, nan_value)
 
     n, k = (<object>values).shape
     ranks = np.empty((n, k), dtype='f8')
 
-    {{if dtype == 'object'}}
-    try:
-        _as = values.argsort(1)
-    except TypeError:
-        values = in_arr
-        for i in range(len(values)):
-            ranks[i] = rank_1d_object(in_arr[i], ties_method=ties_method,
-                                      ascending=ascending, pct=pct)
-        if axis == 0:
-            return ranks.T
-        else:
-            return ranks
-    {{else}}
-    if tiebreak == TIEBREAK_FIRST:
-        # need to use a stable sort here
-        _as = values.argsort(axis=1, kind='mergesort')
-        if not ascending:
-            tiebreak = TIEBREAK_FIRST_DESCENDING
+    if rank_t is object:
+        try:
+            _as = values.argsort(1)
+        except TypeError:
+            values = in_arr
+            for i in range(len(values)):
+                ranks[i] = rank_1d_object(in_arr[i], ties_method=ties_method,
+                                          ascending=ascending, pct=pct)
+            if axis == 0:
+                return ranks.T
+            else:
+                return ranks
     else:
-        _as = values.argsort(1)
-    {{endif}}
+        if tiebreak == TIEBREAK_FIRST:
+            # need to use a stable sort here
+            _as = values.argsort(axis=1, kind='mergesort')
+            if not ascending:
+                tiebreak = TIEBREAK_FIRST_DESCENDING
+        else:
+            _as = values.argsort(1)
 
     if not ascending:
         _as = _as[:, ::-1]
 
-    values = _take_2d_{{dtype}}(values, _as)
+    values = _take_2d(values, _as)
     argsorted = _as.astype('i8')
 
     for i in range(n):
-        {{if dtype == 'object'}}
-        dups = sum_ranks = infs = 0
-        {{else}}
-        dups = sum_ranks = 0
-        {{endif}}
+        if rank_t is object:
+            dups = sum_ranks = infs = 0
+        else:
+            dups = sum_ranks = 0
 
         total_tie_count = 0
         count = 0.0
         for j in range(k):
-            {{if dtype != 'object'}}
-            sum_ranks += j + 1
-            dups += 1
-            {{endif}}
+            if rank_t is not object:
+                sum_ranks += j + 1
+                dups += 1
 
             val = values[i, j]
 
-            {{if dtype != 'uint64'}}
-            {{if dtype == 'object'}}
-            if (val is nan_value) and keep_na:
-            {{else}}
-            if (val == nan_value) and keep_na:
-            {{endif}}
-                ranks[i, argsorted[i, j]] = NaN
+            if rank_t is not uint64_t:
+                if rank_t is object:
+                    skip_condition = (val is nan_value) and keep_na
+                else:
+                    skip_condition = (val == nan_value) and keep_na
+                if skip_condition:
+                    ranks[i, argsorted[i, j]] = NaN
 
-                {{if dtype == 'object'}}
-                infs += 1
-                {{endif}}
+                    if rank_t is object:
+                        infs += 1
 
-                continue
-            {{endif}}
+                    continue
 
             count += 1.0
 
-            {{if dtype == 'object'}}
-            sum_ranks += (j - infs) + 1
-            dups += 1
-            {{endif}}
+            if rank_t is object:
+                sum_ranks += (j - infs) + 1
+                dups += 1
 
-            {{if dtype == 'object'}}
-            if j == k - 1 or are_diff(values[i, j + 1], val):
-            {{else}}
-            if j == k - 1 or values[i, j + 1] != val:
-            {{endif}}
+            if rank_t is object:
+                condition = j == k - 1 or are_diff(values[i, j + 1], val)
+            else:
+                condition = j == k - 1 or values[i, j + 1] != val
+
+            if condition:
                 if tiebreak == TIEBREAK_AVERAGE:
                     for z in range(j - dups + 1, j + 1):
                         ranks[i, argsorted[i, z]] = sum_ranks / dups
@@ -340,13 +379,12 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average',
                     for z in range(j - dups + 1, j + 1):
                         ranks[i, argsorted[i, z]] = j + 1
                 elif tiebreak == TIEBREAK_FIRST:
-                    {{if dtype == 'object'}}
-                    raise ValueError('first not supported '
-                                     'for non-numeric data')
-                    {{else}}
-                    for z in range(j - dups + 1, j + 1):
-                        ranks[i, argsorted[i, z]] = z + 1
-                    {{endif}}
+                    if rank_t is object:
+                        raise ValueError('first not supported '
+                                         'for non-numeric data')
+                    else:
+                        for z in range(j - dups + 1, j + 1):
+                            ranks[i, argsorted[i, z]] = z + 1
                 elif tiebreak == TIEBREAK_FIRST_DESCENDING:
                     for z in range(j - dups + 1, j + 1):
                         ranks[i, argsorted[i, z]] = 2 * j - z - dups + 2
@@ -365,4 +403,8 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average',
     else:
         return ranks
 
-{{endfor}}
+
+rank_2d_object = rank_2d["object"]
+rank_2d_float64 = rank_2d["float64_t"]
+rank_2d_uint64 = rank_2d["uint64_t"]
+rank_2d_int64 = rank_2d["int64_t"]
diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in
index 3a3adc71875ed..e7ee212065c5b 100644
--- a/pandas/_libs/algos_take_helper.pxi.in
+++ b/pandas/_libs/algos_take_helper.pxi.in
@@ -12,26 +12,26 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 
 # name, dest, c_type_in, c_type_out, preval, postval, can_copy, nogil
 dtypes = [
-    ('bool', 'bool', 'uint8_t', 'uint8_t', '', '', True, True),
+    ('bool', 'bool', 'uint8_t', 'uint8_t', '', '', True),
     ('bool', 'object', 'uint8_t', 'object',
-     'True if ', ' > 0 else False', False, False),
-    ('int8', 'int8', 'int8_t', 'int8_t', '', '', True, False),
-    ('int8', 'int32', 'int8_t', 'int32_t', '', '', False, True),
-    ('int8', 'int64', 'int8_t', 'int64_t', '', '', False, True),
-    ('int8', 'float64', 'int8_t', 'float64_t', '', '', False, True),
-    ('int16', 'int16', 'int16_t', 'int16_t', '', '', True, True),
-    ('int16', 'int32', 'int16_t', 'int32_t', '', '', False, True),
-    ('int16', 'int64', 'int16_t', 'int64_t', '', '', False, True),
-    ('int16', 'float64', 'int16_t', 'float64_t', '', '', False, True),
-    ('int32', 'int32', 'int32_t', 'int32_t', '', '', True, True),
-    ('int32', 'int64', 'int32_t', 'int64_t', '', '', False, True),
-    ('int32', 'float64', 'int32_t', 'float64_t', '', '', False, True),
-    ('int64', 'int64', 'int64_t', 'int64_t', '', '', True, True),
-    ('int64', 'float64', 'int64_t', 'float64_t', '', '', False, True),
-    ('float32', 'float32', 'float32_t', 'float32_t', '', '', True, True),
-    ('float32', 'float64', 'float32_t', 'float64_t', '', '', False, True),
-    ('float64', 'float64', 'float64_t', 'float64_t', '', '', True, True),
-    ('object', 'object', 'object', 'object', '', '', False, False)]
+     'True if ', ' > 0 else False', False),
+    ('int8', 'int8', 'int8_t', 'int8_t', '', '', True),
+    ('int8', 'int32', 'int8_t', 'int32_t', '', '', False),
+    ('int8', 'int64', 'int8_t', 'int64_t', '', '', False),
+    ('int8', 'float64', 'int8_t', 'float64_t', '', '', False),
+    ('int16', 'int16', 'int16_t', 'int16_t', '', '', True),
+    ('int16', 'int32', 'int16_t', 'int32_t', '', '', False),
+    ('int16', 'int64', 'int16_t', 'int64_t', '', '', False),
+    ('int16', 'float64', 'int16_t', 'float64_t', '', '', False),
+    ('int32', 'int32', 'int32_t', 'int32_t', '', '', True),
+    ('int32', 'int64', 'int32_t', 'int64_t', '', '', False),
+    ('int32', 'float64', 'int32_t', 'float64_t', '', '', False),
+    ('int64', 'int64', 'int64_t', 'int64_t', '', '', True),
+    ('int64', 'float64', 'int64_t', 'float64_t', '', '', False),
+    ('float32', 'float32', 'float32_t', 'float32_t', '', '', True),
+    ('float32', 'float64', 'float32_t', 'float64_t', '', '', False),
+    ('float64', 'float64', 'float64_t', 'float64_t', '', '', True),
+    ('object', 'object', 'object', 'object', '', '', False)]
 
 
 def get_dispatch(dtypes):
@@ -118,7 +118,9 @@ def get_dispatch(dtypes):
 """
 
     for (name, dest, c_type_in, c_type_out, preval, postval,
-         can_copy, nogil) in dtypes:
+         can_copy) in dtypes:
+
+        nogil = c_type_out != "object"
         if nogil:
             nogil_str = "with nogil:"
             tab = '    '
@@ -276,7 +278,6 @@ cdef _take_2d(ndarray[take_t, ndim=2] values, object idx):
         Py_ssize_t i, j, N, K
         ndarray[Py_ssize_t, ndim=2, cast=True] indexer = idx
         ndarray[take_t, ndim=2] result
-        object val
 
     N, K = (<object>values).shape
 
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 3069bbbf34bb7..8a417d8fe3a92 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -8,9 +8,11 @@ import numpy as np
 cimport numpy as cnp
 from numpy cimport (ndarray,
                     int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
-                    uint32_t, uint64_t, float32_t, float64_t)
+                    uint32_t, uint64_t, float32_t, float64_t, complex64_t, complex128_t)
 cnp.import_array()
 
+cdef extern from "numpy/npy_math.h":
+    float64_t NAN "NPY_NAN"
 
 from pandas._libs.util cimport numeric, get_nat
 
@@ -21,6 +23,7 @@ from pandas._libs.algos import (take_2d_axis1_float64_float64,
                                 groupsort_indexer, tiebreakers)
 
 cdef int64_t NPY_NAT = get_nat()
+_int64_max = np.iinfo(np.int64).max
 
 cdef float64_t NaN = <float64_t>np.NaN
 
@@ -372,7 +375,8 @@ def group_any_all(uint8_t[:] out,
                   const uint8_t[:] mask,
                   object val_test,
                   bint skipna):
-    """Aggregated boolean values to show truthfulness of group elements
+    """
+    Aggregated boolean values to show truthfulness of group elements.
 
     Parameters
     ----------
@@ -420,16 +424,23 @@ def group_any_all(uint8_t[:] out,
             if values[i] == flag_val:
                 out[lab] = flag_val
 
+
 # ----------------------------------------------------------------------
 # group_add, group_prod, group_var, group_mean, group_ohlc
 # ----------------------------------------------------------------------
 
+ctypedef fused complexfloating_t:
+    float64_t
+    float32_t
+    complex64_t
+    complex128_t
+
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def _group_add(floating[:, :] out,
+def _group_add(complexfloating_t[:, :] out,
                int64_t[:] counts,
-               floating[:, :] values,
+               complexfloating_t[:, :] values,
                const int64_t[:] labels,
                Py_ssize_t min_count=0):
     """
@@ -437,13 +448,14 @@ def _group_add(floating[:, :] out,
     """
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
-        floating val, count
-        floating[:, :] sumx, nobs
+        complexfloating_t val, count
+        complexfloating_t[:, :] sumx
+        int64_t[:, :] nobs
 
     if len(values) != len(labels):
-        raise AssertionError("len(index) != len(labels)")
+        raise ValueError("len(index) != len(labels)")
 
-    nobs = np.zeros_like(out)
+    nobs = np.zeros((len(out), out.shape[1]), dtype=np.int64)
     sumx = np.zeros_like(out)
 
     N, K = (<object>values).shape
@@ -461,7 +473,12 @@ def _group_add(floating[:, :] out,
                 # not nan
                 if val == val:
                     nobs[lab, j] += 1
-                    sumx[lab, j] += val
+                    if (complexfloating_t is complex64_t or
+                            complexfloating_t is complex128_t):
+                        # clang errors if we use += with these dtypes
+                        sumx[lab, j] = sumx[lab, j] + val
+                    else:
+                        sumx[lab, j] += val
 
         for i in range(ncounts):
             for j in range(K):
@@ -471,8 +488,10 @@ def _group_add(floating[:, :] out,
                     out[i, j] = sumx[i, j]
 
 
-group_add_float32 = _group_add['float']
-group_add_float64 = _group_add['double']
+group_add_float32 = _group_add['float32_t']
+group_add_float64 = _group_add['float64_t']
+group_add_complex64 = _group_add['float complex']
+group_add_complex128 = _group_add['double complex']
 
 
 @cython.wraparound(False)
@@ -491,7 +510,7 @@ def _group_prod(floating[:, :] out,
         floating[:, :] prodx, nobs
 
     if not len(values) == len(labels):
-        raise AssertionError("len(index) != len(labels)")
+        raise ValueError("len(index) != len(labels)")
 
     nobs = np.zeros_like(out)
     prodx = np.ones_like(out)
@@ -541,7 +560,7 @@ def _group_var(floating[:, :] out,
     assert min_count == -1, "'min_count' only used in add and prod"
 
     if not len(values) == len(labels):
-        raise AssertionError("len(index) != len(labels)")
+        raise ValueError("len(index) != len(labels)")
 
     nobs = np.zeros_like(out)
     mean = np.zeros_like(out)
@@ -596,7 +615,7 @@ def _group_mean(floating[:, :] out,
     assert min_count == -1, "'min_count' only used in add and prod"
 
     if not len(values) == len(labels):
-        raise AssertionError("len(index) != len(labels)")
+        raise ValueError("len(index) != len(labels)")
 
     nobs = np.zeros_like(out)
     sumx = np.zeros_like(out)
@@ -788,5 +807,687 @@ def group_quantile(ndarray[float64_t] out,
             grp_start += grp_sz
 
 
-# generated from template
-include "groupby_helper.pxi"
+# ----------------------------------------------------------------------
+# group_nth, group_last, group_rank
+# ----------------------------------------------------------------------
+
+ctypedef fused rank_t:
+    float64_t
+    float32_t
+    int64_t
+    uint64_t
+    object
+
+
+cdef inline bint _treat_as_na(rank_t val, bint is_datetimelike) nogil:
+    if rank_t is object:
+        # Should never be used, but we need to avoid the `val != val` below
+        #  or else cython will raise about gil acquisition.
+        raise NotImplementedError
+
+    elif rank_t is int64_t:
+        return is_datetimelike and val == NPY_NAT
+    else:
+        return val != val
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def group_last(rank_t[:, :] out,
+               int64_t[:] counts,
+               rank_t[:, :] values,
+               const int64_t[:] labels,
+               Py_ssize_t min_count=-1):
+    """
+    Only aggregates on axis=0
+    """
+    cdef:
+        Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
+        rank_t val
+        ndarray[rank_t, ndim=2] resx
+        ndarray[int64_t, ndim=2] nobs
+        bint runtime_error = False
+
+    assert min_count == -1, "'min_count' only used in add and prod"
+
+    if not len(values) == len(labels):
+        raise AssertionError("len(index) != len(labels)")
+
+    nobs = np.zeros((<object>out).shape, dtype=np.int64)
+    if rank_t is object:
+        resx = np.empty((<object>out).shape, dtype=object)
+    else:
+        resx = np.empty_like(out)
+
+    N, K = (<object>values).shape
+
+    if rank_t is object:
+        # TODO: De-duplicate once conditional-nogil is available
+        for i in range(N):
+            lab = labels[i]
+            if lab < 0:
+                continue
+
+            counts[lab] += 1
+            for j in range(K):
+                val = values[i, j]
+
+                if val == val:
+                    # NB: use _treat_as_na here once
+                    #  conditional-nogil is available.
+                    nobs[lab, j] += 1
+                    resx[lab, j] = val
+
+        for i in range(ncounts):
+            for j in range(K):
+                if nobs[i, j] == 0:
+                    out[i, j] = NAN
+                else:
+                    out[i, j] = resx[i, j]
+    else:
+        with nogil:
+            for i in range(N):
+                lab = labels[i]
+                if lab < 0:
+                    continue
+
+                counts[lab] += 1
+                for j in range(K):
+                    val = values[i, j]
+
+                    if not _treat_as_na(val, True):
+                        # TODO: Sure we always want is_datetimelike=True?
+                        nobs[lab, j] += 1
+                        resx[lab, j] = val
+
+            for i in range(ncounts):
+                for j in range(K):
+                    if nobs[i, j] == 0:
+                        if rank_t is int64_t:
+                            out[i, j] = NPY_NAT
+                        elif rank_t is uint64_t:
+                            runtime_error = True
+                            break
+                        else:
+                            out[i, j] = NAN
+
+                    else:
+                        out[i, j] = resx[i, j]
+
+    if runtime_error:
+        # We cannot raise directly above because that is within a nogil
+        #  block.
+        raise RuntimeError("empty group with uint64_t")
+
+
+group_last_float64 = group_last["float64_t"]
+group_last_float32 = group_last["float32_t"]
+group_last_int64 = group_last["int64_t"]
+group_last_object = group_last["object"]
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def group_nth(rank_t[:, :] out,
+              int64_t[:] counts,
+              rank_t[:, :] values,
+              const int64_t[:] labels, int64_t rank,
+              Py_ssize_t min_count=-1):
+    """
+    Only aggregates on axis=0
+    """
+    cdef:
+        Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
+        rank_t val
+        ndarray[rank_t, ndim=2] resx
+        ndarray[int64_t, ndim=2] nobs
+        bint runtime_error = False
+
+    assert min_count == -1, "'min_count' only used in add and prod"
+
+    if not len(values) == len(labels):
+        raise AssertionError("len(index) != len(labels)")
+
+    nobs = np.zeros((<object>out).shape, dtype=np.int64)
+    if rank_t is object:
+        resx = np.empty((<object>out).shape, dtype=object)
+    else:
+        resx = np.empty_like(out)
+
+    N, K = (<object>values).shape
+
+    if rank_t is object:
+        # TODO: De-duplicate once conditional-nogil is available
+        for i in range(N):
+            lab = labels[i]
+            if lab < 0:
+                continue
+
+            counts[lab] += 1
+            for j in range(K):
+                val = values[i, j]
+
+                if val == val:
+                    # NB: use _treat_as_na here once
+                    #  conditional-nogil is available.
+                    nobs[lab, j] += 1
+                    if nobs[lab, j] == rank:
+                        resx[lab, j] = val
+
+        for i in range(ncounts):
+            for j in range(K):
+                if nobs[i, j] == 0:
+                    out[i, j] = NAN
+                else:
+                    out[i, j] = resx[i, j]
+
+    else:
+        with nogil:
+            for i in range(N):
+                lab = labels[i]
+                if lab < 0:
+                    continue
+
+                counts[lab] += 1
+                for j in range(K):
+                    val = values[i, j]
+
+                    if not _treat_as_na(val, True):
+                        # TODO: Sure we always want is_datetimelike=True?
+                        nobs[lab, j] += 1
+                        if nobs[lab, j] == rank:
+                            resx[lab, j] = val
+
+            for i in range(ncounts):
+                for j in range(K):
+                    if nobs[i, j] == 0:
+                        if rank_t is int64_t:
+                            out[i, j] = NPY_NAT
+                        elif rank_t is uint64_t:
+                            runtime_error = True
+                            break
+                        else:
+                            out[i, j] = NAN
+                    else:
+                        out[i, j] = resx[i, j]
+
+    if runtime_error:
+        # We cannot raise directly above because that is within a nogil
+        #  block.
+        raise RuntimeError("empty group with uint64_t")
+
+
+group_nth_float64 = group_nth["float64_t"]
+group_nth_float32 = group_nth["float32_t"]
+group_nth_int64 = group_nth["int64_t"]
+group_nth_object = group_nth["object"]
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def group_rank(float64_t[:, :] out,
+               rank_t[:, :] values,
+               const int64_t[:] labels,
+               bint is_datetimelike, object ties_method,
+               bint ascending, bint pct, object na_option):
+    """
+    Provides the rank of values within each group.
+
+    Parameters
+    ----------
+    out : array of float64_t values which this method will write its results to
+    values : array of rank_t values to be ranked
+    labels : array containing unique label for each group, with its ordering
+        matching up to the corresponding record in `values`
+    is_datetimelike : bool, default False
+        unused in this method but provided for call compatibility with other
+        Cython transformations
+    ties_method : {'average', 'min', 'max', 'first', 'dense'}, default
+        'average'
+        * average: average rank of group
+        * min: lowest rank in group
+        * max: highest rank in group
+        * first: ranks assigned in order they appear in the array
+        * dense: like 'min', but rank always increases by 1 between groups
+    ascending : boolean, default True
+        False for ranks by high (1) to low (N)
+        na_option : {'keep', 'top', 'bottom'}, default 'keep'
+    pct : boolean, default False
+        Compute percentage rank of data within each group
+    na_option : {'keep', 'top', 'bottom'}, default 'keep'
+        * keep: leave NA values where they are
+        * top: smallest rank if ascending
+        * bottom: smallest rank if descending
+
+    Notes
+    -----
+    This method modifies the `out` parameter rather than returning an object
+    """
+    cdef:
+        TiebreakEnumType tiebreak
+        Py_ssize_t i, j, N, K, grp_start=0, dups=0, sum_ranks=0
+        Py_ssize_t grp_vals_seen=1, grp_na_count=0, grp_tie_count=0
+        ndarray[int64_t] _as
+        ndarray[float64_t, ndim=2] grp_sizes
+        ndarray[rank_t] masked_vals
+        ndarray[uint8_t] mask
+        bint keep_na
+        rank_t nan_fill_val
+
+    if rank_t is object:
+        raise NotImplementedError("Cant do nogil")
+
+    tiebreak = tiebreakers[ties_method]
+    keep_na = na_option == 'keep'
+    N, K = (<object>values).shape
+    grp_sizes = np.ones_like(out)
+
+    # Copy values into new array in order to fill missing data
+    # with mask, without obfuscating location of missing data
+    # in values array
+    masked_vals = np.array(values[:, 0], copy=True)
+    if rank_t is int64_t:
+        mask = (masked_vals == NPY_NAT).astype(np.uint8)
+    else:
+        mask = np.isnan(masked_vals).astype(np.uint8)
+
+    if ascending ^ (na_option == 'top'):
+        if rank_t is int64_t:
+            nan_fill_val = np.iinfo(np.int64).max
+        elif rank_t is uint64_t:
+            nan_fill_val = np.iinfo(np.uint64).max
+        else:
+            nan_fill_val = np.inf
+        order = (masked_vals, mask, labels)
+    else:
+        if rank_t is int64_t:
+            nan_fill_val = np.iinfo(np.int64).min
+        elif rank_t is uint64_t:
+            nan_fill_val = 0
+        else:
+            nan_fill_val = -np.inf
+
+        order = (masked_vals, ~mask, labels)
+    np.putmask(masked_vals, mask, nan_fill_val)
+
+    # lexsort using labels, then mask, then actual values
+    # each label corresponds to a different group value,
+    # the mask helps you differentiate missing values before
+    # performing sort on the actual values
+    _as = np.lexsort(order).astype(np.int64, copy=False)
+
+    if not ascending:
+        _as = _as[::-1]
+
+    with nogil:
+        # Loop over the length of the value array
+        # each incremental i value can be looked up in the _as array
+        # that we sorted previously, which gives us the location of
+        # that sorted value for retrieval back from the original
+        # values / masked_vals arrays
+        for i in range(N):
+            # dups and sum_ranks will be incremented each loop where
+            # the value / group remains the same, and should be reset
+            # when either of those change
+            # Used to calculate tiebreakers
+            dups += 1
+            sum_ranks += i - grp_start + 1
+
+            # Update out only when there is a transition of values or labels.
+            # When a new value or group is encountered, go back #dups steps(
+            # the number of occurrence of current value) and assign the ranks
+            # based on the the starting index of the current group (grp_start)
+            # and the current index
+            if (i == N - 1 or
+                    (masked_vals[_as[i]] != masked_vals[_as[i+1]]) or
+                    (mask[_as[i]] ^ mask[_as[i+1]]) or
+                    (labels[_as[i]] != labels[_as[i+1]])):
+                # if keep_na, check for missing values and assign back
+                # to the result where appropriate
+                if keep_na and mask[_as[i]]:
+                    for j in range(i - dups + 1, i + 1):
+                        out[_as[j], 0] = NaN
+                        grp_na_count = dups
+                elif tiebreak == TIEBREAK_AVERAGE:
+                    for j in range(i - dups + 1, i + 1):
+                        out[_as[j], 0] = sum_ranks / <float64_t>dups
+                elif tiebreak == TIEBREAK_MIN:
+                    for j in range(i - dups + 1, i + 1):
+                        out[_as[j], 0] = i - grp_start - dups + 2
+                elif tiebreak == TIEBREAK_MAX:
+                    for j in range(i - dups + 1, i + 1):
+                        out[_as[j], 0] = i - grp_start + 1
+                elif tiebreak == TIEBREAK_FIRST:
+                    for j in range(i - dups + 1, i + 1):
+                        if ascending:
+                            out[_as[j], 0] = j + 1 - grp_start
+                        else:
+                            out[_as[j], 0] = 2 * i - j - dups + 2 - grp_start
+                elif tiebreak == TIEBREAK_DENSE:
+                    for j in range(i - dups + 1, i + 1):
+                        out[_as[j], 0] = grp_vals_seen
+
+                # look forward to the next value (using the sorting in _as)
+                # if the value does not equal the current value then we need to
+                # reset the dups and sum_ranks, knowing that a new value is
+                # coming up. the conditional also needs to handle nan equality
+                # and the end of iteration
+                if (i == N - 1 or
+                        (masked_vals[_as[i]] != masked_vals[_as[i+1]]) or
+                        (mask[_as[i]] ^ mask[_as[i+1]])):
+                    dups = sum_ranks = 0
+                    grp_vals_seen += 1
+                    grp_tie_count += 1
+
+                # Similar to the previous conditional, check now if we are
+                # moving to a new group. If so, keep track of the index where
+                # the new group occurs, so the tiebreaker calculations can
+                # decrement that from their position. fill in the size of each
+                # group encountered (used by pct calculations later). also be
+                # sure to reset any of the items helping to calculate dups
+                if i == N - 1 or labels[_as[i]] != labels[_as[i+1]]:
+                    if tiebreak != TIEBREAK_DENSE:
+                        for j in range(grp_start, i + 1):
+                            grp_sizes[_as[j], 0] = (i - grp_start + 1 -
+                                                    grp_na_count)
+                    else:
+                        for j in range(grp_start, i + 1):
+                            grp_sizes[_as[j], 0] = (grp_tie_count -
+                                                    (grp_na_count > 0))
+                    dups = sum_ranks = 0
+                    grp_na_count = 0
+                    grp_tie_count = 0
+                    grp_start = i + 1
+                    grp_vals_seen = 1
+
+        if pct:
+            for i in range(N):
+                # We don't include NaN values in percentage
+                # rankings, so we assign them percentages of NaN.
+                if out[i, 0] != out[i, 0] or out[i, 0] == NAN:
+                    out[i, 0] = NAN
+                elif grp_sizes[i, 0] != 0:
+                    out[i, 0] = out[i, 0] / grp_sizes[i, 0]
+
+
+group_rank_float64 = group_rank["float64_t"]
+group_rank_float32 = group_rank["float32_t"]
+group_rank_int64 = group_rank["int64_t"]
+group_rank_uint64 = group_rank["uint64_t"]
+# Note: we do not have a group_rank_object because that would require a
+#  not-nogil implementation, see GH#19560
+
+
+# ----------------------------------------------------------------------
+# group_min, group_max
+# ----------------------------------------------------------------------
+
+# TODO: consider implementing for more dtypes
+ctypedef fused groupby_t:
+    float64_t
+    float32_t
+    int64_t
+    uint64_t
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def group_max(groupby_t[:, :] out,
+              int64_t[:] counts,
+              groupby_t[:, :] values,
+              const int64_t[:] labels,
+              Py_ssize_t min_count=-1):
+    """
+    Only aggregates on axis=0
+    """
+    cdef:
+        Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
+        groupby_t val, count, nan_val
+        ndarray[groupby_t, ndim=2] maxx, nobs
+        bint runtime_error = False
+
+    assert min_count == -1, "'min_count' only used in add and prod"
+
+    if not len(values) == len(labels):
+        raise AssertionError("len(index) != len(labels)")
+
+    nobs = np.zeros_like(out)
+
+    maxx = np.empty_like(out)
+    if groupby_t is int64_t:
+        # Note: evaluated at compile-time
+        maxx[:] = -_int64_max
+        nan_val = NPY_NAT
+    elif groupby_t is uint64_t:
+        # NB: We do not define nan_val because there is no such thing
+        #  for uint64_t.  We carefully avoid having to reference it in this
+        #  case.
+        maxx[:] = 0
+    else:
+        maxx[:] = -np.inf
+        nan_val = NAN
+
+    N, K = (<object>values).shape
+
+    with nogil:
+        for i in range(N):
+            lab = labels[i]
+            if lab < 0:
+                continue
+
+            counts[lab] += 1
+            for j in range(K):
+                val = values[i, j]
+
+                if not _treat_as_na(val, True):
+                    # TODO: Sure we always want is_datetimelike=True?
+                    nobs[lab, j] += 1
+                    if val > maxx[lab, j]:
+                        maxx[lab, j] = val
+
+        for i in range(ncounts):
+            for j in range(K):
+                if nobs[i, j] == 0:
+                    if groupby_t is uint64_t:
+                        runtime_error = True
+                        break
+                    out[i, j] = nan_val
+                else:
+                    out[i, j] = maxx[i, j]
+
+    if runtime_error:
+        # We cannot raise directly above because that is within a nogil
+        #  block.
+        raise RuntimeError("empty group with uint64_t")
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def group_min(groupby_t[:, :] out,
+              int64_t[:] counts,
+              groupby_t[:, :] values,
+              const int64_t[:] labels,
+              Py_ssize_t min_count=-1):
+    """
+    Only aggregates on axis=0
+    """
+    cdef:
+        Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
+        groupby_t val, count, nan_val
+        ndarray[groupby_t, ndim=2] minx, nobs
+        bint runtime_error = False
+
+    assert min_count == -1, "'min_count' only used in add and prod"
+
+    if not len(values) == len(labels):
+        raise AssertionError("len(index) != len(labels)")
+
+    nobs = np.zeros_like(out)
+
+    minx = np.empty_like(out)
+    if groupby_t is int64_t:
+        minx[:] = _int64_max
+        nan_val = NPY_NAT
+    elif groupby_t is uint64_t:
+        # NB: We do not define nan_val because there is no such thing
+        #  for uint64_t.  We carefully avoid having to reference it in this
+        #  case.
+        minx[:] = np.iinfo(np.uint64).max
+    else:
+        minx[:] = np.inf
+        nan_val = NAN
+
+    N, K = (<object>values).shape
+
+    with nogil:
+        for i in range(N):
+            lab = labels[i]
+            if lab < 0:
+                continue
+
+            counts[lab] += 1
+            for j in range(K):
+                val = values[i, j]
+
+                if not _treat_as_na(val, True):
+                    # TODO: Sure we always want is_datetimelike=True?
+                    nobs[lab, j] += 1
+                    if val < minx[lab, j]:
+                        minx[lab, j] = val
+
+        for i in range(ncounts):
+            for j in range(K):
+                if nobs[i, j] == 0:
+                    if groupby_t is uint64_t:
+                        runtime_error = True
+                        break
+                    out[i, j] = nan_val
+                else:
+                    out[i, j] = minx[i, j]
+
+    if runtime_error:
+        # We cannot raise directly above because that is within a nogil
+        #  block.
+        raise RuntimeError("empty group with uint64_t")
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def group_cummin(groupby_t[:, :] out,
+                 groupby_t[:, :] values,
+                 const int64_t[:] labels,
+                 int ngroups,
+                 bint is_datetimelike):
+    """
+    Cumulative minimum of columns of `values`, in row groups `labels`.
+
+    Parameters
+    ----------
+    out : array
+        Array to store cummin in.
+    values : array
+        Values to take cummin of.
+    labels : int64 array
+        Labels to group by.
+    ngroups : int
+        Number of groups, larger than all entries of `labels`.
+    is_datetimelike : bool
+        True if `values` contains datetime-like entries.
+
+    Notes
+    -----
+    This method modifies the `out` parameter, rather than returning an object.
+    """
+
+    cdef:
+        Py_ssize_t i, j, N, K, size
+        groupby_t val, mval
+        ndarray[groupby_t, ndim=2] accum
+        int64_t lab
+
+    N, K = (<object>values).shape
+    accum = np.empty((ngroups, K), dtype=np.asarray(values).dtype)
+    if groupby_t is int64_t:
+        accum[:] = _int64_max
+    elif groupby_t is uint64_t:
+        accum[:] = np.iinfo(np.uint64).max
+    else:
+        accum[:] = np.inf
+
+    with nogil:
+        for i in range(N):
+            lab = labels[i]
+
+            if lab < 0:
+                continue
+            for j in range(K):
+                val = values[i, j]
+
+                if _treat_as_na(val, is_datetimelike):
+                    out[i, j] = val
+                else:
+                    mval = accum[lab, j]
+                    if val < mval:
+                        accum[lab, j] = mval = val
+                    out[i, j] = mval
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def group_cummax(groupby_t[:, :] out,
+                 groupby_t[:, :] values,
+                 const int64_t[:] labels,
+                 int ngroups,
+                 bint is_datetimelike):
+    """
+    Cumulative maximum of columns of `values`, in row groups `labels`.
+
+    Parameters
+    ----------
+    out : array
+        Array to store cummax in.
+    values : array
+        Values to take cummax of.
+    labels : int64 array
+        Labels to group by.
+    ngroups : int
+        Number of groups, larger than all entries of `labels`.
+    is_datetimelike : bool
+        True if `values` contains datetime-like entries.
+
+    Notes
+    -----
+    This method modifies the `out` parameter, rather than returning an object.
+    """
+
+    cdef:
+        Py_ssize_t i, j, N, K, size
+        groupby_t val, mval
+        ndarray[groupby_t, ndim=2] accum
+        int64_t lab
+
+    N, K = (<object>values).shape
+    accum = np.empty((ngroups, K), dtype=np.asarray(values).dtype)
+    if groupby_t is int64_t:
+        accum[:] = -_int64_max
+    elif groupby_t is uint64_t:
+        accum[:] = 0
+    else:
+        accum[:] = -np.inf
+
+    with nogil:
+        for i in range(N):
+            lab = labels[i]
+
+            if lab < 0:
+                continue
+            for j in range(K):
+                val = values[i, j]
+
+                if _treat_as_na(val, is_datetimelike):
+                    out[i, j] = val
+                else:
+                    mval = accum[lab, j]
+                    if val > mval:
+                        accum[lab, j] = mval = val
+                    out[i, j] = mval
diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
deleted file mode 100644
index 6b434b6470581..0000000000000
--- a/pandas/_libs/groupby_helper.pxi.in
+++ /dev/null
@@ -1,670 +0,0 @@
-"""
-Template for each `dtype` helper function using groupby
-
-WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
-"""
-
-cdef extern from "numpy/npy_math.h":
-    float64_t NAN "NPY_NAN"
-_int64_max = np.iinfo(np.int64).max
-
-# ----------------------------------------------------------------------
-# group_nth, group_last, group_rank
-# ----------------------------------------------------------------------
-
-ctypedef fused rank_t:
-    float64_t
-    float32_t
-    int64_t
-    object
-
-
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def group_last(rank_t[:, :] out,
-               int64_t[:] counts,
-               rank_t[:, :] values,
-               const int64_t[:] labels,
-               Py_ssize_t min_count=-1):
-    """
-    Only aggregates on axis=0
-    """
-    cdef:
-        Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
-        rank_t val
-        ndarray[rank_t, ndim=2] resx
-        ndarray[int64_t, ndim=2] nobs
-
-    assert min_count == -1, "'min_count' only used in add and prod"
-
-    if not len(values) == len(labels):
-        raise AssertionError("len(index) != len(labels)")
-
-    nobs = np.zeros((<object>out).shape, dtype=np.int64)
-    if rank_t is object:
-        resx = np.empty((<object>out).shape, dtype=object)
-    else:
-        resx = np.empty_like(out)
-
-    N, K = (<object>values).shape
-
-    if rank_t is object:
-        # TODO: De-duplicate once conditional-nogil is available
-        for i in range(N):
-            lab = labels[i]
-            if lab < 0:
-                continue
-
-            counts[lab] += 1
-            for j in range(K):
-                val = values[i, j]
-
-                # not nan
-                if rank_t is int64_t:
-                    # need a special notna check
-                    if val != NPY_NAT:
-                        nobs[lab, j] += 1
-                        resx[lab, j] = val
-                else:
-                    if val == val:
-                        nobs[lab, j] += 1
-                        resx[lab, j] = val
-
-        for i in range(ncounts):
-            for j in range(K):
-                if nobs[i, j] == 0:
-                    if rank_t is int64_t:
-                        out[i, j] = NPY_NAT
-                    else:
-                        out[i, j] = NAN
-                else:
-                    out[i, j] = resx[i, j]
-    else:
-        with nogil:
-            for i in range(N):
-                lab = labels[i]
-                if lab < 0:
-                    continue
-
-                counts[lab] += 1
-                for j in range(K):
-                    val = values[i, j]
-
-                    # not nan
-                    if rank_t is int64_t:
-                        # need a special notna check
-                        if val != NPY_NAT:
-                            nobs[lab, j] += 1
-                            resx[lab, j] = val
-                    else:
-                        if val == val:
-                            nobs[lab, j] += 1
-                            resx[lab, j] = val
-
-            for i in range(ncounts):
-                for j in range(K):
-                    if nobs[i, j] == 0:
-                        if rank_t is int64_t:
-                            out[i, j] = NPY_NAT
-                        else:
-                            out[i, j] = NAN
-                    else:
-                        out[i, j] = resx[i, j]
-
-group_last_float64 = group_last["float64_t"]
-group_last_float32 = group_last["float32_t"]
-group_last_int64 = group_last["int64_t"]
-group_last_object = group_last["object"]
-
-
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def group_nth(rank_t[:, :] out,
-              int64_t[:] counts,
-              rank_t[:, :] values,
-              const int64_t[:] labels, int64_t rank,
-              Py_ssize_t min_count=-1):
-    """
-    Only aggregates on axis=0
-    """
-    cdef:
-        Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
-        rank_t val
-        ndarray[rank_t, ndim=2] resx
-        ndarray[int64_t, ndim=2] nobs
-
-    assert min_count == -1, "'min_count' only used in add and prod"
-
-    if not len(values) == len(labels):
-        raise AssertionError("len(index) != len(labels)")
-
-    nobs = np.zeros((<object>out).shape, dtype=np.int64)
-    if rank_t is object:
-        resx = np.empty((<object>out).shape, dtype=object)
-    else:
-        resx = np.empty_like(out)
-
-    N, K = (<object>values).shape
-
-    if rank_t is object:
-        # TODO: De-duplicate once conditional-nogil is available
-        for i in range(N):
-            lab = labels[i]
-            if lab < 0:
-                continue
-
-            counts[lab] += 1
-            for j in range(K):
-                val = values[i, j]
-
-                # not nan
-                if val == val:
-                    nobs[lab, j] += 1
-                    if nobs[lab, j] == rank:
-                        resx[lab, j] = val
-
-        for i in range(ncounts):
-            for j in range(K):
-                if nobs[i, j] == 0:
-                    out[i, j] = NAN
-                else:
-                    out[i, j] = resx[i, j]
-
-    else:
-        with nogil:
-            for i in range(N):
-                lab = labels[i]
-                if lab < 0:
-                    continue
-
-                counts[lab] += 1
-                for j in range(K):
-                    val = values[i, j]
-
-                    # not nan
-                    if rank_t is int64_t:
-                        # need a special notna check
-                        if val != NPY_NAT:
-                            nobs[lab, j] += 1
-                            if nobs[lab, j] == rank:
-                                resx[lab, j] = val
-                    else:
-                        if val == val:
-                            nobs[lab, j] += 1
-                            if nobs[lab, j] == rank:
-                                resx[lab, j] = val
-
-            for i in range(ncounts):
-                for j in range(K):
-                    if nobs[i, j] == 0:
-                        if rank_t is int64_t:
-                            out[i, j] = NPY_NAT
-                        else:
-                            out[i, j] = NAN
-                    else:
-                        out[i, j] = resx[i, j]
-
-
-group_nth_float64 = group_nth["float64_t"]
-group_nth_float32 = group_nth["float32_t"]
-group_nth_int64 = group_nth["int64_t"]
-group_nth_object = group_nth["object"]
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def group_rank(float64_t[:, :] out,
-               rank_t[:, :] values,
-               const int64_t[:] labels,
-               bint is_datetimelike, object ties_method,
-               bint ascending, bint pct, object na_option):
-    """
-    Provides the rank of values within each group.
-
-    Parameters
-    ----------
-    out : array of float64_t values which this method will write its results to
-    values : array of rank_t values to be ranked
-    labels : array containing unique label for each group, with its ordering
-        matching up to the corresponding record in `values`
-    is_datetimelike : bool, default False
-        unused in this method but provided for call compatibility with other
-        Cython transformations
-    ties_method : {'average', 'min', 'max', 'first', 'dense'}, default
-        'average'
-        * average: average rank of group
-        * min: lowest rank in group
-        * max: highest rank in group
-        * first: ranks assigned in order they appear in the array
-        * dense: like 'min', but rank always increases by 1 between groups
-    ascending : boolean, default True
-        False for ranks by high (1) to low (N)
-        na_option : {'keep', 'top', 'bottom'}, default 'keep'
-    pct : boolean, default False
-        Compute percentage rank of data within each group
-    na_option : {'keep', 'top', 'bottom'}, default 'keep'
-        * keep: leave NA values where they are
-        * top: smallest rank if ascending
-        * bottom: smallest rank if descending
-
-    Notes
-    -----
-    This method modifies the `out` parameter rather than returning an object
-    """
-    cdef:
-        TiebreakEnumType tiebreak
-        Py_ssize_t i, j, N, K, grp_start=0, dups=0, sum_ranks=0
-        Py_ssize_t grp_vals_seen=1, grp_na_count=0, grp_tie_count=0
-        ndarray[int64_t] _as
-        ndarray[float64_t, ndim=2] grp_sizes
-        ndarray[rank_t] masked_vals
-        ndarray[uint8_t] mask
-        bint keep_na
-        rank_t nan_fill_val
-
-    if rank_t is object:
-        raise NotImplementedError("Cant do nogil")
-
-    tiebreak = tiebreakers[ties_method]
-    keep_na = na_option == 'keep'
-    N, K = (<object>values).shape
-    grp_sizes = np.ones_like(out)
-
-    # Copy values into new array in order to fill missing data
-    # with mask, without obfuscating location of missing data
-    # in values array
-    masked_vals = np.array(values[:, 0], copy=True)
-    if rank_t is int64_t:
-        mask = (masked_vals == NPY_NAT).astype(np.uint8)
-    else:
-        mask = np.isnan(masked_vals).astype(np.uint8)
-
-    if ascending ^ (na_option == 'top'):
-        if rank_t is int64_t:
-            nan_fill_val = np.iinfo(np.int64).max
-        else:
-            nan_fill_val = np.inf
-        order = (masked_vals, mask, labels)
-    else:
-        if rank_t is int64_t:
-            nan_fill_val = np.iinfo(np.int64).min
-        else:
-            nan_fill_val = -np.inf
-
-        order = (masked_vals, ~mask, labels)
-    np.putmask(masked_vals, mask, nan_fill_val)
-
-    # lexsort using labels, then mask, then actual values
-    # each label corresponds to a different group value,
-    # the mask helps you differentiate missing values before
-    # performing sort on the actual values
-    _as = np.lexsort(order).astype(np.int64, copy=False)
-
-    if not ascending:
-        _as = _as[::-1]
-
-    with nogil:
-        # Loop over the length of the value array
-        # each incremental i value can be looked up in the _as array
-        # that we sorted previously, which gives us the location of
-        # that sorted value for retrieval back from the original
-        # values / masked_vals arrays
-        for i in range(N):
-            # dups and sum_ranks will be incremented each loop where
-            # the value / group remains the same, and should be reset
-            # when either of those change
-            # Used to calculate tiebreakers
-            dups += 1
-            sum_ranks += i - grp_start + 1
-
-            # Update out only when there is a transition of values or labels.
-            # When a new value or group is encountered, go back #dups steps(
-            # the number of occurrence of current value) and assign the ranks
-            # based on the the starting index of the current group (grp_start)
-            # and the current index
-            if (i == N - 1 or
-                    (masked_vals[_as[i]] != masked_vals[_as[i+1]]) or
-                    (mask[_as[i]] ^ mask[_as[i+1]]) or
-                    (labels[_as[i]] != labels[_as[i+1]])):
-                # if keep_na, check for missing values and assign back
-                # to the result where appropriate
-                if keep_na and mask[_as[i]]:
-                    for j in range(i - dups + 1, i + 1):
-                        out[_as[j], 0] = NaN
-                        grp_na_count = dups
-                elif tiebreak == TIEBREAK_AVERAGE:
-                    for j in range(i - dups + 1, i + 1):
-                        out[_as[j], 0] = sum_ranks / <float64_t>dups
-                elif tiebreak == TIEBREAK_MIN:
-                    for j in range(i - dups + 1, i + 1):
-                        out[_as[j], 0] = i - grp_start - dups + 2
-                elif tiebreak == TIEBREAK_MAX:
-                    for j in range(i - dups + 1, i + 1):
-                        out[_as[j], 0] = i - grp_start + 1
-                elif tiebreak == TIEBREAK_FIRST:
-                    for j in range(i - dups + 1, i + 1):
-                        if ascending:
-                            out[_as[j], 0] = j + 1 - grp_start
-                        else:
-                            out[_as[j], 0] = 2 * i - j - dups + 2 - grp_start
-                elif tiebreak == TIEBREAK_DENSE:
-                    for j in range(i - dups + 1, i + 1):
-                        out[_as[j], 0] = grp_vals_seen
-
-                # look forward to the next value (using the sorting in _as)
-                # if the value does not equal the current value then we need to
-                # reset the dups and sum_ranks, knowing that a new value is
-                # coming up. the conditional also needs to handle nan equality
-                # and the end of iteration
-                if (i == N - 1 or
-                        (masked_vals[_as[i]] != masked_vals[_as[i+1]]) or
-                        (mask[_as[i]] ^ mask[_as[i+1]])):
-                    dups = sum_ranks = 0
-                    grp_vals_seen += 1
-                    grp_tie_count += 1
-
-                # Similar to the previous conditional, check now if we are
-                # moving to a new group. If so, keep track of the index where
-                # the new group occurs, so the tiebreaker calculations can
-                # decrement that from their position. fill in the size of each
-                # group encountered (used by pct calculations later). also be
-                # sure to reset any of the items helping to calculate dups
-                if i == N - 1 or labels[_as[i]] != labels[_as[i+1]]:
-                    if tiebreak != TIEBREAK_DENSE:
-                        for j in range(grp_start, i + 1):
-                            grp_sizes[_as[j], 0] = (i - grp_start + 1 -
-                                                    grp_na_count)
-                    else:
-                        for j in range(grp_start, i + 1):
-                            grp_sizes[_as[j], 0] = (grp_tie_count -
-                                                    (grp_na_count > 0))
-                    dups = sum_ranks = 0
-                    grp_na_count = 0
-                    grp_tie_count = 0
-                    grp_start = i + 1
-                    grp_vals_seen = 1
-
-        if pct:
-            for i in range(N):
-                # We don't include NaN values in percentage
-                # rankings, so we assign them percentages of NaN.
-                if out[i, 0] != out[i, 0] or out[i, 0] == NAN:
-                    out[i, 0] = NAN
-                elif grp_sizes[i, 0] != 0:
-                    out[i, 0] = out[i, 0] / grp_sizes[i, 0]
-
-
-group_rank_float64 = group_rank["float64_t"]
-group_rank_float32 = group_rank["float32_t"]
-group_rank_int64 = group_rank["int64_t"]
-# Note: we do not have a group_rank_object because that would require a
-#  not-nogil implementation, see GH#19560
-
-
-# ----------------------------------------------------------------------
-# group_min, group_max
-# ----------------------------------------------------------------------
-
-# TODO: consider implementing for more dtypes
-ctypedef fused groupby_t:
-    float64_t
-    float32_t
-    int64_t
-
-
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def group_max(groupby_t[:, :] out,
-              int64_t[:] counts,
-              groupby_t[:, :] values,
-              const int64_t[:] labels,
-              Py_ssize_t min_count=-1):
-    """
-    Only aggregates on axis=0
-    """
-    cdef:
-        Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
-        groupby_t val, count, nan_val
-        ndarray[groupby_t, ndim=2] maxx, nobs
-
-    assert min_count == -1, "'min_count' only used in add and prod"
-
-    if not len(values) == len(labels):
-        raise AssertionError("len(index) != len(labels)")
-
-    nobs = np.zeros_like(out)
-
-    maxx = np.empty_like(out)
-    if groupby_t is int64_t:
-        # Note: evaluated at compile-time
-        maxx[:] = -_int64_max
-        nan_val = NPY_NAT
-    else:
-        maxx[:] = -np.inf
-        nan_val = NAN
-
-    N, K = (<object>values).shape
-
-    with nogil:
-        for i in range(N):
-            lab = labels[i]
-            if lab < 0:
-                continue
-
-            counts[lab] += 1
-            for j in range(K):
-                val = values[i, j]
-
-                # not nan
-                if groupby_t is int64_t:
-                    if val != nan_val:
-                        nobs[lab, j] += 1
-                        if val > maxx[lab, j]:
-                            maxx[lab, j] = val
-                else:
-                    if val == val and val != nan_val:
-                        nobs[lab, j] += 1
-                        if val > maxx[lab, j]:
-                            maxx[lab, j] = val
-
-        for i in range(ncounts):
-            for j in range(K):
-                if nobs[i, j] == 0:
-                    out[i, j] = nan_val
-                else:
-                    out[i, j] = maxx[i, j]
-
-
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def group_min(groupby_t[:, :] out,
-              int64_t[:] counts,
-              groupby_t[:, :] values,
-              const int64_t[:] labels,
-              Py_ssize_t min_count=-1):
-    """
-    Only aggregates on axis=0
-    """
-    cdef:
-        Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
-        groupby_t val, count, nan_val
-        ndarray[groupby_t, ndim=2] minx, nobs
-
-    assert min_count == -1, "'min_count' only used in add and prod"
-
-    if not len(values) == len(labels):
-        raise AssertionError("len(index) != len(labels)")
-
-    nobs = np.zeros_like(out)
-
-    minx = np.empty_like(out)
-    if groupby_t is int64_t:
-        minx[:] = _int64_max
-        nan_val = NPY_NAT
-    else:
-        minx[:] = np.inf
-        nan_val = NAN
-
-    N, K = (<object>values).shape
-
-    with nogil:
-        for i in range(N):
-            lab = labels[i]
-            if lab < 0:
-                continue
-
-            counts[lab] += 1
-            for j in range(K):
-                val = values[i, j]
-
-                # not nan
-                if groupby_t is int64_t:
-                    if val != nan_val:
-                        nobs[lab, j] += 1
-                        if val < minx[lab, j]:
-                            minx[lab, j] = val
-                else:
-                    if val == val and val != nan_val:
-                        nobs[lab, j] += 1
-                        if val < minx[lab, j]:
-                            minx[lab, j] = val
-
-        for i in range(ncounts):
-            for j in range(K):
-                if nobs[i, j] == 0:
-                    out[i, j] = nan_val
-                else:
-                    out[i, j] = minx[i, j]
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def group_cummin(groupby_t[:, :] out,
-                 groupby_t[:, :] values,
-                 const int64_t[:] labels,
-                 int ngroups,
-                 bint is_datetimelike):
-    """
-    Cumulative minimum of columns of `values`, in row groups `labels`.
-
-    Parameters
-    ----------
-    out : array
-        Array to store cummin in.
-    values : array
-        Values to take cummin of.
-    labels : int64 array
-        Labels to group by.
-    ngroups : int
-        Number of groups, larger than all entries of `labels`.
-    is_datetimelike : bool
-        True if `values` contains datetime-like entries.
-
-    Notes
-    -----
-    This method modifies the `out` parameter, rather than returning an object.
-    """
-
-    cdef:
-        Py_ssize_t i, j, N, K, size
-        groupby_t val, mval
-        ndarray[groupby_t, ndim=2] accum
-        int64_t lab
-
-    N, K = (<object>values).shape
-    accum = np.empty((ngroups, K), dtype=np.asarray(values).dtype)
-    if groupby_t is int64_t:
-        accum[:] = _int64_max
-    else:
-        accum[:] = np.inf
-
-    with nogil:
-        for i in range(N):
-            lab = labels[i]
-
-            if lab < 0:
-                continue
-            for j in range(K):
-                val = values[i, j]
-
-                # val = nan
-                if groupby_t is int64_t:
-                    if is_datetimelike and val == NPY_NAT:
-                        out[i, j] = NPY_NAT
-                    else:
-                        mval = accum[lab, j]
-                        if val < mval:
-                            accum[lab, j] = mval = val
-                        out[i, j] = mval
-                else:
-                    if val == val:
-                        mval = accum[lab, j]
-                        if val < mval:
-                            accum[lab, j] = mval = val
-                        out[i, j] = mval
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def group_cummax(groupby_t[:, :] out,
-                 groupby_t[:, :] values,
-                 const int64_t[:] labels,
-                 int ngroups,
-                 bint is_datetimelike):
-    """
-    Cumulative maximum of columns of `values`, in row groups `labels`.
-
-    Parameters
-    ----------
-    out : array
-        Array to store cummax in.
-    values : array
-        Values to take cummax of.
-    labels : int64 array
-        Labels to group by.
-    ngroups : int
-        Number of groups, larger than all entries of `labels`.
-    is_datetimelike : bool
-        True if `values` contains datetime-like entries.
-
-    Notes
-    -----
-    This method modifies the `out` parameter, rather than returning an object.
-    """
-
-    cdef:
-        Py_ssize_t i, j, N, K, size
-        groupby_t val, mval
-        ndarray[groupby_t, ndim=2] accum
-        int64_t lab
-
-    N, K = (<object>values).shape
-    accum = np.empty((ngroups, K), dtype=np.asarray(values).dtype)
-    if groupby_t is int64_t:
-        accum[:] = -_int64_max
-    else:
-        accum[:] = -np.inf
-
-    with nogil:
-        for i in range(N):
-            lab = labels[i]
-
-            if lab < 0:
-                continue
-            for j in range(K):
-                val = values[i, j]
-
-                if groupby_t is int64_t:
-                    if is_datetimelike and val == NPY_NAT:
-                        out[i, j] = NPY_NAT
-                    else:
-                        mval = accum[lab, j]
-                        if val > mval:
-                            accum[lab, j] = mval = val
-                        out[i, j] = mval
-                else:
-                    if val == val:
-                        mval = accum[lab, j]
-                        if val > mval:
-                            accum[lab, j] = mval = val
-                        out[i, j] = mval
diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index 979dad6db0838..144d555258c50 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -41,11 +41,13 @@ cdef inline bint is_definitely_invalid_key(object val):
 
 
 cpdef get_value_at(ndarray arr, object loc, object tz=None):
+    obj = util.get_value_at(arr, loc)
+
     if arr.descr.type_num == NPY_DATETIME:
-        return Timestamp(util.get_value_at(arr, loc), tz=tz)
+        return Timestamp(obj, tz=tz)
     elif arr.descr.type_num == NPY_TIMEDELTA:
-        return Timedelta(util.get_value_at(arr, loc))
-    return util.get_value_at(arr, loc)
+        return Timedelta(obj)
+    return obj
 
 
 # Don't populate hash tables in monotonic indexes larger than this
@@ -102,6 +104,9 @@ cdef class IndexEngine:
         arr[loc] = value
 
     cpdef get_loc(self, object val):
+        cdef:
+            Py_ssize_t loc
+
         if is_definitely_invalid_key(val):
             raise TypeError("'{val}' is an invalid key".format(val=val))
 
@@ -114,7 +119,7 @@ cdef class IndexEngine:
             loc = _bin_search(values, val)  # .searchsorted(val, side='left')
             if loc >= len(values):
                 raise KeyError(val)
-            if util.get_value_at(values, loc) != val:
+            if values[loc] != val:
                 raise KeyError(val)
             return loc
 
@@ -281,7 +286,7 @@ cdef class IndexEngine:
         cdef:
             ndarray values, x
             ndarray[int64_t] result, missing
-            set stargets
+            set stargets, remaining_stargets
             dict d = {}
             object val
             int count = 0, count_missing = 0
@@ -304,12 +309,20 @@ cdef class IndexEngine:
         if stargets and len(stargets) < 5 and self.is_monotonic_increasing:
             # if there are few enough stargets and the index is monotonically
             # increasing, then use binary search for each starget
+            remaining_stargets = set()
             for starget in stargets:
-                start = values.searchsorted(starget, side='left')
-                end = values.searchsorted(starget, side='right')
-                if start != end:
-                    d[starget] = list(range(start, end))
-        else:
+                try:
+                    start = values.searchsorted(starget, side='left')
+                    end = values.searchsorted(starget, side='right')
+                except TypeError:  # e.g. if we tried to search for string in int array
+                    remaining_stargets.add(starget)
+                else:
+                    if start != end:
+                        d[starget] = list(range(start, end))
+
+            stargets = remaining_stargets
+
+        if stargets:
             # otherwise, map by iterating through all items in the index
             for i in range(n):
                 val = values[i]
@@ -352,22 +365,22 @@ cdef Py_ssize_t _bin_search(ndarray values, object val) except -1:
         Py_ssize_t mid = 0, lo = 0, hi = len(values) - 1
         object pval
 
-    if hi == 0 or (hi > 0 and val > util.get_value_at(values, hi)):
+    if hi == 0 or (hi > 0 and val > values[hi]):
         return len(values)
 
     while lo < hi:
         mid = (lo + hi) // 2
-        pval = util.get_value_at(values, mid)
+        pval = values[mid]
         if val < pval:
             hi = mid
         elif val > pval:
             lo = mid + 1
         else:
-            while mid > 0 and val == util.get_value_at(values, mid - 1):
+            while mid > 0 and val == values[mid - 1]:
                 mid -= 1
             return mid
 
-    if val <= util.get_value_at(values, mid):
+    if val <= values[mid]:
         return mid
     else:
         return mid + 1
@@ -387,13 +400,16 @@ cdef class DatetimeEngine(Int64Engine):
         return 'M8[ns]'
 
     def __contains__(self, object val):
+        cdef:
+            int64_t loc
+
         if self.over_size_threshold and self.is_monotonic_increasing:
             if not self.is_unique:
                 return self._get_loc_duplicates(val)
             values = self._get_index_values()
             conv = maybe_datetimelike_to_i8(val)
             loc = values.searchsorted(conv, side='left')
-            return util.get_value_at(values, loc) == conv
+            return values[loc] == conv
 
         self._ensure_mapping_populated()
         return maybe_datetimelike_to_i8(val) in self.mapping
@@ -405,6 +421,8 @@ cdef class DatetimeEngine(Int64Engine):
         return algos.is_monotonic(values, timelike=True)
 
     cpdef get_loc(self, object val):
+        cdef:
+            int64_t loc
         if is_definitely_invalid_key(val):
             raise TypeError
 
@@ -422,7 +440,7 @@ cdef class DatetimeEngine(Int64Engine):
                 self._date_check_type(val)
                 raise KeyError(val)
 
-            if loc == len(values) or util.get_value_at(values, loc) != conv:
+            if loc == len(values) or values[loc] != conv:
                 raise KeyError(val)
             return loc
 
diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in
index ac713a928973f..08bfaf21db9fb 100644
--- a/pandas/_libs/intervaltree.pxi.in
+++ b/pandas/_libs/intervaltree.pxi.in
@@ -158,7 +158,7 @@ cdef class IntervalTree(IntervalMixin):
 
         # TODO: write get_indexer_intervals
         cdef:
-            size_t old_len
+            Py_ssize_t old_len
             Py_ssize_t i
             Int64Vector result
 
@@ -179,7 +179,7 @@ cdef class IntervalTree(IntervalMixin):
         the given array of scalar targets. Non-unique positions are repeated.
         """
         cdef:
-            size_t old_len
+            Py_ssize_t old_len
             Py_ssize_t i
             Int64Vector result, missing
 
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 1c2f80b832201..b13246a4a969c 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -137,8 +137,8 @@ def is_scalar(val: object) -> bool:
 
     Examples
     --------
-    >>> dt = pd.datetime.datetime(2018, 10, 3)
-    >>> pd.is_scalar(dt)
+    >>> dt = datetime.datetime(2018, 10, 3)
+    >>> pd.api.types.is_scalar(dt)
     True
 
     >>> pd.api.types.is_scalar([2, 3])
@@ -782,8 +782,16 @@ def generate_slices(const int64_t[:] labels, Py_ssize_t ngroups):
     return starts, ends
 
 
-def indices_fast(object index, const int64_t[:] labels, list keys,
+def indices_fast(ndarray index, const int64_t[:] labels, list keys,
                  list sorted_labels):
+    """
+    Parameters
+    ----------
+    index : ndarray
+    labels : ndarray[int64]
+    keys : list
+    sorted_labels : list[ndarray[int64]]
+    """
     cdef:
         Py_ssize_t i, j, k, lab, cur, start, n = len(labels)
         dict result = {}
@@ -803,8 +811,7 @@ def indices_fast(object index, const int64_t[:] labels, list keys,
             if lab != -1:
                 tup = PyTuple_New(k)
                 for j in range(k):
-                    val = util.get_value_at(keys[j],
-                                            sorted_labels[j][i - 1])
+                    val = keys[j][sorted_labels[j][i - 1]]
                     PyTuple_SET_ITEM(tup, j, val)
                     Py_INCREF(val)
 
@@ -814,8 +821,7 @@ def indices_fast(object index, const int64_t[:] labels, list keys,
 
     tup = PyTuple_New(k)
     for j in range(k):
-        val = util.get_value_at(keys[j],
-                                sorted_labels[j][n - 1])
+        val = keys[j][sorted_labels[j][n - 1]]
         PyTuple_SET_ITEM(tup, j, val)
         Py_INCREF(val)
     result[tup] = index[start:]
@@ -2066,7 +2072,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
                 floats[i] = float(val)
                 complexes[i] = complex(val)
                 seen.float_ = 1
-            except Exception:
+            except (ValueError, TypeError):
                 seen.object_ = 1
                 break
         else:
@@ -2346,7 +2352,8 @@ def to_object_array_tuples(rows: object):
             row = rows[i]
             for j in range(len(row)):
                 result[i, j] = row[j]
-    except Exception:
+    except TypeError:
+        # e.g. "Expected tuple, got list"
         # upcast any subclasses to tuple
         for i in range(n):
             row = (rows[i],) if checknull(rows[i]) else tuple(rows[i])
diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
index 34eb9412451c5..0eac0e94f0beb 100644
--- a/pandas/_libs/reduction.pyx
+++ b/pandas/_libs/reduction.pyx
@@ -121,7 +121,7 @@ cdef class Reducer:
             for i in range(self.nresults):
 
                 if has_ndarray_labels:
-                    name = util.get_value_at(labels, i)
+                    name = labels[i]
                 elif has_labels:
                     # labels is an ExtensionArray
                     name = labels[i]
diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
index 33665484311ba..bf0a0ae5a3fe9 100644
--- a/pandas/_libs/tslibs/parsing.pyx
+++ b/pandas/_libs/tslibs/parsing.pyx
@@ -581,7 +581,7 @@ def try_parse_dates(object[:] values, parser=None,
                 else:
                     result[i] = parse_date(values[i])
         except Exception:
-            # failed
+            # Since parser is user-defined, we can't guess what it migh raise
             return values
     else:
         parse_date = parser
diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx
index cbfbc14c35b35..bc1fdfae99de9 100644
--- a/pandas/_libs/tslibs/timezones.pyx
+++ b/pandas/_libs/tslibs/timezones.pyx
@@ -226,11 +226,8 @@ cdef object get_dst_info(object tz):
         if treat_tz_as_pytz(tz):
             trans = np.array(tz._utc_transition_times, dtype='M8[ns]')
             trans = trans.view('i8')
-            try:
-                if tz._utc_transition_times[0].year == 1:
-                    trans[0] = NPY_NAT + 1
-            except Exception:
-                pass
+            if tz._utc_transition_times[0].year == 1:
+                trans[0] = NPY_NAT + 1
             deltas = unbox_utcoffsets(tz._transition_info)
             typ = 'pytz'
 
diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py
index ce56c08d3ec14..402ed62f2df65 100644
--- a/pandas/compat/numpy/__init__.py
+++ b/pandas/compat/numpy/__init__.py
@@ -12,6 +12,7 @@
 _np_version_under1p15 = _nlv < LooseVersion("1.15")
 _np_version_under1p16 = _nlv < LooseVersion("1.16")
 _np_version_under1p17 = _nlv < LooseVersion("1.17")
+_np_version_under1p18 = _nlv < LooseVersion("1.18")
 _is_numpy_dev = ".dev" in str(_nlv)
 
 
diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py
index bce6c352ce480..fc60c01d7b808 100644
--- a/pandas/core/accessor.py
+++ b/pandas/core/accessor.py
@@ -4,7 +4,7 @@
 that can be mixed into or pinned onto other pandas classes.
 
 """
-from typing import Set
+from typing import FrozenSet, Set
 import warnings
 
 from pandas.util._decorators import Appender
@@ -12,9 +12,7 @@
 
 class DirNamesMixin:
     _accessors = set()  # type: Set[str]
-    _deprecations = frozenset(
-        ["asobject", "base", "data", "flags", "itemsize", "strides"]
-    )
+    _deprecations = frozenset()  # type: FrozenSet[str]
 
     def _dir_deletions(self):
         """
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 2e5ab0d182aff..717c2eb26be8b 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -1304,7 +1304,7 @@ def get_indexer(current_indexer, other_indexer):
         return frame.sort_values(columns, ascending=ascending, kind="mergesort")
 
 
-# ------- ## ---- #
+# ---- #
 # take #
 # ---- #
 
@@ -1712,59 +1712,44 @@ def take_nd(
 take_1d = take_nd
 
 
-def take_2d_multi(
-    arr, indexer, out=None, fill_value=np.nan, mask_info=None, allow_fill=True
-):
+def take_2d_multi(arr, indexer, fill_value=np.nan):
     """
     Specialized Cython take which sets NaN values in one pass
     """
-    if indexer is None or (indexer[0] is None and indexer[1] is None):
-        row_idx = np.arange(arr.shape[0], dtype=np.int64)
-        col_idx = np.arange(arr.shape[1], dtype=np.int64)
-        indexer = row_idx, col_idx
-        dtype, fill_value = arr.dtype, arr.dtype.type()
-    else:
-        row_idx, col_idx = indexer
-        if row_idx is None:
-            row_idx = np.arange(arr.shape[0], dtype=np.int64)
-        else:
-            row_idx = ensure_int64(row_idx)
-        if col_idx is None:
-            col_idx = np.arange(arr.shape[1], dtype=np.int64)
-        else:
-            col_idx = ensure_int64(col_idx)
-        indexer = row_idx, col_idx
-        if not allow_fill:
+    # This is only called from one place in DataFrame._reindex_multi,
+    #  so we know indexer is well-behaved.
+    assert indexer is not None
+    assert indexer[0] is not None
+    assert indexer[1] is not None
+
+    row_idx, col_idx = indexer
+
+    row_idx = ensure_int64(row_idx)
+    col_idx = ensure_int64(col_idx)
+    indexer = row_idx, col_idx
+    mask_info = None
+
+    # check for promotion based on types only (do this first because
+    # it's faster than computing a mask)
+    dtype, fill_value = maybe_promote(arr.dtype, fill_value)
+    if dtype != arr.dtype:
+        # check if promotion is actually required based on indexer
+        row_mask = row_idx == -1
+        col_mask = col_idx == -1
+        row_needs = row_mask.any()
+        col_needs = col_mask.any()
+        mask_info = (row_mask, col_mask), (row_needs, col_needs)
+
+        if not (row_needs or col_needs):
+            # if not, then depromote, set fill_value to dummy
+            # (it won't be used but we don't want the cython code
+            # to crash when trying to cast it to dtype)
             dtype, fill_value = arr.dtype, arr.dtype.type()
-            mask_info = None, False
-        else:
-            # check for promotion based on types only (do this first because
-            # it's faster than computing a mask)
-            dtype, fill_value = maybe_promote(arr.dtype, fill_value)
-            if dtype != arr.dtype and (out is None or out.dtype != dtype):
-                # check if promotion is actually required based on indexer
-                if mask_info is not None:
-                    (row_mask, col_mask), (row_needs, col_needs) = mask_info
-                else:
-                    row_mask = row_idx == -1
-                    col_mask = col_idx == -1
-                    row_needs = row_mask.any()
-                    col_needs = col_mask.any()
-                    mask_info = (row_mask, col_mask), (row_needs, col_needs)
-                if row_needs or col_needs:
-                    if out is not None and out.dtype != dtype:
-                        raise TypeError("Incompatible type for fill_value")
-                else:
-                    # if not, then depromote, set fill_value to dummy
-                    # (it won't be used but we don't want the cython code
-                    # to crash when trying to cast it to dtype)
-                    dtype, fill_value = arr.dtype, arr.dtype.type()
 
     # at this point, it's guaranteed that dtype can hold both the arr values
     # and the fill_value
-    if out is None:
-        out_shape = len(row_idx), len(col_idx)
-        out = np.empty(out_shape, dtype=dtype)
+    out_shape = len(row_idx), len(col_idx)
+    out = np.empty(out_shape, dtype=dtype)
 
     func = _take_2d_multi_dict.get((arr.dtype.name, out.dtype.name), None)
     if func is None and arr.dtype != out.dtype:
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 91f3e878c3807..f402154dc91ca 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -1,5 +1,4 @@
 import inspect
-import warnings
 
 import numpy as np
 
@@ -21,9 +20,7 @@ def frame_apply(
     obj,
     func,
     axis=0,
-    broadcast=None,
     raw=False,
-    reduce=None,
     result_type=None,
     ignore_failures=False,
     args=None,
@@ -40,9 +37,7 @@ def frame_apply(
     return klass(
         obj,
         func,
-        broadcast=broadcast,
         raw=raw,
-        reduce=reduce,
         result_type=result_type,
         ignore_failures=ignore_failures,
         args=args,
@@ -51,18 +46,7 @@ def frame_apply(
 
 
 class FrameApply:
-    def __init__(
-        self,
-        obj,
-        func,
-        broadcast,
-        raw,
-        reduce,
-        result_type,
-        ignore_failures,
-        args,
-        kwds,
-    ):
+    def __init__(self, obj, func, raw, result_type, ignore_failures, args, kwds):
         self.obj = obj
         self.raw = raw
         self.ignore_failures = ignore_failures
@@ -75,34 +59,6 @@ def __init__(
                 "of {None, 'reduce', 'broadcast', 'expand'}"
             )
 
-        if broadcast is not None:
-            warnings.warn(
-                "The broadcast argument is deprecated and will "
-                "be removed in a future version. You can specify "
-                "result_type='broadcast' to broadcast the result "
-                "to the original dimensions",
-                FutureWarning,
-                stacklevel=4,
-            )
-            if broadcast:
-                result_type = "broadcast"
-
-        if reduce is not None:
-            warnings.warn(
-                "The reduce argument is deprecated and will "
-                "be removed in a future version. You can specify "
-                "result_type='reduce' to try to reduce the result "
-                "to the original dimensions",
-                FutureWarning,
-                stacklevel=4,
-            )
-            if reduce:
-
-                if result_type is not None:
-                    raise ValueError("cannot pass both reduce=True and result_type")
-
-                result_type = "reduce"
-
         self.result_type = result_type
 
         # curry if needed
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 7a16c3f6a35b6..53755695c97e3 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -474,7 +474,7 @@ def fillna(self, value=None, method=None, limit=None):
         method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
             Method to use for filling holes in reindexed Series
             pad / ffill: propagate last valid observation forward to next valid
-            backfill / bfill: use NEXT valid observation to fill gap
+            backfill / bfill: use NEXT valid observation to fill gap.
         limit : int, default None
             If method is specified, this is the maximum number of consecutive
             NaN values to forward/backward fill. In other words, if there is
@@ -485,7 +485,8 @@ def fillna(self, value=None, method=None, limit=None):
 
         Returns
         -------
-        filled : ExtensionArray with NA/NaN filled
+        ExtensionArray
+            With NA/NaN filled.
         """
         value, method = validate_fillna_kwargs(value, method)
 
@@ -539,13 +540,14 @@ def shift(self, periods: int = 1, fill_value: object = None) -> ABCExtensionArra
 
         fill_value : object, optional
             The scalar value to use for newly introduced missing values.
-            The default is ``self.dtype.na_value``
+            The default is ``self.dtype.na_value``.
 
             .. versionadded:: 0.24.0
 
         Returns
         -------
-        shifted : ExtensionArray
+        ExtensionArray
+            Shifted.
 
         Notes
         -----
@@ -869,11 +871,12 @@ def view(self, dtype=None) -> Union[ABCExtensionArray, np.ndarray]:
         Parameters
         ----------
         dtype : str, np.dtype, or ExtensionDtype, optional
-            Default None
+            Default None.
 
         Returns
         -------
         ExtensionArray
+            A view of the :class:`ExtensionArray`.
         """
         # NB:
         # - This must return a *new* object referencing the same data, not self.
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index ea19808b19fc9..795986127cde7 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -295,7 +295,7 @@ class Categorical(ExtensionArray, PandasObject):
 
     See Also
     --------
-    api.types.CategoricalDtype : Type for categorical data.
+    CategoricalDtype : Type for categorical data.
     CategoricalIndex : An Index with an underlying ``Categorical``.
 
     Notes
@@ -331,7 +331,9 @@ class Categorical(ExtensionArray, PandasObject):
     __array_priority__ = 1000
     _dtype = CategoricalDtype(ordered=False)
     # tolist is not actually deprecated, just suppressed in the __dir__
-    _deprecations = PandasObject._deprecations | frozenset(["tolist", "get_values"])
+    _deprecations = PandasObject._deprecations | frozenset(
+        ["tolist", "itemsize", "get_values"]
+    )
     _typ = "categorical"
 
     def __init__(
diff --git a/pandas/core/base.py b/pandas/core/base.py
index e4e14a950c96b..5ae3926952a67 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -4,7 +4,7 @@
 import builtins
 from collections import OrderedDict
 import textwrap
-from typing import Dict, Optional
+from typing import Dict, FrozenSet, Optional
 import warnings
 
 import numpy as np
@@ -267,7 +267,7 @@ def aggregate(self, func, *args, **kwargs):
 
     agg = aggregate
 
-    def _try_aggregate_string_function(self, arg, *args, **kwargs):
+    def _try_aggregate_string_function(self, arg: str, *args, **kwargs):
         """
         if arg is a string, then try to operate on it:
         - try to find a function (or attribute) on ourselves
@@ -292,12 +292,10 @@ def _try_aggregate_string_function(self, arg, *args, **kwargs):
 
         f = getattr(np, arg, None)
         if f is not None:
-            try:
+            if hasattr(self, "__array__"):
+                # in particular exclude Window
                 return f(self, *args, **kwargs)
 
-            except (AttributeError, TypeError):
-                pass
-
         raise AttributeError(
             "'{arg}' is not a valid function for "
             "'{cls}' object".format(arg=arg, cls=type(self).__name__)
@@ -653,7 +651,17 @@ class IndexOpsMixin:
 
     # ndarray compatibility
     __array_priority__ = 1000
-    _deprecations = frozenset(["item"])
+    _deprecations = frozenset(
+        [
+            "tolist",  # tolist is not deprecated, just suppressed in the __dir__
+            "base",
+            "data",
+            "item",
+            "itemsize",
+            "flags",
+            "strides",
+        ]
+    )  # type: FrozenSet[str]
 
     def transpose(self, *args, **kwargs):
         """
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 79e941f262931..7880acb1b78da 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -14,7 +14,18 @@
 import itertools
 import sys
 from textwrap import dedent
-from typing import FrozenSet, List, Optional, Sequence, Set, Tuple, Type, Union
+from typing import (
+    FrozenSet,
+    Hashable,
+    Iterable,
+    List,
+    Optional,
+    Sequence,
+    Set,
+    Tuple,
+    Type,
+    Union,
+)
 import warnings
 
 import numpy as np
@@ -861,7 +872,7 @@ def style(self):
         """
 
     @Appender(_shared_docs["items"])
-    def items(self):
+    def items(self) -> Iterable[Tuple[Hashable, Series]]:
         if self.columns.is_unique and hasattr(self, "_item_cache"):
             for k in self.columns:
                 yield k, self._get_item_cache(k)
@@ -2207,6 +2218,7 @@ def to_html(
         border=None,
         table_id=None,
         render_links=False,
+        encoding=None,
     ):
         """
         Render a DataFrame as an HTML table.
@@ -2222,6 +2234,10 @@ def to_html(
         border : int
             A ``border=border`` attribute is included in the opening
             `<table>` tag. Default ``pd.options.display.html.border``.
+        encoding : str, default "utf-8"
+            Set character encoding
+
+            .. versionadded:: 1.0
         table_id : str, optional
             A css id is included in the opening `<table>` tag if specified.
 
@@ -2263,7 +2279,11 @@ def to_html(
         )
         # TODO: a generic formatter wld b in DataFrameFormatter
         return formatter.to_html(
-            buf=buf, classes=classes, notebook=notebook, border=border
+            buf=buf,
+            classes=classes,
+            notebook=notebook,
+            border=border,
+            encoding=encoding,
         )
 
     # ----------------------------------------------------------------------
@@ -6628,15 +6648,7 @@ def transform(self, func, axis=0, *args, **kwargs):
         return super().transform(func, *args, **kwargs)
 
     def apply(
-        self,
-        func,
-        axis=0,
-        broadcast=None,
-        raw=False,
-        reduce=None,
-        result_type=None,
-        args=(),
-        **kwds
+        self, func, axis=0, raw=False, reduce=None, result_type=None, args=(), **kwds
     ):
         """
         Apply a function along an axis of the DataFrame.
@@ -6656,21 +6668,9 @@ def apply(
 
             * 0 or 'index': apply function to each column.
             * 1 or 'columns': apply function to each row.
-        broadcast : bool, optional
-            Only relevant for aggregation functions:
-
-            * ``False`` or ``None`` : returns a Series whose length is the
-              length of the index or the number of columns (based on the
-              `axis` parameter)
-            * ``True`` : results will be broadcast to the original shape
-              of the frame, the original index and columns will be retained.
-
-            .. deprecated:: 0.23.0
-               This argument will be removed in a future version, replaced
-               by result_type='broadcast'.
 
         raw : bool, default False
-            Determines if row or column is passed as a Series or ndarry object:
+            Determines if row or column is passed as a Series or ndarray object:
 
             * ``False`` : passes each row or column as a Series to the
               function.
@@ -6678,20 +6678,6 @@ def apply(
               instead.
               If you are just applying a NumPy reduction function this will
               achieve much better performance.
-        reduce : bool or None, default None
-            Try to apply reduction procedures. If the DataFrame is empty,
-            `apply` will use `reduce` to determine whether the result
-            should be a Series or a DataFrame. If ``reduce=None`` (the
-            default), `apply`'s return value will be guessed by calling
-            `func` on an empty Series
-            (note: while guessing, exceptions raised by `func` will be
-            ignored).
-            If ``reduce=True`` a Series will always be returned, and if
-            ``reduce=False`` a DataFrame will always be returned.
-
-            .. deprecated:: 0.23.0
-               This argument will be removed in a future version, replaced
-               by ``result_type='reduce'``.
 
         result_type : {'expand', 'reduce', 'broadcast', None}, default None
             These only act when ``axis=1`` (columns):
@@ -6805,9 +6791,7 @@ def apply(
             self,
             func=func,
             axis=axis,
-            broadcast=broadcast,
             raw=raw,
-            reduce=reduce,
             result_type=result_type,
             args=args,
             kwds=kwds,
@@ -7772,7 +7756,8 @@ def _count_level(self, level, axis=0, numeric_only=False):
         if isinstance(level, str):
             level = count_axis._get_level_number(level)
 
-        level_index = count_axis.levels[level]
+        level_name = count_axis._names[level]
+        level_index = count_axis.levels[level]._shallow_copy(name=level_name)
         level_codes = ensure_int64(count_axis.codes[level])
         counts = lib.count_level_2d(mask, level_codes, len(level_index), axis=0)
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index e97772a418982..e3e59639de56b 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -4606,7 +4606,7 @@ def _needs_reindex_multi(self, axes, method, level):
         )
 
     def _reindex_multi(self, axes, copy, fill_value):
-        return NotImplemented
+        raise AbstractMethodError(self)
 
     def _reindex_with_indexers(
         self, reindexers, fill_value=None, copy=False, allow_dups=False
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 598f13de309b4..8191c3519a36a 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -11,7 +11,17 @@
 from functools import partial
 from textwrap import dedent
 import typing
-from typing import Any, Callable, FrozenSet, Sequence, Type, Union
+from typing import (
+    Any,
+    Callable,
+    FrozenSet,
+    Hashable,
+    Iterable,
+    Sequence,
+    Tuple,
+    Type,
+    Union,
+)
 import warnings
 
 import numpy as np
@@ -53,7 +63,6 @@
     _transform_template,
     groupby,
 )
-from pandas.core.groupby.ops import BinGrouper
 from pandas.core.index import Index, MultiIndex, _all_indexes_same
 import pandas.core.indexes.base as ibase
 from pandas.core.internals import BlockManager, make_block
@@ -133,7 +142,7 @@ def pinner(cls):
 class SeriesGroupBy(GroupBy):
     _apply_whitelist = base.series_apply_whitelist
 
-    def _iterate_slices(self):
+    def _iterate_slices(self) -> Iterable[Tuple[Hashable, Series]]:
         yield self._selection_name, self._selected_obj
 
     @property
@@ -252,6 +261,8 @@ def aggregate(self, func=None, *args, **kwargs):
 
             try:
                 return self._python_agg_general(func, *args, **kwargs)
+            except AssertionError:
+                raise
             except Exception:
                 result = self._aggregate_named(func, *args, **kwargs)
 
@@ -640,12 +651,7 @@ def value_counts(
         rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx))
 
         # multi-index components
-        if isinstance(self.grouper, BinGrouper) and (
-            len(self.grouper.binlabels) != len(self.grouper.indices)
-        ):
-            labels = list(map(rep, [np.unique(ids)])) + [llab(lab, inc)]
-        else:
-            labels = list(map(rep, self.grouper.recons_labels)) + [llab(lab, inc)]
+        labels = list(map(rep, self.grouper.recons_labels)) + [llab(lab, inc)]
         levels = [ping.group_index for ping in self.grouper.groupings] + [lev]
         names = self.grouper.names + [self._selection_name]
 
@@ -883,7 +889,23 @@ def aggregate(self, func=None, *args, **kwargs):
                     result = self._aggregate_multiple_funcs(
                         [func], _level=_level, _axis=self.axis
                     )
-                except Exception:
+                except ValueError as err:
+                    if "no results" not in str(err):
+                        # raised directly by _aggregate_multiple_funcs
+                        raise
+                    result = self._aggregate_frame(func)
+                except NotImplementedError as err:
+                    if "axis other than 0 is not supported" in str(err):
+                        # raised directly by _aggregate_multiple_funcs
+                        pass
+                    elif "decimal does not support skipna=True" in str(err):
+                        # FIXME: kludge for DecimalArray tests
+                        pass
+                    else:
+                        raise
+                    # FIXME: this is raised in a bunch of
+                    #  test_whitelist.test_regression_whitelist_methods tests,
+                    #  can be avoided
                     result = self._aggregate_frame(func)
                 else:
                     result.columns = Index(
@@ -904,22 +926,20 @@ def aggregate(self, func=None, *args, **kwargs):
 
     agg = aggregate
 
-    def _iterate_slices(self):
-        if self.axis == 0:
-            # kludge
-            if self._selection is None:
-                slice_axis = self.obj.columns
-            else:
-                slice_axis = self._selection_list
-            slicer = lambda x: self.obj[x]
+    def _iterate_slices(self) -> Iterable[Tuple[Hashable, Series]]:
+        obj = self._selected_obj
+        if self.axis == 1:
+            obj = obj.T
+
+        if isinstance(obj, Series) and obj.name not in self.exclusions:
+            # Occurs when doing DataFrameGroupBy(...)["X"]
+            yield obj.name, obj
         else:
-            slice_axis = self.obj.index
-            slicer = self.obj.xs
+            for label, values in obj.items():
+                if label in self.exclusions:
+                    continue
 
-        for val in slice_axis:
-            if val in self.exclusions:
-                continue
-            yield val, slicer(val)
+                yield label, values
 
     def _cython_agg_general(self, how, alt=None, numeric_only=True, min_count=-1):
         new_items, new_blocks = self._cython_agg_blocks(
@@ -958,11 +978,17 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True, min_count=-1):
                 if alt is None:
                     # we cannot perform the operation
                     # in an alternate way, exclude the block
+                    assert how == "ohlc"
                     deleted_items.append(locs)
                     continue
 
                 # call our grouper again with only this block
                 obj = self.obj[data.items[locs]]
+                if obj.shape[1] == 1:
+                    # Avoid call to self.values that can occur in DataFrame
+                    #  reductions; see GH#28949
+                    obj = obj.iloc[:, 0]
+
                 s = groupby(obj, self.grouper)
                 try:
                     result = s.aggregate(lambda x: alt(x, axis=self.axis))
@@ -971,17 +997,29 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True, min_count=-1):
                     # continue and exclude the block
                     deleted_items.append(locs)
                     continue
+
+                # unwrap DataFrame to get array
+                assert len(result._data.blocks) == 1
+                result = result._data.blocks[0].values
+                if result.ndim == 1 and isinstance(result, np.ndarray):
+                    result = result.reshape(1, -1)
+
             finally:
+                assert not isinstance(result, DataFrame)
+
                 if result is not no_result:
                     # see if we can cast the block back to the original dtype
                     result = maybe_downcast_numeric(result, block.dtype)
 
-                    if result.ndim == 1 and isinstance(result, np.ndarray):
+                    if block.is_extension and isinstance(result, np.ndarray):
                         # e.g. block.values was an IntegerArray
+                        # (1, N) case can occur if block.values was Categorical
+                        #  and result is ndarray[object]
+                        assert result.ndim == 1 or result.shape[0] == 1
                         try:
                             # Cast back if feasible
                             result = type(block.values)._from_sequence(
-                                result, dtype=block.values.dtype
+                                result.ravel(), dtype=block.values.dtype
                             )
                         except ValueError:
                             # reshape to be valid for non-Extension Block
@@ -1031,17 +1069,24 @@ def _aggregate_frame(self, func, *args, **kwargs):
         if axis != obj._info_axis_number:
             try:
                 for name, data in self:
-                    result[name] = self._try_cast(func(data, *args, **kwargs), data)
+                    fres = func(data, *args, **kwargs)
+                    result[name] = self._try_cast(fres, data)
+            except AssertionError:
+                raise
             except Exception:
                 return self._aggregate_item_by_item(func, *args, **kwargs)
         else:
             for name in self.indices:
+                data = self.get_group(name, obj=obj)
                 try:
-                    data = self.get_group(name, obj=obj)
-                    result[name] = self._try_cast(func(data, *args, **kwargs), data)
+                    fres = func(data, *args, **kwargs)
+                except AssertionError:
+                    raise
                 except Exception:
                     wrapper = lambda x: func(x, *args, **kwargs)
                     result[name] = data.apply(wrapper, axis=axis)
+                else:
+                    result[name] = self._try_cast(fres, data)
 
         return self._wrap_frame_output(result, obj)
 
@@ -1392,6 +1437,8 @@ def _choose_path(self, fast_path, slow_path, group):
         # if we make it here, test if we can use the fast path
         try:
             res_fast = fast_path(group)
+        except AssertionError:
+            raise
         except Exception:
             # Hard to know ex-ante what exceptions `fast_path` might raise
             return path, res
@@ -1416,9 +1463,12 @@ def _transform_item_by_item(self, obj, wrapper):
         for i, col in enumerate(obj):
             try:
                 output[col] = self[col].transform(wrapper)
-                inds.append(i)
+            except AssertionError:
+                raise
             except Exception:
                 pass
+            else:
+                inds.append(i)
 
         if len(output) == 0:
             raise TypeError("Transform function invalid for data types")
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index cc297629a7004..b27d5bb05ee8f 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -14,7 +14,7 @@ class providing the base-class of operations.
 import inspect
 import re
 import types
-from typing import FrozenSet, List, Optional, Tuple, Type, Union
+from typing import FrozenSet, Hashable, Iterable, List, Optional, Tuple, Type, Union
 
 import numpy as np
 
@@ -44,13 +44,7 @@ class providing the base-class of operations.
 from pandas.core import nanops
 import pandas.core.algorithms as algorithms
 from pandas.core.arrays import Categorical
-from pandas.core.base import (
-    DataError,
-    GroupByError,
-    PandasObject,
-    SelectionMixin,
-    SpecificationError,
-)
+from pandas.core.base import DataError, PandasObject, SelectionMixin
 import pandas.core.common as com
 from pandas.core.construction import extract_array
 from pandas.core.frame import DataFrame
@@ -598,14 +592,7 @@ def pipe(self, func, *args, **kwargs):
     plot = property(GroupByPlot)
 
     def _make_wrapper(self, name):
-        if name not in self._apply_whitelist:
-            is_callable = callable(getattr(self._selected_obj, name, None))
-            kind = " callable " if is_callable else " "
-            msg = (
-                "Cannot access{0}attribute {1!r} of {2!r} objects, try "
-                "using the 'apply' method".format(kind, name, type(self).__name__)
-            )
-            raise AttributeError(msg)
+        assert name in self._apply_whitelist
 
         self._set_group_selection()
 
@@ -758,7 +745,7 @@ def _python_apply_general(self, f):
             keys, values, not_indexed_same=mutated or self.mutated
         )
 
-    def _iterate_slices(self):
+    def _iterate_slices(self) -> Iterable[Tuple[Hashable, Series]]:
         raise AbstractMethodError(self)
 
     def transform(self, func, *args, **kwargs):
@@ -869,8 +856,6 @@ def _cython_transform(self, how, numeric_only=True, **kwargs):
                 result, names = self.grouper.transform(obj.values, how, **kwargs)
             except NotImplementedError:
                 continue
-            except AssertionError as e:
-                raise GroupByError(str(e))
             if self._transform_should_cast(how):
                 output[name] = self._try_cast(result, obj)
             else:
@@ -897,12 +882,7 @@ def _cython_agg_general(self, how, alt=None, numeric_only=True, min_count=-1):
             if numeric_only and not is_numeric:
                 continue
 
-            try:
-                result, names = self.grouper.aggregate(
-                    obj.values, how, min_count=min_count
-                )
-            except AssertionError as e:
-                raise GroupByError(str(e))
+            result, names = self.grouper.aggregate(obj.values, how, min_count=min_count)
             output[name] = self._try_cast(result, obj)
 
         if len(output) == 0:
@@ -919,9 +899,10 @@ def _python_agg_general(self, func, *args, **kwargs):
         for name, obj in self._iterate_slices():
             try:
                 result, counts = self.grouper.agg_series(obj, f)
-                output[name] = self._try_cast(result, obj, numeric_only=True)
             except TypeError:
                 continue
+            else:
+                output[name] = self._try_cast(result, obj, numeric_only=True)
 
         if len(output) == 0:
             return self._python_apply_general(f)
@@ -1359,10 +1340,18 @@ def f(self, **kwargs):
                 # try a cython aggregation if we can
                 try:
                     return self._cython_agg_general(alias, alt=npfunc, **kwargs)
-                except AssertionError as e:
-                    raise SpecificationError(str(e))
-                except Exception:
+                except DataError:
                     pass
+                except NotImplementedError as err:
+                    if "function is not implemented for this dtype" in str(err):
+                        # raised in _get_cython_function, in some cases can
+                        #  be trimmed by implementing cython funcs for more dtypes
+                        pass
+                    elif "decimal does not support skipna=True" in str(err):
+                        # FIXME: kludge for test_decimal:test_in_numeric_groupby
+                        pass
+                    else:
+                        raise
 
                 # apply a non-cython aggregation
                 result = self.aggregate(lambda x: npfunc(x, axis=self.axis))
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 40517eefe4d5d..e4335d39929b3 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -526,7 +526,13 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs):
             func = self._get_cython_function(kind, how, values, is_numeric)
         except NotImplementedError:
             if is_numeric:
-                values = ensure_float64(values)
+                try:
+                    values = ensure_float64(values)
+                except TypeError:
+                    if lib.infer_dtype(values, skipna=False) == "complex":
+                        values = values.astype(complex)
+                    else:
+                        raise
                 func = self._get_cython_function(kind, how, values, is_numeric)
             else:
                 raise
@@ -647,6 +653,8 @@ def _transform(
     def agg_series(self, obj, func):
         try:
             return self._aggregate_series_fast(obj, func)
+        except AssertionError:
+            raise
         except Exception:
             return self._aggregate_series_pure_python(obj, func)
 
@@ -825,6 +833,14 @@ def levels(self):
     def names(self):
         return [self.binlabels.name]
 
+    @property
+    def recons_labels(self):
+        comp_ids, obs_ids, _ = self.group_info
+        if len(self.binlabels) != len(self.indices):
+            return [np.unique(comp_ids)]
+        labels = (ping.labels for ping in self.groupings)
+        return decons_obs_group_ids(comp_ids, obs_ids, self.shape, labels, xnull=True)
+
     @property
     def groupings(self):
         from pandas.core.groupby.grouper import Grouping
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 7dee3a17f8f9e..1a08609ccd99a 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -1,7 +1,7 @@
 from datetime import datetime
 import operator
 from textwrap import dedent
-from typing import Union
+from typing import FrozenSet, Union
 import warnings
 
 import numpy as np
@@ -63,7 +63,7 @@
 from pandas.core.dtypes.missing import array_equivalent, isna
 
 from pandas.core import ops
-from pandas.core.accessor import CachedAccessor, DirNamesMixin
+from pandas.core.accessor import CachedAccessor
 import pandas.core.algorithms as algos
 from pandas.core.arrays import ExtensionArray
 from pandas.core.base import IndexOpsMixin, PandasObject
@@ -206,10 +206,10 @@ class Index(IndexOpsMixin, PandasObject):
 
     # tolist is not actually deprecated, just suppressed in the __dir__
     _deprecations = (
-        IndexOpsMixin._deprecations
-        | DirNamesMixin._deprecations
-        | frozenset(["tolist", "contains", "dtype_str", "get_values", "set_value"])
-    )
+        PandasObject._deprecations
+        | IndexOpsMixin._deprecations
+        | frozenset(["asobject", "contains", "dtype_str", "get_values", "set_value"])
+    )  # type: FrozenSet[str]
 
     # To hand over control to subclasses
     _join_precedence = 1
@@ -2493,8 +2493,12 @@ def _union(self, other, sort):
                 value_set = set(lvals)
                 result.extend([x for x in rvals if x not in value_set])
         else:
-            indexer = self.get_indexer(other)
-            indexer, = (indexer == -1).nonzero()
+            # find indexes of things in "other" that are not in "self"
+            if self.is_unique:
+                indexer = self.get_indexer(other)
+                indexer = (indexer == -1).nonzero()[0]
+            else:
+                indexer = algos.unique1d(self.get_indexer_non_unique(other)[1])
 
             if len(indexer) > 0:
                 other_diff = algos.take_nd(rvals, indexer, allow_fill=False)
@@ -3138,16 +3142,7 @@ def is_int(v):
         elif is_positional:
             indexer = key
         else:
-            try:
-                indexer = self.slice_indexer(start, stop, step, kind=kind)
-            except Exception:
-                if is_index_slice:
-                    if self.is_integer():
-                        raise
-                    else:
-                        indexer = key
-                else:
-                    raise
+            indexer = self.slice_indexer(start, stop, step, kind=kind)
 
         return indexer
 
@@ -4672,11 +4667,11 @@ def get_value(self, series, key):
                     raise InvalidIndexError(key)
                 else:
                     raise e1
-            except Exception:  # pragma: no cover
+            except Exception:
                 raise e1
         except TypeError:
-            # python 3
-            if is_scalar(key):  # pragma: no cover
+            # e.g. "[False] is an invalid key"
+            if is_scalar(key):
                 raise IndexError(key)
             raise InvalidIndexError(key)
 
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index b538c4df00e19..e5a8edb56e413 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -552,10 +552,6 @@ def get_value(self, series: AnyArrayLike, key: Any):
         # we might be a positional inexer
         return super().get_value(series, key)
 
-    def _can_reindex(self, indexer):
-        """ always allow reindexing """
-        pass
-
     @Substitution(klass="CategoricalIndex")
     @Appender(_shared_docs["searchsorted"])
     def searchsorted(self, value, side="left", sorter=None):
@@ -585,7 +581,6 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None):
             Indices of output values in original index
 
         """
-
         if method is not None:
             raise NotImplementedError(
                 "argument method is not implemented for CategoricalIndex.reindex"
@@ -605,9 +600,6 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None):
             indexer = None
             missing = []
         else:
-            if not target.is_unique:
-                raise ValueError("cannot reindex with a non-unique indexer")
-
             indexer, missing = self.get_indexer_non_unique(np.array(target))
 
         if len(self.codes) and indexer is not None:
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 596eaf0c55dbd..fda5c78a61e53 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -274,6 +274,7 @@ def __new__(
         result._set_levels(levels, copy=copy, validate=False)
         result._set_codes(codes, copy=copy, validate=False)
 
+        result._names = [None] * len(levels)
         if names is not None:
             # handles name validation
             result._set_names(names)
@@ -638,7 +639,10 @@ def from_frame(cls, df, sortorder=None, names=None):
 
     @property
     def levels(self):
-        return self._levels
+        result = [
+            x._shallow_copy(name=name) for x, name in zip(self._levels, self._names)
+        ]
+        return FrozenList(result)
 
     @property
     def _values(self):
@@ -829,7 +833,7 @@ def _set_codes(
         if level is None:
             new_codes = FrozenList(
                 _ensure_frozen(level_codes, lev, copy=copy)._shallow_copy()
-                for lev, level_codes in zip(self.levels, codes)
+                for lev, level_codes in zip(self._levels, codes)
             )
         else:
             level = [self._get_level_number(l) for l in level]
@@ -1216,7 +1220,7 @@ def __len__(self):
         return len(self.codes[0])
 
     def _get_names(self):
-        return FrozenList(level.name for level in self.levels)
+        return FrozenList(self._names)
 
     def _set_names(self, names, level=None, validate=True):
         """
@@ -1262,7 +1266,7 @@ def _set_names(self, names, level=None, validate=True):
             level = [self._get_level_number(l) for l in level]
 
         # set the name
-        for l, name in zip(level, names):
+        for lev, name in zip(level, names):
             if name is not None:
                 # GH 20527
                 # All items in 'names' need to be hashable:
@@ -1272,7 +1276,7 @@ def _set_names(self, names, level=None, validate=True):
                             self.__class__.__name__
                         )
                     )
-            self.levels[l].rename(name, inplace=True)
+            self._names[lev] = name
 
     names = property(
         fset=_set_names, fget=_get_names, doc="""\nNames of levels in MultiIndex.\n"""
@@ -1582,13 +1586,13 @@ def _get_level_values(self, level, unique=False):
         values : ndarray
         """
 
-        values = self.levels[level]
+        lev = self.levels[level]
         level_codes = self.codes[level]
+        name = self._names[level]
         if unique:
             level_codes = algos.unique(level_codes)
-        filled = algos.take_1d(values._values, level_codes, fill_value=values._na_value)
-        values = values._shallow_copy(filled)
-        return values
+        filled = algos.take_1d(lev._values, level_codes, fill_value=lev._na_value)
+        return lev._shallow_copy(filled, name=name)
 
     def get_level_values(self, level):
         """
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index 0fc74f4e78c9f..f085dff84462d 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -457,7 +457,11 @@ def __contains__(self, key):
             try:
                 self.get_loc(key)
                 return True
-            except Exception:
+            except (ValueError, TypeError, KeyError):
+                # TypeError can be reached if we pass a tuple that is not hashable
+                # ValueError can be reached if pass a 2-tuple and parse_time_string
+                #  raises with the wrong number of return values
+                #  TODO: the latter is a bug in parse_time_string
                 return False
 
     @cache_readonly
@@ -765,7 +769,9 @@ def _maybe_cast_slice_bound(self, label, side, kind):
                 _, parsed, reso = parse_time_string(label, self.freq)
                 bounds = self._parsed_string_to_bounds(reso, parsed)
                 return bounds[0 if side == "left" else 1]
-            except Exception:
+            except ValueError:
+                # string cannot be parsed as datetime-like
+                # TODO: we need tests for this case
                 raise KeyError(label)
         elif is_integer(label) or is_float(label):
             self._invalid_indexer("slice", label)
diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
index 755992c881fe5..62a74fefa6577 100644
--- a/pandas/core/indexes/timedeltas.py
+++ b/pandas/core/indexes/timedeltas.py
@@ -630,7 +630,8 @@ def insert(self, loc, item):
         if _is_convertible_to_td(item):
             try:
                 item = Timedelta(item)
-            except Exception:
+            except ValueError:
+                # e.g. str that can't be parsed to timedelta
                 pass
         elif is_scalar(item) and isna(item):
             # GH 18295
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index b76cb5cbec626..1495be1f26df5 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -687,7 +687,6 @@ def _try_coerce_args(self, other):
 
     def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs):
         """ convert to our native types format, slicing if desired """
-
         values = self.get_values()
 
         if slicer is not None:
@@ -1783,6 +1782,23 @@ def get_values(self, dtype=None):
     def to_dense(self):
         return np.asarray(self.values)
 
+    def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs):
+        """override to use ExtensionArray astype for the conversion"""
+        values = self.values
+        if slicer is not None:
+            values = values[slicer]
+        mask = isna(values)
+
+        try:
+            values = values.astype(str)
+            values[mask] = na_rep
+        except Exception:
+            # eg SparseArray does not support setitem, needs to be converted to ndarray
+            return super().to_native_types(slicer, na_rep, quoting, **kwargs)
+
+        # we are expected to return a 2-d ndarray
+        return values.reshape(1, len(values))
+
     def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None):
         """
         Take values according to indexer and return them as a block.
@@ -2265,6 +2281,7 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock):
     is_extension = True
 
     _can_hold_element = DatetimeBlock._can_hold_element
+    to_native_types = DatetimeBlock.to_native_types
     fill_value = np.datetime64("NaT", "ns")
 
     @property
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
index 545bc21dd6d1b..d4ae3767f6157 100644
--- a/pandas/core/resample.py
+++ b/pandas/core/resample.py
@@ -17,6 +17,7 @@
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
 
 import pandas.core.algorithms as algos
+from pandas.core.base import DataError
 from pandas.core.generic import _shared_docs
 from pandas.core.groupby.base import GroupByMixin
 from pandas.core.groupby.generic import SeriesGroupBy
@@ -360,7 +361,25 @@ def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs):
                 result = grouped._aggregate_item_by_item(how, *args, **kwargs)
             else:
                 result = grouped.aggregate(how, *args, **kwargs)
-        except Exception:
+        except AssertionError:
+            raise
+        except DataError:
+            # we have a non-reducing function; try to evaluate
+            result = grouped.apply(how, *args, **kwargs)
+        except ValueError as err:
+            if "Must produce aggregated value" in str(err):
+                # raised in _aggregate_named
+                pass
+            elif "len(index) != len(labels)" in str(err):
+                # raised in libgroupby validation
+                pass
+            elif "No objects to concatenate" in str(err):
+                # raised in concat call
+                #  In tests this is reached via either
+                #  _apply_to_column_groupbys (ohlc) or DataFrameGroupBy.nunique
+                pass
+            else:
+                raise
 
             # we have a non-reducing function
             # try to evaluate
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index e654685d24d9d..340e964d7c14f 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -259,10 +259,10 @@ def get_new_values(self):
     def get_new_columns(self):
         if self.value_columns is None:
             if self.lift == 0:
-                return self.removed_level
+                return self.removed_level._shallow_copy(name=self.removed_name)
 
-            lev = self.removed_level
-            return lev.insert(0, lev._na_value)
+            lev = self.removed_level.insert(0, item=self.removed_level._na_value)
+            return lev.rename(self.removed_name)
 
         stride = len(self.removed_level) + self.lift
         width = len(self.value_columns)
@@ -298,10 +298,10 @@ def get_new_index(self):
 
         # construct the new index
         if len(self.new_index_levels) == 1:
-            lev, lab = self.new_index_levels[0], result_codes[0]
-            if (lab == -1).any():
-                lev = lev.insert(len(lev), lev._na_value)
-            return lev.take(lab)
+            level, level_codes = self.new_index_levels[0], result_codes[0]
+            if (level_codes == -1).any():
+                level = level.insert(len(level), level._na_value)
+            return level.take(level_codes).rename(self.new_index_names[0])
 
         return MultiIndex(
             levels=self.new_index_levels,
@@ -661,7 +661,8 @@ def _convert_level_number(level_num, columns):
         new_names = this.columns.names[:-1]
         new_columns = MultiIndex.from_tuples(unique_groups, names=new_names)
     else:
-        new_columns = unique_groups = this.columns.levels[0]
+        new_columns = this.columns.levels[0]._shallow_copy(name=this.columns.names[0])
+        unique_groups = new_columns
 
     # time to ravel the values
     new_data = {}
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 539a09f7046ac..1039e9af929d4 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -54,7 +54,7 @@
 
 import pandas as pd
 from pandas.core import algorithms, base, generic, nanops, ops
-from pandas.core.accessor import CachedAccessor, DirNamesMixin
+from pandas.core.accessor import CachedAccessor
 from pandas.core.arrays import ExtensionArray
 from pandas.core.arrays.categorical import Categorical, CategoricalAccessor
 from pandas.core.arrays.sparse import SparseAccessor
@@ -178,10 +178,8 @@ class Series(base.IndexOpsMixin, generic.NDFrame):
     _deprecations = (
         base.IndexOpsMixin._deprecations
         | generic.NDFrame._deprecations
-        | DirNamesMixin._deprecations
         | frozenset(
             [
-                "tolist",  # tolist is not deprecated, just suppressed in the __dir__
                 "asobject",
                 "compress",
                 "valid",
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index e6edad656d430..6d80cf8c697d6 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -162,7 +162,6 @@ def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull):
     xnull: boolean,
         if nulls are excluded; i.e. -1 labels are passed through
     """
-
     if not xnull:
         lift = np.fromiter(((a == -1).any() for a in labels), dtype="i8")
         shape = np.asarray(shape, dtype="i8") + lift
@@ -303,8 +302,8 @@ def get_flattened_iterator(comp_ids, ngroups, levels, labels):
 
 
 def get_indexer_dict(label_list, keys):
-    """ return a diction of {labels} -> {indexers} """
-    shape = list(map(len, keys))
+    """ return a dict of {labels} -> {indexers} """
+    shape = [len(x) for x in keys]
 
     group_index = get_group_index(label_list, shape, sort=True, xnull=True)
     ngroups = (
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index ad62c56a337b6..b8c40e3f62221 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -942,6 +942,7 @@ def _format_col(self, i: int) -> List[str]:
     def to_html(
         self,
         buf: Optional[FilePathOrBuffer[str]] = None,
+        encoding: Optional[str] = None,
         classes: Optional[Union[str, List, Tuple]] = None,
         notebook: bool = False,
         border: Optional[int] = None,
@@ -963,7 +964,9 @@ def to_html(
         from pandas.io.formats.html import HTMLFormatter, NotebookFormatter
 
         Klass = NotebookFormatter if notebook else HTMLFormatter
-        return Klass(self, classes=classes, border=border).get_result(buf=buf)
+        return Klass(self, classes=classes, border=border).get_result(
+            buf=buf, encoding=encoding
+        )
 
     def _get_formatted_column_labels(self, frame: "DataFrame") -> List[List[str]]:
         from pandas.core.index import _sparsify
diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py
index 9016e8a98e5ba..1e27421a55499 100644
--- a/pandas/io/json/_table_schema.py
+++ b/pandas/io/json/_table_schema.py
@@ -243,8 +243,10 @@ def build_table_schema(data, index=True, primary_key=None, version=True):
 
     if index:
         if data.index.nlevels > 1:
-            for level in data.index.levels:
-                fields.append(convert_pandas_type_to_json_field(level))
+            for level, name in zip(data.index.levels, data.index.names):
+                new_field = convert_pandas_type_to_json_field(level)
+                new_field["name"] = name
+                fields.append(new_field)
         else:
             fields.append(convert_pandas_type_to_json_field(data.index))
 
diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
index b6ffd8a83e409..4d40cd3a2d4ca 100644
--- a/pandas/tests/computation/test_eval.py
+++ b/pandas/tests/computation/test_eval.py
@@ -2,6 +2,7 @@
 from functools import reduce
 from itertools import product
 import operator
+from typing import Dict, Type
 import warnings
 
 import numpy as np
@@ -19,7 +20,11 @@
 from pandas.core.computation.check import _NUMEXPR_VERSION
 from pandas.core.computation.engines import NumExprClobberingError, _engines
 import pandas.core.computation.expr as expr
-from pandas.core.computation.expr import PandasExprVisitor, PythonExprVisitor
+from pandas.core.computation.expr import (
+    BaseExprVisitor,
+    PandasExprVisitor,
+    PythonExprVisitor,
+)
 from pandas.core.computation.expressions import _NUMEXPR_INSTALLED, _USE_NUMEXPR
 from pandas.core.computation.ops import (
     _arith_ops_syms,
@@ -1884,7 +1889,7 @@ def test_invalid_parser():
     "python": PythonExprVisitor,
     "pytables": pytables.ExprVisitor,
     "pandas": PandasExprVisitor,
-}
+}  # type: Dict[str, Type[BaseExprVisitor]]
 
 
 @pytest.mark.parametrize("engine", _engines)
diff --git a/pandas/tests/extension/list/__init__.py b/pandas/tests/extension/list/__init__.py
new file mode 100644
index 0000000000000..108f1937d07d3
--- /dev/null
+++ b/pandas/tests/extension/list/__init__.py
@@ -0,0 +1,3 @@
+from .array import ListArray, ListDtype, make_data
+
+__all__ = ["ListArray", "ListDtype", "make_data"]
diff --git a/pandas/tests/extension/list/array.py b/pandas/tests/extension/list/array.py
new file mode 100644
index 0000000000000..0ca9fadb68829
--- /dev/null
+++ b/pandas/tests/extension/list/array.py
@@ -0,0 +1,133 @@
+"""
+Test extension array for storing nested data in a pandas container.
+
+The ListArray stores an ndarray of lists.
+"""
+import numbers
+import random
+import string
+
+import numpy as np
+
+from pandas.core.dtypes.base import ExtensionDtype
+
+import pandas as pd
+from pandas.core.arrays import ExtensionArray
+
+
+class ListDtype(ExtensionDtype):
+    type = list
+    name = "list"
+    na_value = np.nan
+
+    @classmethod
+    def construct_array_type(cls):
+        """
+        Return the array type associated with this dtype.
+
+        Returns
+        -------
+        type
+        """
+        return ListArray
+
+    @classmethod
+    def construct_from_string(cls, string):
+        if string == cls.name:
+            return cls()
+        else:
+            raise TypeError("Cannot construct a '{}' from '{}'".format(cls, string))
+
+
+class ListArray(ExtensionArray):
+    dtype = ListDtype()
+    __array_priority__ = 1000
+
+    def __init__(self, values, dtype=None, copy=False):
+        if not isinstance(values, np.ndarray):
+            raise TypeError("Need to pass a numpy array as values")
+        for val in values:
+            if not isinstance(val, self.dtype.type) and not pd.isna(val):
+                raise TypeError("All values must be of type " + str(self.dtype.type))
+        self.data = values
+
+    @classmethod
+    def _from_sequence(cls, scalars, dtype=None, copy=False):
+        data = np.empty(len(scalars), dtype=object)
+        data[:] = scalars
+        return cls(data)
+
+    def __getitem__(self, item):
+        if isinstance(item, numbers.Integral):
+            return self.data[item]
+        else:
+            # slice, list-like, mask
+            return type(self)(self.data[item])
+
+    def __len__(self) -> int:
+        return len(self.data)
+
+    def isna(self):
+        return np.array(
+            [not isinstance(x, list) and np.isnan(x) for x in self.data], dtype=bool
+        )
+
+    def take(self, indexer, allow_fill=False, fill_value=None):
+        # re-implement here, since NumPy has trouble setting
+        # sized objects like UserDicts into scalar slots of
+        # an ndarary.
+        indexer = np.asarray(indexer)
+        msg = (
+            "Index is out of bounds or cannot do a "
+            "non-empty take from an empty array."
+        )
+
+        if allow_fill:
+            if fill_value is None:
+                fill_value = self.dtype.na_value
+            # bounds check
+            if (indexer < -1).any():
+                raise ValueError
+            try:
+                output = [
+                    self.data[loc] if loc != -1 else fill_value for loc in indexer
+                ]
+            except IndexError:
+                raise IndexError(msg)
+        else:
+            try:
+                output = [self.data[loc] for loc in indexer]
+            except IndexError:
+                raise IndexError(msg)
+
+        return self._from_sequence(output)
+
+    def copy(self):
+        return type(self)(self.data[:])
+
+    def astype(self, dtype, copy=True):
+        if isinstance(dtype, type(self.dtype)) and dtype == self.dtype:
+            if copy:
+                return self.copy()
+            return self
+        elif pd.api.types.is_string_dtype(dtype) and not pd.api.types.is_object_dtype(
+            dtype
+        ):
+            # numpy has problems with astype(str) for nested elements
+            return np.array([str(x) for x in self.data], dtype=dtype)
+        return np.array(self.data, dtype=dtype, copy=copy)
+
+    @classmethod
+    def _concat_same_type(cls, to_concat):
+        data = np.concatenate([x.data for x in to_concat])
+        return cls(data)
+
+
+def make_data():
+    # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer
+    data = np.empty(100, dtype=object)
+    data[:] = [
+        [random.choice(string.ascii_letters) for _ in range(random.randint(0, 10))]
+        for _ in range(100)
+    ]
+    return data
diff --git a/pandas/tests/extension/list/test_list.py b/pandas/tests/extension/list/test_list.py
new file mode 100644
index 0000000000000..c5c4417155562
--- /dev/null
+++ b/pandas/tests/extension/list/test_list.py
@@ -0,0 +1,30 @@
+import pytest
+
+import pandas as pd
+
+from .array import ListArray, ListDtype, make_data
+
+
+@pytest.fixture
+def dtype():
+    return ListDtype()
+
+
+@pytest.fixture
+def data():
+    """Length-100 ListArray for semantics test."""
+    data = make_data()
+
+    while len(data[0]) == len(data[1]):
+        data = make_data()
+
+    return ListArray(data)
+
+
+def test_to_csv(data):
+    # https://github.com/pandas-dev/pandas/issues/28840
+    # array with list-likes fail when doing astype(str) on the numpy array
+    # which was done in to_native_types
+    df = pd.DataFrame({"a": data})
+    res = df.to_csv()
+    assert str(data[0]) in res
diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py
index 0328232213588..fe034504b8161 100644
--- a/pandas/tests/frame/test_apply.py
+++ b/pandas/tests/frame/test_apply.py
@@ -137,13 +137,6 @@ def test_nunique_empty(self):
         expected = Series([], index=pd.Index([]))
         assert_series_equal(result, expected)
 
-    def test_apply_deprecate_reduce(self):
-        empty_frame = DataFrame()
-
-        x = []
-        with tm.assert_produces_warning(FutureWarning):
-            empty_frame.apply(x.append, axis=1, reduce=True)
-
     def test_apply_standard_nonunique(self):
         df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=["a", "a", "c"])
 
@@ -170,10 +163,6 @@ def test_apply_with_string_funcs(self, float_frame, func, args, kwds):
         expected = getattr(float_frame, func)(*args, **kwds)
         tm.assert_series_equal(result, expected)
 
-    def test_apply_broadcast_deprecated(self, float_frame):
-        with tm.assert_produces_warning(FutureWarning):
-            float_frame.apply(np.mean, broadcast=True)
-
     def test_apply_broadcast(self, float_frame, int_frame_const_col):
 
         # scalars
diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py
index 3f0768ad5bdac..c9a7507969f5b 100644
--- a/pandas/tests/frame/test_convert_to.py
+++ b/pandas/tests/frame/test_convert_to.py
@@ -575,9 +575,9 @@ def test_frame_to_dict_tz(self):
                 ),
             ),
             (
-                defaultdict(list),
+                defaultdict(dict),
                 defaultdict(
-                    list,
+                    dict,
                     {
                         0: {"int_col": 1, "float_col": 1.0},
                         1: {"int_col": 2, "float_col": 2.0},
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
index 7e3cbed09c6d7..5dad868c8c3aa 100644
--- a/pandas/tests/groupby/aggregate/test_other.py
+++ b/pandas/tests/groupby/aggregate/test_other.py
@@ -19,7 +19,7 @@
     date_range,
     period_range,
 )
-from pandas.core.groupby.groupby import SpecificationError
+from pandas.core.base import SpecificationError
 import pandas.util.testing as tm
 
 from pandas.io.formats.printing import pprint_thing
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 2831c07cb21d3..5391cb5ce821f 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -4,7 +4,7 @@
 import numpy as np
 import pytest
 
-from pandas.compat import PY37, is_platform_windows
+from pandas.compat import PY37
 
 import pandas as pd
 from pandas import (
@@ -209,10 +209,9 @@ def test_level_get_group(observed):
     assert_frame_equal(result, expected)
 
 
-# GH#21636 previously flaky on py37
-@pytest.mark.xfail(
-    is_platform_windows() and PY37, reason="Flaky, GH-27902", strict=False
-)
+# GH#21636 flaky on py37; may be related to older numpy, see discussion
+#  https://github.com/MacPython/pandas-wheels/pull/64
+@pytest.mark.xfail(PY37, reason="Flaky, GH-27902", strict=False)
 @pytest.mark.parametrize("ordered", [True, False])
 def test_apply(ordered):
     # GH 10138
@@ -229,6 +228,9 @@ def test_apply(ordered):
     idx = MultiIndex.from_arrays([missing, dense], names=["missing", "dense"])
     expected = DataFrame([0, 1, 2.0], index=idx, columns=["values"])
 
+    # GH#21636 tracking down the xfail, in some builds np.mean(df.loc[[0]])
+    #  is coming back as Series([0., 1., 0.], index=["missing", "dense", "values"])
+    #  when we expect Series(0., index=["values"])
     result = grouped.apply(lambda x: np.mean(x))
     assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index afb22a732691c..571e710ba8928 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -378,7 +378,7 @@ def test_median_empty_bins(observed):
 
 
 @pytest.mark.parametrize(
-    "dtype", ["int8", "int16", "int32", "int64", "float32", "float64"]
+    "dtype", ["int8", "int16", "int32", "int64", "float32", "float64", "uint64"]
 )
 @pytest.mark.parametrize(
     "method,data",
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 6212a37472000..dff5baa9b5984 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1944,3 +1944,13 @@ def test_shift_bfill_ffill_tz(tz_naive_fixture, op, expected):
     result = getattr(grouped, op)()
     expected = DataFrame(expected).assign(time=lambda x: x.time.dt.tz_localize(tz))
     assert_frame_equal(result, expected)
+
+
+def test_groupby_only_none_group():
+    # see GH21624
+    # this was crashing with "ValueError: Length of passed values is 1, index implies 0"
+    df = pd.DataFrame({"g": [None], "x": 1})
+    actual = df.groupby("g")["x"].transform("sum")
+    expected = pd.Series([np.nan], name="x")
+
+    assert_series_equal(actual, expected)
diff --git a/pandas/tests/indexes/multi/test_astype.py b/pandas/tests/indexes/multi/test_astype.py
index 4adcdd0112b26..f320a89c471bf 100644
--- a/pandas/tests/indexes/multi/test_astype.py
+++ b/pandas/tests/indexes/multi/test_astype.py
@@ -11,7 +11,7 @@ def test_astype(idx):
     actual = idx.astype("O")
     assert_copy(actual.levels, expected.levels)
     assert_copy(actual.codes, expected.codes)
-    assert [level.name for level in actual.levels] == list(expected.names)
+    assert actual.names == list(expected.names)
 
     with pytest.raises(TypeError, match="^Setting.*dtype.*object"):
         idx.astype(np.dtype(int))
diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py
index 9472d539537ba..ff98da85cfb2d 100644
--- a/pandas/tests/indexes/multi/test_constructor.py
+++ b/pandas/tests/indexes/multi/test_constructor.py
@@ -294,6 +294,7 @@ def test_from_arrays_empty():
     assert isinstance(result, MultiIndex)
     expected = Index([], name="A")
     tm.assert_index_equal(result.levels[0], expected)
+    assert result.names == ["A"]
 
     # N levels
     for N in [2, 3]:
@@ -441,6 +442,7 @@ def test_from_product_empty_one_level():
     result = MultiIndex.from_product([[]], names=["A"])
     expected = pd.Index([], name="A")
     tm.assert_index_equal(result.levels[0], expected)
+    assert result.names == ["A"]
 
 
 @pytest.mark.parametrize(
diff --git a/pandas/tests/indexes/multi/test_names.py b/pandas/tests/indexes/multi/test_names.py
index 5856cb56b307b..5c3a48c9dd481 100644
--- a/pandas/tests/indexes/multi/test_names.py
+++ b/pandas/tests/indexes/multi/test_names.py
@@ -27,28 +27,25 @@ def test_index_name_retained():
 
 
 def test_changing_names(idx):
-
-    # names should be applied to levels
-    level_names = [level.name for level in idx.levels]
-    check_level_names(idx, idx.names)
+    assert [level.name for level in idx.levels] == ["first", "second"]
 
     view = idx.view()
     copy = idx.copy()
     shallow_copy = idx._shallow_copy()
 
-    # changing names should change level names on object
+    # changing names should not change level names on object
     new_names = [name + "a" for name in idx.names]
     idx.names = new_names
-    check_level_names(idx, new_names)
+    check_level_names(idx, ["firsta", "seconda"])
 
-    # but not on copies
-    check_level_names(view, level_names)
-    check_level_names(copy, level_names)
-    check_level_names(shallow_copy, level_names)
+    # and not on copies
+    check_level_names(view, ["first", "second"])
+    check_level_names(copy, ["first", "second"])
+    check_level_names(shallow_copy, ["first", "second"])
 
     # and copies shouldn't change original
     shallow_copy.names = [name + "c" for name in shallow_copy.names]
-    check_level_names(idx, new_names)
+    check_level_names(idx, ["firsta", "seconda"])
 
 
 def test_take_preserve_name(idx):
@@ -82,9 +79,9 @@ def test_copy_names():
 def test_names(idx, index_names):
 
     # names are assigned in setup
-    names = index_names
+    assert index_names == ["first", "second"]
     level_names = [level.name for level in idx.levels]
-    assert names == level_names
+    assert level_names == index_names
 
     # setting bad names on existing
     index = idx
@@ -109,11 +106,10 @@ def test_names(idx, index_names):
             names=["first", "second", "third"],
         )
 
-    # names are assigned
+    # names are assigned on index, but not transferred to the levels
     index.names = ["a", "b"]
-    ind_names = list(index.names)
     level_names = [level.name for level in index.levels]
-    assert ind_names == level_names
+    assert level_names == ["a", "b"]
 
 
 def test_duplicate_level_names_access_raises(idx):
@@ -121,3 +117,10 @@ def test_duplicate_level_names_access_raises(idx):
     idx.names = ["foo", "foo"]
     with pytest.raises(ValueError, match="name foo occurs multiple times"):
         idx._get_level_number("foo")
+
+
+def test_get_names_from_levels():
+    idx = pd.MultiIndex.from_product([["a"], [1, 2]], names=["a", "b"])
+
+    assert idx.levels[0].name == "a"
+    assert idx.levels[1].name == "b"
diff --git a/pandas/tests/indexes/multi/test_reindex.py b/pandas/tests/indexes/multi/test_reindex.py
index 88de4d1e80386..513efa8941de8 100644
--- a/pandas/tests/indexes/multi/test_reindex.py
+++ b/pandas/tests/indexes/multi/test_reindex.py
@@ -6,19 +6,17 @@
 import pandas.util.testing as tm
 
 
-def check_level_names(index, names):
-    assert [level.name for level in index.levels] == list(names)
-
-
 def test_reindex(idx):
     result, indexer = idx.reindex(list(idx[:4]))
     assert isinstance(result, MultiIndex)
-    check_level_names(result, idx[:4].names)
+    assert result.names == ["first", "second"]
+    assert [level.name for level in result.levels] == ["first", "second"]
 
     result, indexer = idx.reindex(list(idx))
     assert isinstance(result, MultiIndex)
     assert indexer is None
-    check_level_names(result, idx.names)
+    assert result.names == ["first", "second"]
+    assert [level.name for level in result.levels] == ["first", "second"]
 
 
 def test_reindex_level(idx):
diff --git a/pandas/tests/indexes/multi/test_reshape.py b/pandas/tests/indexes/multi/test_reshape.py
index a30e6f33d1499..37df420e9ea2e 100644
--- a/pandas/tests/indexes/multi/test_reshape.py
+++ b/pandas/tests/indexes/multi/test_reshape.py
@@ -17,6 +17,7 @@ def test_insert(idx):
 
     exp0 = Index(list(idx.levels[0]) + ["abc"], name="first")
     tm.assert_index_equal(new_index.levels[0], exp0)
+    assert new_index.names == ["first", "second"]
 
     exp1 = Index(list(idx.levels[1]) + ["three"], name="second")
     tm.assert_index_equal(new_index.levels[1], exp1)
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index 4326c3f8188fc..8ed7f1a890c39 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -599,15 +599,19 @@ def test_reindex_dtype(self):
         tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))
 
     def test_reindex_duplicate_target(self):
-        # See GH23963
-        c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
-        with pytest.raises(ValueError, match="non-unique indexer"):
-            c.reindex(["a", "a", "c"])
+        # See GH25459
+        cat = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"])
+        res, indexer = cat.reindex(["a", "c", "c"])
+        exp = Index(["a", "c", "c"], dtype="object")
+        tm.assert_index_equal(res, exp, exact=True)
+        tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))
 
-        with pytest.raises(ValueError, match="non-unique indexer"):
-            c.reindex(
-                CategoricalIndex(["a", "a", "c"], categories=["a", "b", "c", "d"])
-            )
+        res, indexer = cat.reindex(
+            CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
+        )
+        exp = CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
+        tm.assert_index_equal(res, exp, exact=True)
+        tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))
 
     def test_reindex_empty_index(self):
         # See GH16770
diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py
index c365c985eb4b6..005a9a24dc597 100644
--- a/pandas/tests/indexing/test_categorical.py
+++ b/pandas/tests/indexing/test_categorical.py
@@ -561,26 +561,30 @@ def test_read_only_source(self):
         assert_frame_equal(rw_df.loc[1:3], ro_df.loc[1:3])
 
     def test_reindexing(self):
+        df = DataFrame(
+            {
+                "A": np.arange(3, dtype="int64"),
+                "B": Series(list("abc")).astype(CDT(list("cabe"))),
+            }
+        ).set_index("B")
 
         # reindexing
         # convert to a regular index
-        result = self.df2.reindex(["a", "b", "e"])
-        expected = DataFrame(
-            {"A": [0, 1, 5, 2, 3, np.nan], "B": Series(list("aaabbe"))}
-        ).set_index("B")
+        result = df.reindex(["a", "b", "e"])
+        expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index(
+            "B"
+        )
         assert_frame_equal(result, expected, check_index_type=True)
 
-        result = self.df2.reindex(["a", "b"])
-        expected = DataFrame(
-            {"A": [0, 1, 5, 2, 3], "B": Series(list("aaabb"))}
-        ).set_index("B")
+        result = df.reindex(["a", "b"])
+        expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B")
         assert_frame_equal(result, expected, check_index_type=True)
 
-        result = self.df2.reindex(["e"])
+        result = df.reindex(["e"])
         expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B")
         assert_frame_equal(result, expected, check_index_type=True)
 
-        result = self.df2.reindex(["d"])
+        result = df.reindex(["d"])
         expected = DataFrame({"A": [np.nan], "B": Series(["d"])}).set_index("B")
         assert_frame_equal(result, expected, check_index_type=True)
 
@@ -588,65 +592,58 @@ def test_reindexing(self):
         # then return a Categorical
         cats = list("cabe")
 
-        result = self.df2.reindex(Categorical(["a", "d"], categories=cats))
+        result = df.reindex(Categorical(["a", "e"], categories=cats))
         expected = DataFrame(
-            {"A": [0, 1, 5, np.nan], "B": Series(list("aaad")).astype(CDT(cats))}
+            {"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats))}
         ).set_index("B")
         assert_frame_equal(result, expected, check_index_type=True)
 
-        result = self.df2.reindex(Categorical(["a"], categories=cats))
+        result = df.reindex(Categorical(["a"], categories=cats))
         expected = DataFrame(
-            {"A": [0, 1, 5], "B": Series(list("aaa")).astype(CDT(cats))}
+            {"A": [0], "B": Series(list("a")).astype(CDT(cats))}
         ).set_index("B")
         assert_frame_equal(result, expected, check_index_type=True)
 
-        result = self.df2.reindex(["a", "b", "e"])
-        expected = DataFrame(
-            {"A": [0, 1, 5, 2, 3, np.nan], "B": Series(list("aaabbe"))}
-        ).set_index("B")
+        result = df.reindex(["a", "b", "e"])
+        expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index(
+            "B"
+        )
         assert_frame_equal(result, expected, check_index_type=True)
 
-        result = self.df2.reindex(["a", "b"])
-        expected = DataFrame(
-            {"A": [0, 1, 5, 2, 3], "B": Series(list("aaabb"))}
-        ).set_index("B")
+        result = df.reindex(["a", "b"])
+        expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B")
         assert_frame_equal(result, expected, check_index_type=True)
 
-        result = self.df2.reindex(["e"])
+        result = df.reindex(["e"])
         expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B")
         assert_frame_equal(result, expected, check_index_type=True)
 
         # give back the type of categorical that we received
-        result = self.df2.reindex(
-            Categorical(["a", "d"], categories=cats, ordered=True)
-        )
+        result = df.reindex(Categorical(["a", "e"], categories=cats, ordered=True))
         expected = DataFrame(
-            {
-                "A": [0, 1, 5, np.nan],
-                "B": Series(list("aaad")).astype(CDT(cats, ordered=True)),
-            }
+            {"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats, ordered=True))}
         ).set_index("B")
         assert_frame_equal(result, expected, check_index_type=True)
 
-        result = self.df2.reindex(Categorical(["a", "d"], categories=["a", "d"]))
+        result = df.reindex(Categorical(["a", "d"], categories=["a", "d"]))
         expected = DataFrame(
-            {"A": [0, 1, 5, np.nan], "B": Series(list("aaad")).astype(CDT(["a", "d"]))}
+            {"A": [0, np.nan], "B": Series(list("ad")).astype(CDT(["a", "d"]))}
         ).set_index("B")
         assert_frame_equal(result, expected, check_index_type=True)
 
         # passed duplicate indexers are not allowed
-        msg = "cannot reindex with a non-unique indexer"
+        msg = "cannot reindex from a duplicate axis"
         with pytest.raises(ValueError, match=msg):
-            self.df2.reindex(["a", "a"])
+            self.df2.reindex(["a", "b"])
 
         # args NotImplemented ATM
         msg = r"argument {} is not implemented for CategoricalIndex\.reindex"
         with pytest.raises(NotImplementedError, match=msg.format("method")):
-            self.df2.reindex(["a"], method="ffill")
+            df.reindex(["a"], method="ffill")
         with pytest.raises(NotImplementedError, match=msg.format("level")):
-            self.df2.reindex(["a"], level=1)
+            df.reindex(["a"], level=1)
         with pytest.raises(NotImplementedError, match=msg.format("limit")):
-            self.df2.reindex(["a"], limit=2)
+            df.reindex(["a"], limit=2)
 
     def test_loc_slice(self):
         # slicing
diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py
index 05b58b0eca9b8..4f38d7beb9c0b 100644
--- a/pandas/tests/indexing/test_coercion.py
+++ b/pandas/tests/indexing/test_coercion.py
@@ -1,4 +1,5 @@
 import itertools
+from typing import Dict, List
 
 import numpy as np
 import pytest
@@ -928,7 +929,7 @@ class TestReplaceSeriesCoercion(CoercionBase):
     klasses = ["series"]
     method = "replace"
 
-    rep = {}
+    rep = {}  # type: Dict[str, List]
     rep["object"] = ["a", "b"]
     rep["int64"] = [4, 5]
     rep["float64"] = [1.1, 2.2]
diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py
index ef19319e208d9..6c4a226b7ebd2 100644
--- a/pandas/tests/io/formats/test_to_html.py
+++ b/pandas/tests/io/formats/test_to_html.py
@@ -99,6 +99,14 @@ def test_to_html_unicode(df, expected, datapath):
     assert result == expected
 
 
+def test_to_html_encoding(float_frame, tmp_path):
+    # GH 28663
+    path = tmp_path / "test.html"
+    float_frame.to_html(path, encoding="gbk")
+    with open(str(path), "r", encoding="gbk") as f:
+        assert float_frame.to_html() == f.read()
+
+
 def test_to_html_decimal(datapath):
     # GH 12031
     df = DataFrame({"A": [6.0, 3.1, 2.2]})
diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py
index d6572ac7b7bfe..20e2690084e2a 100644
--- a/pandas/tests/io/json/test_ujson.py
+++ b/pandas/tests/io/json/test_ujson.py
@@ -1,10 +1,7 @@
-try:
-    import json
-except ImportError:
-    import simplejson as json
 import calendar
 import datetime
 import decimal
+import json
 import locale
 import math
 import re
diff --git a/pandas/tests/io/parser/conftest.py b/pandas/tests/io/parser/conftest.py
index 2c347a096006a..183ad500b15f3 100644
--- a/pandas/tests/io/parser/conftest.py
+++ b/pandas/tests/io/parser/conftest.py
@@ -1,4 +1,5 @@
 import os
+from typing import List, Optional
 
 import pytest
 
@@ -6,9 +7,9 @@
 
 
 class BaseParser:
-    engine = None
+    engine = None  # type: Optional[str]
     low_memory = True
-    float_precision_choices = []
+    float_precision_choices = []  # type: List[Optional[str]]
 
     def update_kwargs(self, kwargs):
         kwargs = kwargs.copy()
@@ -59,11 +60,11 @@ def csv1(csv_dir_path):
 
 _py_parsers_only = [_pythonParser]
 _c_parsers_only = [_cParserHighMemory, _cParserLowMemory]
-_all_parsers = _c_parsers_only + _py_parsers_only
+_all_parsers = [*_c_parsers_only, *_py_parsers_only]
 
 _py_parser_ids = ["python"]
 _c_parser_ids = ["c_high", "c_low"]
-_all_parser_ids = _c_parser_ids + _py_parser_ids
+_all_parser_ids = [*_c_parser_ids, *_py_parser_ids]
 
 
 @pytest.fixture(params=_all_parsers, ids=_all_parser_ids)
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index 7491cef17ebfc..183a47c6039ec 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -583,7 +583,7 @@ class _TestSQLApi(PandasSQLTest):
     """
 
     flavor = "sqlite"
-    mode = None
+    mode = None  # type: str
 
     def setup_connect(self):
         self.conn = self.connect()
@@ -1234,7 +1234,7 @@ class _TestSQLAlchemy(SQLAlchemyMixIn, PandasSQLTest):
 
     """
 
-    flavor = None
+    flavor = None  # type: str
 
     @pytest.fixture(autouse=True, scope="class")
     def setup_class(cls):
diff --git a/pandas/tests/plotting/test_backend.py b/pandas/tests/plotting/test_backend.py
index 6511d94aa4c09..41b1a88b15acb 100644
--- a/pandas/tests/plotting/test_backend.py
+++ b/pandas/tests/plotting/test_backend.py
@@ -9,7 +9,7 @@
 import pandas
 
 dummy_backend = types.ModuleType("pandas_dummy_backend")
-dummy_backend.plot = lambda *args, **kwargs: None
+setattr(dummy_backend, "plot", lambda *args, **kwargs: None)
 
 
 @pytest.fixture
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index 13f0f14014a31..eda7bc0ec4df7 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -1220,7 +1220,9 @@ def test_concat_keys_specific_levels(self):
         )
 
         tm.assert_index_equal(result.columns.levels[0], Index(level, name="group_key"))
-        assert result.columns.names[0] == "group_key"
+        tm.assert_index_equal(result.columns.levels[1], Index([0, 1, 2, 3]))
+
+        assert result.columns.names == ["group_key", None]
 
     def test_concat_dataframe_keys_bug(self, sort):
         t1 = DataFrame(
@@ -1409,7 +1411,7 @@ def test_concat_keys_and_levels(self):
             keys=[("foo", "one"), ("foo", "two"), ("baz", "one"), ("baz", "two")],
             names=["first", "second"],
         )
-        assert result.index.names == ("first", "second") + (None,)
+        assert result.index.names == ("first", "second", None)
         tm.assert_index_equal(
             result.index.levels[0], Index(["baz", "foo"], name="first")
         )
diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py
index e2c6f7d1c8feb..0b9392a0eeb5b 100644
--- a/pandas/tests/reshape/test_reshape.py
+++ b/pandas/tests/reshape/test_reshape.py
@@ -618,16 +618,15 @@ def test_reshaping_multi_index_categorical(self):
         df.index.names = ["major", "minor"]
         df["str"] = "foo"
 
-        dti = df.index.levels[0]
-
         df["category"] = df["str"].astype("category")
         result = df["category"].unstack()
 
+        dti = df.index.levels[0]
         c = Categorical(["foo"] * len(dti))
         expected = DataFrame(
             {"A": c.copy(), "B": c.copy(), "C": c.copy(), "D": c.copy()},
             columns=Index(list("ABCD"), name="minor"),
-            index=dti,
+            index=dti.rename("major"),
         )
         tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
index d60cd3029e5a8..c8e1c04f3e3fb 100644
--- a/pandas/tests/series/test_analytics.py
+++ b/pandas/tests/series/test_analytics.py
@@ -4,6 +4,7 @@
 import numpy as np
 import pytest
 
+from pandas.compat.numpy import _np_version_under1p18
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -160,6 +161,9 @@ def test_cummax(self, datetime_series):
 
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.xfail(
+        not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT"
+    )
     def test_cummin_datetime64(self):
         s = pd.Series(
             pd.to_datetime(["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-3"])
@@ -179,6 +183,9 @@ def test_cummin_datetime64(self):
         result = s.cummin(skipna=False)
         tm.assert_series_equal(expected, result)
 
+    @pytest.mark.xfail(
+        not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT"
+    )
     def test_cummax_datetime64(self):
         s = pd.Series(
             pd.to_datetime(["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-3"])
@@ -198,6 +205,9 @@ def test_cummax_datetime64(self):
         result = s.cummax(skipna=False)
         tm.assert_series_equal(expected, result)
 
+    @pytest.mark.xfail(
+        not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT"
+    )
     def test_cummin_timedelta64(self):
         s = pd.Series(pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "3 min"]))
 
@@ -213,6 +223,9 @@ def test_cummin_timedelta64(self):
         result = s.cummin(skipna=False)
         tm.assert_series_equal(expected, result)
 
+    @pytest.mark.xfail(
+        not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT"
+    )
     def test_cummax_timedelta64(self):
         s = pd.Series(pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "3 min"]))
 
diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py
index 467f2c177850a..6bfcc02ca633a 100644
--- a/pandas/tests/series/test_operators.py
+++ b/pandas/tests/series/test_operators.py
@@ -7,7 +7,6 @@
 import pandas as pd
 from pandas import Categorical, DataFrame, Index, Series, bdate_range, date_range, isna
 from pandas.core import ops
-from pandas.core.indexes.base import InvalidIndexError
 import pandas.core.nanops as nanops
 import pandas.util.testing as tm
 from pandas.util.testing import (
@@ -282,13 +281,27 @@ def test_logical_ops_with_index(self, op):
         result = op(ser, idx2)
         assert_series_equal(result, expected)
 
+    def test_reversed_xor_with_index_returns_index(self):
+        # GH#22092, GH#19792
+        ser = Series([True, True, False, False])
+        idx1 = Index([True, False, True, False])
+        idx2 = Index([1, 0, 1, 0])
+
+        expected = Index.symmetric_difference(idx1, ser)
+        result = idx1 ^ ser
+        assert_index_equal(result, expected)
+
+        expected = Index.symmetric_difference(idx2, ser)
+        result = idx2 ^ ser
+        assert_index_equal(result, expected)
+
     @pytest.mark.parametrize(
         "op",
         [
             pytest.param(
                 ops.rand_,
                 marks=pytest.mark.xfail(
-                    reason="GH#22092 Index implementation returns Index",
+                    reason="GH#22092 Index __and__ returns Index intersection",
                     raises=AssertionError,
                     strict=True,
                 ),
@@ -296,30 +309,26 @@ def test_logical_ops_with_index(self, op):
             pytest.param(
                 ops.ror_,
                 marks=pytest.mark.xfail(
-                    reason="Index.get_indexer with non unique index",
-                    raises=InvalidIndexError,
+                    reason="GH#22092 Index __or__ returns Index union",
+                    raises=AssertionError,
                     strict=True,
                 ),
             ),
-            ops.rxor,
         ],
     )
-    def test_reversed_logical_ops_with_index(self, op):
+    def test_reversed_logical_op_with_index_returns_series(self, op):
         # GH#22092, GH#19792
         ser = Series([True, True, False, False])
         idx1 = Index([True, False, True, False])
         idx2 = Index([1, 0, 1, 0])
 
-        # symmetric_difference is only for rxor, but other 2 should fail
-        expected = idx1.symmetric_difference(ser)
-
+        expected = pd.Series(op(idx1.values, ser.values))
         result = op(ser, idx1)
-        assert_index_equal(result, expected)
-
-        expected = idx2.symmetric_difference(ser)
+        assert_series_equal(result, expected)
 
+        expected = pd.Series(op(idx2.values, ser.values))
         result = op(ser, idx2)
-        assert_index_equal(result, expected)
+        assert_series_equal(result, expected)
 
     @pytest.mark.parametrize(
         "op, expected",
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index 483122a0eeaba..1f19f58e80f26 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -1009,6 +1009,12 @@ def test_bool_indexing(self, indexer_klass, indexer):
             s = pd.Series(idx)
             tm.assert_series_equal(s[indexer_klass(indexer)], s.iloc[exp_idx])
 
+    def test_get_indexer_non_unique_dtype_mismatch(self):
+        # GH 25459
+        indexes, missing = pd.Index(["A", "B"]).get_indexer_non_unique(pd.Index([0]))
+        tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes)
+        tm.assert_numpy_array_equal(np.array([0], dtype=np.int64), missing)
+
 
 class TestTranspose(Ops):
     errmsg = "the 'axes' parameter is not supported"
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
index e641d6f842d87..79c9fe2b60bd9 100644
--- a/pandas/tests/test_multilevel.py
+++ b/pandas/tests/test_multilevel.py
@@ -335,7 +335,7 @@ def test_count_level_corner(self):
         df = self.frame[:0]
         result = df.count(level=0)
         expected = (
-            DataFrame(index=s.index.levels[0], columns=df.columns)
+            DataFrame(index=s.index.levels[0].set_names(["first"]), columns=df.columns)
             .fillna(0)
             .astype(np.int64)
         )
@@ -975,14 +975,12 @@ def test_count(self):
         series.index.names = ["a", "b"]
 
         result = series.count(level="b")
-        expect = self.series.count(level=1)
-        tm.assert_series_equal(result, expect, check_names=False)
-        assert result.index.name == "b"
+        expect = self.series.count(level=1).rename_axis("b")
+        tm.assert_series_equal(result, expect)
 
         result = series.count(level="a")
-        expect = self.series.count(level=0)
-        tm.assert_series_equal(result, expect, check_names=False)
-        assert result.index.name == "a"
+        expect = self.series.count(level=0).rename_axis("a")
+        tm.assert_series_equal(result, expect)
 
         msg = "Level x not found"
         with pytest.raises(KeyError, match=msg):
@@ -1014,6 +1012,8 @@ def test_frame_group_ops(self, op, level, axis, skipna, sort):
         self.frame.iloc[1, [1, 2]] = np.nan
         self.frame.iloc[7, [0, 1]] = np.nan
 
+        level_name = self.frame.index.names[level]
+
         if axis == 0:
             frame = self.frame
         else:
@@ -1034,7 +1034,7 @@ def aggf(x):
             frame = frame.sort_index(level=level, axis=axis)
 
         # for good measure, groupby detail
-        level_index = frame._get_axis(axis).levels[level]
+        level_index = frame._get_axis(axis).levels[level].rename(level_name)
 
         tm.assert_index_equal(leftside._get_axis(axis), level_index)
         tm.assert_index_equal(rightside._get_axis(axis), level_index)
@@ -1639,10 +1639,14 @@ def test_constructor_with_tz(self):
         )
 
         result = MultiIndex.from_arrays([index, columns])
+
+        assert result.names == ["dt1", "dt2"]
         tm.assert_index_equal(result.levels[0], index)
         tm.assert_index_equal(result.levels[1], columns)
 
         result = MultiIndex.from_arrays([Series(index), Series(columns)])
+
+        assert result.names == ["dt1", "dt2"]
         tm.assert_index_equal(result.levels[0], index)
         tm.assert_index_equal(result.levels[1], columns)
 
@@ -1674,10 +1678,12 @@ def test_set_index_datetime(self):
         df = df.set_index("label", append=True)
         tm.assert_index_equal(df.index.levels[0], expected)
         tm.assert_index_equal(df.index.levels[1], Index(["a", "b"], name="label"))
+        assert df.index.names == ["datetime", "label"]
 
         df = df.swaplevel(0, 1)
         tm.assert_index_equal(df.index.levels[0], Index(["a", "b"], name="label"))
         tm.assert_index_equal(df.index.levels[1], expected)
+        assert df.index.names == ["label", "datetime"]
 
         df = DataFrame(np.random.random(6))
         idx1 = pd.DatetimeIndex(
diff --git a/pandas/tests/tseries/offsets/test_offsets_properties.py b/pandas/tests/tseries/offsets/test_offsets_properties.py
index 880ff1f137520..a05de78e299f7 100644
--- a/pandas/tests/tseries/offsets/test_offsets_properties.py
+++ b/pandas/tests/tseries/offsets/test_offsets_properties.py
@@ -36,8 +36,8 @@
 
 with warnings.catch_warnings():
     warnings.simplefilter("ignore")
-    min_dt = (pd.Timestamp(1900, 1, 1).to_pydatetime(),)
-    max_dt = (pd.Timestamp(1900, 1, 1).to_pydatetime(),)
+    min_dt = pd.Timestamp(1900, 1, 1).to_pydatetime()
+    max_dt = pd.Timestamp(1900, 1, 1).to_pydatetime()
 
 gen_date_range = st.builds(
     pd.date_range,
diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py
index 9571e8027ccf7..86e5d506e0779 100644
--- a/pandas/tests/util/test_assert_frame_equal.py
+++ b/pandas/tests/util/test_assert_frame_equal.py
@@ -141,7 +141,7 @@ def test_empty_dtypes(check_dtype):
     df1["col1"] = df1["col1"].astype("int64")
 
     if check_dtype:
-        msg = "Attributes are different"
+        msg = r"Attributes of DataFrame\..* are different"
         with pytest.raises(AssertionError, match=msg):
             assert_frame_equal(df1, df2, **kwargs)
     else:
diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py
index a12d9386eb159..bad3f2e67f8bb 100644
--- a/pandas/tests/util/test_assert_series_equal.py
+++ b/pandas/tests/util/test_assert_series_equal.py
@@ -179,7 +179,7 @@ def test_series_equal_values_mismatch(check_less_precise):
 
 
 def test_series_equal_categorical_mismatch(check_categorical):
-    msg = """Attributes are different
+    msg = """Attributes of Series are different
 
 Attribute "dtype" are different
 \\[left\\]:  CategoricalDtype\\(categories=\\['a', 'b'\\], ordered=False\\)
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index c8b41a87baa9d..73535e55d4fa5 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -1156,7 +1156,9 @@ def assert_series_equal(
         ):
             pass
         else:
-            assert_attr_equal("dtype", left, right)
+            assert_attr_equal(
+                "dtype", left, right, obj="Attributes of {obj}".format(obj=obj)
+            )
 
     if check_exact:
         assert_numpy_array_equal(
@@ -1315,8 +1317,9 @@ def assert_frame_equal(
 
     >>> assert_frame_equal(df1, df2)
     Traceback (most recent call last):
-    AssertionError: Attributes are different
     ...
+    AssertionError: Attributes of DataFrame.iloc[:, 1] are different
+
     Attribute "dtype" are different
     [left]:  int64
     [right]: float64
@@ -1600,7 +1603,9 @@ def makeUnicodeIndex(k=10, name=None):
 def makeCategoricalIndex(k=10, n=3, name=None, **kwargs):
     """ make a length k index or n categories """
     x = rands_array(nchars=4, size=n)
-    return CategoricalIndex(np.random.choice(x, k), name=name, **kwargs)
+    return CategoricalIndex(
+        Categorical.from_codes(np.arange(k) % n, categories=x), name=name, **kwargs
+    )
 
 
 def makeIntervalIndex(k=10, name=None, **kwargs):
diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py
index b1b5be6d4faeb..1506acc95edf9 100644
--- a/scripts/tests/test_validate_docstrings.py
+++ b/scripts/tests/test_validate_docstrings.py
@@ -1,3 +1,4 @@
+import functools
 import io
 import random
 import string
@@ -68,6 +69,23 @@ def sample(self):
         """
         return random.random()
 
+    @functools.lru_cache(None)
+    def decorated_sample(self, max):
+        """
+        Generate and return a random integer between 0 and max.
+
+        Parameters
+        ----------
+        max : int
+            The maximum value of the random number.
+
+        Returns
+        -------
+        int
+            Random number generated.
+        """
+        return random.randint(0, max)
+
     def random_letters(self):
         """
         Generate and return a sequence of random letters.
@@ -870,6 +888,7 @@ def test_good_class(self, capsys):
             "plot",
             "swap",
             "sample",
+            "decorated_sample",
             "random_letters",
             "sample_values",
             "head",
diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py
index 50b02c0fcbaf5..1d0f4b583bd0c 100755
--- a/scripts/validate_docstrings.py
+++ b/scripts/validate_docstrings.py
@@ -430,6 +430,17 @@ def doc_parameters(self):
 
     @property
     def signature_parameters(self):
+        def add_stars(param_name: str, info: inspect.Parameter):
+            """
+            Add stars to *args and **kwargs parameters
+            """
+            if info.kind == inspect.Parameter.VAR_POSITIONAL:
+                return f"*{param_name}"
+            elif info.kind == inspect.Parameter.VAR_KEYWORD:
+                return f"**{param_name}"
+            else:
+                return param_name
+
         if inspect.isclass(self.obj):
             if hasattr(self.obj, "_accessors") and (
                 self.name.split(".")[-1] in self.obj._accessors
@@ -437,17 +448,16 @@ def signature_parameters(self):
                 # accessor classes have a signature but don't want to show this
                 return tuple()
         try:
-            sig = inspect.getfullargspec(self.obj)
+            sig = inspect.signature(self.obj)
         except (TypeError, ValueError):
             # Some objects, mainly in C extensions do not support introspection
             # of the signature
             return tuple()
-        params = sig.args
-        if sig.varargs:
-            params.append("*" + sig.varargs)
-        if sig.varkw:
-            params.append("**" + sig.varkw)
-        params = tuple(params)
+
+        params = tuple(
+            add_stars(parameter, sig.parameters[parameter])
+            for parameter in sig.parameters
+        )
         if params and params[0] in ("self", "cls"):
             return params[1:]
         return params
diff --git a/setup.cfg b/setup.cfg
index 64494bf84363e..ca1ca4a7b5733 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -145,9 +145,6 @@ ignore_errors=True
 [mypy-pandas.tests.arrays.test_period]
 ignore_errors=True
 
-[mypy-pandas.tests.computation.test_eval]
-ignore_errors=True
-
 [mypy-pandas.tests.dtypes.test_common]
 ignore_errors=True
 
@@ -166,9 +163,6 @@ ignore_errors=True
 [mypy-pandas.tests.frame.test_constructors]
 ignore_errors=True
 
-[mypy-pandas.tests.frame.test_convert_to]
-ignore_errors=True
-
 [mypy-pandas.tests.indexes.datetimes.test_datetimelike]
 ignore_errors=True
 
@@ -196,24 +190,9 @@ ignore_errors=True
 [mypy-pandas.tests.indexes.timedeltas.test_timedelta]
 ignore_errors=True
 
-[mypy-pandas.tests.indexing.test_coercion]
-ignore_errors=True
-
 [mypy-pandas.tests.indexing.test_loc]
 ignore_errors=True
 
-[mypy-pandas.tests.io.json.test_ujson]
-ignore_errors=True
-
-[mypy-pandas.tests.io.parser.conftest]
-ignore_errors=True
-
-[mypy-pandas.tests.io.test_sql]
-ignore_errors=True
-
-[mypy-pandas.tests.plotting.test_backend]
-ignore_errors=True
-
 [mypy-pandas.tests.series.test_constructors]
 ignore_errors=True
 
@@ -226,8 +205,5 @@ ignore_errors=True
 [mypy-pandas.tests.tseries.offsets.test_offsets]
 ignore_errors=True
 
-[mypy-pandas.tests.tseries.offsets.test_offsets_properties]
-ignore_errors=True
-
 [mypy-pandas.tests.tseries.offsets.test_yqm_offsets]
 ignore_errors=True
diff --git a/setup.py b/setup.py
index 04aedcb101e25..2892cd0b2e294 100755
--- a/setup.py
+++ b/setup.py
@@ -88,7 +88,6 @@ def is_platform_mac():
         "_libs/algos_take_helper.pxi.in",
         "_libs/algos_rank_helper.pxi.in",
     ],
-    "groupby": ["_libs/groupby_helper.pxi.in"],
     "hashtable": [
         "_libs/hashtable_class_helper.pxi.in",
         "_libs/hashtable_func_helper.pxi.in",
@@ -228,6 +227,7 @@ def build_extensions(self):
     "Programming Language :: Python :: 3.5",
     "Programming Language :: Python :: 3.6",
     "Programming Language :: Python :: 3.7",
+    "Programming Language :: Python :: 3.8",
     "Programming Language :: Cython",
     "Topic :: Scientific/Engineering",
 ]
@@ -563,7 +563,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"):
 
 ext_data = {
     "_libs.algos": {"pyxfile": "_libs/algos", "depends": _pxi_dep["algos"]},
-    "_libs.groupby": {"pyxfile": "_libs/groupby", "depends": _pxi_dep["groupby"]},
+    "_libs.groupby": {"pyxfile": "_libs/groupby"},
     "_libs.hashing": {"pyxfile": "_libs/hashing", "include": [], "depends": []},
     "_libs.hashtable": {
         "pyxfile": "_libs/hashtable",

From 67847ff90e42482db02b0375d1b99f5592207d53 Mon Sep 17 00:00:00 2001
From: 0xF4D3C0D3 <dongho971220@gmail.com>
Date: Sat, 19 Oct 2019 21:47:17 +0900
Subject: [PATCH 09/19] checkout unrelated files to master

---
 pandas/core/sorting.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index 6d80cf8c697d6..e6edad656d430 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -162,6 +162,7 @@ def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull):
     xnull: boolean,
         if nulls are excluded; i.e. -1 labels are passed through
     """
+
     if not xnull:
         lift = np.fromiter(((a == -1).any() for a in labels), dtype="i8")
         shape = np.asarray(shape, dtype="i8") + lift
@@ -302,8 +303,8 @@ def get_flattened_iterator(comp_ids, ngroups, levels, labels):
 
 
 def get_indexer_dict(label_list, keys):
-    """ return a dict of {labels} -> {indexers} """
-    shape = [len(x) for x in keys]
+    """ return a diction of {labels} -> {indexers} """
+    shape = list(map(len, keys))
 
     group_index = get_group_index(label_list, shape, sort=True, xnull=True)
     ngroups = (

From 66a96e37725bded91934d15f69f5334df35adb32 Mon Sep 17 00:00:00 2001
From: 0xF4D3C0D3 <dongho971220@gmail.com>
Date: Sat, 19 Oct 2019 21:55:32 +0900
Subject: [PATCH 10/19] checkout sorting.py to upstream/master not
 origin/master

---
 pandas/core/sorting.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index e6edad656d430..94810369785d3 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -303,8 +303,8 @@ def get_flattened_iterator(comp_ids, ngroups, levels, labels):
 
 
 def get_indexer_dict(label_list, keys):
-    """ return a diction of {labels} -> {indexers} """
-    shape = list(map(len, keys))
+    """ return a dict of {labels} -> {indexers} """
+    shape = [len(x) for x in keys]
 
     group_index = get_group_index(label_list, shape, sort=True, xnull=True)
     ngroups = (

From 9390d2172337b16633a8d94082c6fbebac8d6c17 Mon Sep 17 00:00:00 2001
From: 0xF4D3C0D3 <dongho971220@gmail.com>
Date: Thu, 24 Oct 2019 23:01:28 +0900
Subject: [PATCH 11/19] accept TomAugspurger requests

1. remove the seed
2. remove meaningless comment
3. refer to GH issue number
---
 pandas/tests/groupby/test_value_counts.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
index 70d6b3db1923e..032ae7b6d8fec 100644
--- a/pandas/tests/groupby/test_value_counts.py
+++ b/pandas/tests/groupby/test_value_counts.py
@@ -85,8 +85,7 @@ def rebuild_index(df):
 @pytest.mark.parametrize("size", [100, 1000])
 @pytest.mark.parametrize("frac", [0.1, 0.5, 1])
 def test_series_groupby_value_counts_with_grouper(freq, size, frac):
-    np.random.seed(42)
-
+    # GH28479
     df = DataFrame.from_dict(
         {
             "date": date_range("2019-09-25", periods=size),
@@ -99,8 +98,6 @@ def test_series_groupby_value_counts_with_grouper(freq, size, frac):
     # have to sort on index because of unstable sort on values xref GH9212
     result = gr.value_counts().sort_index()
     expected = gr.apply(Series.value_counts).sort_index()
-    expected.index.names = (
-        result.index.names
-    )  # .apply(Series.value_counts) can't create all names
+    expected.index.names = result.index.names
 
     tm.assert_series_equal(result, expected)

From d8da75adb589664a424e466d845e1db90c5823f5 Mon Sep 17 00:00:00 2001
From: 0xF4D3C0D3 <dongho971220@gmail.com>
Date: Fri, 25 Oct 2019 02:03:33 +0900
Subject: [PATCH 12/19] use deterministic values instead of random values

---
 pandas/tests/groupby/test_value_counts.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
index 032ae7b6d8fec..434a4834123c6 100644
--- a/pandas/tests/groupby/test_value_counts.py
+++ b/pandas/tests/groupby/test_value_counts.py
@@ -4,7 +4,7 @@
 and proper parameter handling
 """
 
-from itertools import product
+from itertools import cycle, islice, product
 
 import numpy as np
 import pytest
@@ -89,7 +89,7 @@ def test_series_groupby_value_counts_with_grouper(freq, size, frac):
     df = DataFrame.from_dict(
         {
             "date": date_range("2019-09-25", periods=size),
-            "name": np.random.choice(list("abcd"), size),
+            "name": islice(cycle("abc"), size),
         }
     ).sample(frac=frac)
 

From 6f6371ca2b7ff05d71738b3ac568dccfa93a97fd Mon Sep 17 00:00:00 2001
From: 0xF4D3C0D3 <dongho971220@gmail.com>
Date: Fri, 25 Oct 2019 23:31:21 +0900
Subject: [PATCH 13/19] undo adding recons_labels method

---
 pandas/core/groupby/ops.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 808646a259541..fbe1598767736 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -865,14 +865,6 @@ def levels(self):
     def names(self):
         return [self.binlabels.name]
 
-    @property
-    def recons_labels(self):
-        comp_ids, obs_ids, _ = self.group_info
-        if len(self.binlabels) != len(self.indices):
-            return [np.unique(comp_ids)]
-        labels = (ping.labels for ping in self.groupings)
-        return decons_obs_group_ids(comp_ids, obs_ids, self.shape, labels, xnull=True)
-
     @property
     def groupings(self):
         from pandas.core.groupby.grouper import Grouping

From 82e5153810d986fd4f801d919a537b915117db82 Mon Sep 17 00:00:00 2001
From: 0xF4D3C0D3 <dongho971220@gmail.com>
Date: Sat, 26 Oct 2019 01:08:14 +0900
Subject: [PATCH 14/19] move a change returning unique comp ids

---
 pandas/core/sorting.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index 94810369785d3..4bd9d74592d81 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -162,6 +162,10 @@ def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull):
     xnull: boolean,
         if nulls are excluded; i.e. -1 labels are passed through
     """
+    labels = list(labels)
+    unique_comp_ids = np.unique(comp_ids)
+    if (shape[0] != len(unique_comp_ids)) and (shape[0] == len(labels[0])):
+        return [unique_comp_ids]
 
     if not xnull:
         lift = np.fromiter(((a == -1).any() for a in labels), dtype="i8")

From df6454b2b04d3beedca7bd6a16389868afa6d165 Mon Sep 17 00:00:00 2001
From: donghojung <andrew@buzzni.com>
Date: Mon, 4 Nov 2019 10:52:32 +0900
Subject: [PATCH 15/19] add recons_labels under the BinGrouper

---
 pandas/core/groupby/ops.py | 4 ++++
 pandas/core/sorting.py     | 4 ----
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 8d13c37270d7a..b953df78c05c0 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -747,6 +747,10 @@ def group_info(self):
             ngroups,
         )
 
+    @cache_readonly
+    def recons_labels(self):
+        return [np.r_[0, np.flatnonzero(self.bins[1:] != self.bins[:-1])]]
+
     @cache_readonly
     def result_index(self):
         if len(self.binlabels) != 0 and isna(self.binlabels[0]):
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index 5d9d5c6c751a1..9b8a1a76e419c 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -162,10 +162,6 @@ def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull: bool):
     xnull: boolean,
         if nulls are excluded; i.e. -1 labels are passed through
     """
-    labels = list(labels)
-    unique_comp_ids = np.unique(comp_ids)
-    if (shape[0] != len(unique_comp_ids)) and (shape[0] == len(labels[0])):
-        return [unique_comp_ids]
 
     if not xnull:
         lift = np.fromiter(((a == -1).any() for a in labels), dtype="i8")

From 28ee287ae00ab99bbe80dc8e57a5f69f133d1be9 Mon Sep 17 00:00:00 2001
From: donghojung <andrew@buzzni.com>
Date: Mon, 4 Nov 2019 18:43:58 +0900
Subject: [PATCH 16/19] fix indexing in recons_labels

---
 pandas/core/groupby/ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index b953df78c05c0..1bbd3d1b6d777 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -749,7 +749,7 @@ def group_info(self):
 
     @cache_readonly
     def recons_labels(self):
-        return [np.r_[0, np.flatnonzero(self.bins[1:] != self.bins[:-1])]]
+        return [np.r_[0, np.flatnonzero(self.bins[1:] != self.bins[:-1]) + 1]]
 
     @cache_readonly
     def result_index(self):

From a1049fde9bb5cddddb0748a1051a1436414404b2 Mon Sep 17 00:00:00 2001
From: donghojung <andrew@buzzni.com>
Date: Mon, 4 Nov 2019 18:44:17 +0900
Subject: [PATCH 17/19] add an exact reproduction of the original issue

---
 pandas/tests/groupby/test_value_counts.py | 32 ++++++++++++++---------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
index 8d630147a9304..c76ee09f977b5 100644
--- a/pandas/tests/groupby/test_value_counts.py
+++ b/pandas/tests/groupby/test_value_counts.py
@@ -4,12 +4,12 @@
 and proper parameter handling
 """
 
-from itertools import cycle, islice, product
+from itertools import product
 
 import numpy as np
 import pytest
 
-from pandas import DataFrame, Grouper, MultiIndex, Series, date_range
+from pandas import DataFrame, Grouper, MultiIndex, Series, date_range, to_datetime
 import pandas.util.testing as tm
 
 
@@ -81,23 +81,29 @@ def rebuild_index(df):
     tm.assert_series_equal(left.sort_index(), right.sort_index())
 
 
-@pytest.mark.parametrize("freq", ["1D", "2D", "1W", "1Y"])
-@pytest.mark.parametrize("size", [100, 1000])
-@pytest.mark.parametrize("frac", [0.1, 0.5, 1])
-def test_series_groupby_value_counts_with_grouper(freq, size, frac):
+def test_series_groupby_value_counts_with_grouper():
     # GH28479
-    df = DataFrame.from_dict(
+    df = DataFrame(
         {
-            "date": date_range("2019-09-25", periods=size),
-            "name": islice(cycle("abc"), size),
+            "Timestamp": [
+                1565083561,
+                1565083561 + 86400,
+                1565083561 + 86500,
+                1565083561 + 86400 * 2,
+                1565083561 + 86400 * 3,
+                1565083561 + 86500 * 3,
+                1565083561 + 86400 * 4,
+            ],
+            "Food": ["apple", "apple", "banana", "banana", "orange", "orange", "pear"],
         }
-    ).sample(frac=frac)
+    ).drop([3])
 
-    gr = df.groupby(Grouper(key="date", freq=freq))["name"]
+    df["Datetime"] = to_datetime(df["Timestamp"].apply(lambda t: str(t)), unit="s")
+    dfg = df.groupby(Grouper(freq="1D", key="Datetime"))
 
     # have to sort on index because of unstable sort on values xref GH9212
-    result = gr.value_counts().sort_index()
-    expected = gr.apply(Series.value_counts).sort_index()
+    result = dfg["Food"].value_counts().sort_index()
+    expected = dfg["Food"].apply(Series.value_counts).sort_index()
     expected.index.names = result.index.names
 
     tm.assert_series_equal(result, expected)

From 961a72cc78a2c5c43d68e67a55ecbff2b59b4629 Mon Sep 17 00:00:00 2001
From: donghojung <andrew@buzzni.com>
Date: Wed, 6 Nov 2019 10:13:25 +0900
Subject: [PATCH 18/19] add a comment under recons_labels

---
 pandas/core/groupby/ops.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index ee78af3e29e9c..5b61fc8624b32 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -752,6 +752,7 @@ def group_info(self):
 
     @cache_readonly
     def recons_labels(self):
+        # get unique result indices, and prepend 0 as groupby starts from the first
         return [np.r_[0, np.flatnonzero(self.bins[1:] != self.bins[:-1]) + 1]]
 
     @cache_readonly

From 368311c9adbaf2bef9764fd001a8e3f3340701e1 Mon Sep 17 00:00:00 2001
From: 0xF4D3C0D3 <dongho971220@gmail.com>
Date: Thu, 7 Nov 2019 23:17:53 +0900
Subject: [PATCH 19/19] rename from recons_labels to recons_codes

---
 pandas/core/groupby/ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 29ba64b4c90bf..9599ce0bf39a9 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -768,7 +768,7 @@ def group_info(self):
         )
 
     @cache_readonly
-    def recons_labels(self):
+    def recons_codes(self):
         # get unique result indices, and prepend 0 as groupby starts from the first
         return [np.r_[0, np.flatnonzero(self.bins[1:] != self.bins[:-1]) + 1]]