From d66032869e2ed7330d9859157d2f464e3e02d4b1 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 30 Jul 2019 11:38:19 -0700
Subject: [PATCH 01/12] use extract_array

---
 pandas/core/ops/__init__.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index 3a5dfe6700bd2..b24a4b2514092 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -39,18 +39,20 @@
 from pandas.core.dtypes.generic import (
     ABCDataFrame,
     ABCDatetimeArray,
+    ABCDatetimeIndex,
     ABCIndex,
     ABCIndexClass,
     ABCSeries,
     ABCSparseArray,
     ABCSparseSeries,
     ABCTimedeltaArray,
+    ABCTimedeltaIndex,
 )
 from pandas.core.dtypes.missing import isna, notna
 
 import pandas as pd
 from pandas._typing import ArrayLike
-import pandas.core.common as com
+from pandas.core.construction import extract_array
 
 from . import missing
 from .docstrings import (
@@ -1017,7 +1019,7 @@ def wrapper(left, right):
                 right = np.broadcast_to(right, left.shape)
                 right = pd.TimedeltaIndex(right)
 
-            assert isinstance(right, (pd.TimedeltaIndex, ABCTimedeltaArray, ABCSeries))
+            assert isinstance(right, (ABCTimedeltaIndex, ABCTimedeltaArray, ABCSeries))
             try:
                 result = op(left._values, right)
             except NullFrequencyError:
@@ -1035,7 +1037,7 @@ def wrapper(left, right):
             #  does inference in the case where `result` has object-dtype.
             return construct_result(left, result, index=left.index, name=res_name)
 
-        elif isinstance(right, (ABCDatetimeArray, pd.DatetimeIndex)):
+        elif isinstance(right, (ABCDatetimeArray, ABCDatetimeIndex)):
             result = op(left._values, right)
             return construct_result(left, result, index=left.index, name=res_name)
 
@@ -1231,7 +1233,7 @@ def wrapper(self, other, axis=None):
                 )
 
             # always return a full value series here
-            res_values = com.values_from_object(res)
+            res_values = extract_array(res, extract_numpy=True)
             return self._constructor(
                 res_values, index=self.index, name=res_name, dtype="bool"
             )

From 7a7eb3254edb8e342ae3fa2e18096161f10fe4a5 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 1 Aug 2019 11:30:44 -0700
Subject: [PATCH 02/12] cleanups

---
 pandas/_libs/reduction.pyx               |  2 +-
 pandas/_libs/tslibs/timedeltas.pyx       |  3 ++-
 pandas/core/apply.py                     |  4 ++--
 pandas/core/arrays/categorical.py        |  2 +-
 pandas/core/arrays/datetimelike.py       | 15 ++-------------
 pandas/core/arrays/datetimes.py          |  1 -
 pandas/core/arrays/period.py             |  1 -
 pandas/core/arrays/timedeltas.py         |  1 -
 pandas/core/groupby/groupby.py           |  6 +++---
 pandas/core/internals/concat.py          | 10 ++++------
 pandas/tests/groupby/test_bin_groupby.py | 12 +++++++-----
 11 files changed, 22 insertions(+), 35 deletions(-)

diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
index 739ac0ed397ca..5ad1d71ff8ebb 100644
--- a/pandas/_libs/reduction.pyx
+++ b/pandas/_libs/reduction.pyx
@@ -628,7 +628,7 @@ cdef class BlockSlider:
             arr.shape[1] = 0
 
 
-def reduce(arr, f, axis=0, dummy=None, labels=None):
+def do_reduce(arr, f, axis=0, dummy=None, labels=None):
     """
 
     Parameters
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
index 6a32553fe2d38..d24aafae0967d 100644
--- a/pandas/_libs/tslibs/timedeltas.pyx
+++ b/pandas/_libs/tslibs/timedeltas.pyx
@@ -1280,7 +1280,8 @@ class Timedelta(_Timedelta):
         else:
             raise ValueError(
                 "Value must be Timedelta, string, integer, "
-                "float, timedelta or convertible")
+                "float, timedelta or convertible, not {typ}"
+                .format(typ=type(value).__name__))
 
         if is_timedelta64_object(value):
             value = value.view('i8')
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 2246bbfde636d..b7995f20ce2b7 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -221,7 +221,7 @@ def apply_raw(self):
         """ apply to the values as a numpy array """
 
         try:
-            result = reduction.reduce(self.values, self.f, axis=self.axis)
+            result = reduction.do_reduce(self.values, self.f, axis=self.axis)
         except Exception:
             result = np.apply_along_axis(self.f, self.axis, self.values)
 
@@ -281,7 +281,7 @@ def apply_standard(self):
             dummy = Series(empty_arr, index=index, dtype=values.dtype)
 
             try:
-                result = reduction.reduce(
+                result = reduction.do_reduce(
                     values, self.f, axis=self.axis, dummy=dummy, labels=labels
                 )
                 return self.obj._constructor_sliced(result, index=labels)
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index b16217d5d0a32..d22b4bd4d3f2b 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2703,7 +2703,7 @@ def _convert_to_list_like(list_like):
     elif is_scalar(list_like):
         return [list_like]
     else:
-        # is this reached?
+        # TODO: is this reached?
         return [list_like]
 
 
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index f86b307e5ede3..599300bc7973a 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -57,21 +57,10 @@
 class AttributesMixin:
     _data = None  # type: np.ndarray
 
-    @property
-    def _attributes(self):
-        # Inheriting subclass should implement _attributes as a list of strings
-        raise AbstractMethodError(self)
-
     @classmethod
     def _simple_new(cls, values, **kwargs):
         raise AbstractMethodError(cls)
 
-    def _get_attributes_dict(self):
-        """
-        return an attributes dict for my class
-        """
-        return {k: getattr(self, k, None) for k in self._attributes}
-
     @property
     def _scalar_type(self) -> Type[DatetimeLikeScalar]:
         """The scalar associated with this datelike
@@ -224,8 +213,8 @@ class TimelikeOps:
 
             .. versionadded:: 0.24.0
 
-        nonexistent : 'shift_forward', 'shift_backward', 'NaT', timedelta, \
-default 'raise'
+        nonexistent : 'shift_forward', 'shift_backward', 'NaT', timedelta,
+            default 'raise'
             A nonexistent time does not exist in a particular timezone
             where clocks moved forward due to DST.
 
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index 2e086c8ce8c34..e6f6fada05877 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -328,7 +328,6 @@ class DatetimeArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps, dtl.DatelikeOps
     # -----------------------------------------------------------------
     # Constructors
 
-    _attributes = ["freq", "tz"]
     _dtype = None  # type: Union[np.dtype, DatetimeTZDtype]
     _freq = None
 
diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
index c290391278def..2e72f0dbf9b07 100644
--- a/pandas/core/arrays/period.py
+++ b/pandas/core/arrays/period.py
@@ -161,7 +161,6 @@ class PeriodArray(dtl.DatetimeLikeArrayMixin, dtl.DatelikeOps):
 
     # array priority higher than numpy scalars
     __array_priority__ = 1000
-    _attributes = ["freq"]
     _typ = "periodarray"  # ABCPeriodArray
     _scalar_type = Period
 
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
index dd0b9a79c6dca..afd1e8203059e 100644
--- a/pandas/core/arrays/timedeltas.py
+++ b/pandas/core/arrays/timedeltas.py
@@ -199,7 +199,6 @@ def dtype(self):
 
     # ----------------------------------------------------------------
     # Constructors
-    _attributes = ["freq"]
 
     def __init__(self, values, dtype=_TD_DTYPE, freq=None, copy=False):
         if isinstance(values, (ABCSeries, ABCIndexClass)):
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 15b94e59c065c..12b9cf25687cf 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1206,7 +1206,7 @@ def mean(self, *args, **kwargs):
             )
         except GroupByError:
             raise
-        except Exception:  # pragma: no cover
+        except Exception:
             with _group_selection_context(self):
                 f = lambda x: x.mean(axis=self.axis, **kwargs)
                 return self._python_agg_general(f)
@@ -1232,7 +1232,7 @@ def median(self, **kwargs):
             )
         except GroupByError:
             raise
-        except Exception:  # pragma: no cover
+        except Exception:
 
             def f(x):
                 if isinstance(x, np.ndarray):
@@ -2470,7 +2470,7 @@ def groupby(obj, by, **kwds):
         from pandas.core.groupby.generic import DataFrameGroupBy
 
         klass = DataFrameGroupBy
-    else:  # pragma: no cover
+    else:
         raise TypeError("invalid type: {}".format(obj))
 
     return klass(obj, by, **kwds)
diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index 121c61d8d3623..7eaec0687b790 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -183,7 +183,7 @@ def get_reindexed_values(self, empty_dtype, upcasted_na):
             fill_value = upcasted_na
 
             if self.is_na:
-                if getattr(self.block, "is_object", False):
+                if self.block.is_object:
                     # we want to avoid filling with np.nan if we are
                     # using None; we already know that we are all
                     # nulls
@@ -191,18 +191,16 @@ def get_reindexed_values(self, empty_dtype, upcasted_na):
                     if len(values) and values[0] is None:
                         fill_value = None
 
-                if getattr(self.block, "is_datetimetz", False) or is_datetime64tz_dtype(
-                    empty_dtype
-                ):
+                if self.block.is_datetimetz or is_datetime64tz_dtype(empty_dtype):
                     if self.block is None:
                         array = empty_dtype.construct_array_type()
                         return array(
                             np.full(self.shape[1], fill_value.value), dtype=empty_dtype
                         )
                     pass
-                elif getattr(self.block, "is_categorical", False):
+                elif self.block.is_categorical:
                     pass
-                elif getattr(self.block, "is_extension", False):
+                elif self.block.is_extension:
                     pass
                 else:
                     missing_arr = np.empty(self.shape, dtype=empty_dtype)
diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py
index b240876de92b1..ce249e9de3497 100644
--- a/pandas/tests/groupby/test_bin_groupby.py
+++ b/pandas/tests/groupby/test_bin_groupby.py
@@ -126,27 +126,29 @@ def test_int_index(self):
         from pandas.core.series import Series
 
         arr = np.random.randn(100, 4)
-        result = reduction.reduce(arr, np.sum, labels=Index(np.arange(4)))
+        result = reduction.do_reduce(arr, np.sum, labels=Index(np.arange(4)))
         expected = arr.sum(0)
         assert_almost_equal(result, expected)
 
-        result = reduction.reduce(arr, np.sum, axis=1, labels=Index(np.arange(100)))
+        result = reduction.do_reduce(arr, np.sum, axis=1, labels=Index(np.arange(100)))
         expected = arr.sum(1)
         assert_almost_equal(result, expected)
 
         dummy = Series(0.0, index=np.arange(100))
-        result = reduction.reduce(arr, np.sum, dummy=dummy, labels=Index(np.arange(4)))
+        result = reduction.do_reduce(
+            arr, np.sum, dummy=dummy, labels=Index(np.arange(4))
+        )
         expected = arr.sum(0)
         assert_almost_equal(result, expected)
 
         dummy = Series(0.0, index=np.arange(4))
-        result = reduction.reduce(
+        result = reduction.do_reduce(
             arr, np.sum, axis=1, dummy=dummy, labels=Index(np.arange(100))
         )
         expected = arr.sum(1)
         assert_almost_equal(result, expected)
 
-        result = reduction.reduce(
+        result = reduction.do_reduce(
             arr, np.sum, axis=1, dummy=dummy, labels=Index(np.arange(100))
         )
         assert_almost_equal(result, expected)

From b750f06ce3fec1a27b2f3e8ac06708a9082a9880 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 1 Aug 2019 11:53:58 -0700
Subject: [PATCH 03/12] remove apparently-unreachable value_counts

---
 pandas/core/groupby/generic.py | 125 ---------------------------------
 1 file changed, 125 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 811836d0e8a4d..ac390b9388c31 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -23,13 +23,10 @@
 
 from pandas.core.dtypes.cast import maybe_convert_objects, maybe_downcast_to_dtype
 from pandas.core.dtypes.common import (
-    ensure_int64,
     ensure_platform_int,
     is_bool,
     is_datetimelike,
     is_dict_like,
-    is_integer_dtype,
-    is_interval_dtype,
     is_list_like,
     is_numeric_dtype,
     is_object_dtype,
@@ -1195,128 +1192,6 @@ def describe(self, **kwargs):
             return result.T
         return result.unstack()
 
-    def value_counts(
-        self, normalize=False, sort=True, ascending=False, bins=None, dropna=True
-    ):
-
-        from pandas.core.reshape.tile import cut
-        from pandas.core.reshape.merge import _get_join_indexers
-
-        if bins is not None and not np.iterable(bins):
-            # scalar bins cannot be done at top level
-            # in a backward compatible way
-            return self.apply(
-                Series.value_counts,
-                normalize=normalize,
-                sort=sort,
-                ascending=ascending,
-                bins=bins,
-            )
-
-        ids, _, _ = self.grouper.group_info
-        val = self.obj._internal_get_values()
-
-        # groupby removes null keys from groupings
-        mask = ids != -1
-        ids, val = ids[mask], val[mask]
-
-        if bins is None:
-            lab, lev = algorithms.factorize(val, sort=True)
-            llab = lambda lab, inc: lab[inc]
-        else:
-
-            # lab is a Categorical with categories an IntervalIndex
-            lab = cut(Series(val), bins, include_lowest=True)
-            lev = lab.cat.categories
-            lab = lev.take(lab.cat.codes)
-            llab = lambda lab, inc: lab[inc]._multiindex.codes[-1]
-
-        if is_interval_dtype(lab):
-            # TODO: should we do this inside II?
-            sorter = np.lexsort((lab.left, lab.right, ids))
-        else:
-            sorter = np.lexsort((lab, ids))
-
-        ids, lab = ids[sorter], lab[sorter]
-
-        # group boundaries are where group ids change
-        idx = np.r_[0, 1 + np.nonzero(ids[1:] != ids[:-1])[0]]
-
-        # new values are where sorted labels change
-        lchanges = llab(lab, slice(1, None)) != llab(lab, slice(None, -1))
-        inc = np.r_[True, lchanges]
-        inc[idx] = True  # group boundaries are also new values
-        out = np.diff(np.nonzero(np.r_[inc, True])[0])  # value counts
-
-        # num. of times each group should be repeated
-        rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx))
-
-        # multi-index components
-        labels = list(map(rep, self.grouper.recons_labels)) + [llab(lab, inc)]
-        levels = [ping.group_index for ping in self.grouper.groupings] + [lev]
-        names = self.grouper.names + [self._selection_name]
-
-        if dropna:
-            mask = labels[-1] != -1
-            if mask.all():
-                dropna = False
-            else:
-                out, labels = out[mask], [label[mask] for label in labels]
-
-        if normalize:
-            out = out.astype("float")
-            d = np.diff(np.r_[idx, len(ids)])
-            if dropna:
-                m = ids[lab == -1]
-                np.add.at(d, m, -1)
-                acc = rep(d)[mask]
-            else:
-                acc = rep(d)
-            out /= acc
-
-        if sort and bins is None:
-            cat = ids[inc][mask] if dropna else ids[inc]
-            sorter = np.lexsort((out if ascending else -out, cat))
-            out, labels[-1] = out[sorter], labels[-1][sorter]
-
-        if bins is None:
-            mi = MultiIndex(
-                levels=levels, codes=labels, names=names, verify_integrity=False
-            )
-
-            if is_integer_dtype(out):
-                out = ensure_int64(out)
-            return Series(out, index=mi, name=self._selection_name)
-
-        # for compat. with libgroupby.value_counts need to ensure every
-        # bin is present at every index level, null filled with zeros
-        diff = np.zeros(len(out), dtype="bool")
-        for lab in labels[:-1]:
-            diff |= np.r_[True, lab[1:] != lab[:-1]]
-
-        ncat, nbin = diff.sum(), len(levels[-1])
-
-        left = [np.repeat(np.arange(ncat), nbin), np.tile(np.arange(nbin), ncat)]
-
-        right = [diff.cumsum() - 1, labels[-1]]
-
-        _, idx = _get_join_indexers(left, right, sort=False, how="left")
-        out = np.where(idx != -1, out[idx], 0)
-
-        if sort:
-            sorter = np.lexsort((out if ascending else -out, left[0]))
-            out, left[-1] = out[sorter], left[-1][sorter]
-
-        # build the multi-index w/ full levels
-        codes = list(map(lambda lab: np.repeat(lab[diff], nbin), labels[:-1]))
-        codes.append(left[-1])
-
-        mi = MultiIndex(levels=levels, codes=codes, names=names, verify_integrity=False)
-
-        if is_integer_dtype(out):
-            out = ensure_int64(out)
-        return Series(out, index=mi, name=self._selection_name)
-
     def count(self):
         """
         Compute count of group, excluding missing values.

From 64e9897b09566062c4f9c3d667261ec2a2f0c68f Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 1 Aug 2019 13:05:00 -0700
Subject: [PATCH 04/12] typing

---
 pandas/core/groupby/groupby.py  |  6 +++---
 pandas/core/groupby/grouper.py  |  3 ++-
 pandas/core/groupby/ops.py      |  4 ++--
 pandas/core/internals/concat.py | 10 ++++++----
 4 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 12b9cf25687cf..b40cf13379c2d 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -343,7 +343,7 @@ class _GroupBy(PandasObject, SelectionMixin):
 
     def __init__(
         self,
-        obj,
+        obj: NDFrame,
         keys=None,
         axis=0,
         level=None,
@@ -360,8 +360,8 @@ def __init__(
 
         self._selection = selection
 
-        if isinstance(obj, NDFrame):
-            obj._consolidate_inplace()
+        assert isinstance(obj, NDFrame), type(obj)
+        obj._consolidate_inplace()
 
         self.level = level
 
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 1d88ebd26b1b6..e7ed348ccb140 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -25,6 +25,7 @@
 from pandas.core.arrays import Categorical, ExtensionArray
 import pandas.core.common as com
 from pandas.core.frame import DataFrame
+from pandas.core.generic import NDFrame
 from pandas.core.groupby.categorical import recode_for_groupby, recode_from_groupby
 from pandas.core.groupby.ops import BaseGrouper
 from pandas.core.index import CategoricalIndex, Index, MultiIndex
@@ -423,7 +424,7 @@ def groups(self):
 
 
 def _get_grouper(
-    obj,
+    obj: NDFrame,
     key=None,
     axis=0,
     level=None,
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 1484feeeada64..f20c3f702e29d 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -906,7 +906,7 @@ def _get_sorted_data(self):
         return self.data.take(self.sort_idx, axis=self.axis)
 
     def _chop(self, sdata, slice_obj):
-        return sdata.iloc[slice_obj]
+        raise AbstractMethodError(self)
 
     def apply(self, f):
         raise AbstractMethodError(self)
@@ -933,7 +933,7 @@ def _chop(self, sdata, slice_obj):
         if self.axis == 0:
             return sdata.iloc[slice_obj]
         else:
-            return sdata._slice(slice_obj, axis=1)  # .loc[:, slice_obj]
+            return sdata._slice(slice_obj, axis=1)
 
 
 def get_splitter(data, *args, **kwargs):
diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index 7eaec0687b790..121c61d8d3623 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -183,7 +183,7 @@ def get_reindexed_values(self, empty_dtype, upcasted_na):
             fill_value = upcasted_na
 
             if self.is_na:
-                if self.block.is_object:
+                if getattr(self.block, "is_object", False):
                     # we want to avoid filling with np.nan if we are
                     # using None; we already know that we are all
                     # nulls
@@ -191,16 +191,18 @@ def get_reindexed_values(self, empty_dtype, upcasted_na):
                     if len(values) and values[0] is None:
                         fill_value = None
 
-                if self.block.is_datetimetz or is_datetime64tz_dtype(empty_dtype):
+                if getattr(self.block, "is_datetimetz", False) or is_datetime64tz_dtype(
+                    empty_dtype
+                ):
                     if self.block is None:
                         array = empty_dtype.construct_array_type()
                         return array(
                             np.full(self.shape[1], fill_value.value), dtype=empty_dtype
                         )
                     pass
-                elif self.block.is_categorical:
+                elif getattr(self.block, "is_categorical", False):
                     pass
-                elif self.block.is_extension:
+                elif getattr(self.block, "is_extension", False):
                     pass
                 else:
                     missing_arr = np.empty(self.shape, dtype=empty_dtype)

From 6fd209667490eeff1cfaa2e9058d87e15f1503d7 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 1 Aug 2019 18:01:49 -0700
Subject: [PATCH 05/12] add types

---
 pandas/core/generic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 821c35e0cce2f..b3d3802015528 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3563,7 +3563,7 @@ def _iget_item_cache(self, item):
     def _box_item_values(self, key, values):
         raise AbstractMethodError(self)
 
-    def _slice(self, slobj, axis=0, kind=None):
+    def _slice(self, slob: slice, axis: int = 0, kind=None):
         """
         Construct a slice of this container.
 

From 84fbabc7699cf967c80e354290a4f96257581fc1 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 1 Aug 2019 18:03:13 -0700
Subject: [PATCH 06/12] revert a type

---
 pandas/core/generic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index b3d3802015528..e097cb84e7777 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3563,7 +3563,7 @@ def _iget_item_cache(self, item):
     def _box_item_values(self, key, values):
         raise AbstractMethodError(self)
 
-    def _slice(self, slob: slice, axis: int = 0, kind=None):
+    def _slice(self, slob: slice, axis=0, kind=None):
         """
         Construct a slice of this container.
 

From e01f2869c9e4b610578cd2a83a04d0dbadeb35a9 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 1 Aug 2019 18:05:39 -0700
Subject: [PATCH 07/12] improve type

---
 pandas/core/groupby/grouper.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index e7ed348ccb140..93d9fa58cdb52 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -21,11 +21,11 @@
 )
 from pandas.core.dtypes.generic import ABCSeries
 
+from pandas._typing import FrameOrSeries
 import pandas.core.algorithms as algorithms
 from pandas.core.arrays import Categorical, ExtensionArray
 import pandas.core.common as com
 from pandas.core.frame import DataFrame
-from pandas.core.generic import NDFrame
 from pandas.core.groupby.categorical import recode_for_groupby, recode_from_groupby
 from pandas.core.groupby.ops import BaseGrouper
 from pandas.core.index import CategoricalIndex, Index, MultiIndex
@@ -424,7 +424,7 @@ def groups(self):
 
 
 def _get_grouper(
-    obj: NDFrame,
+    obj: FrameOrSeries,
     key=None,
     axis=0,
     level=None,

From b809ac6e487b01092290f87e206cad8e2f01be6c Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 1 Aug 2019 18:10:25 -0700
Subject: [PATCH 08/12] restore value_counts

---
 pandas/core/groupby/generic.py | 125 +++++++++++++++++++++++++++++++++
 1 file changed, 125 insertions(+)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index ac390b9388c31..811836d0e8a4d 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -23,10 +23,13 @@
 
 from pandas.core.dtypes.cast import maybe_convert_objects, maybe_downcast_to_dtype
 from pandas.core.dtypes.common import (
+    ensure_int64,
     ensure_platform_int,
     is_bool,
     is_datetimelike,
     is_dict_like,
+    is_integer_dtype,
+    is_interval_dtype,
     is_list_like,
     is_numeric_dtype,
     is_object_dtype,
@@ -1192,6 +1195,128 @@ def describe(self, **kwargs):
             return result.T
         return result.unstack()
 
+    def value_counts(
+        self, normalize=False, sort=True, ascending=False, bins=None, dropna=True
+    ):
+
+        from pandas.core.reshape.tile import cut
+        from pandas.core.reshape.merge import _get_join_indexers
+
+        if bins is not None and not np.iterable(bins):
+            # scalar bins cannot be done at top level
+            # in a backward compatible way
+            return self.apply(
+                Series.value_counts,
+                normalize=normalize,
+                sort=sort,
+                ascending=ascending,
+                bins=bins,
+            )
+
+        ids, _, _ = self.grouper.group_info
+        val = self.obj._internal_get_values()
+
+        # groupby removes null keys from groupings
+        mask = ids != -1
+        ids, val = ids[mask], val[mask]
+
+        if bins is None:
+            lab, lev = algorithms.factorize(val, sort=True)
+            llab = lambda lab, inc: lab[inc]
+        else:
+
+            # lab is a Categorical with categories an IntervalIndex
+            lab = cut(Series(val), bins, include_lowest=True)
+            lev = lab.cat.categories
+            lab = lev.take(lab.cat.codes)
+            llab = lambda lab, inc: lab[inc]._multiindex.codes[-1]
+
+        if is_interval_dtype(lab):
+            # TODO: should we do this inside II?
+            sorter = np.lexsort((lab.left, lab.right, ids))
+        else:
+            sorter = np.lexsort((lab, ids))
+
+        ids, lab = ids[sorter], lab[sorter]
+
+        # group boundaries are where group ids change
+        idx = np.r_[0, 1 + np.nonzero(ids[1:] != ids[:-1])[0]]
+
+        # new values are where sorted labels change
+        lchanges = llab(lab, slice(1, None)) != llab(lab, slice(None, -1))
+        inc = np.r_[True, lchanges]
+        inc[idx] = True  # group boundaries are also new values
+        out = np.diff(np.nonzero(np.r_[inc, True])[0])  # value counts
+
+        # num. of times each group should be repeated
+        rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx))
+
+        # multi-index components
+        labels = list(map(rep, self.grouper.recons_labels)) + [llab(lab, inc)]
+        levels = [ping.group_index for ping in self.grouper.groupings] + [lev]
+        names = self.grouper.names + [self._selection_name]
+
+        if dropna:
+            mask = labels[-1] != -1
+            if mask.all():
+                dropna = False
+            else:
+                out, labels = out[mask], [label[mask] for label in labels]
+
+        if normalize:
+            out = out.astype("float")
+            d = np.diff(np.r_[idx, len(ids)])
+            if dropna:
+                m = ids[lab == -1]
+                np.add.at(d, m, -1)
+                acc = rep(d)[mask]
+            else:
+                acc = rep(d)
+            out /= acc
+
+        if sort and bins is None:
+            cat = ids[inc][mask] if dropna else ids[inc]
+            sorter = np.lexsort((out if ascending else -out, cat))
+            out, labels[-1] = out[sorter], labels[-1][sorter]
+
+        if bins is None:
+            mi = MultiIndex(
+                levels=levels, codes=labels, names=names, verify_integrity=False
+            )
+
+            if is_integer_dtype(out):
+                out = ensure_int64(out)
+            return Series(out, index=mi, name=self._selection_name)
+
+        # for compat. with libgroupby.value_counts need to ensure every
+        # bin is present at every index level, null filled with zeros
+        diff = np.zeros(len(out), dtype="bool")
+        for lab in labels[:-1]:
+            diff |= np.r_[True, lab[1:] != lab[:-1]]
+
+        ncat, nbin = diff.sum(), len(levels[-1])
+
+        left = [np.repeat(np.arange(ncat), nbin), np.tile(np.arange(nbin), ncat)]
+
+        right = [diff.cumsum() - 1, labels[-1]]
+
+        _, idx = _get_join_indexers(left, right, sort=False, how="left")
+        out = np.where(idx != -1, out[idx], 0)
+
+        if sort:
+            sorter = np.lexsort((out if ascending else -out, left[0]))
+            out, left[-1] = out[sorter], left[-1][sorter]
+
+        # build the multi-index w/ full levels
+        codes = list(map(lambda lab: np.repeat(lab[diff], nbin), labels[:-1]))
+        codes.append(left[-1])
+
+        mi = MultiIndex(levels=levels, codes=codes, names=names, verify_integrity=False)
+
+        if is_integer_dtype(out):
+            out = ensure_int64(out)
+        return Series(out, index=mi, name=self._selection_name)
+
     def count(self):
         """
         Compute count of group, excluding missing values.

From a6dc9e0b2614fba22bce6d3627d463843fb8fd9c Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 1 Aug 2019 19:53:40 -0700
Subject: [PATCH 09/12] typo fixup

---
 pandas/core/generic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index e097cb84e7777..ecda9d616960a 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3563,7 +3563,7 @@ def _iget_item_cache(self, item):
     def _box_item_values(self, key, values):
         raise AbstractMethodError(self)
 
-    def _slice(self, slob: slice, axis=0, kind=None):
+    def _slice(self, slobj: slice, axis=0, kind=None):
         """
         Construct a slice of this container.
 

From 5b7a89f8a82e68271477c84ef11cdc9f5be78632 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 1 Aug 2019 20:32:21 -0700
Subject: [PATCH 10/12] mypy fixup

---
 pandas/core/generic.py         | 4 +---
 pandas/core/groupby/groupby.py | 4 +++-
 pandas/core/groupby/grouper.py | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index ecda9d616960a..f6b89a6e90cd8 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -6190,8 +6190,6 @@ def fillna(
             axis = 0
         axis = self._get_axis_number(axis)
 
-        from pandas import DataFrame
-
         if value is None:
 
             if self._is_mixed_type and axis == 1:
@@ -6254,7 +6252,7 @@ def fillna(
                 new_data = self._data.fillna(
                     value=value, limit=limit, inplace=inplace, downcast=downcast
                 )
-            elif isinstance(value, DataFrame) and self.ndim == 2:
+            elif isinstance(value, ABCDataFrame) and self.ndim == 2:
                 new_data = self.where(self.notna(), value)
             else:
                 raise ValueError("invalid fill value with a %s" % type(value))
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index b40cf13379c2d..c9352cfad409c 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -29,14 +29,16 @@ class providing the base-class of operations.
 from pandas.core.dtypes.cast import maybe_downcast_to_dtype
 from pandas.core.dtypes.common import (
     ensure_float,
+    is_datetime64_dtype,
     is_datetime64tz_dtype,
     is_extension_array_dtype,
+    is_integer_dtype,
     is_numeric_dtype,
+    is_object_dtype,
     is_scalar,
 )
 from pandas.core.dtypes.missing import isna, notna
 
-from pandas.api.types import is_datetime64_dtype, is_integer_dtype, is_object_dtype
 import pandas.core.algorithms as algorithms
 from pandas.core.arrays import Categorical
 from pandas.core.base import (
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 93d9fa58cdb52..e7ed348ccb140 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -21,11 +21,11 @@
 )
 from pandas.core.dtypes.generic import ABCSeries
 
-from pandas._typing import FrameOrSeries
 import pandas.core.algorithms as algorithms
 from pandas.core.arrays import Categorical, ExtensionArray
 import pandas.core.common as com
 from pandas.core.frame import DataFrame
+from pandas.core.generic import NDFrame
 from pandas.core.groupby.categorical import recode_for_groupby, recode_from_groupby
 from pandas.core.groupby.ops import BaseGrouper
 from pandas.core.index import CategoricalIndex, Index, MultiIndex
@@ -424,7 +424,7 @@ def groups(self):
 
 
 def _get_grouper(
-    obj: FrameOrSeries,
+    obj: NDFrame,
     key=None,
     axis=0,
     level=None,

From 482f2d037c83c618265d214c09e7bfdcfa655557 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 2 Aug 2019 07:32:36 -0700
Subject: [PATCH 11/12] rename do_reduce-->compute_reduction

---
 pandas/_libs/reduction.pyx               |  2 +-
 pandas/core/apply.py                     |  4 ++--
 pandas/core/arrays/datetimelike.py       |  4 ++--
 pandas/tests/groupby/test_bin_groupby.py | 20 ++++++++------------
 4 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
index 5ad1d71ff8ebb..f95685c337969 100644
--- a/pandas/_libs/reduction.pyx
+++ b/pandas/_libs/reduction.pyx
@@ -628,7 +628,7 @@ cdef class BlockSlider:
             arr.shape[1] = 0
 
 
-def do_reduce(arr, f, axis=0, dummy=None, labels=None):
+def compute_reduction(arr, f, axis=0, dummy=None, labels=None):
     """
 
     Parameters
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index b7995f20ce2b7..5c8599dbb054b 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -221,7 +221,7 @@ def apply_raw(self):
         """ apply to the values as a numpy array """
 
         try:
-            result = reduction.do_reduce(self.values, self.f, axis=self.axis)
+            result = reduction.compute_reduction(self.values, self.f, axis=self.axis)
         except Exception:
             result = np.apply_along_axis(self.f, self.axis, self.values)
 
@@ -281,7 +281,7 @@ def apply_standard(self):
             dummy = Series(empty_arr, index=index, dtype=values.dtype)
 
             try:
-                result = reduction.do_reduce(
+                result = reduction.compute_reduction(
                     values, self.f, axis=self.axis, dummy=dummy, labels=labels
                 )
                 return self.obj._constructor_sliced(result, index=labels)
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 599300bc7973a..ecad58e49bde4 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -213,8 +213,8 @@ class TimelikeOps:
 
             .. versionadded:: 0.24.0
 
-        nonexistent : 'shift_forward', 'shift_backward', 'NaT', timedelta,
-            default 'raise'
+        nonexistent : 'shift_forward', 'shift_backward', 'NaT', timedelta, \
+default 'raise'
             A nonexistent time does not exist in a particular timezone
             where clocks moved forward due to DST.
 
diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py
index ce249e9de3497..2195686ee9c7f 100644
--- a/pandas/tests/groupby/test_bin_groupby.py
+++ b/pandas/tests/groupby/test_bin_groupby.py
@@ -6,15 +6,13 @@
 
 from pandas.core.dtypes.common import ensure_int64
 
-from pandas import Index, isna
+from pandas import Index, Series, isna
 from pandas.core.groupby.ops import generate_bins_generic
 import pandas.util.testing as tm
 from pandas.util.testing import assert_almost_equal
 
 
 def test_series_grouper():
-    from pandas import Series
-
     obj = Series(np.random.randn(10))
     dummy = obj[:0]
 
@@ -31,8 +29,6 @@ def test_series_grouper():
 
 
 def test_series_bin_grouper():
-    from pandas import Series
-
     obj = Series(np.random.randn(10))
     dummy = obj[:0]
 
@@ -123,32 +119,32 @@ class TestMoments:
 
 class TestReducer:
     def test_int_index(self):
-        from pandas.core.series import Series
-
         arr = np.random.randn(100, 4)
-        result = reduction.do_reduce(arr, np.sum, labels=Index(np.arange(4)))
+        result = reduction.compute_reduction(arr, np.sum, labels=Index(np.arange(4)))
         expected = arr.sum(0)
         assert_almost_equal(result, expected)
 
-        result = reduction.do_reduce(arr, np.sum, axis=1, labels=Index(np.arange(100)))
+        result = reduction.compute_reduction(
+            arr, np.sum, axis=1, labels=Index(np.arange(100))
+        )
         expected = arr.sum(1)
         assert_almost_equal(result, expected)
 
         dummy = Series(0.0, index=np.arange(100))
-        result = reduction.do_reduce(
+        result = reduction.compute_reduction(
             arr, np.sum, dummy=dummy, labels=Index(np.arange(4))
         )
         expected = arr.sum(0)
         assert_almost_equal(result, expected)
 
         dummy = Series(0.0, index=np.arange(4))
-        result = reduction.do_reduce(
+        result = reduction.compute_reduction(
             arr, np.sum, axis=1, dummy=dummy, labels=Index(np.arange(100))
         )
         expected = arr.sum(1)
         assert_almost_equal(result, expected)
 
-        result = reduction.do_reduce(
+        result = reduction.compute_reduction(
             arr, np.sum, axis=1, dummy=dummy, labels=Index(np.arange(100))
         )
         assert_almost_equal(result, expected)

From 61f0241a1a1e546be23aa32c9078b247a7c324d0 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 2 Aug 2019 08:10:05 -0700
Subject: [PATCH 12/12] dummy to force cI