pandas-dev · chris-b1 · Sep 18, 2016 · Sep 15, 2016
diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
@@ -1572,3 +1572,4 @@ Bug Fixes
 - Bug in ``eval()`` where the ``resolvers`` argument would not accept a list (:issue:`14095`)
 - Bugs in ``stack``, ``get_dummies``, ``make_axis_dummies`` which don't preserve categorical dtypes in (multi)indexes (:issue:`13854`)
 - ``PeridIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`)
+- Bug in ``df.groupby`` where ``.median()`` returns arbitrary values if grouped dataframe contains empty bins (:issue:`13629`)
diff --git a/pandas/algos.pyx b/pandas/algos.pyx
@@ -992,7 +992,7 @@ def is_lexsorted(list list_of_arrays):
 def groupby_indices(dict ids, ndarray[int64_t] labels,
                     ndarray[int64_t] counts):
     """
-    turn group_labels output into a combined indexer maping the labels to
+    turn group_labels output into a combined indexer mapping the labels to
     indexers
 
     Parameters
@@ -1313,6 +1313,9 @@ cdef inline float64_t _median_linear(float64_t* a, int n):
     cdef float64_t result
     cdef float64_t* tmp
 
+    if n == 0:
+        return NaN
+
     # count NAs
     for i in range(n):
         if a[i] != a[i]:

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -4424,12 +4424,13 @@ def _reorder_by_uniques(uniques, labels):
 def _groupby_indices(values):
 
     if is_categorical_dtype(values):
-
         # we have a categorical, so we can do quite a bit
         # bit better than factorizing again
         reverse = dict(enumerate(values.categories))
         codes = values.codes.astype('int64')
-        _, counts = _hash.value_count_int64(codes, False)
+
+        mask = 0 <= codes
+        counts = np.bincount(codes[mask], minlength=values.categories.size)
     else:
         reverse, codes, counts = _algos.group_labels(
             _values_from_object(_ensure_object(values)))

diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
@@ -799,6 +799,17 @@ def test_get_group(self):
         self.assertRaises(ValueError,
                           lambda: g.get_group(('foo', 'bar', 'baz')))
 
+    def test_get_group_empty_bins(self):
+        d = pd.DataFrame([3, 1, 7, 6])
+        bins = [0, 5, 10, 15]
+        g = d.groupby(pd.cut(d[0], bins))
+
+        result = g.get_group('(0, 5]')
+        expected = DataFrame([3, 1], index=[0, 1])
+        assert_frame_equal(result, expected)
+
+        self.assertRaises(KeyError, lambda: g.get_group('(10, 15]'))
+
     def test_get_group_grouped_by_tuple(self):
         # GH 8121
         df = DataFrame([[(1, ), (1, 2), (1, ), (1, 2)]], index=['ids']).T
@@ -4415,6 +4426,16 @@ def test_cython_median(self):
         xp = df.groupby(labels).median()
         assert_frame_equal(rs, xp)
 
+    def test_median_empty_bins(self):
+        df = pd.DataFrame(np.random.randint(0, 44, 500))
+
+        grps = range(0, 55, 5)
+        bins = pd.cut(df[0], grps)
+
+        result = df.groupby(bins).median()
+        expected = df.groupby(bins).agg(lambda x: x.median())
+        assert_frame_equal(result, expected)
+
     def test_groupby_categorical_no_compress(self):
         data = Series(np.random.randn(9))
 
@@ -6123,6 +6144,27 @@ def test__cython_agg_general(self):
                 exc.args += ('operation: %s' % op, )
                 raise
 
+    def test_cython_agg_empty_buckets(self):
+        ops = [('mean', np.mean),
+               ('median', np.median),
+               ('var', lambda x: np.var(x, ddof=1)),
+               ('add', lambda x: np.sum(x) if len(x) > 0 else np.nan),
+               ('prod', np.prod),
+               ('min', np.min),
+               ('max', np.max), ]
+
+        df = pd.DataFrame([11, 12, 13])
+        grps = range(0, 55, 5)
+
+        for op, targop in ops:
+            result = df.groupby(pd.cut(df[0], grps))._cython_agg_general(op)
+            expected = df.groupby(pd.cut(df[0], grps)).agg(lambda x: targop(x))
+            try:
+                tm.assert_frame_equal(result, expected)
+            except BaseException as exc:
+                exc.args += ('operation: %s' % op,)
+                raise
+
     def test_cython_group_transform_algos(self):
         # GH 4095
         dtypes = [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint32,