pandas-dev · jreback · Jan 1, 2021 · Dec 31, 2020 · Dec 31, 2020 · Dec 31, 2020
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -285,6 +285,7 @@ Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
 - Bug in :meth:`SeriesGroupBy.value_counts` where unobserved categories in a grouped categorical series were not tallied (:issue:`38672`)
+- Bug in :meth:`.GroupBy.indices` would contain non-existent indices when null values were present in the groupby keys (:issue:`9304`)
 -
 
 Reshaping

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -891,9 +891,17 @@ def indices_fast(ndarray index, const int64_t[:] labels, list keys,
     if n == 0:
         return result
 
-    start = 0
-    cur = labels[0]
-    for i in range(1, n):
+    # Start at the first non-null entry
+    j = 0
+    for j in range(0, n):
+        if labels[j] != -1:
+            break
+    else:
+        return result
+    cur = labels[j]
+    start = j
+
+    for i in range(j+1, n):
         lab = labels[i]
 
         if lab != cur:

diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
@@ -542,8 +542,7 @@ def get_indexer_dict(
 
     group_index = get_group_index(label_list, shape, sort=True, xnull=True)
     if np.all(group_index == -1):
-        # When all keys are nan and dropna=True, indices_fast can't handle this
-        # and the return is empty anyway
+        # Short-circuit, lib.indices_fast will return the same
         return {}
     ngroups = (
         ((group_index.size and group_index.max()) + 1)

diff --git a/pandas/tests/groupby/test_missing.py b/pandas/tests/groupby/test_missing.py
@@ -126,3 +126,12 @@ def test_min_count(func, min_count, value):
     result = getattr(df.groupby("a"), func)(min_count=min_count)
     expected = DataFrame({"b": [value], "c": [np.nan]}, index=Index([1], name="a"))
     tm.assert_frame_equal(result, expected)
+
+
+def test_indicies_with_missing():
+    # GH 9304
+    df = DataFrame({"a": [1, 1, np.nan], "b": [2, 3, 4], "c": [5, 6, 7]})
+    g = df.groupby(["a", "b"])
+    result = g.indices
+    expected = {(1.0, 2): np.array([0]), (1.0, 3): np.array([1])}
+    assert result == expected