pandas-dev · ivannz · Jul 27, 2016 · Jul 27, 2016 · Jul 28, 2016 · Jul 28, 2016
diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
@@ -735,6 +735,7 @@ Performance Improvements
 Bug Fixes
 ~~~~~~~~~
 
+- Bug in ``groupby().shift()``, which could cause a segfault or corruption in rare circumstances when grouping by columns with missing values (:issue:`13813`)
 - Bug in ``pd.read_csv()``, which may cause a segfault or corruption when iterating in large chunks over a stream/file under rare circumstances (:issue:`13703`)
 - Bug in ``io.json.json_normalize()``, where non-ascii keys raised an exception (:issue:`13213`)
 - Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing may raise ``IndexError`` (:issue:`13144`)

diff --git a/pandas/src/algos_groupby_helper.pxi b/pandas/src/algos_groupby_helper.pxi
@@ -1356,6 +1356,12 @@ def group_shift_indexer(int64_t[:] out, int64_t[:] labels,
                 ## reverse iterator if shifting backwards
                 ii = offset + sign * i
                 lab = labels[ii]
+
+                # Skip null keys
+                if lab == -1:
+                    out[ii] = -1
+                    continue
+
                 label_seen[lab] += 1
 
                 idxer_slot = label_seen[lab] % periods

diff --git a/pandas/src/algos_groupby_helper.pxi.in b/pandas/src/algos_groupby_helper.pxi.in
@@ -700,6 +700,12 @@ def group_shift_indexer(int64_t[:] out, int64_t[:] labels,
                 ## reverse iterator if shifting backwards
                 ii = offset + sign * i
                 lab = labels[ii]
+
+                # Skip null keys
+                if lab == -1:
+                    out[ii] = -1
+                    continue
+
                 label_seen[lab] += 1
 
                 idxer_slot = label_seen[lab] % periods

diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
@@ -6560,6 +6560,31 @@ def test_grouping_string_repr(self):
         expected = "Grouping(('A', 'a'))"
         tm.assert_equal(result, expected)
 
+    def test_group_shift_with_null_key(self):
+        # This test is designed to replicate the segfault in issue #13813.
+        n_rows = 1200
+
+        # Generate a moderately large dataframe with occasional missing
+        # values in column `B`, and then group by [`A`, `B`]. This should
+        # force `-1` in `labels` array of `gr_.grouper.group_info` exactly
+        # at those places, where the group-by key is partilly missing.
+        df = pd.DataFrame([(i % 12, i % 3 if i % 3 else np.nan, i)
+                           for i in range(n_rows)], dtype=float,
+                          columns=["A", "B", "Z"], index=None)
+        gr_ = df.groupby(["A", "B"])
+
+        # Generate teh expected dataframe
+        expected = pd.DataFrame([(i % 12, i % 3 if i % 3 else np.nan,
+                                  i + 12 if i % 3 and i < n_rows - 12
+                                  else np.nan)
+                                 for i in range(n_rows)], dtype=float,
+                                columns=["A", "B", "Z"], index=None)
+        result = gr_.shift(-1)
+
+        # Check for data grabbed from beyond the acceptable array bounds
+        # in case there was no segfault.
+        tm.assert_frame_equal(result, expected[["Z"]])
+
 
 def assert_fp_equal(a, b):
     assert (np.abs(a - b) < 1e-12).all()