From 966d5c63fcebcc117dfb05b6086d0c3a025b9e88 Mon Sep 17 00:00:00 2001
From: Ivan Nazarov <ivannnnz@gmail.com>
Date: Wed, 27 Jul 2016 16:07:07 +0300
Subject: [PATCH 1/6] BUG: group_shift_indexer checks for null group keys

---
 pandas/src/algos_groupby_helper.pxi | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pandas/src/algos_groupby_helper.pxi b/pandas/src/algos_groupby_helper.pxi
index fb86c4efb7314..8e289544a072e 100644
--- a/pandas/src/algos_groupby_helper.pxi
+++ b/pandas/src/algos_groupby_helper.pxi
@@ -1356,6 +1356,11 @@ def group_shift_indexer(int64_t[:] out, int64_t[:] labels,
                 ## reverse iterator if shifting backwards
                 ii = offset + sign * i
                 lab = labels[ii]
+
+                # Skip null keys
+                if lab == -1:
+                    continue
+
                 label_seen[lab] += 1
 
                 idxer_slot = label_seen[lab] % periods

From fe2f0ecf7f510a8ee10b62cde9644687cb97fecc Mon Sep 17 00:00:00 2001
From: Ivan Nazarov <ivannnnz@gmail.com>
Date: Wed, 27 Jul 2016 21:49:25 +0300
Subject: [PATCH 2/6] Treat incomplete group keys as distinct when shifting

---
 pandas/src/algos_groupby_helper.pxi | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/src/algos_groupby_helper.pxi b/pandas/src/algos_groupby_helper.pxi
index 8e289544a072e..013a03f719bbd 100644
--- a/pandas/src/algos_groupby_helper.pxi
+++ b/pandas/src/algos_groupby_helper.pxi
@@ -1359,6 +1359,7 @@ def group_shift_indexer(int64_t[:] out, int64_t[:] labels,
 
                 # Skip null keys
                 if lab == -1:
+                    out[ii] = -1
                     continue
 
                 label_seen[lab] += 1

From 94bae0bdf069971d40030a4c60a369eb4783a33a Mon Sep 17 00:00:00 2001
From: Ivan Nazarov <ivannnnz@gmail.com>
Date: Thu, 28 Jul 2016 08:37:27 +0300
Subject: [PATCH 3/6] Patched the template, and added a test for '.shift()'

---
 pandas/src/algos_groupby_helper.pxi.in |  6 ++++++
 pandas/tests/test_groupby.py           | 25 +++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/pandas/src/algos_groupby_helper.pxi.in b/pandas/src/algos_groupby_helper.pxi.in
index 6b9d8f07587bc..5c704436ce3a0 100644
--- a/pandas/src/algos_groupby_helper.pxi.in
+++ b/pandas/src/algos_groupby_helper.pxi.in
@@ -700,6 +700,12 @@ def group_shift_indexer(int64_t[:] out, int64_t[:] labels,
                 ## reverse iterator if shifting backwards
                 ii = offset + sign * i
                 lab = labels[ii]
+
+                # Skip null keys
+                if lab == -1:
+                    out[ii] = -1
+                    continue
+
                 label_seen[lab] += 1
 
                 idxer_slot = label_seen[lab] % periods
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index 3f5b4152afe31..8ac5d88822259 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -6560,6 +6560,31 @@ def test_grouping_string_repr(self):
         expected = "Grouping(('A', 'a'))"
         tm.assert_equal(result, expected)
 
+    def test_group_shift_with_null_key(self):
+        # This test is designed to replicate the segfault in issue #13813.
+        n_rows = 1200
+
+        # Generate a moderately large dataframe with occasional missing
+        # values in column `B`, and then group by [`A`, `B`]. This should
+        # force `-1` in `labels` array of `gr_.grouper.group_info` exactly
+        # at those places, where the group-by key is partilly missing.
+        df = pd.DataFrame([(i%12, i%3 if i%3 else float("nan"), i)
+                           for i in range(n_rows)], dtype=float,
+                          columns=["A", "B", "Z"], index=None)
+        gr_ = df.groupby(["A", "B"])
+
+        # Generate teh expected dataframe
+        expected = pd.DataFrame([(i%12, i%3 if i%3 else float("nan"),
+                                  i + 12 if i%3 and i < n_rows - 12 \
+                                  else float("nan"))
+                                 for i in range(n_rows)], dtype=float,
+                                columns=["A", "B", "Z"], index=None)
+        result = gr_.shift(-1)
+
+        # Check for data grabbed from beyond the acceptable array bounds
+        # in case there was no segfault.
+        tm.assert_frame_equal(result, expected[["Z"]])
+
 
 def assert_fp_equal(a, b):
     assert (np.abs(a - b) < 1e-12).all()

From d92cf3cfbf23c4befd3bdd1b75e2d6bc1ba87e1e Mon Sep 17 00:00:00 2001
From: Ivan Nazarov <ivannnnz@gmail.com>
Date: Thu, 28 Jul 2016 08:51:30 +0300
Subject: [PATCH 4/6] minor flake8 style corrections

---
 pandas/tests/test_groupby.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index 8ac5d88822259..c3e882766a48a 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -6568,14 +6568,14 @@ def test_group_shift_with_null_key(self):
         # values in column `B`, and then group by [`A`, `B`]. This should
         # force `-1` in `labels` array of `gr_.grouper.group_info` exactly
         # at those places, where the group-by key is partilly missing.
-        df = pd.DataFrame([(i%12, i%3 if i%3 else float("nan"), i)
+        df = pd.DataFrame([(i % 12, i % 3 if i % 3 else float("nan"), i)
                            for i in range(n_rows)], dtype=float,
                           columns=["A", "B", "Z"], index=None)
         gr_ = df.groupby(["A", "B"])
 
         # Generate teh expected dataframe
-        expected = pd.DataFrame([(i%12, i%3 if i%3 else float("nan"),
-                                  i + 12 if i%3 and i < n_rows - 12 \
+        expected = pd.DataFrame([(i % 12, i % 3 if i % 3 else float("nan"),
+                                  i + 12 if i % 3 and i < n_rows - 12
                                   else float("nan"))
                                  for i in range(n_rows)], dtype=float,
                                 columns=["A", "B", "Z"], index=None)

From eab80388ef4031db84c5640fdb7486fe3c48d875 Mon Sep 17 00:00:00 2001
From: Ivan Nazarov <ivannnnz@gmail.com>
Date: Thu, 28 Jul 2016 09:46:39 +0300
Subject: [PATCH 5/6] Added bugfix description [ci skip]

---
 doc/source/whatsnew/v0.19.0.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
index 03f8dbc20b52e..be8ba92496d49 100644
--- a/doc/source/whatsnew/v0.19.0.txt
+++ b/doc/source/whatsnew/v0.19.0.txt
@@ -735,6 +735,7 @@ Performance Improvements
 Bug Fixes
 ~~~~~~~~~
 
+- Bug in ``groupby().shift()``, which could cause a segfault or corruption in rare circumstances when grouping by columns with missing values (:issue:`13813`)
 - Bug in ``pd.read_csv()``, which may cause a segfault or corruption when iterating in large chunks over a stream/file under rare circumstances (:issue:`13703`)
 - Bug in ``io.json.json_normalize()``, where non-ascii keys raised an exception (:issue:`13213`)
 - Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing may raise ``IndexError`` (:issue:`13144`)

From bddf799812202357ca94009206654f44f50cd1a3 Mon Sep 17 00:00:00 2001
From: Ivan Nazarov <ivannnnz@gmail.com>
Date: Fri, 29 Jul 2016 10:26:26 +0300
Subject: [PATCH 6/6] Switched from float('nan') to np.nan

---
 pandas/tests/test_groupby.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index c3e882766a48a..448d0c875b5c8 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -6568,15 +6568,15 @@ def test_group_shift_with_null_key(self):
         # values in column `B`, and then group by [`A`, `B`]. This should
         # force `-1` in `labels` array of `gr_.grouper.group_info` exactly
         # at those places, where the group-by key is partilly missing.
-        df = pd.DataFrame([(i % 12, i % 3 if i % 3 else float("nan"), i)
+        df = pd.DataFrame([(i % 12, i % 3 if i % 3 else np.nan, i)
                            for i in range(n_rows)], dtype=float,
                           columns=["A", "B", "Z"], index=None)
         gr_ = df.groupby(["A", "B"])
 
         # Generate teh expected dataframe
-        expected = pd.DataFrame([(i % 12, i % 3 if i % 3 else float("nan"),
+        expected = pd.DataFrame([(i % 12, i % 3 if i % 3 else np.nan,
                                   i + 12 if i % 3 and i < n_rows - 12
-                                  else float("nan"))
+                                  else np.nan)
                                  for i in range(n_rows)], dtype=float,
                                 columns=["A", "B", "Z"], index=None)
         result = gr_.shift(-1)