From da25b19189f6b43c3d2bac73f3ebded9c85ac4b2 Mon Sep 17 00:00:00 2001
From: Anh Le <anh.le91@gmail.com>
Date: Mon, 16 Apr 2018 01:14:23 -0400
Subject: [PATCH 01/13] ENH GH20601 raise an error when the number of levels in
 a pivot table larger than int32

---
 pandas/core/reshape/reshape.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 03b77f0e787f0..31aa91fec9622 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -162,6 +162,8 @@ def _make_selectors(self):
         self.full_shape = ngroups, stride
 
         selector = self.sorted_labels[-1] + stride * comp_index + self.lift
+        if np.prod(self.full_shape) > (2 ** 31 - 1):
+            raise ValueError('Pivot table is too big, causing int32 overflow')
         mask = np.zeros(np.prod(self.full_shape), dtype=bool)
         mask.put(selector, True)
 

From e6c88c1e18becc79f02c61dbf3846659d61e135f Mon Sep 17 00:00:00 2001
From: Anh Le <anh.le91@gmail.com>
Date: Mon, 16 Apr 2018 01:53:06 -0400
Subject: [PATCH 02/13] TST add a test for pivot table large number of levels
 causing int32 overflow

---
 pandas/tests/reshape/test_pivot.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 1cb036dccf23c..f2fb8625f6d3f 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -1276,6 +1276,14 @@ def test_pivot_string_func_vs_func(self, f, f_numpy):
                                aggfunc=f_numpy)
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.slow
+    def test_pivot_number_of_levels_larger_than_int32(self):
+        # GH 20601
+        data = DataFrame({'ind1': list(range(1337600)) * 2,
+                          'ind2': list(range(3040)) * 2 * 440, 'count': [1] * 2 * 1337600})
+        with tm.assert_raises_regex(ValueError, 'int32 overflow'):
+            data.pivot_table(index='ind1', columns='ind2', values='count', aggfunc='count')
+
 
 class TestCrosstab(object):
 

From db2319eee68650ad8089eaf6dc3680badf32bb1a Mon Sep 17 00:00:00 2001
From: Anh Le <anh.le91@gmail.com>
Date: Mon, 16 Apr 2018 01:55:56 -0400
Subject: [PATCH 03/13] CLN PEP8 compliance

---
 pandas/tests/reshape/test_pivot.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index f2fb8625f6d3f..5ccedf92391af 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -1280,9 +1280,11 @@ def test_pivot_string_func_vs_func(self, f, f_numpy):
     def test_pivot_number_of_levels_larger_than_int32(self):
         # GH 20601
         data = DataFrame({'ind1': list(range(1337600)) * 2,
-                          'ind2': list(range(3040)) * 2 * 440, 'count': [1] * 2 * 1337600})
+                          'ind2': list(range(3040)) * 2 * 440,
+                          'count': [1] * 2 * 1337600})
         with tm.assert_raises_regex(ValueError, 'int32 overflow'):
-            data.pivot_table(index='ind1', columns='ind2', values='count', aggfunc='count')
+            data.pivot_table(index='ind1', columns='ind2',
+                             values='count', aggfunc='count')
 
 
 class TestCrosstab(object):

From 6b7b03065fa859dd367589a2d3c65835669d7cd1 Mon Sep 17 00:00:00 2001
From: Anh Le <anh.le91@gmail.com>
Date: Sun, 22 Apr 2018 02:20:18 -0400
Subject: [PATCH 04/13] ENH catch the int32 overflow error earlier and in two
 separate places: in pivot_table and unstack

---
 pandas/core/reshape/pivot.py       | 5 +++++
 pandas/core/reshape/reshape.py     | 7 +++++--
 pandas/tests/reshape/test_pivot.py | 8 ++++----
 pandas/tests/test_multilevel.py    | 7 +++++++
 4 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
index 22e591e776a22..bb6e849cc94dd 100644
--- a/pandas/core/reshape/pivot.py
+++ b/pandas/core/reshape/pivot.py
@@ -31,6 +31,11 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
     index = _convert_by(index)
     columns = _convert_by(columns)
 
+    num_rows = data.reindex(index, axis='columns').shape[0]
+    num_columns = data.reindex(columns, axis='columns').shape[0]
+    if num_rows * num_columns > (2 ** 31 - 1):
+        raise ValueError('Pivot table is too big, causing int32 overflow')
+
     if isinstance(aggfunc, list):
         pieces = []
         keys = []
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 31aa91fec9622..226a8391a5ca5 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -127,6 +127,11 @@ def __init__(self, values, index, level=-1, value_columns=None,
         self.removed_level = self.new_index_levels.pop(self.level)
         self.removed_level_full = index.levels[self.level]
 
+        num_rows = np.max([index_level.size for index_level in self.new_index_levels])
+        num_columns = self.removed_level.size
+        if num_rows * num_columns > (2 ** 31 - 1):
+            raise ValueError('Unstacked data frame is too big, causing int32 overflow')
+
         self._make_sorted_values_labels()
         self._make_selectors()
 
@@ -162,8 +167,6 @@ def _make_selectors(self):
         self.full_shape = ngroups, stride
 
         selector = self.sorted_labels[-1] + stride * comp_index + self.lift
-        if np.prod(self.full_shape) > (2 ** 31 - 1):
-            raise ValueError('Pivot table is too big, causing int32 overflow')
         mask = np.zeros(np.prod(self.full_shape), dtype=bool)
         mask.put(selector, True)
 
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 5ccedf92391af..8935cb6274733 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -1279,11 +1279,11 @@ def test_pivot_string_func_vs_func(self, f, f_numpy):
     @pytest.mark.slow
     def test_pivot_number_of_levels_larger_than_int32(self):
         # GH 20601
-        data = DataFrame({'ind1': list(range(1337600)) * 2,
-                          'ind2': list(range(3040)) * 2 * 440,
-                          'count': [1] * 2 * 1337600})
+        df = DataFrame({'ind1': np.arange(2 ** 16),
+                          'ind2': np.arange(2 ** 16),
+                          'count': np.arange(2 ** 16)})
         with tm.assert_raises_regex(ValueError, 'int32 overflow'):
-            data.pivot_table(index='ind1', columns='ind2',
+            df.pivot_table(index='ind1', columns='ind2',
                              values='count', aggfunc='count')
 
 
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
index 2022340926cca..94b7e31744836 100644
--- a/pandas/tests/test_multilevel.py
+++ b/pandas/tests/test_multilevel.py
@@ -1212,6 +1212,13 @@ def test_unstack_unobserved_keys(self):
         recons = result.stack()
         tm.assert_frame_equal(recons, df)
 
+    @pytest.mark.slow
+    def test_unstack_number_of_levels_larger_than_int32(self):
+        # GH 20601
+        df = DataFrame(np.random.randn(2 ** 16, 2), index=[np.arange(2 ** 16), np.arange(2 ** 16)])
+        with tm.assert_raises_regex(ValueError, 'int32 overflow'):
+            df.unstack()
+
     def test_stack_order_with_unsorted_levels(self):
         # GH 16323
 

From 23dae9344061fe6b9cd4d4cc4994b6369140dce8 Mon Sep 17 00:00:00 2001
From: Anh Le <anh.le91@gmail.com>
Date: Sun, 22 Apr 2018 02:48:08 -0400
Subject: [PATCH 05/13] CLN PEP8 compliance

---
 pandas/core/reshape/reshape.py  | 6 ++++--
 pandas/tests/test_multilevel.py | 3 ++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 226a8391a5ca5..c649e2d751733 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -127,10 +127,12 @@ def __init__(self, values, index, level=-1, value_columns=None,
         self.removed_level = self.new_index_levels.pop(self.level)
         self.removed_level_full = index.levels[self.level]
 
-        num_rows = np.max([index_level.size for index_level in self.new_index_levels])
+        num_rows = np.max([index_level.size for index_level
+                           in self.new_index_levels])
         num_columns = self.removed_level.size
         if num_rows * num_columns > (2 ** 31 - 1):
-            raise ValueError('Unstacked data frame is too big, causing int32 overflow')
+            raise ValueError('Unstacked DataFrame is too big, '
+                             'causing int32 overflow')
 
         self._make_sorted_values_labels()
         self._make_selectors()
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
index 94b7e31744836..9f14ee3cb7f0d 100644
--- a/pandas/tests/test_multilevel.py
+++ b/pandas/tests/test_multilevel.py
@@ -1215,7 +1215,8 @@ def test_unstack_unobserved_keys(self):
     @pytest.mark.slow
     def test_unstack_number_of_levels_larger_than_int32(self):
         # GH 20601
-        df = DataFrame(np.random.randn(2 ** 16, 2), index=[np.arange(2 ** 16), np.arange(2 ** 16)])
+        df = DataFrame(np.random.randn(2 ** 16, 2),
+                       index=[np.arange(2 ** 16), np.arange(2 ** 16)])
         with tm.assert_raises_regex(ValueError, 'int32 overflow'):
             df.unstack()
 

From a69438fd261cd7cbc828bba8b1fa6aa51f6d40fb Mon Sep 17 00:00:00 2001
From: Anh Le <anh.le91@gmail.com>
Date: Sun, 22 Apr 2018 13:15:48 -0400
Subject: [PATCH 06/13] ENH calculate size of the resulting pivot table and
 raise error if it's too big

---
 pandas/core/reshape/pivot.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
index bb6e849cc94dd..16aa4df74b2d8 100644
--- a/pandas/core/reshape/pivot.py
+++ b/pandas/core/reshape/pivot.py
@@ -31,11 +31,6 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
     index = _convert_by(index)
     columns = _convert_by(columns)
 
-    num_rows = data.reindex(index, axis='columns').shape[0]
-    num_columns = data.reindex(columns, axis='columns').shape[0]
-    if num_rows * num_columns > (2 ** 31 - 1):
-        raise ValueError('Pivot table is too big, causing int32 overflow')
-
     if isinstance(aggfunc, list):
         pieces = []
         keys = []
@@ -86,9 +81,14 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
                 pass
         values = list(values)
 
-    # group by the cartesian product of the grouper
-    # if we have a categorical
-    grouped = data.groupby(keys, observed=False)
+    num_rows = (data.reindex(columns=index).drop_duplicates().shape[0]
+                if index else 1)
+    num_cols = (data.reindex(columns=columns).drop_duplicates().shape[0]
+                if columns else 1)
+    if num_rows * num_cols * len(values) > (2 ** 31 - 1):
+        raise ValueError('Pivot table is too big, causing int32 overflow')
+
+    grouped = data.groupby(keys)
     agged = grouped.agg(aggfunc)
     if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):
         agged = agged.dropna(how='all')

From b44ca163ab993dc800f0f8f91807d10d096b9382 Mon Sep 17 00:00:00 2001
From: Anh Le <anh.le91@gmail.com>
Date: Mon, 30 Jul 2018 15:40:26 -0500
Subject: [PATCH 07/13] rebase onto upstream master

---
 pandas/core/reshape/pivot.py       | 7 -------
 pandas/tests/reshape/test_pivot.py | 6 +++---
 2 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
index 16aa4df74b2d8..611cd350a3e53 100644
--- a/pandas/core/reshape/pivot.py
+++ b/pandas/core/reshape/pivot.py
@@ -81,13 +81,6 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
                 pass
         values = list(values)
 
-    num_rows = (data.reindex(columns=index).drop_duplicates().shape[0]
-                if index else 1)
-    num_cols = (data.reindex(columns=columns).drop_duplicates().shape[0]
-                if columns else 1)
-    if num_rows * num_cols * len(values) > (2 ** 31 - 1):
-        raise ValueError('Pivot table is too big, causing int32 overflow')
-
     grouped = data.groupby(keys)
     agged = grouped.agg(aggfunc)
     if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 8935cb6274733..eb76ff71bf152 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -1280,11 +1280,11 @@ def test_pivot_string_func_vs_func(self, f, f_numpy):
     def test_pivot_number_of_levels_larger_than_int32(self):
         # GH 20601
         df = DataFrame({'ind1': np.arange(2 ** 16),
-                          'ind2': np.arange(2 ** 16),
-                          'count': np.arange(2 ** 16)})
+                        'ind2': np.arange(2 ** 16),
+                        'count': np.arange(2 ** 16)})
         with tm.assert_raises_regex(ValueError, 'int32 overflow'):
             df.pivot_table(index='ind1', columns='ind2',
-                             values='count', aggfunc='count')
+                           values='count', aggfunc='count')
 
 
 class TestCrosstab(object):

From 59678a65fd0c811920acd6a677fb37c27887f1a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20M=C3=BCller?= <florian@tomueller.de>
Date: Thu, 8 Nov 2018 12:03:13 +0100
Subject: [PATCH 08/13] ENH: Raise and catch FloatingPointException due to
 overflow

* Modify tests to only cover windows platforms
---
 pandas/core/reshape/reshape.py     |  9 ++++++---
 pandas/tests/reshape/test_pivot.py | 15 +++++++++------
 pandas/tests/test_multilevel.py    | 13 ++++++++-----
 3 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index c649e2d751733..700440ec4aeef 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -130,9 +130,12 @@ def __init__(self, values, index, level=-1, value_columns=None,
         num_rows = np.max([index_level.size for index_level
                            in self.new_index_levels])
         num_columns = self.removed_level.size
-        if num_rows * num_columns > (2 ** 31 - 1):
-            raise ValueError('Unstacked DataFrame is too big, '
-                             'causing int32 overflow')
+        with np.errstate(all='raise'):
+            try:
+                num_columns * num_rows
+            except FloatingPointError:
+                raise ValueError('Unstacked DataFrame is too big, '
+                                 'causing int32 overflow')
 
         self._make_sorted_values_labels()
         self._make_selectors()
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index eb76ff71bf152..9aa4800597ca7 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 
 from datetime import datetime, date, timedelta
+import sys
 
 import pytest
 
@@ -1279,12 +1280,14 @@ def test_pivot_string_func_vs_func(self, f, f_numpy):
     @pytest.mark.slow
     def test_pivot_number_of_levels_larger_than_int32(self):
         # GH 20601
-        df = DataFrame({'ind1': np.arange(2 ** 16),
-                        'ind2': np.arange(2 ** 16),
-                        'count': np.arange(2 ** 16)})
-        with tm.assert_raises_regex(ValueError, 'int32 overflow'):
-            df.pivot_table(index='ind1', columns='ind2',
-                           values='count', aggfunc='count')
+        if sys.platform == 'win32':
+            df = DataFrame({'ind1': np.arange(2 ** 16),
+                            'ind2': np.arange(2 ** 16),
+                            'count': 0})
+
+            with tm.assert_raises_regex(ValueError, 'int32 overflow'):
+                df.pivot_table(index='ind1', columns='ind2',
+                               values='count', aggfunc='count')
 
 
 class TestCrosstab(object):
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
index 9f14ee3cb7f0d..68fa3643e4f18 100644
--- a/pandas/tests/test_multilevel.py
+++ b/pandas/tests/test_multilevel.py
@@ -3,6 +3,8 @@
 from warnings import catch_warnings, simplefilter
 import datetime
 import itertools
+import sys
+
 import pytest
 import pytz
 
@@ -1214,11 +1216,12 @@ def test_unstack_unobserved_keys(self):
 
     @pytest.mark.slow
     def test_unstack_number_of_levels_larger_than_int32(self):
-        # GH 20601
-        df = DataFrame(np.random.randn(2 ** 16, 2),
-                       index=[np.arange(2 ** 16), np.arange(2 ** 16)])
-        with tm.assert_raises_regex(ValueError, 'int32 overflow'):
-            df.unstack()
+        if sys.platform == 'win32':
+            # GH 20601
+            df = DataFrame(np.random.randn(2 ** 16, 2),
+                           index=[np.arange(2 ** 16), np.arange(2 ** 16)])
+            with tm.assert_raises_regex(ValueError, 'int32 overflow'):
+                df.unstack()
 
     def test_stack_order_with_unsorted_levels(self):
         # GH 16323

From 4dbbad750fb330663646160dd1190a6bcaf4231e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20M=C3=BCller?= <florian@tomueller.de>
Date: Mon, 12 Nov 2018 15:16:58 +0100
Subject: [PATCH 09/13] ENH: use pd.compat for windows check, add comment

---
 pandas/core/reshape/pivot.py       | 2 +-
 pandas/core/reshape/reshape.py     | 4 ++++
 pandas/tests/reshape/test_pivot.py | 4 ++--
 pandas/tests/test_multilevel.py    | 6 +++---
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
index 611cd350a3e53..537eb290f8e83 100644
--- a/pandas/core/reshape/pivot.py
+++ b/pandas/core/reshape/pivot.py
@@ -81,7 +81,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
                 pass
         values = list(values)
 
-    grouped = data.groupby(keys)
+    grouped = data.groupby(keys, observed=False)
     agged = grouped.agg(aggfunc)
     if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):
         agged = agged.dropna(how='all')
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 700440ec4aeef..2292384e681a2 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -127,6 +127,10 @@ def __init__(self, values, index, level=-1, value_columns=None,
         self.removed_level = self.new_index_levels.pop(self.level)
         self.removed_level_full = index.levels[self.level]
 
+        # Bug fix GH 20601
+        # If the data frame is too big, the number of unique index combination
+        # will cause int32 overflow on windows environments.
+        # We want to check and raise an error before this happens
         num_rows = np.max([index_level.size for index_level
                            in self.new_index_levels])
         num_columns = self.removed_level.size
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 9aa4800597ca7..d2663ba9c6e05 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -13,7 +13,7 @@
 from pandas import (DataFrame, Series, Index, MultiIndex,
                     Grouper, date_range, concat, Categorical)
 from pandas.core.reshape.pivot import pivot_table, crosstab
-from pandas.compat import range, product
+from pandas.compat import range, product, is_platform_windows
 import pandas.util.testing as tm
 from pandas.api.types import CategoricalDtype as CDT
 
@@ -1280,7 +1280,7 @@ def test_pivot_string_func_vs_func(self, f, f_numpy):
     @pytest.mark.slow
     def test_pivot_number_of_levels_larger_than_int32(self):
         # GH 20601
-        if sys.platform == 'win32':
+        if is_platform_windows():
             df = DataFrame({'ind1': np.arange(2 ** 16),
                             'ind2': np.arange(2 ** 16),
                             'count': 0})
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
index 68fa3643e4f18..b8cbfe65c49df 100644
--- a/pandas/tests/test_multilevel.py
+++ b/pandas/tests/test_multilevel.py
@@ -19,7 +19,7 @@
 import pandas.core.common as com
 import pandas.util.testing as tm
 from pandas.compat import (range, lrange, StringIO, lzip, u, product as
-                           cart_product, zip)
+                           cart_product, zip, is_platform_windows)
 import pandas as pd
 import pandas._libs.index as _index
 
@@ -1216,8 +1216,8 @@ def test_unstack_unobserved_keys(self):
 
     @pytest.mark.slow
     def test_unstack_number_of_levels_larger_than_int32(self):
-        if sys.platform == 'win32':
-            # GH 20601
+        # GH 20601
+        if is_platform_windows():
             df = DataFrame(np.random.randn(2 ** 16, 2),
                            index=[np.arange(2 ** 16), np.arange(2 ** 16)])
             with tm.assert_raises_regex(ValueError, 'int32 overflow'):

From 263f598aa6b26330899fd437506b2ef0c0b3de4b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20M=C3=BCller?= <florian@tomueller.de>
Date: Sun, 30 Dec 2018 16:58:57 +0100
Subject: [PATCH 10/13] ENH: ValueError on all platforms when max int32 is
 reached

---
 pandas/core/reshape/reshape.py     | 12 ++++++------
 pandas/tests/reshape/test_pivot.py | 18 ++++++++----------
 pandas/tests/test_multilevel.py    | 12 +++++-------
 3 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 2d8ad48f46bc4..b7ee68b8feec8 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -116,12 +116,12 @@ def __init__(self, values, index, level=-1, value_columns=None,
         num_rows = np.max([index_level.size for index_level
                            in self.new_index_levels])
         num_columns = self.removed_level.size
-        with np.errstate(all='raise'):
-            try:
-                num_columns * num_rows
-            except FloatingPointError:
-                raise ValueError('Unstacked DataFrame is too big, '
-                                 'causing int32 overflow')
+
+        num_cells = np.multiply(num_rows, num_columns, dtype=np.int32)
+
+        if num_cells <= 0:
+            raise ValueError('Unstacked DataFrame is too big, '
+                             'causing int32 overflow')
 
         self._make_sorted_values_labels()
         self._make_selectors()
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index b031c80178066..0610db2e9a2b4 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -1,7 +1,6 @@
 # -*- coding: utf-8 -*-
 
 from datetime import datetime, date, timedelta
-import sys
 
 import pytest
 
@@ -13,7 +12,7 @@
 from pandas import (DataFrame, Series, Index, MultiIndex,
                     Grouper, date_range, concat, Categorical)
 from pandas.core.reshape.pivot import pivot_table, crosstab
-from pandas.compat import range, product, is_platform_windows
+from pandas.compat import range, product
 import pandas.util.testing as tm
 from pandas.api.types import CategoricalDtype as CDT
 
@@ -1276,14 +1275,13 @@ def test_pivot_string_func_vs_func(self, f, f_numpy):
     @pytest.mark.slow
     def test_pivot_number_of_levels_larger_than_int32(self):
         # GH 20601
-        if is_platform_windows():
-            df = DataFrame({'ind1': np.arange(2 ** 16),
-                            'ind2': np.arange(2 ** 16),
-                            'count': 0})
-
-            with tm.assert_raises_regex(ValueError, 'int32 overflow'):
-                df.pivot_table(index='ind1', columns='ind2',
-                               values='count', aggfunc='count')
+        df = DataFrame({'ind1': np.arange(2 ** 16),
+                        'ind2': np.arange(2 ** 16),
+                        'count': 0})
+
+        with pytest.raises(ValueError, match='int32 overflow'):
+            df.pivot_table(index='ind1', columns='ind2',
+                           values='count', aggfunc='count')
 
 
 class TestCrosstab(object):
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
index d51126ad8a4cc..85bcb5c530c08 100644
--- a/pandas/tests/test_multilevel.py
+++ b/pandas/tests/test_multilevel.py
@@ -3,7 +3,6 @@
 from warnings import catch_warnings, simplefilter
 import datetime
 import itertools
-import sys
 
 import pytest
 import pytz
@@ -17,7 +16,7 @@
 from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype
 import pandas.util.testing as tm
 from pandas.compat import (range, lrange, StringIO, lzip, u, product as
-                           cart_product, zip, is_platform_windows)
+                           cart_product, zip)
 import pandas as pd
 
 AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew', 'mad',
@@ -725,11 +724,10 @@ def test_unstack_unobserved_keys(self):
     @pytest.mark.slow
     def test_unstack_number_of_levels_larger_than_int32(self):
         # GH 20601
-        if is_platform_windows():
-            df = DataFrame(np.random.randn(2 ** 16, 2),
-                           index=[np.arange(2 ** 16), np.arange(2 ** 16)])
-            with tm.assert_raises_regex(ValueError, 'int32 overflow'):
-                df.unstack()
+        df = DataFrame(np.random.randn(2 ** 16, 2),
+                       index=[np.arange(2 ** 16), np.arange(2 ** 16)])
+        with pytest.raises(ValueError, match='int32 overflow'):
+            df.unstack()
 
     def test_stack_order_with_unsorted_levels(self):
         # GH 16323

From b96689d8ec66ee90384ad34c2a87a36cdd0abc7d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20M=C3=BCller?= <florian@tomueller.de>
Date: Sun, 30 Dec 2018 17:13:21 +0100
Subject: [PATCH 11/13] CLN: Added comment for overflow

---
 pandas/core/reshape/reshape.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index ea649f267a21a..1d5bac415f648 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -117,6 +117,7 @@ def __init__(self, values, index, level=-1, value_columns=None,
                            in self.new_index_levels])
         num_columns = self.removed_level.size
 
+        # GH20601: This forces an overflow if the number of cells is too high.
         num_cells = np.multiply(num_rows, num_columns, dtype=np.int32)
 
         if num_cells <= 0:

From 241729ff7d0a7687decba63dca36ad2876557b9a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20M=C3=BCller?= <florian@tomueller.de>
Date: Sun, 30 Dec 2018 18:01:26 +0100
Subject: [PATCH 12/13] BUG: zero cells should be allowed

---
 pandas/core/reshape/reshape.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 1d5bac415f648..f436b3b92a359 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -120,7 +120,7 @@ def __init__(self, values, index, level=-1, value_columns=None,
         # GH20601: This forces an overflow if the number of cells is too high.
         num_cells = np.multiply(num_rows, num_columns, dtype=np.int32)
 
-        if num_cells <= 0:
+        if num_rows > 0 and num_columns > 0 and num_cells <= 0:
             raise ValueError('Unstacked DataFrame is too big, '
                              'causing int32 overflow')
 

From a3cdbca33363c520afa5b9f9dc564e09096251bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20M=C3=BCller?= <florian@tomueller.de>
Date: Mon, 31 Dec 2018 12:43:46 +0100
Subject: [PATCH 13/13] DOC: Added whatsnew entry (#23512)

---
 doc/source/whatsnew/v0.24.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index a84fd118061bc..5f40ca2ad3b36 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1646,6 +1646,7 @@ Reshaping
 - :meth:`DataFrame.nlargest` and :meth:`DataFrame.nsmallest` now returns the correct n values when keep != 'all' also when tied on the first columns (:issue:`22752`)
 - Constructing a DataFrame with an index argument that wasn't already an instance of :class:`~pandas.core.Index` was broken (:issue:`22227`).
 - Bug in :class:`DataFrame` prevented list subclasses to be used to construction (:issue:`21226`)
+- Bug in :func:`DataFrame.unstack` and :func:`DataFrame.pivot_table` returning a missleading error message when the resulting DataFrame has more elements than int32 can handle. Now, the error message is improved, pointing towards the actual problem (:issue:`20601`)
 
 .. _whatsnew_0240.bug_fixes.sparse: