diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 276812a564e03..1a0df4789f4c7 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -466,6 +466,7 @@ I/O - Fixed memory leak in :meth:`DataFrame.to_json` when dealing with numeric data (:issue:`24889`) - Bug in :func:`read_json` where date strings with ``Z`` were not converted to a UTC timezone (:issue:`26168`) - Added ``cache_dates=True`` parameter to :meth:`read_csv`, which allows to cache unique dates when they are parsed (:issue:`25990`) +- :meth:`DataFrame.to_excel` now raises a ``ValueError`` when the caller's dimensions exceed the limitations of Excel (:issue:`26051`) Plotting ^^^^^^^^ diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index fd6e3304ec4ef..4db00e34b39e2 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -341,6 +341,9 @@ class ExcelFormatter: This is only called for body cells. """ + max_rows = 2**20 + max_cols = 2**14 + def __init__(self, df, na_rep='', float_format=None, cols=None, header=True, index=True, index_label=None, merge_cells=False, inf_rep='inf', style_converter=None): @@ -648,6 +651,13 @@ def write(self, writer, sheet_name='Sheet1', startrow=0, from pandas.io.excel import ExcelWriter from pandas.io.common import _stringify_path + num_rows, num_cols = self.df.shape + if num_rows > self.max_rows or num_cols > self.max_cols: + raise ValueError("This sheet is too large! Your sheet size is: " + + "{}, {} ".format(num_rows, num_cols) + + "Max sheet size is: {}, {}". + format(self.max_rows, self.max_cols)) + if isinstance(writer, ExcelWriter): need_save = False else: diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 100de227aa97c..7fe8e1d18838f 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -1185,6 +1185,24 @@ class and any subclasses, on account of the `autouse=True` class TestExcelWriter(_WriterBase): # Base class for test cases to run with different Excel writers. + def test_excel_sheet_size(self): + + # GH 26080 + breaking_row_count = 2**20 + 1 + breaking_col_count = 2**14 + 1 + # purposely using two arrays to prevent memory issues while testing + row_arr = np.zeros(shape=(breaking_row_count, 1)) + col_arr = np.zeros(shape=(1, breaking_col_count)) + row_df = pd.DataFrame(row_arr) + col_df = pd.DataFrame(col_arr) + + msg = "sheet is too large" + with pytest.raises(ValueError, match=msg): + row_df.to_excel(self.path) + + with pytest.raises(ValueError, match=msg): + col_df.to_excel(self.path) + def test_excel_sheet_by_name_raise(self, *_): import xlrd