From 20e357aff8dcc14d35b2efd1b151c26cfa338c36 Mon Sep 17 00:00:00 2001 From: Zhengbo Wang <77875500+luke396@users.noreply.github.com> Date: Sat, 27 Jan 2024 16:06:29 +0800 Subject: [PATCH 01/10] Add check for character limmit --- pandas/core/generic.py | 4 ++++ pandas/io/excel/_base.py | 6 ++++++ pandas/tests/io/excel/test_writers.py | 9 +++++++++ 3 files changed, 19 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 42ebfea1f370e..6b93727587d38 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2363,6 +2363,10 @@ def to_excel( Once a workbook has been saved it is not possible to write further data without rewriting the whole workbook. + Pandas will check the number of rows, columns, + and cell character count does not exceed Excel's limitations. + All other limitations must be checked by the user. + Examples -------- diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 2189f54263dec..a51d239908d22 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1326,6 +1326,12 @@ def _value_with_fmt( fmt = "0" else: val = str(val) + # GH#56954 + if len(val) > 32767: + warnings.warn( + "String value too long, truncated to 32767 characters", + UserWarning, + stacklevel=find_stack_level()) return val, fmt diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 76a138a295bda..c10dcaa814556 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -1505,3 +1505,12 @@ def test_subclass_attr(klass): attrs_base = {name for name in dir(ExcelWriter) if not name.startswith("_")} attrs_klass = {name for name in dir(klass) if not name.startswith("_")} assert not attrs_base.symmetric_difference(attrs_klass) + + +def test_to_excel_raising_warning_when_cell_character_exceed_limit(): + # GH#56954 + df = DataFrame({"A": ["a" * 32768]}) + msg = "String value too long, truncated to 32767 characters" + with tm.assert_produces_warning(UserWarning, match=msg): + buf = BytesIO() + df.to_excel(buf) From 74e302f80abe307b9277c1cd59d0a5b43ed4bd3f Mon Sep 17 00:00:00 2001 From: Zhengbo Wang <77875500+luke396@users.noreply.github.com> Date: Sat, 27 Jan 2024 16:18:02 +0800 Subject: [PATCH 02/10] Run pre-commit --- pandas/io/excel/_base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index a51d239908d22..b121ea9ab43b7 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1331,7 +1331,8 @@ def _value_with_fmt( warnings.warn( "String value too long, truncated to 32767 characters", UserWarning, - stacklevel=find_stack_level()) + stacklevel=find_stack_level(), + ) return val, fmt From 74f4f815ae29e08b502e8c17bd00a089788aa690 Mon Sep 17 00:00:00 2001 From: Zhengbo Wang <77875500+luke396@users.noreply.github.com> Date: Sat, 27 Jan 2024 20:19:11 +0800 Subject: [PATCH 03/10] Fix test --- pandas/tests/io/excel/test_writers.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index c10dcaa814556..79e116efbd312 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -1397,6 +1397,13 @@ def test_to_excel_empty_frame(self, engine, ext): expected = DataFrame() tm.assert_frame_equal(result, expected) + def test_to_excel_raising_warning_when_cell_character_exceed_limit(self, path): + # GH#56954 + df = DataFrame({"A": ["a" * 32768]}) + msg = "String value too long, truncated to 32767 characters" + with tm.assert_produces_warning(UserWarning, match=msg): + df.to_excel(path) + class TestExcelWriterEngineTests: @pytest.mark.parametrize( @@ -1505,12 +1512,3 @@ def test_subclass_attr(klass): attrs_base = {name for name in dir(ExcelWriter) if not name.startswith("_")} attrs_klass = {name for name in dir(klass) if not name.startswith("_")} assert not attrs_base.symmetric_difference(attrs_klass) - - -def test_to_excel_raising_warning_when_cell_character_exceed_limit(): - # GH#56954 - df = DataFrame({"A": ["a" * 32768]}) - msg = "String value too long, truncated to 32767 characters" - with tm.assert_produces_warning(UserWarning, match=msg): - buf = BytesIO() - df.to_excel(buf) From babdf29c0420a4093adc96b7c174155209b4ae85 Mon Sep 17 00:00:00 2001 From: Zhengbo Wang <77875500+luke396@users.noreply.github.com> Date: Sat, 27 Jan 2024 21:11:51 +0800 Subject: [PATCH 04/10] Fix test --- pandas/tests/io/excel/test_writers.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index cd46c74ba93c3..2be4aed767545 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -1385,12 +1385,14 @@ def test_to_excel_empty_frame(self, engine, ext): expected = DataFrame() tm.assert_frame_equal(result, expected) - def test_to_excel_raising_warning_when_cell_character_exceed_limit(self, path): + def test_to_excel_raising_warning_when_cell_character_exceed_limit( + self, path, engine + ): # GH#56954 df = DataFrame({"A": ["a" * 32768]}) msg = "String value too long, truncated to 32767 characters" with tm.assert_produces_warning(UserWarning, match=msg): - df.to_excel(path) + df.to_excel(path, engine=engine) class TestExcelWriterEngineTests: From 476808915180fbaada37af2d3941171635a152d0 Mon Sep 17 00:00:00 2001 From: Zhengbo Wang <77875500+luke396@users.noreply.github.com> Date: Sun, 28 Jan 2024 13:09:22 +0800 Subject: [PATCH 05/10] Fix test ingnore extra warings --- pandas/tests/io/excel/test_writers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 2be4aed767545..4609f3946eb73 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -1391,7 +1391,9 @@ def test_to_excel_raising_warning_when_cell_character_exceed_limit( # GH#56954 df = DataFrame({"A": ["a" * 32768]}) msg = "String value too long, truncated to 32767 characters" - with tm.assert_produces_warning(UserWarning, match=msg): + with tm.assert_produces_warning( + UserWarning, match=msg, raise_on_extra_warnings=False + ): df.to_excel(path, engine=engine) From 65284bbab9ac02a7acbfe4c837a3459933933825 Mon Sep 17 00:00:00 2001 From: Zhengbo Wang <77875500+luke396@users.noreply.github.com> Date: Sun, 28 Jan 2024 23:15:45 +0800 Subject: [PATCH 06/10] Improve by review --- pandas/core/generic.py | 2 +- pandas/io/excel/_base.py | 6 ++---- pandas/tests/io/excel/test_writers.py | 8 +++----- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a49597d1a0a3c..b5e2720b430d8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2363,7 +2363,7 @@ def to_excel( Once a workbook has been saved it is not possible to write further data without rewriting the whole workbook. - Pandas will check the number of rows, columns, + pandas will check the number of rows, columns, and cell character count does not exceed Excel's limitations. All other limitations must be checked by the user. diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index d3b228855d8f8..074a77da8d462 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1328,10 +1328,8 @@ def _value_with_fmt( val = str(val) # GH#56954 if len(val) > 32767: - warnings.warn( - "String value too long, truncated to 32767 characters", - UserWarning, - stacklevel=find_stack_level(), + raise ValueError( + "Cell contents too long, truncated to 32767 characters" ) return val, fmt diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 4609f3946eb73..b7c8d7b73b3da 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -1385,15 +1385,13 @@ def test_to_excel_empty_frame(self, engine, ext): expected = DataFrame() tm.assert_frame_equal(result, expected) - def test_to_excel_raising_warning_when_cell_character_exceed_limit( + def test_to_excel_raising_error_when_cell_character_exceed_limit( self, path, engine ): # GH#56954 df = DataFrame({"A": ["a" * 32768]}) - msg = "String value too long, truncated to 32767 characters" - with tm.assert_produces_warning( - UserWarning, match=msg, raise_on_extra_warnings=False - ): + msg = "Cell contents too long, truncated to 32767 characters" + with pytest.raises(ValueError, match=msg): df.to_excel(path, engine=engine) From 41cad2bcea14f41b3848f24879a62d0cd18d0da7 Mon Sep 17 00:00:00 2001 From: Zhengbo Wang <77875500+luke396@users.noreply.github.com> Date: Sun, 28 Jan 2024 23:28:54 +0800 Subject: [PATCH 07/10] Add what's new --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 73201fa93a8aa..a20d87a39a19f 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -28,7 +28,7 @@ enhancement2 Other enhancements ^^^^^^^^^^^^^^^^^^ -- +- :func:`to_excel` now raises a ``ValueError`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`) - .. --------------------------------------------------------------------------- From 2e79c610ce65e57134037aa006c8f25312f4b68b Mon Sep 17 00:00:00 2001 From: Zhengbo Wang <77875500+luke396@users.noreply.github.com> Date: Tue, 30 Jan 2024 16:37:11 +0800 Subject: [PATCH 08/10] Update doc/source/whatsnew/v3.0.0.rst Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 5e0355cba18a0..4504b8eb7f3a4 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -28,7 +28,7 @@ enhancement2 Other enhancements ^^^^^^^^^^^^^^^^^^ -- :func:`to_excel` now raises a ``ValueError`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`) +- :func:`DataFrame.to_excel` now raises a ``ValueError`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`) - .. --------------------------------------------------------------------------- From 62b8b540082a1ea6d4bff55bc61b2abccf9e1513 Mon Sep 17 00:00:00 2001 From: Zhengbo Wang <77875500+luke396@users.noreply.github.com> Date: Tue, 30 Jan 2024 18:29:00 +0800 Subject: [PATCH 09/10] Improve --- pandas/io/excel/_base.py | 9 ++++++--- pandas/tests/io/excel/test_writers.py | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 074a77da8d462..4109b6d0965bb 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1327,11 +1327,14 @@ def _value_with_fmt( else: val = str(val) # GH#56954 + # Excel's limitation on cell contents is 32767 characters + # xref https://support.microsoft.com/en-au/office/excel-specifications-and-limits-1672b34d-7043-467e-8e27-269d656771c3 if len(val) > 32767: - raise ValueError( - "Cell contents too long, truncated to 32767 characters" + warnings.warn( + "Cell contents too long, truncated to 32767 characters", + UserWarning, + stacklevel=find_stack_level(), ) - return val, fmt @classmethod diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index b7c8d7b73b3da..6ea48cd759fbc 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -1385,14 +1385,17 @@ def test_to_excel_empty_frame(self, engine, ext): expected = DataFrame() tm.assert_frame_equal(result, expected) - def test_to_excel_raising_error_when_cell_character_exceed_limit( + def test_to_excel_raising_warning_when_cell_character_exceed_limit( self, path, engine ): # GH#56954 df = DataFrame({"A": ["a" * 32768]}) msg = "Cell contents too long, truncated to 32767 characters" - with pytest.raises(ValueError, match=msg): - df.to_excel(path, engine=engine) + with tm.assert_produces_warning( + UserWarning, match=msg, raise_on_extra_warnings=False + ): + buf = BytesIO() + df.to_excel(buf) class TestExcelWriterEngineTests: From bd6a97579a9375f35d8f51bc9b64da4d4633db6c Mon Sep 17 00:00:00 2001 From: Zhengbo Wang <77875500+luke396@users.noreply.github.com> Date: Tue, 30 Jan 2024 18:32:31 +0800 Subject: [PATCH 10/10] Change what's new --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4504b8eb7f3a4..ec6172b4a7ba6 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -28,7 +28,7 @@ enhancement2 Other enhancements ^^^^^^^^^^^^^^^^^^ -- :func:`DataFrame.to_excel` now raises a ``ValueError`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`) +- :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`) - .. ---------------------------------------------------------------------------