From e107c1e675caabf7339e6b3c195f30768361d5c1 Mon Sep 17 00:00:00 2001 From: Chris Withers Date: Sun, 13 Dec 2020 18:17:16 +0000 Subject: [PATCH 1/8] adjust docs about xlrd changes to match my intentions --- doc/source/whatsnew/v1.2.0.rst | 15 ++++++--------- pandas/io/excel/_base.py | 26 ++++++++++++-------------- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index bc7f5b8174573..16819e213226b 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -10,21 +10,18 @@ including other versions of pandas. .. warning:: - The packages `xlrd `_ for reading excel - files and `xlwt `_ for - writing excel files are no longer maintained. These are the only engines in pandas - that support the xls format. + The `xlwt `_ package for writing old-style ``.xls`` + excel files is no longer maintained. + The `xlrd `_ package is now only for reading + old-style ``.xls`` files. Previously, the default argument ``engine=None`` to ``pd.read_excel`` would result in using the ``xlrd`` engine in many cases. If `openpyxl `_ is installed, many of these cases will now default to using the ``openpyxl`` engine. See the :func:`read_excel` documentation for more details. Attempting to read - ``.xls`` files or specifying ``engine="xlrd"`` to ``pd.read_excel`` will not - raise a warning. However users should be aware that ``xlrd`` is already - broken with certain package configurations, for example with Python 3.9 - when `defusedxml `_ is installed, and - is anticipated to be unusable in the future. + anything but ``.xls`` files when specifying ``engine="xlrd"`` to ``pd.read_excel`` will raise + an exception. Attempting to use the the ``xlwt`` engine will raise a ``FutureWarning`` unless the option :attr:`io.excel.xls.writer` is set to ``"xlwt"``. diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index bf1011176693f..c00074e8597e6 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -105,25 +105,24 @@ Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb". Engine compatibility : - - "xlrd" supports most old/new Excel file formats. + - "xlrd" supports old-style Excel files (.xls). - "openpyxl" supports newer Excel file formats. - "odf" supports OpenDocument file formats (.odf, .ods, .odt). - "pyxlsb" supports Binary Excel files. .. versionchanged:: 1.2.0 The engine `xlrd `_ - is no longer maintained, and is not supported with - python >= 3.9. When ``engine=None``, the following logic will be - used to determine the engine. + now only supports old-style ``.xls`` files. + When ``engine=None``, the following logic will be + used to determine the engine: - If ``path_or_buffer`` is an OpenDocument format (.odf, .ods, .odt), then `odf `_ will be used. - - Otherwise if ``path_or_buffer`` is a bytes stream, the file has the + - Otherwise if the file has the extension ``.xls``, or is an ``xlrd`` Book instance, then ``xlrd`` will be used. - Otherwise if `openpyxl `_ is installed, then ``openpyxl`` will be used. - - Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised. Specifying ``engine="xlrd"`` will continue to be allowed for the indefinite future. @@ -920,7 +919,7 @@ class ExcelFile: """ Class for parsing tabular excel sheets into DataFrame objects. - Uses xlrd engine by default. See read_excel for more documentation + See read_excel for more documentation Parameters ---------- @@ -933,26 +932,25 @@ class ExcelFile: Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb`` Engine compatibility : - - ``xlrd`` supports most old/new Excel file formats. + - ``xlrd`` old-style Excel files (.xls). - ``openpyxl`` supports newer Excel file formats. - ``odf`` supports OpenDocument file formats (.odf, .ods, .odt). - ``pyxlsb`` supports Binary Excel files. .. versionchanged:: 1.2.0 - The engine `xlrd `_ - is no longer maintained, and is not supported with - python >= 3.9. When ``engine=None``, the following logic will be - used to determine the engine. + The engine `xlrd `_ + now only supports old-style ``.xls`` files. + When ``engine=None``, the following logic will be + used to determine the engine: - If ``path_or_buffer`` is an OpenDocument format (.odf, .ods, .odt), then `odf `_ will be used. - - Otherwise if ``path_or_buffer`` is a bytes stream, the file has the + - Otherwise if the file has the extension ``.xls``, or is an ``xlrd`` Book instance, then ``xlrd`` will be used. - Otherwise if `openpyxl `_ is installed, then ``openpyxl`` will be used. - - Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised. Specifying ``engine="xlrd"`` will continue to be allowed for the indefinite future. From a70795d1178007afecba82b5f4af50739a6f3c70 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 22 Dec 2020 23:17:37 +0100 Subject: [PATCH 2/8] undo edits under discussion --- doc/source/whatsnew/v1.2.0.rst | 4 +--- pandas/io/excel/_base.py | 14 ++++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 16819e213226b..b6cacfc391d66 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -19,9 +19,7 @@ including other versions of pandas. would result in using the ``xlrd`` engine in many cases. If `openpyxl `_ is installed, many of these cases will now default to using the ``openpyxl`` engine. - See the :func:`read_excel` documentation for more details. Attempting to read - anything but ``.xls`` files when specifying ``engine="xlrd"`` to ``pd.read_excel`` will raise - an exception. + See the :func:`read_excel` documentation for more details. Attempting to use the the ``xlwt`` engine will raise a ``FutureWarning`` unless the option :attr:`io.excel.xls.writer` is set to ``"xlwt"``. diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index c00074e8597e6..e9a9e5e88c8d6 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -118,11 +118,12 @@ - If ``path_or_buffer`` is an OpenDocument format (.odf, .ods, .odt), then `odf `_ will be used. - - Otherwise if the file has the + - Otherwise if ``path_or_buffer`` is a bytes stream, the file has the extension ``.xls``, or is an ``xlrd`` Book instance, then ``xlrd`` will be used. - Otherwise if `openpyxl `_ is installed, then ``openpyxl`` will be used. + - Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised. Specifying ``engine="xlrd"`` will continue to be allowed for the indefinite future. @@ -939,18 +940,19 @@ class ExcelFile: .. versionchanged:: 1.2.0 - The engine `xlrd `_ - now only supports old-style ``.xls`` files. - When ``engine=None``, the following logic will be - used to determine the engine: + The engine `xlrd `_ + now only supports old-style ``.xls`` files. + When ``engine=None``, the following logic will be + used to determine the engine: - If ``path_or_buffer`` is an OpenDocument format (.odf, .ods, .odt), then `odf `_ will be used. - - Otherwise if the file has the + - Otherwise if ``path_or_buffer`` is a bytes stream, the file has the extension ``.xls``, or is an ``xlrd`` Book instance, then ``xlrd`` will be used. - Otherwise if `openpyxl `_ is installed, then ``openpyxl`` will be used. + - Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised. Specifying ``engine="xlrd"`` will continue to be allowed for the indefinite future. From d7d8fd3921da492bbe8a8a14df2c253a937eaf41 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 22 Dec 2020 23:18:57 +0100 Subject: [PATCH 3/8] fixup --- pandas/io/excel/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index e9a9e5e88c8d6..6e0fda5f0f955 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -952,7 +952,7 @@ class ExcelFile: will be used. - Otherwise if `openpyxl `_ is installed, then ``openpyxl`` will be used. - - Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised. + - Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised. Specifying ``engine="xlrd"`` will continue to be allowed for the indefinite future. From 51706b2d5c0ec67caf19bf04ddbe6c25e16161a2 Mon Sep 17 00:00:00 2001 From: Chris Withers Date: Tue, 22 Dec 2020 23:17:51 +0000 Subject: [PATCH 4/8] Add warnings that using xlrd to open xlsx files is no longer supported. --- doc/source/user_guide/io.rst | 7 ++++++- doc/source/whatsnew/v1.2.0.rst | 5 +++++ pandas/io/excel/_base.py | 6 ++++-- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 965833c013c03..86ddd6cb1a01d 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -2822,13 +2822,18 @@ Excel files The :func:`~pandas.read_excel` method can read Excel 2003 (``.xls``) files using the ``xlrd`` Python module. Excel 2007+ (``.xlsx``) files -can be read using either ``xlrd`` or ``openpyxl``. Binary Excel (``.xlsb``) +can be read using ``openpyxl``. Binary Excel (``.xlsb``) files can be read using ``pyxlsb``. The :meth:`~DataFrame.to_excel` instance method is used for saving a ``DataFrame`` to Excel. Generally the semantics are similar to working with :ref:`csv` data. See the :ref:`cookbook` for some advanced strategies. +.. warning:: + + Please do not report issues when using ``xlrd`` to read ``.xlsx`` files. + This is not supported, switch to using ``openpyxl`` instead. + .. _io.excel_reader: Reading Excel files diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index b6cacfc391d66..9dd5d7f1ef036 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -21,6 +21,11 @@ including other versions of pandas. many of these cases will now default to using the ``openpyxl`` engine. See the :func:`read_excel` documentation for more details. + .. warning:: + + Please do not report issues when using ``xlrd`` to read ``.xlsx`` files. + This is not supported, switch to using ``openpyxl`` instead. + Attempting to use the the ``xlwt`` engine will raise a ``FutureWarning`` unless the option :attr:`io.excel.xls.writer` is set to ``"xlwt"``. While this option is now deprecated and will also raise a ``FutureWarning``, diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 6e0fda5f0f955..df3baa0a8c381 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -954,8 +954,10 @@ class ExcelFile: then ``openpyxl`` will be used. - Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised. - Specifying ``engine="xlrd"`` will continue to be allowed for the - indefinite future. + .. warning:: + + Please do not report issues when using ``xlrd`` to read ``.xlsx`` files. + This is not supported, switch to using ``openpyxl`` instead. """ from pandas.io.excel._odfreader import ODFReader From f77ca98b3d8e7514a59ed92c816f380d2bbf9845 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 23 Dec 2020 15:05:01 +0100 Subject: [PATCH 5/8] add warning to io.rst as well --- doc/source/user_guide/io.rst | 30 +++++++++++++++++++++++++----- doc/source/whatsnew/v1.2.0.rst | 15 ++++++++------- pandas/io/excel/_base.py | 2 +- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 86ddd6cb1a01d..7e9ab75b64d4b 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -2820,9 +2820,9 @@ parse HTML tables in the top-level pandas io function ``read_html``. Excel files ----------- -The :func:`~pandas.read_excel` method can read Excel 2003 (``.xls``) -files using the ``xlrd`` Python module. Excel 2007+ (``.xlsx``) files -can be read using ``openpyxl``. Binary Excel (``.xlsb``) +The :func:`~pandas.read_excel` method can read Excel 2007+ (``.xlsx``) files +using the ``openpyxl`` Python module. Excel 2003 (``.xls``) files +can be read using ``xlrd``. Binary Excel (``.xlsb``) files can be read using ``pyxlsb``. The :meth:`~DataFrame.to_excel` instance method is used for saving a ``DataFrame`` to Excel. Generally the semantics are @@ -2831,8 +2831,28 @@ See the :ref:`cookbook` for some advanced strategies. .. warning:: - Please do not report issues when using ``xlrd`` to read ``.xlsx`` files. - This is not supported, switch to using ``openpyxl`` instead. + The `xlwt `_ package for writing old-style ``.xls`` + excel files is no longer maintained. + The `xlrd `_ package is now only for reading + old-style ``.xls`` files. + + Previously, the default argument ``engine=None`` to :func:`~pandas.read_excel` + would result in using the ``xlrd`` engine in many cases, also for new + Excel 2007+ (``.xlsx``) files. + If `openpyxl `_ is installed, + many of these cases will now default to using the ``openpyxl`` engine. + See the :func:`read_excel` documentation for more details. + + Thus, it is strongly encouraged to install ``openpyxl`` to read Excel 2007+ + (``.xlsx``) files. + Please do not report issues when using ``xlrd`` to read ``.xlsx`` files. + This is no longer supported, switch to using ``openpyxl`` instead. + + Attempting to use the the ``xlwt`` engine will raise a ``FutureWarning`` + unless the option :attr:`io.excel.xls.writer` is set to ``"xlwt"``. + While this option is now deprecated and will also raise a ``FutureWarning``, + it can be globally set and the warning suppressed. Users are recommended to + write ``.xlsx`` files using the ``openpyxl`` engine instead. .. _io.excel_reader: diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 9dd5d7f1ef036..706dab6cb148d 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -15,16 +15,17 @@ including other versions of pandas. The `xlrd `_ package is now only for reading old-style ``.xls`` files. - Previously, the default argument ``engine=None`` to ``pd.read_excel`` - would result in using the ``xlrd`` engine in many cases. If - `openpyxl `_ is installed, + Previously, the default argument ``engine=None`` to :func:`~pandas.read_excel` + would result in using the ``xlrd`` engine in many cases, also for new + Excel 2007+ (``.xlsx``) files. + If `openpyxl `_ is installed, many of these cases will now default to using the ``openpyxl`` engine. See the :func:`read_excel` documentation for more details. - .. warning:: - - Please do not report issues when using ``xlrd`` to read ``.xlsx`` files. - This is not supported, switch to using ``openpyxl`` instead. + Thus, it is strongly encouraged to install ``openpyxl`` to read Excel 2007+ + (``.xlsx``) files. + Please do not report issues when using ``xlrd`` to read ``.xlsx`` files. + This is no longer supported, switch to using ``openpyxl`` instead. Attempting to use the the ``xlwt`` engine will raise a ``FutureWarning`` unless the option :attr:`io.excel.xls.writer` is set to ``"xlwt"``. diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index df3baa0a8c381..c72f294bf6ac8 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -933,7 +933,7 @@ class ExcelFile: Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb`` Engine compatibility : - - ``xlrd`` old-style Excel files (.xls). + - ``xlrd`` supports old-style Excel files (.xls). - ``openpyxl`` supports newer Excel file formats. - ``odf`` supports OpenDocument file formats (.odf, .ods, .odt). - ``pyxlsb`` supports Binary Excel files. From 41c14cf53e5b38a9f14e917765af36cf14620b35 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 23 Dec 2020 15:37:58 +0100 Subject: [PATCH 6/8] edits --- doc/source/user_guide/io.rst | 4 ++-- doc/source/whatsnew/v1.2.0.rst | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 7e9ab75b64d4b..80e09e4567faa 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -2837,7 +2837,7 @@ See the :ref:`cookbook` for some advanced strategies. old-style ``.xls`` files. Previously, the default argument ``engine=None`` to :func:`~pandas.read_excel` - would result in using the ``xlrd`` engine in many cases, also for new + would result in using the ``xlrd`` engine in many cases, including for new Excel 2007+ (``.xlsx``) files. If `openpyxl `_ is installed, many of these cases will now default to using the ``openpyxl`` engine. @@ -2845,7 +2845,7 @@ See the :ref:`cookbook` for some advanced strategies. Thus, it is strongly encouraged to install ``openpyxl`` to read Excel 2007+ (``.xlsx``) files. - Please do not report issues when using ``xlrd`` to read ``.xlsx`` files. + **Please do not report issues when using ``xlrd`` to read ``.xlsx`` files.** This is no longer supported, switch to using ``openpyxl`` instead. Attempting to use the the ``xlwt`` engine will raise a ``FutureWarning`` diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 706dab6cb148d..9060dd5a75f5c 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -16,7 +16,7 @@ including other versions of pandas. old-style ``.xls`` files. Previously, the default argument ``engine=None`` to :func:`~pandas.read_excel` - would result in using the ``xlrd`` engine in many cases, also for new + would result in using the ``xlrd`` engine in many cases, including for new Excel 2007+ (``.xlsx``) files. If `openpyxl `_ is installed, many of these cases will now default to using the ``openpyxl`` engine. @@ -24,7 +24,7 @@ including other versions of pandas. Thus, it is strongly encouraged to install ``openpyxl`` to read Excel 2007+ (``.xlsx``) files. - Please do not report issues when using ``xlrd`` to read ``.xlsx`` files. + **Please do not report issues when using ``xlrd`` to read ``.xlsx`` files.** This is no longer supported, switch to using ``openpyxl`` instead. Attempting to use the the ``xlwt`` engine will raise a ``FutureWarning`` From 4bc6a0842335abdf4108e3827fde48b2cca85386 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 23 Dec 2020 15:42:29 +0100 Subject: [PATCH 7/8] edit --- doc/source/user_guide/io.rst | 2 +- doc/source/whatsnew/v1.2.0.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 80e09e4567faa..fb77565889f3e 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -2837,7 +2837,7 @@ See the :ref:`cookbook` for some advanced strategies. old-style ``.xls`` files. Previously, the default argument ``engine=None`` to :func:`~pandas.read_excel` - would result in using the ``xlrd`` engine in many cases, including for new + would result in using the ``xlrd`` engine in many cases, including new Excel 2007+ (``.xlsx``) files. If `openpyxl `_ is installed, many of these cases will now default to using the ``openpyxl`` engine. diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 9060dd5a75f5c..a81443c1be93b 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -16,7 +16,7 @@ including other versions of pandas. old-style ``.xls`` files. Previously, the default argument ``engine=None`` to :func:`~pandas.read_excel` - would result in using the ``xlrd`` engine in many cases, including for new + would result in using the ``xlrd`` engine in many cases, including new Excel 2007+ (``.xlsx``) files. If `openpyxl `_ is installed, many of these cases will now default to using the ``openpyxl`` engine. From 47e26b5d86c78601f0d2614b0a1019efccba9a10 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 23 Dec 2020 16:25:46 +0100 Subject: [PATCH 8/8] fix sphinx warning --- doc/source/user_guide/io.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index fb77565889f3e..eba097cd8c345 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -2831,15 +2831,15 @@ See the :ref:`cookbook` for some advanced strategies. .. warning:: - The `xlwt `_ package for writing old-style ``.xls`` + The `xlwt `__ package for writing old-style ``.xls`` excel files is no longer maintained. - The `xlrd `_ package is now only for reading + The `xlrd `__ package is now only for reading old-style ``.xls`` files. Previously, the default argument ``engine=None`` to :func:`~pandas.read_excel` would result in using the ``xlrd`` engine in many cases, including new Excel 2007+ (``.xlsx``) files. - If `openpyxl `_ is installed, + If `openpyxl `__ is installed, many of these cases will now default to using the ``openpyxl`` engine. See the :func:`read_excel` documentation for more details.