From 69bad56b4be1f4532edfc5629b6caa753356600c Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Tue, 9 Feb 2021 11:22:08 -0600 Subject: [PATCH 1/5] DEP: bump min version of openpyxl to 3.0.0 #39603 --- ci/deps/azure-37-locale_slow.yaml | 2 +- ci/deps/azure-37-minimum_versions.yaml | 2 +- doc/source/getting_started/install.rst | 2 +- doc/source/whatsnew/v1.3.0.rst | 2 +- pandas/compat/_optional.py | 2 +- pandas/io/excel/_openpyxl.py | 9 +++------ 6 files changed, 8 insertions(+), 11 deletions(-) diff --git a/ci/deps/azure-37-locale_slow.yaml b/ci/deps/azure-37-locale_slow.yaml index 7f658fe62d268..0c47b1a72774f 100644 --- a/ci/deps/azure-37-locale_slow.yaml +++ b/ci/deps/azure-37-locale_slow.yaml @@ -18,7 +18,7 @@ dependencies: - lxml - matplotlib=3.0.0 - numpy=1.16.* - - openpyxl=2.6.0 + - openpyxl=3.0.0 - python-dateutil - python-blosc - pytz=2017.3 diff --git a/ci/deps/azure-37-minimum_versions.yaml b/ci/deps/azure-37-minimum_versions.yaml index f184ea87c89fe..9cc158b76cd41 100644 --- a/ci/deps/azure-37-minimum_versions.yaml +++ b/ci/deps/azure-37-minimum_versions.yaml @@ -19,7 +19,7 @@ dependencies: - numba=0.46.0 - numexpr=2.6.8 - numpy=1.16.5 - - openpyxl=2.6.0 + - openpyxl=3.0.0 - pytables=3.5.1 - python-dateutil=2.7.3 - pytz=2017.3 diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index 49039f05b889a..06e1af75053d3 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -274,7 +274,7 @@ html5lib 1.0.1 HTML parser for read_html (see :ref lxml 4.3.0 HTML parser for read_html (see :ref:`note `) matplotlib 2.2.3 Visualization numba 0.46.0 Alternative execution engine for rolling operations -openpyxl 2.6.0 Reading / writing for xlsx files +openpyxl 3.0.0 Reading / writing for xlsx files pandas-gbq 0.12.0 Google Big Query access psycopg2 2.7 PostgreSQL engine for sqlalchemy pyarrow 0.15.0 Parquet, ORC, and feather reading / writing diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index bed3484793bcc..7e4f9b24b460e 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -186,7 +186,7 @@ Optional libraries below the lowest tested version may still work, but are not c +-----------------+-----------------+---------+ | numba | 0.46.0 | | +-----------------+-----------------+---------+ -| openpyxl | 2.6.0 | | +| openpyxl | 3.0.0 | X | +-----------------+-----------------+---------+ | pyarrow | 0.15.0 | | +-----------------+-----------------+---------+ diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index bcad9f1ddab09..eb2b4caddb7a6 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -17,7 +17,7 @@ "matplotlib": "2.2.3", "numexpr": "2.6.8", "odfpy": "1.3.0", - "openpyxl": "2.6.0", + "openpyxl": "3.0.0", "pandas_gbq": "0.12.0", "pyarrow": "0.15.0", "pytest": "5.0.1", diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 3a753a707166e..2d5e2c7d0c8e4 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -1,13 +1,12 @@ from __future__ import annotations -from distutils.version import LooseVersion import mmap from typing import TYPE_CHECKING, Dict, List, Optional import numpy as np from pandas._typing import FilePathOrBuffer, Scalar, StorageOptions -from pandas.compat._optional import get_version, import_optional_dependency +from pandas.compat._optional import import_optional_dependency from pandas.io.excel._base import BaseExcelReader, ExcelWriter from pandas.io.excel._util import validate_freeze_panes @@ -536,13 +535,11 @@ def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: # writers sometimes omit or get it wrong import openpyxl - version = LooseVersion(get_version(openpyxl)) - # There is no good way of determining if a sheet is read-only # https://foss.heptapod.net/openpyxl/openpyxl/-/issues/1605 is_readonly = hasattr(sheet, "reset_dimensions") - if version >= "3.0.0" and is_readonly: + if is_readonly: sheet.reset_dimensions() data: List[List[Scalar]] = [] @@ -556,7 +553,7 @@ def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: # Trim trailing empty rows data = data[: last_row_with_data + 1] - if version >= "3.0.0" and is_readonly and len(data) > 0: + if is_readonly and len(data) > 0: # With dimension reset, openpyxl no longer pads rows max_width = max(len(data_row) for data_row in data) if min(len(data_row) for data_row in data) < max_width: From 035b454d6957b6973c2f2b4ac4b6e176afc889c7 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Tue, 9 Feb 2021 11:33:40 -0600 Subject: [PATCH 2/5] fix import --- pandas/io/excel/_openpyxl.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 2d5e2c7d0c8e4..6722e0b511a5c 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -530,10 +530,6 @@ def _convert_cell(self, cell, convert_float: bool) -> Scalar: return cell.value def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: - # GH 39001 - # Reading of excel file depends on dimension data being correct but - # writers sometimes omit or get it wrong - import openpyxl # There is no good way of determining if a sheet is read-only # https://foss.heptapod.net/openpyxl/openpyxl/-/issues/1605 From 7ef8ab5ef7611f318953452608aae2a500b07c6e Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Wed, 10 Feb 2021 20:10:35 -0600 Subject: [PATCH 3/5] remove version compat in openpyxl test --- pandas/tests/io/excel/test_openpyxl.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index 0962b719efd4d..8128e958141e2 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -1,11 +1,8 @@ -from distutils.version import LooseVersion from pathlib import Path import numpy as np import pytest -from pandas.compat._optional import get_version - import pandas as pd from pandas import DataFrame import pandas._testing as tm @@ -157,10 +154,6 @@ def test_read_with_bad_dimension( datapath, ext, header, expected_data, filename, read_only, request ): # GH 38956, 39001 - no/incorrect dimension information - version = LooseVersion(get_version(openpyxl)) - if (read_only or read_only is None) and version < "3.0.0": - msg = "openpyxl read-only sheet is incorrect when dimension data is wrong" - request.node.add_marker(pytest.mark.xfail(reason=msg)) path = datapath("io", "data", "excel", f"{filename}{ext}") if read_only is None: result = pd.read_excel(path, header=header) @@ -195,10 +188,6 @@ def test_append_mode_file(ext): @pytest.mark.parametrize("read_only", [True, False, None]) def test_read_with_empty_trailing_rows(datapath, ext, read_only, request): # GH 39181 - version = LooseVersion(get_version(openpyxl)) - if (read_only or read_only is None) and version < "3.0.0": - msg = "openpyxl read-only sheet is incorrect when dimension data is wrong" - request.node.add_marker(pytest.mark.xfail(reason=msg)) path = datapath("io", "data", "excel", f"empty_trailing_rows{ext}") if read_only is None: result = pd.read_excel(path) From 9d1d82f7095c50c03f706de851259d1dd98c9a56 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Sat, 13 Feb 2021 15:32:52 -0600 Subject: [PATCH 4/5] add comments back --- pandas/io/excel/_openpyxl.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 6722e0b511a5c..ba5097041376a 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -536,6 +536,9 @@ def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: is_readonly = hasattr(sheet, "reset_dimensions") if is_readonly: + # GH 39001 + # Reading of excel file depends on dimension data being correct but + # writers sometimes omit or get it wrong sheet.reset_dimensions() data: List[List[Scalar]] = [] From 006e4ae8b15da3f0fb4cac9ae24ba44e8fcc0ad0 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Mon, 15 Feb 2021 10:03:11 -0600 Subject: [PATCH 5/5] fix typo --- pandas/io/excel/_openpyxl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index ef0c030f49260..ef70706920dc4 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -531,7 +531,7 @@ def _convert_cell(self, cell, convert_float: bool) -> Scalar: def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: - ifself.book.read_only: + if self.book.read_only: sheet.reset_dimensions() data: List[List[Scalar]] = []