From 817199fe29dc5f825737435f2a49992d5feab409 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Wed, 29 Mar 2023 23:24:43 -0400 Subject: [PATCH 01/29] Fixing merge conflicts --- doc/source/whatsnew/v2.1.0.rst | 3 ++- pandas/io/excel/_base.py | 31 +++++++++++++++++++++------ pandas/io/excel/_odfreader.py | 12 ++++++++--- pandas/io/excel/_openpyxl.py | 16 +++++++++++--- pandas/io/excel/_pyxlsb.py | 12 ++++++++--- pandas/io/excel/_xlrd.py | 16 +++++++++----- pandas/tests/io/excel/test_readers.py | 21 ++++++++++++++++++ 7 files changed, 90 insertions(+), 21 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 38faf90f1de74..d10676c990d27 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -38,7 +38,8 @@ Other enhancements - Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`) - :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`) - :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`). -- Add dtype of categories to ``repr`` information of :class:`CategoricalDtype` (:issue:`52179`) +- :meth:`pandas.read_excel` now supports ``engine_kwargs`` to pass keyword args to Excel engines. (:issue:`40274`). +- Add dtype of categories to ``repr`` information of :class:`CategoricalDtype` (:issue:`52179`). .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 8c3bbb7798f68..5d5729cf06bbe 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -289,6 +289,9 @@ .. versionadded:: 2.0 +engine_kwargs : dict + Arbitrary keyword arguments passed to excel engine. + Returns ------- DataFrame or dict of DataFrames @@ -302,6 +305,11 @@ read_csv : Read a comma-separated values (csv) file into DataFrame. read_fwf : Read a table of fixed-width formatted lines into DataFrame. +Notes +----- +For specific information on the methods used for each Excel engine, refer to the pandas +user guide: :doc: `/source/user_guide/io` + Examples -------- The file can be read using the file name as string or an open file object: @@ -472,13 +480,18 @@ def read_excel( skipfooter: int = 0, storage_options: StorageOptions = None, dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, + engine_kwargs: dict = dict(), ) -> DataFrame | dict[IntStrT, DataFrame]: check_dtype_backend(dtype_backend) - should_close = False if not isinstance(io, ExcelFile): should_close = True - io = ExcelFile(io, storage_options=storage_options, engine=engine) + io = ExcelFile( + io, + storage_options=storage_options, + engine=engine, + engine_kwargs=engine_kwargs, + ) elif engine and engine != io.engine: raise ValueError( "Engine should not be specified when passing " @@ -520,7 +533,10 @@ def read_excel( class BaseExcelReader(metaclass=abc.ABCMeta): def __init__( - self, filepath_or_buffer, storage_options: StorageOptions = None + self, + filepath_or_buffer, + storage_options: StorageOptions = None, + **engine_kwargs, ) -> None: # First argument can also be bytes, so create a buffer if isinstance(filepath_or_buffer, bytes): @@ -540,7 +556,7 @@ def __init__( # N.B. xlrd.Book has a read attribute too self.handles.handle.seek(0) try: - self.book = self.load_workbook(self.handles.handle) + self.book = self.load_workbook(self.handles.handle, **engine_kwargs) except Exception: self.close() raise @@ -555,7 +571,7 @@ def _workbook_class(self): pass @abc.abstractmethod - def load_workbook(self, filepath_or_buffer): + def load_workbook(self, filepath_or_buffer, **engine_kwargs): pass def close(self) -> None: @@ -1469,6 +1485,7 @@ def __init__( path_or_buffer, engine: str | None = None, storage_options: StorageOptions = None, + engine_kwargs: dict = dict(), ) -> None: if engine is not None and engine not in self._engines: raise ValueError(f"Unknown engine: {engine}") @@ -1513,7 +1530,9 @@ def __init__( self.engine = engine self.storage_options = storage_options - self._reader = self._engines[engine](self._io, storage_options=storage_options) + self._reader = self._engines[engine]( + self._io, storage_options=storage_options, **engine_kwargs + ) def __fspath__(self): return self._io diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index c3d7cb5df717f..1d2f57b09b2cd 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -31,6 +31,7 @@ def __init__( self, filepath_or_buffer: FilePath | ReadBuffer[bytes], storage_options: StorageOptions = None, + **engine_kwargs, ) -> None: """ Read tables out of OpenDocument formatted files. @@ -40,9 +41,12 @@ def __init__( filepath_or_buffer : str, path to be parsed or an open readable stream. {storage_options} + engine_kwargs : Arbitrary keyword arguments passed to excel engine """ import_optional_dependency("odf") - super().__init__(filepath_or_buffer, storage_options=storage_options) + super().__init__( + filepath_or_buffer, storage_options=storage_options, **engine_kwargs + ) @property def _workbook_class(self): @@ -50,10 +54,12 @@ def _workbook_class(self): return OpenDocument - def load_workbook(self, filepath_or_buffer: FilePath | ReadBuffer[bytes]): + def load_workbook( + self, filepath_or_buffer: FilePath | ReadBuffer[bytes], **engine_kwargs + ): from odf.opendocument import load - return load(filepath_or_buffer) + return load(filepath_or_buffer, **engine_kwargs) @property def empty_value(self) -> str: diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index e751c919ee8dc..3e1027c8e964c 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -536,6 +536,7 @@ def __init__( self, filepath_or_buffer: FilePath | ReadBuffer[bytes], storage_options: StorageOptions = None, + **engine_kwargs, ) -> None: """ Reader using openpyxl engine. @@ -545,9 +546,12 @@ def __init__( filepath_or_buffer : str, path object or Workbook Object to be parsed. {storage_options} + engine_kwargs : Arbitrary keyword arguments passed to excel engine """ import_optional_dependency("openpyxl") - super().__init__(filepath_or_buffer, storage_options=storage_options) + super().__init__( + filepath_or_buffer, storage_options=storage_options, **engine_kwargs + ) @property def _workbook_class(self): @@ -555,11 +559,17 @@ def _workbook_class(self): return Workbook - def load_workbook(self, filepath_or_buffer: FilePath | ReadBuffer[bytes]): + def load_workbook( + self, filepath_or_buffer: FilePath | ReadBuffer[bytes], **engine_kwargs + ): from openpyxl import load_workbook return load_workbook( - filepath_or_buffer, read_only=True, data_only=True, keep_links=False + filepath_or_buffer, + read_only=True, + data_only=True, + keep_links=False, + **engine_kwargs, ) @property diff --git a/pandas/io/excel/_pyxlsb.py b/pandas/io/excel/_pyxlsb.py index bfe21082cc4d0..a3f83ac80a740 100644 --- a/pandas/io/excel/_pyxlsb.py +++ b/pandas/io/excel/_pyxlsb.py @@ -25,6 +25,7 @@ def __init__( self, filepath_or_buffer: FilePath | ReadBuffer[bytes], storage_options: StorageOptions = None, + **engine_kwargs, ) -> None: """ Reader using pyxlsb engine. @@ -34,11 +35,14 @@ def __init__( filepath_or_buffer : str, path object, or Workbook Object to be parsed. {storage_options} + engine_kwargs : Arbitrary keyword arguments passed to excel engine """ import_optional_dependency("pyxlsb") # This will call load_workbook on the filepath or buffer # And set the result to the book-attribute - super().__init__(filepath_or_buffer, storage_options=storage_options) + super().__init__( + filepath_or_buffer, storage_options=storage_options, **engine_kwargs + ) @property def _workbook_class(self): @@ -46,14 +50,16 @@ def _workbook_class(self): return Workbook - def load_workbook(self, filepath_or_buffer: FilePath | ReadBuffer[bytes]): + def load_workbook( + self, filepath_or_buffer: FilePath | ReadBuffer[bytes], **engine_kwargs + ): from pyxlsb import open_workbook # TODO: hack in buffer capability # This might need some modifications to the Pyxlsb library # Actual work for opening it is in xlsbpackage.py, line 20-ish - return open_workbook(filepath_or_buffer) + return open_workbook(filepath_or_buffer, **engine_kwargs) @property def sheet_names(self) -> list[str]: diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py index 702d00e7fdea7..57b6371af6601 100644 --- a/pandas/io/excel/_xlrd.py +++ b/pandas/io/excel/_xlrd.py @@ -22,7 +22,10 @@ class XlrdReader(BaseExcelReader): @doc(storage_options=_shared_docs["storage_options"]) def __init__( - self, filepath_or_buffer, storage_options: StorageOptions = None + self, + filepath_or_buffer, + storage_options: StorageOptions = None, + **engine_kwargs, ) -> None: """ Reader using xlrd engine. @@ -32,10 +35,13 @@ def __init__( filepath_or_buffer : str, path object or Workbook Object to be parsed. {storage_options} + engine_kwargs : Arbitrary keyword arguments passed to excel engine """ err_msg = "Install xlrd >= 2.0.1 for xls Excel support" import_optional_dependency("xlrd", extra=err_msg) - super().__init__(filepath_or_buffer, storage_options=storage_options) + super().__init__( + filepath_or_buffer, storage_options=storage_options, **engine_kwargs + ) @property def _workbook_class(self): @@ -43,14 +49,14 @@ def _workbook_class(self): return Book - def load_workbook(self, filepath_or_buffer): + def load_workbook(self, filepath_or_buffer, **engine_kwargs): from xlrd import open_workbook if hasattr(filepath_or_buffer, "read"): data = filepath_or_buffer.read() - return open_workbook(file_contents=data) + return open_workbook(file_contents=data, **engine_kwargs) else: - return open_workbook(filepath_or_buffer) + return open_workbook(filepath_or_buffer, **engine_kwargs) @property def sheet_names(self): diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index c22051912d293..d99d6ea9b2a78 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -6,6 +6,7 @@ import os from pathlib import Path import platform +import re from urllib.error import URLError from zipfile import BadZipFile @@ -148,6 +149,26 @@ def parser(self, *args, **kwargs): expected = expected_defaults[read_ext[1:]] assert result == expected + def test_engine_kwargs(self, read_ext, engine): + # GH#52214 + expected_defaults = { + "xlsx": {"foo": "abcd"}, + "xlsm": {"foo": 123}, + "xlsb": {"foo": "True"}, + "xls": {"foo": True}, + "ods": {}, + } + + msg = re.escape(r"read_excel() got an unexpected keyword argument 'foo'") + if engine is not None and expected_defaults[read_ext[1:]]: + with pytest.raises(TypeError, match=msg): + pd.read_excel( + "test1" + read_ext, + sheet_name="Sheet1", + index_col=0, + **expected_defaults[read_ext[1:]], + ) + def test_usecols_int(self, read_ext): # usecols as int msg = "Passing an integer for `usecols`" From 1333165b8979eda83f3c7227613e37c3e1312e1e Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Wed, 29 Mar 2023 23:35:44 -0400 Subject: [PATCH 02/29] Fixing merge conflict --- doc/source/user_guide/io.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index c33d4ab92d4c6..eb41a65859a25 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -3449,6 +3449,18 @@ Reading Excel files In the most basic use-case, ``read_excel`` takes a path to an Excel file, and the ``sheet_name`` indicating which sheet to parse. +When using the engine_kwargs parameter, pandas will pass these arguments to the +appropriate engine. For this, it is important to know which function pandas is +using internally. + +* For the engine openpyxl, pandas is using :func:`~openpyxl.load_workbook` to read in (``.xlsx``) and (``.xlsm``) files. + +* For the engine xlrd, pandas is using :func:`~xlrd.open_workbook` to read in (``.xls``) files. + +* For the engine xlrd, pandas is using :func:`~pyxlsb.open_workbook` to read in (``.xlsb``) files. + +* For the engine odf, pandas is using :func:`~odf.opendocument.load` to read in (``.ods``) files. + .. code-block:: python # Returns a DataFrame From 2267d3098e5c99b57a5e73757ecacdfe7de7893f Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Thu, 30 Mar 2023 23:50:41 -0400 Subject: [PATCH 03/29] Fixing documentation issues --- doc/source/user_guide/io.rst | 12 ++++++------ doc/source/whatsnew/v2.1.0.rst | 1 - 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index eb41a65859a25..69d1928ae5b33 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -3449,17 +3449,17 @@ Reading Excel files In the most basic use-case, ``read_excel`` takes a path to an Excel file, and the ``sheet_name`` indicating which sheet to parse. -When using the engine_kwargs parameter, pandas will pass these arguments to the -appropriate engine. For this, it is important to know which function pandas is +When using the ``engine_kwargs`` parameter, pandas will pass these arguments to the +engine. For this, it is important to know which function pandas is using internally. -* For the engine openpyxl, pandas is using :func:`~openpyxl.load_workbook` to read in (``.xlsx``) and (``.xlsm``) files. +* For the engine openpyxl, pandas is using :func:`openpyxl.load_workbook` to read in (``.xlsx``) and (``.xlsm``) files. -* For the engine xlrd, pandas is using :func:`~xlrd.open_workbook` to read in (``.xls``) files. +* For the engine xlrd, pandas is using :func:`xlrd.open_workbook` to read in (``.xls``) files. -* For the engine xlrd, pandas is using :func:`~pyxlsb.open_workbook` to read in (``.xlsb``) files. +* For the engine xlrd, pandas is using :func:`pyxlsb.open_workbook` to read in (``.xlsb``) files. -* For the engine odf, pandas is using :func:`~odf.opendocument.load` to read in (``.ods``) files. +* For the engine odf, pandas is using :func:`odf.opendocument.load` to read in (``.ods``) files. .. code-block:: python diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index d2a28704e5709..019cfcce305a3 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -41,7 +41,6 @@ Other enhancements - Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`) - :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`) - :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`). - - :meth:`pandas.read_excel` now supports ``engine_kwargs`` to pass keyword args to Excel engines. (:issue:`40274`). - Add dtype of categories to ``repr`` information of :class:`CategoricalDtype` (:issue:`52179`). - :meth:`Categorical.map` and :meth:`CategoricalIndex.map` now have a ``na_action`` parameter (:issue:`44279`) From 14b4be039b826f577d20601d12937ba64310e173 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Sat, 1 Apr 2023 12:55:50 -0400 Subject: [PATCH 04/29] standardized usage of engine_kwargs, fixed unit tests & doc strings --- doc/source/user_guide/io.rst | 2 +- pandas/io/excel/_base.py | 15 +++++++++------ pandas/io/excel/_odfreader.py | 9 +++++---- pandas/io/excel/_openpyxl.py | 9 +++++---- pandas/io/excel/_pyxlsb.py | 11 ++++++----- pandas/io/excel/_xlrd.py | 9 +++++---- pandas/tests/io/excel/test_readers.py | 8 ++++++-- 7 files changed, 37 insertions(+), 26 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 69d1928ae5b33..4ef445d56457f 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -3457,7 +3457,7 @@ using internally. * For the engine xlrd, pandas is using :func:`xlrd.open_workbook` to read in (``.xls``) files. -* For the engine xlrd, pandas is using :func:`pyxlsb.open_workbook` to read in (``.xlsb``) files. +* For the engine pyxlsb, pandas is using :func:`pyxlsb.open_workbook` to read in (``.xlsb``) files. * For the engine odf, pandas is using :func:`odf.opendocument.load` to read in (``.ods``) files. diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 5d5729cf06bbe..3a76ead4110da 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -480,17 +480,20 @@ def read_excel( skipfooter: int = 0, storage_options: StorageOptions = None, dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, - engine_kwargs: dict = dict(), + engine_kwargs: dict | None = None, ) -> DataFrame | dict[IntStrT, DataFrame]: check_dtype_backend(dtype_backend) should_close = False + if engine_kwargs is None: + engine_kwargs = {} + if not isinstance(io, ExcelFile): should_close = True io = ExcelFile( io, + engine_kwargs, storage_options=storage_options, engine=engine, - engine_kwargs=engine_kwargs, ) elif engine and engine != io.engine: raise ValueError( @@ -535,8 +538,8 @@ class BaseExcelReader(metaclass=abc.ABCMeta): def __init__( self, filepath_or_buffer, + engine_kwargs, storage_options: StorageOptions = None, - **engine_kwargs, ) -> None: # First argument can also be bytes, so create a buffer if isinstance(filepath_or_buffer, bytes): @@ -556,7 +559,7 @@ def __init__( # N.B. xlrd.Book has a read attribute too self.handles.handle.seek(0) try: - self.book = self.load_workbook(self.handles.handle, **engine_kwargs) + self.book = self.load_workbook(self.handles.handle, engine_kwargs) except Exception: self.close() raise @@ -1483,9 +1486,9 @@ class ExcelFile: def __init__( self, path_or_buffer, + engine_kwargs, engine: str | None = None, storage_options: StorageOptions = None, - engine_kwargs: dict = dict(), ) -> None: if engine is not None and engine not in self._engines: raise ValueError(f"Unknown engine: {engine}") @@ -1531,7 +1534,7 @@ def __init__( self.storage_options = storage_options self._reader = self._engines[engine]( - self._io, storage_options=storage_options, **engine_kwargs + self._io, engine_kwargs, storage_options=storage_options, ) def __fspath__(self): diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 1d2f57b09b2cd..92b8897763fd0 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -30,8 +30,8 @@ class ODFReader(BaseExcelReader): def __init__( self, filepath_or_buffer: FilePath | ReadBuffer[bytes], + engine_kwargs, storage_options: StorageOptions = None, - **engine_kwargs, ) -> None: """ Read tables out of OpenDocument formatted files. @@ -40,12 +40,13 @@ def __init__( ---------- filepath_or_buffer : str, path to be parsed or an open readable stream. + engine_kwargs : dict + Arbitrary keyword arguments passed to excel engine {storage_options} - engine_kwargs : Arbitrary keyword arguments passed to excel engine """ import_optional_dependency("odf") super().__init__( - filepath_or_buffer, storage_options=storage_options, **engine_kwargs + filepath_or_buffer, engine_kwargs, storage_options=storage_options, ) @property @@ -55,7 +56,7 @@ def _workbook_class(self): return OpenDocument def load_workbook( - self, filepath_or_buffer: FilePath | ReadBuffer[bytes], **engine_kwargs + self, filepath_or_buffer: FilePath | ReadBuffer[bytes], engine_kwargs ): from odf.opendocument import load diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 3e1027c8e964c..a433cfa3b2152 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -535,8 +535,8 @@ class OpenpyxlReader(BaseExcelReader): def __init__( self, filepath_or_buffer: FilePath | ReadBuffer[bytes], + engine_kwargs, storage_options: StorageOptions = None, - **engine_kwargs, ) -> None: """ Reader using openpyxl engine. @@ -545,12 +545,13 @@ def __init__( ---------- filepath_or_buffer : str, path object or Workbook Object to be parsed. + engine_kwargs : dict + Arbitrary keyword arguments passed to excel engine {storage_options} - engine_kwargs : Arbitrary keyword arguments passed to excel engine """ import_optional_dependency("openpyxl") super().__init__( - filepath_or_buffer, storage_options=storage_options, **engine_kwargs + filepath_or_buffer, engine_kwargs, storage_options=storage_options, ) @property @@ -560,7 +561,7 @@ def _workbook_class(self): return Workbook def load_workbook( - self, filepath_or_buffer: FilePath | ReadBuffer[bytes], **engine_kwargs + self, filepath_or_buffer: FilePath | ReadBuffer[bytes], engine_kwargs ): from openpyxl import load_workbook diff --git a/pandas/io/excel/_pyxlsb.py b/pandas/io/excel/_pyxlsb.py index a3f83ac80a740..35905c1aa5158 100644 --- a/pandas/io/excel/_pyxlsb.py +++ b/pandas/io/excel/_pyxlsb.py @@ -24,8 +24,8 @@ class PyxlsbReader(BaseExcelReader): def __init__( self, filepath_or_buffer: FilePath | ReadBuffer[bytes], - storage_options: StorageOptions = None, - **engine_kwargs, + engine_kwargs, + storage_options: StorageOptions = None ) -> None: """ Reader using pyxlsb engine. @@ -34,14 +34,15 @@ def __init__( ---------- filepath_or_buffer : str, path object, or Workbook Object to be parsed. + engine_kwargs : dict + Arbitrary keyword arguments passed to excel engine {storage_options} - engine_kwargs : Arbitrary keyword arguments passed to excel engine """ import_optional_dependency("pyxlsb") # This will call load_workbook on the filepath or buffer # And set the result to the book-attribute super().__init__( - filepath_or_buffer, storage_options=storage_options, **engine_kwargs + filepath_or_buffer, engine_kwargs, storage_options=storage_options, ) @property @@ -51,7 +52,7 @@ def _workbook_class(self): return Workbook def load_workbook( - self, filepath_or_buffer: FilePath | ReadBuffer[bytes], **engine_kwargs + self, filepath_or_buffer: FilePath | ReadBuffer[bytes], engine_kwargs ): from pyxlsb import open_workbook diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py index 57b6371af6601..eaa8348b04f06 100644 --- a/pandas/io/excel/_xlrd.py +++ b/pandas/io/excel/_xlrd.py @@ -24,8 +24,8 @@ class XlrdReader(BaseExcelReader): def __init__( self, filepath_or_buffer, + engine_kwargs, storage_options: StorageOptions = None, - **engine_kwargs, ) -> None: """ Reader using xlrd engine. @@ -34,13 +34,14 @@ def __init__( ---------- filepath_or_buffer : str, path object or Workbook Object to be parsed. + engine_kwargs : dict + Arbitrary keyword arguments passed to excel engine {storage_options} - engine_kwargs : Arbitrary keyword arguments passed to excel engine """ err_msg = "Install xlrd >= 2.0.1 for xls Excel support" import_optional_dependency("xlrd", extra=err_msg) super().__init__( - filepath_or_buffer, storage_options=storage_options, **engine_kwargs + filepath_or_buffer, engine_kwargs, storage_options=storage_options, ) @property @@ -49,7 +50,7 @@ def _workbook_class(self): return Book - def load_workbook(self, filepath_or_buffer, **engine_kwargs): + def load_workbook(self, filepath_or_buffer, engine_kwargs): from xlrd import open_workbook if hasattr(filepath_or_buffer, "read"): diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index d99d6ea9b2a78..baf4c3e9aabac 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -159,14 +159,18 @@ def test_engine_kwargs(self, read_ext, engine): "ods": {}, } - msg = re.escape(r"read_excel() got an unexpected keyword argument 'foo'") + msg = re.escape(r"load_workbook() got an unexpected keyword argument 'foo'") + + if read_ext[1:] == 'xls' or read_ext[1:] == 'xlsb': + msg = re.escape(r"open_workbook() got an unexpected keyword argument 'foo'") + if engine is not None and expected_defaults[read_ext[1:]]: with pytest.raises(TypeError, match=msg): pd.read_excel( "test1" + read_ext, sheet_name="Sheet1", index_col=0, - **expected_defaults[read_ext[1:]], + engine_kwargs=expected_defaults[read_ext[1:]] ) def test_usecols_int(self, read_ext): From 057d5a231ae0a3e8c09024520e9b561415d66bd9 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Sat, 1 Apr 2023 13:11:04 -0400 Subject: [PATCH 05/29] Fixing documentation issues --- pandas/io/excel/_base.py | 6 +++++- pandas/io/excel/_odfreader.py | 6 ++++-- pandas/io/excel/_openpyxl.py | 6 ++++-- pandas/io/excel/_pyxlsb.py | 8 +++++--- pandas/io/excel/_xlrd.py | 6 ++++-- pandas/tests/io/excel/test_readers.py | 4 ++-- 6 files changed, 24 insertions(+), 12 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 3a76ead4110da..231d750fcfee8 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1435,6 +1435,8 @@ class ExcelFile: A file-like object, xlrd workbook or openpyxl workbook. If a string or path object, expected to be a path to a .xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file. + engine_kwargs: dict + Arbitrary keyword arguments passed to excel engine. engine : str, default None If io is not a buffer or path, this must be set to identify io. Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb`` @@ -1534,7 +1536,9 @@ def __init__( self.storage_options = storage_options self._reader = self._engines[engine]( - self._io, engine_kwargs, storage_options=storage_options, + self._io, + engine_kwargs, + storage_options=storage_options, ) def __fspath__(self): diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 92b8897763fd0..9e63004911ec5 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -41,12 +41,14 @@ def __init__( filepath_or_buffer : str, path to be parsed or an open readable stream. engine_kwargs : dict - Arbitrary keyword arguments passed to excel engine + Arbitrary keyword arguments passed to excel engine. {storage_options} """ import_optional_dependency("odf") super().__init__( - filepath_or_buffer, engine_kwargs, storage_options=storage_options, + filepath_or_buffer, + engine_kwargs, + storage_options=storage_options, ) @property diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index a433cfa3b2152..7107d715eed15 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -546,12 +546,14 @@ def __init__( filepath_or_buffer : str, path object or Workbook Object to be parsed. engine_kwargs : dict - Arbitrary keyword arguments passed to excel engine + Arbitrary keyword arguments passed to excel engine. {storage_options} """ import_optional_dependency("openpyxl") super().__init__( - filepath_or_buffer, engine_kwargs, storage_options=storage_options, + filepath_or_buffer, + engine_kwargs, + storage_options=storage_options, ) @property diff --git a/pandas/io/excel/_pyxlsb.py b/pandas/io/excel/_pyxlsb.py index 35905c1aa5158..1b2fdbf8d40b6 100644 --- a/pandas/io/excel/_pyxlsb.py +++ b/pandas/io/excel/_pyxlsb.py @@ -25,7 +25,7 @@ def __init__( self, filepath_or_buffer: FilePath | ReadBuffer[bytes], engine_kwargs, - storage_options: StorageOptions = None + storage_options: StorageOptions = None, ) -> None: """ Reader using pyxlsb engine. @@ -35,14 +35,16 @@ def __init__( filepath_or_buffer : str, path object, or Workbook Object to be parsed. engine_kwargs : dict - Arbitrary keyword arguments passed to excel engine + Arbitrary keyword arguments passed to excel engine. {storage_options} """ import_optional_dependency("pyxlsb") # This will call load_workbook on the filepath or buffer # And set the result to the book-attribute super().__init__( - filepath_or_buffer, engine_kwargs, storage_options=storage_options, + filepath_or_buffer, + engine_kwargs, + storage_options=storage_options, ) @property diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py index eaa8348b04f06..812fa8f13f7b0 100644 --- a/pandas/io/excel/_xlrd.py +++ b/pandas/io/excel/_xlrd.py @@ -35,13 +35,15 @@ def __init__( filepath_or_buffer : str, path object or Workbook Object to be parsed. engine_kwargs : dict - Arbitrary keyword arguments passed to excel engine + Arbitrary keyword arguments passed to excel engine. {storage_options} """ err_msg = "Install xlrd >= 2.0.1 for xls Excel support" import_optional_dependency("xlrd", extra=err_msg) super().__init__( - filepath_or_buffer, engine_kwargs, storage_options=storage_options, + filepath_or_buffer, + engine_kwargs, + storage_options=storage_options, ) @property diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index baf4c3e9aabac..9b87ab514b325 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -161,7 +161,7 @@ def test_engine_kwargs(self, read_ext, engine): msg = re.escape(r"load_workbook() got an unexpected keyword argument 'foo'") - if read_ext[1:] == 'xls' or read_ext[1:] == 'xlsb': + if read_ext[1:] == "xls" or read_ext[1:] == "xlsb": msg = re.escape(r"open_workbook() got an unexpected keyword argument 'foo'") if engine is not None and expected_defaults[read_ext[1:]]: @@ -170,7 +170,7 @@ def test_engine_kwargs(self, read_ext, engine): "test1" + read_ext, sheet_name="Sheet1", index_col=0, - engine_kwargs=expected_defaults[read_ext[1:]] + engine_kwargs=expected_defaults[read_ext[1:]], ) def test_usecols_int(self, read_ext): From c05f1824eda0db0738ca8b86f28f5acd5a6405b3 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Sat, 1 Apr 2023 15:13:06 -0400 Subject: [PATCH 06/29] Fixing implementation logic and unit tests --- pandas/io/excel/_base.py | 20 +++++++++++++------- pandas/io/excel/_odfreader.py | 8 ++++---- pandas/io/excel/_openpyxl.py | 8 ++++---- pandas/io/excel/_pyxlsb.py | 8 ++++---- pandas/io/excel/_xlrd.py | 8 ++++---- 5 files changed, 29 insertions(+), 23 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 231d750fcfee8..b2ff381b70477 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -289,7 +289,7 @@ .. versionadded:: 2.0 -engine_kwargs : dict +engine_kwargs : dict, default None Arbitrary keyword arguments passed to excel engine. Returns @@ -491,9 +491,9 @@ def read_excel( should_close = True io = ExcelFile( io, - engine_kwargs, storage_options=storage_options, engine=engine, + engine_kwargs=engine_kwargs, ) elif engine and engine != io.engine: raise ValueError( @@ -538,9 +538,12 @@ class BaseExcelReader(metaclass=abc.ABCMeta): def __init__( self, filepath_or_buffer, - engine_kwargs, storage_options: StorageOptions = None, + engine_kwargs: None = None, ) -> None: + if engine_kwargs is None: + engine_kwargs = {} + # First argument can also be bytes, so create a buffer if isinstance(filepath_or_buffer, bytes): filepath_or_buffer = BytesIO(filepath_or_buffer) @@ -1435,8 +1438,6 @@ class ExcelFile: A file-like object, xlrd workbook or openpyxl workbook. If a string or path object, expected to be a path to a .xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file. - engine_kwargs: dict - Arbitrary keyword arguments passed to excel engine. engine : str, default None If io is not a buffer or path, this must be set to identify io. Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb`` @@ -1471,6 +1472,8 @@ class ExcelFile: Please do not report issues when using ``xlrd`` to read ``.xlsx`` files. This is not supported, switch to using ``openpyxl`` instead. + engine_kwargs: dict, default None + Arbitrary keyword arguments passed to excel engine. """ from pandas.io.excel._odfreader import ODFReader @@ -1488,10 +1491,13 @@ class ExcelFile: def __init__( self, path_or_buffer, - engine_kwargs, engine: str | None = None, storage_options: StorageOptions = None, + engine_kwargs: None = None, ) -> None: + if engine_kwargs is None: + engine_kwargs = {} + if engine is not None and engine not in self._engines: raise ValueError(f"Unknown engine: {engine}") @@ -1537,8 +1543,8 @@ def __init__( self._reader = self._engines[engine]( self._io, - engine_kwargs, storage_options=storage_options, + engine_kwargs=engine_kwargs, ) def __fspath__(self): diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 9e63004911ec5..f2d5c855e4141 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -30,8 +30,8 @@ class ODFReader(BaseExcelReader): def __init__( self, filepath_or_buffer: FilePath | ReadBuffer[bytes], - engine_kwargs, storage_options: StorageOptions = None, + engine_kwargs: None = None, ) -> None: """ Read tables out of OpenDocument formatted files. @@ -40,15 +40,15 @@ def __init__( ---------- filepath_or_buffer : str, path to be parsed or an open readable stream. - engine_kwargs : dict - Arbitrary keyword arguments passed to excel engine. {storage_options} + engine_kwargs : dict, default None + Arbitrary keyword arguments passed to excel engine. """ import_optional_dependency("odf") super().__init__( filepath_or_buffer, - engine_kwargs, storage_options=storage_options, + engine_kwargs=engine_kwargs, ) @property diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 7107d715eed15..cc9a588b7e852 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -535,8 +535,8 @@ class OpenpyxlReader(BaseExcelReader): def __init__( self, filepath_or_buffer: FilePath | ReadBuffer[bytes], - engine_kwargs, storage_options: StorageOptions = None, + engine_kwargs: None = None, ) -> None: """ Reader using openpyxl engine. @@ -545,15 +545,15 @@ def __init__( ---------- filepath_or_buffer : str, path object or Workbook Object to be parsed. - engine_kwargs : dict - Arbitrary keyword arguments passed to excel engine. {storage_options} + engine_kwargs : dict, default None + Arbitrary keyword arguments passed to excel engine. """ import_optional_dependency("openpyxl") super().__init__( filepath_or_buffer, - engine_kwargs, storage_options=storage_options, + engine_kwargs=engine_kwargs, ) @property diff --git a/pandas/io/excel/_pyxlsb.py b/pandas/io/excel/_pyxlsb.py index 1b2fdbf8d40b6..b4d63a2871d58 100644 --- a/pandas/io/excel/_pyxlsb.py +++ b/pandas/io/excel/_pyxlsb.py @@ -24,8 +24,8 @@ class PyxlsbReader(BaseExcelReader): def __init__( self, filepath_or_buffer: FilePath | ReadBuffer[bytes], - engine_kwargs, storage_options: StorageOptions = None, + engine_kwargs: None = None, ) -> None: """ Reader using pyxlsb engine. @@ -34,17 +34,17 @@ def __init__( ---------- filepath_or_buffer : str, path object, or Workbook Object to be parsed. - engine_kwargs : dict - Arbitrary keyword arguments passed to excel engine. {storage_options} + engine_kwargs : dict, default None + Arbitrary keyword arguments passed to excel engine. """ import_optional_dependency("pyxlsb") # This will call load_workbook on the filepath or buffer # And set the result to the book-attribute super().__init__( filepath_or_buffer, - engine_kwargs, storage_options=storage_options, + engine_kwargs=engine_kwargs, ) @property diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py index 812fa8f13f7b0..855f8d49770f4 100644 --- a/pandas/io/excel/_xlrd.py +++ b/pandas/io/excel/_xlrd.py @@ -24,8 +24,8 @@ class XlrdReader(BaseExcelReader): def __init__( self, filepath_or_buffer, - engine_kwargs, storage_options: StorageOptions = None, + engine_kwargs: None = None, ) -> None: """ Reader using xlrd engine. @@ -34,16 +34,16 @@ def __init__( ---------- filepath_or_buffer : str, path object or Workbook Object to be parsed. - engine_kwargs : dict - Arbitrary keyword arguments passed to excel engine. {storage_options} + engine_kwargs : dict, default None + Arbitrary keyword arguments passed to excel engine. """ err_msg = "Install xlrd >= 2.0.1 for xls Excel support" import_optional_dependency("xlrd", extra=err_msg) super().__init__( filepath_or_buffer, - engine_kwargs, storage_options=storage_options, + engine_kwargs=engine_kwargs, ) @property From 90652619fef38b314d688dc700af743011ee4992 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Sat, 1 Apr 2023 15:53:43 -0400 Subject: [PATCH 07/29] Fixing implementation logic --- pandas/io/excel/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index b2ff381b70477..70e5fa0e46f6b 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -577,7 +577,7 @@ def _workbook_class(self): pass @abc.abstractmethod - def load_workbook(self, filepath_or_buffer, **engine_kwargs): + def load_workbook(self, filepath_or_buffer, engine_kwargs): pass def close(self) -> None: From 45589bb0e1f9993521140cde35edd157ba6424ea Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Sat, 1 Apr 2023 16:30:55 -0400 Subject: [PATCH 08/29] Fixing formatting issues --- pandas/io/excel/_base.py | 4 ++-- pandas/io/excel/_odfreader.py | 2 +- pandas/io/excel/_openpyxl.py | 2 +- pandas/io/excel/_pyxlsb.py | 2 +- pandas/io/excel/_xlrd.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 70e5fa0e46f6b..cb60d6bcf800c 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -289,7 +289,7 @@ .. versionadded:: 2.0 -engine_kwargs : dict, default None +engine_kwargs : dict, optional Arbitrary keyword arguments passed to excel engine. Returns @@ -1472,7 +1472,7 @@ class ExcelFile: Please do not report issues when using ``xlrd`` to read ``.xlsx`` files. This is not supported, switch to using ``openpyxl`` instead. - engine_kwargs: dict, default None + engine_kwargs : dict, optional Arbitrary keyword arguments passed to excel engine. """ diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index f2d5c855e4141..6b71fe8b4c614 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -41,7 +41,7 @@ def __init__( filepath_or_buffer : str, path to be parsed or an open readable stream. {storage_options} - engine_kwargs : dict, default None + engine_kwargs : dict, optional Arbitrary keyword arguments passed to excel engine. """ import_optional_dependency("odf") diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index cc9a588b7e852..efa638c0b1598 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -546,7 +546,7 @@ def __init__( filepath_or_buffer : str, path object or Workbook Object to be parsed. {storage_options} - engine_kwargs : dict, default None + engine_kwargs : dict, optional Arbitrary keyword arguments passed to excel engine. """ import_optional_dependency("openpyxl") diff --git a/pandas/io/excel/_pyxlsb.py b/pandas/io/excel/_pyxlsb.py index b4d63a2871d58..8a9158f3e33de 100644 --- a/pandas/io/excel/_pyxlsb.py +++ b/pandas/io/excel/_pyxlsb.py @@ -35,7 +35,7 @@ def __init__( filepath_or_buffer : str, path object, or Workbook Object to be parsed. {storage_options} - engine_kwargs : dict, default None + engine_kwargs : dict, optional Arbitrary keyword arguments passed to excel engine. """ import_optional_dependency("pyxlsb") diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py index 855f8d49770f4..5630e52973288 100644 --- a/pandas/io/excel/_xlrd.py +++ b/pandas/io/excel/_xlrd.py @@ -35,7 +35,7 @@ def __init__( filepath_or_buffer : str, path object or Workbook Object to be parsed. {storage_options} - engine_kwargs : dict, default None + engine_kwargs : dict, optional Arbitrary keyword arguments passed to excel engine. """ err_msg = "Install xlrd >= 2.0.1 for xls Excel support" From 93c6e601a092eee6b9d197e9ba79a845a1e5a446 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Sat, 1 Apr 2023 18:44:20 -0400 Subject: [PATCH 09/29] Fixing error for test Docstring validation, typing, and other manual pre-commit hooks --- pandas/io/excel/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index cb60d6bcf800c..b8c78fb6e0df7 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1493,7 +1493,7 @@ def __init__( path_or_buffer, engine: str | None = None, storage_options: StorageOptions = None, - engine_kwargs: None = None, + engine_kwargs: dict | None = None, ) -> None: if engine_kwargs is None: engine_kwargs = {} From d60aa9778efad408bd9e19ff8531738dcc18e01e Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Sat, 1 Apr 2023 19:24:57 -0400 Subject: [PATCH 10/29] Fixing documentation error --- pandas/io/excel/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index b8c78fb6e0df7..d55cf92e229f0 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -539,7 +539,7 @@ def __init__( self, filepath_or_buffer, storage_options: StorageOptions = None, - engine_kwargs: None = None, + engine_kwargs: dict | None = None, ) -> None: if engine_kwargs is None: engine_kwargs = {} From 543117802048d995fdc0eae0b0175027c3e9bb67 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Sat, 1 Apr 2023 21:14:29 -0400 Subject: [PATCH 11/29] Standardizing engine_kwarg types --- pandas/io/excel/_odfreader.py | 2 +- pandas/io/excel/_openpyxl.py | 2 +- pandas/io/excel/_pyxlsb.py | 2 +- pandas/io/excel/_xlrd.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 6b71fe8b4c614..c46424d5b26da 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -31,7 +31,7 @@ def __init__( self, filepath_or_buffer: FilePath | ReadBuffer[bytes], storage_options: StorageOptions = None, - engine_kwargs: None = None, + engine_kwargs: dict | None = None, ) -> None: """ Read tables out of OpenDocument formatted files. diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index efa638c0b1598..195d3a3a8b263 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -536,7 +536,7 @@ def __init__( self, filepath_or_buffer: FilePath | ReadBuffer[bytes], storage_options: StorageOptions = None, - engine_kwargs: None = None, + engine_kwargs: dict | None = None, ) -> None: """ Reader using openpyxl engine. diff --git a/pandas/io/excel/_pyxlsb.py b/pandas/io/excel/_pyxlsb.py index 8a9158f3e33de..a1234b0e74c3e 100644 --- a/pandas/io/excel/_pyxlsb.py +++ b/pandas/io/excel/_pyxlsb.py @@ -25,7 +25,7 @@ def __init__( self, filepath_or_buffer: FilePath | ReadBuffer[bytes], storage_options: StorageOptions = None, - engine_kwargs: None = None, + engine_kwargs: dict | None = None, ) -> None: """ Reader using pyxlsb engine. diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py index 5630e52973288..d131567cf70f7 100644 --- a/pandas/io/excel/_xlrd.py +++ b/pandas/io/excel/_xlrd.py @@ -25,7 +25,7 @@ def __init__( self, filepath_or_buffer, storage_options: StorageOptions = None, - engine_kwargs: None = None, + engine_kwargs: dict | None = None, ) -> None: """ Reader using xlrd engine. From f631de7309d881be26c9d182dc073824bc2eaa12 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Sun, 2 Apr 2023 09:20:43 -0400 Subject: [PATCH 12/29] Fixing minor issues with unit tests and documentation --- doc/source/whatsnew/v2.1.0.rst | 5 ++--- pandas/tests/io/excel/test_readers.py | 5 ++++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 6acb3af4b40c4..a1d8c66b0db5f 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -42,10 +42,9 @@ Other enhancements - Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`) - :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`) - :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`). -- :meth:`pandas.read_excel` now supports ``engine_kwargs`` to pass keyword args to Excel engines. (:issue:`40274`). -- Add dtype of categories to ``repr`` information of :class:`CategoricalDtype` (:issue:`52179`). +- :meth:`pandas.read_excel` now supports ``engine_kwargs`` to pass keyword args to Excel engines. (:issue:`40274`) - :meth:`Categorical.map` and :meth:`CategoricalIndex.map` now have a ``na_action`` parameter (:issue:`44279`) -- +- Add dtype of categories to ``repr`` information of :class:`CategoricalDtype` (:issue:`52179`). .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 9b87ab514b325..1441cf2f33502 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -156,7 +156,7 @@ def test_engine_kwargs(self, read_ext, engine): "xlsm": {"foo": 123}, "xlsb": {"foo": "True"}, "xls": {"foo": True}, - "ods": {}, + "ods": {"foo": "abcd"}, } msg = re.escape(r"load_workbook() got an unexpected keyword argument 'foo'") @@ -164,6 +164,9 @@ def test_engine_kwargs(self, read_ext, engine): if read_ext[1:] == "xls" or read_ext[1:] == "xlsb": msg = re.escape(r"open_workbook() got an unexpected keyword argument 'foo'") + elif read_ext[1:] == "ods": + msg = re.escape(r"load() got an unexpected keyword argument 'foo'") + if engine is not None and expected_defaults[read_ext[1:]]: with pytest.raises(TypeError, match=msg): pd.read_excel( From 1000a30f71c9d64e6b4f898c8713be3c942b026e Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Sun, 2 Apr 2023 10:21:07 -0400 Subject: [PATCH 13/29] Fixing documentation issue --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index a1d8c66b0db5f..670a3cdacdd32 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -42,9 +42,9 @@ Other enhancements - Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`) - :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`) - :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`). +- Add dtype of categories to ``repr`` information of :class:`CategoricalDtype` (:issue:`52179`) - :meth:`pandas.read_excel` now supports ``engine_kwargs`` to pass keyword args to Excel engines. (:issue:`40274`) - :meth:`Categorical.map` and :meth:`CategoricalIndex.map` now have a ``na_action`` parameter (:issue:`44279`) -- Add dtype of categories to ``repr`` information of :class:`CategoricalDtype` (:issue:`52179`). .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: From 86dbb359c46a49e63d6fa9ef7362b7c6d97b000a Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Mon, 3 Apr 2023 17:23:27 -0400 Subject: [PATCH 14/29] Fixing a formatting / documentation error --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 670a3cdacdd32..603ba56640d10 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -41,7 +41,7 @@ Other enhancements - Added to the escape mode "latex-math" preserving without escaping all characters between "\(" and "\)" in formatter (:issue:`51903`) - Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`) - :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`) -- :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`). +- :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`) - Add dtype of categories to ``repr`` information of :class:`CategoricalDtype` (:issue:`52179`) - :meth:`pandas.read_excel` now supports ``engine_kwargs`` to pass keyword args to Excel engines. (:issue:`40274`) - :meth:`Categorical.map` and :meth:`CategoricalIndex.map` now have a ``na_action`` parameter (:issue:`44279`) From da022c8d2ff4e7b8244e5104673b846b61bc7793 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Wed, 5 Apr 2023 15:06:57 -0400 Subject: [PATCH 15/29] Fixing documentation errors --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 3c10d0f3b9167..06fc6f02e3a20 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -71,7 +71,6 @@ to ``na_action=None``, like for all the other array types. Other enhancements ^^^^^^^^^^^^^^^^^^ -- :meth:`Categorical.map` and :meth:`CategoricalIndex.map` now have a ``na_action`` parameter. :meth:`Categorical.map` implicitly had a default value of ``"ignore"`` for ``na_action``. This has formally been deprecated and will be changed to ``None`` in the future. Also notice that :meth:`Series.map` has default ``na_action=None`` and calls to series with categorical data will now use ``na_action=None`` unless explicitly set otherwise (:issue:`44279`) - Implemented ``__pandas_priority__`` to allow custom types to take precedence over :class:`DataFrame`, :class:`Series`, :class:`Index`, or :class:`ExtensionArray` for arithmetic operations, :ref:`see the developer guide ` (:issue:`48347`) @@ -89,6 +88,7 @@ Other enhancements - Add dtype of categories to ``repr`` information of :class:`CategoricalDtype` (:issue:`52179`) - :meth:`pandas.read_excel` now supports ``engine_kwargs`` to pass keyword args to Excel engines. (:issue:`40274`) - :meth:`Categorical.map` and :meth:`CategoricalIndex.map` now have a ``na_action`` parameter (:issue:`44279`) + .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: From 8106cc6f9709e29b32408dd904a6afd63715510d Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Wed, 5 Apr 2023 15:15:04 -0400 Subject: [PATCH 16/29] Fixing documentation errors --- doc/source/whatsnew/v2.1.0.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 06fc6f02e3a20..17642548eb302 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -73,6 +73,8 @@ Other enhancements ^^^^^^^^^^^^^^^^^^ :meth:`Categorical.map` implicitly had a default value of ``"ignore"`` for ``na_action``. This has formally been deprecated and will be changed to ``None`` in the future. Also notice that :meth:`Series.map` has default ``na_action=None`` and calls to series with categorical data will now use ``na_action=None`` unless explicitly set otherwise (:issue:`44279`) +- :meth:`pandas.read_excel` now supports ``engine_kwargs`` to pass keyword args to Excel engines. (:issue:`40274`) +- :meth:`Categorical.map` and :meth:`CategoricalIndex.map` now have a ``na_action`` parameter (:issue:`44279`) - Implemented ``__pandas_priority__`` to allow custom types to take precedence over :class:`DataFrame`, :class:`Series`, :class:`Index`, or :class:`ExtensionArray` for arithmetic operations, :ref:`see the developer guide ` (:issue:`48347`) - :meth:`MultiIndex.sort_values` now supports ``na_position`` (:issue:`51612`) - :meth:`MultiIndex.sortlevel` and :meth:`Index.sortlevel` gained a new keyword ``na_position`` (:issue:`51612`) @@ -83,11 +85,8 @@ Other enhancements - Improve error message when having incompatible columns using :meth:`DataFrame.merge` (:issue:`51861`) - Added to the escape mode "latex-math" preserving without escaping all characters between "\(" and "\)" in formatter (:issue:`51903`) - Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`) -- :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`) - :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`) - Add dtype of categories to ``repr`` information of :class:`CategoricalDtype` (:issue:`52179`) -- :meth:`pandas.read_excel` now supports ``engine_kwargs`` to pass keyword args to Excel engines. (:issue:`40274`) -- :meth:`Categorical.map` and :meth:`CategoricalIndex.map` now have a ``na_action`` parameter (:issue:`44279`) .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: From 19a6d8868ce500d26852c76d0b760a5913ae2e4e Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Wed, 5 Apr 2023 15:52:46 -0400 Subject: [PATCH 17/29] Fixing documentation errors --- doc/source/whatsnew/v2.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 17642548eb302..24e4dad56e331 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -87,6 +87,7 @@ Other enhancements - Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`) - :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`) - Add dtype of categories to ``repr`` information of :class:`CategoricalDtype` (:issue:`52179`) +- :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`) .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: From c69ef917f8cf8bd6258ddfca2063a689860671df Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Wed, 5 Apr 2023 17:14:32 -0400 Subject: [PATCH 18/29] Fixing documentation errors --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 24e4dad56e331..08c89f0291a8b 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -88,7 +88,7 @@ Other enhancements - :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`) - Add dtype of categories to ``repr`` information of :class:`CategoricalDtype` (:issue:`52179`) - :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`) - +- .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: From 242765d30761e979113ec98de629173c070049cf Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Wed, 5 Apr 2023 18:41:04 -0400 Subject: [PATCH 19/29] Fixing documentation errors --- doc/source/whatsnew/v2.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 08c89f0291a8b..170368581c1e8 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -89,6 +89,7 @@ Other enhancements - Add dtype of categories to ``repr`` information of :class:`CategoricalDtype` (:issue:`52179`) - :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`) - + .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: From c9aa28a317025daa23f4af84042d015ea7883876 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Wed, 5 Apr 2023 20:49:12 -0400 Subject: [PATCH 20/29] Adding an extra blank line to troubleshoot documentation error --- doc/source/whatsnew/v2.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 170368581c1e8..e9ad70d30c4a7 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -90,6 +90,7 @@ Other enhancements - :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`) - + .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: From 46be9ec9e3f9e36624de807143c856fe5d1a3583 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Wed, 5 Apr 2023 21:22:53 -0400 Subject: [PATCH 21/29] Adding an extra blank line to troubleshoot documentation error --- doc/source/whatsnew/v2.1.0.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index e9ad70d30c4a7..3b47d607d8f17 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -71,7 +71,7 @@ to ``na_action=None``, like for all the other array types. Other enhancements ^^^^^^^^^^^^^^^^^^ - :meth:`Categorical.map` implicitly had a default value of ``"ignore"`` for ``na_action``. This has formally been deprecated and will be changed to ``None`` in the future. +- :meth:`Categorical.map` implicitly had a default value of ``"ignore"`` for ``na_action``. This has formally been deprecated and will be changed to ``None`` in the future. Also notice that :meth:`Series.map` has default ``na_action=None`` and calls to series with categorical data will now use ``na_action=None`` unless explicitly set otherwise (:issue:`44279`) - :meth:`pandas.read_excel` now supports ``engine_kwargs`` to pass keyword args to Excel engines. (:issue:`40274`) - :meth:`Categorical.map` and :meth:`CategoricalIndex.map` now have a ``na_action`` parameter (:issue:`44279`) @@ -90,7 +90,6 @@ Other enhancements - :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`) - - .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: From f692c8e4cf05203a6ba286272c17fb90227f470e Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Mon, 10 Apr 2023 19:07:48 -0400 Subject: [PATCH 22/29] Fixing documentation issues --- doc/source/whatsnew/v2.1.0.rst | 67 ++++++++++++++++++++++++++++++---- 1 file changed, 60 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 5936fa3337843..7aeac1000b8d6 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -71,10 +71,9 @@ to ``na_action=None``, like for all the other array types. Other enhancements ^^^^^^^^^^^^^^^^^^ -- :meth:`Categorical.map` implicitly had a default value of ``"ignore"`` for ``na_action``. This has formally been deprecated and will be changed to ``None`` in the future. +- :meth:`Categorical.map` and :meth:`CategoricalIndex.map` now have a ``na_action`` parameter. + :meth:`Categorical.map` implicitly had a default value of ``"ignore"`` for ``na_action``. This has formally been deprecated and will be changed to ``None`` in the future. Also notice that :meth:`Series.map` has default ``na_action=None`` and calls to series with categorical data will now use ``na_action=None`` unless explicitly set otherwise (:issue:`44279`) -- :meth:`pandas.read_excel` now supports ``engine_kwargs`` to pass keyword args to Excel engines. (:issue:`40274`) -- :meth:`Categorical.map` and :meth:`CategoricalIndex.map` now have a ``na_action`` parameter (:issue:`44279`) - Implemented ``__pandas_priority__`` to allow custom types to take precedence over :class:`DataFrame`, :class:`Series`, :class:`Index`, or :class:`ExtensionArray` for arithmetic operations, :ref:`see the developer guide ` (:issue:`48347`) - :meth:`MultiIndex.sort_values` now supports ``na_position`` (:issue:`51612`) - :meth:`MultiIndex.sortlevel` and :meth:`Index.sortlevel` gained a new keyword ``na_position`` (:issue:`51612`) @@ -85,9 +84,10 @@ Other enhancements - Improve error message when having incompatible columns using :meth:`DataFrame.merge` (:issue:`51861`) - Added to the escape mode "latex-math" preserving without escaping all characters between "\(" and "\)" in formatter (:issue:`51903`) - Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`) -- :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`) -- Add dtype of categories to ``repr`` information of :class:`CategoricalDtype` (:issue:`52179`) - :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`) +- :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`). +- Adding ``engine_kwargs`` parameter to :meth:`DataFrame.read_excel` (:issue:`52214`). +- Add dtype of categories to ``repr`` information of :class:`CategoricalDtype` (:issue:`52179`) - .. --------------------------------------------------------------------------- @@ -126,6 +126,56 @@ If installed, we now require: +=================+=================+==========+=========+ | mypy (dev) | 1.2 | | X | +-----------------+-----------------+----------+---------+ +| beautifulsoup4 | 4.11.1 | | X | ++-----------------+-----------------+----------+---------+ +| bottleneck | 1.3.4 | | X | ++-----------------+-----------------+----------+---------+ +| fastparquet | 0.8.1 | | X | ++-----------------+-----------------+----------+---------+ +| fsspec | 2022.05.0 | | X | ++-----------------+-----------------+----------+---------+ +| hypothesis | 6.46.1 | | X | ++-----------------+-----------------+----------+---------+ +| gcsfs | 2022.05.0 | | X | ++-----------------+-----------------+----------+---------+ +| jinja2 | 3.1.2 | | X | ++-----------------+-----------------+----------+---------+ +| lxml | 4.8.0 | | X | ++-----------------+-----------------+----------+---------+ +| numba | 0.55.2 | | X | ++-----------------+-----------------+----------+---------+ +| numexpr | 2.8.0 | | X | ++-----------------+-----------------+----------+---------+ +| openpyxl | 3.0.10 | | X | ++-----------------+-----------------+----------+---------+ +| pandas-gbq | 0.17.5 | | X | ++-----------------+-----------------+----------+---------+ +| psycopg2 | 2.9.3 | | X | ++-----------------+-----------------+----------+---------+ +| pyreadstat | 1.1.5 | | X | ++-----------------+-----------------+----------+---------+ +| pyqt5 | 5.15.6 | | X | ++-----------------+-----------------+----------+---------+ +| pytables | 3.7.0 | | X | ++-----------------+-----------------+----------+---------+ +| python-snappy | 0.6.1 | | X | ++-----------------+-----------------+----------+---------+ +| pyxlsb | 1.0.9 | | X | ++-----------------+-----------------+----------+---------+ +| s3fs | 2022.05.0 | | X | ++-----------------+-----------------+----------+---------+ +| scipy | 1.8.1 | | X | ++-----------------+-----------------+----------+---------+ +| sqlalchemy | 1.4.36 | | X | ++-----------------+-----------------+----------+---------+ +| tabulate | 0.8.10 | | X | ++-----------------+-----------------+----------+---------+ +| xarray | 2022.03.0 | | X | ++-----------------+-----------------+----------+---------+ +| xlsxwriter | 3.0.3 | | X | ++-----------------+-----------------+----------+---------+ +| zstandard | 0.17.0 | | X | ++-----------------+-----------------+----------+---------+ For `optional libraries `_ the general recommendation is to use the latest version. The following table lists the lowest version per library that is currently being tested throughout the development of pandas. @@ -177,6 +227,7 @@ Deprecations - Deprecated making :meth:`Series.apply` return a :class:`DataFrame` when the passed-in callable returns a :class:`Series` object. In the future this will return a :class:`Series` whose values are themselves :class:`Series`. This pattern was very slow and it's recommended to use alternative methods to archive the same goal (:issue:`52116`) - Deprecated parameter ``convert_type`` in :meth:`Series.apply` (:issue:`52140`) - Deprecated ``freq`` parameter in :class:`PeriodArray` constructor, pass ``dtype`` instead (:issue:`52462`) +- Deprecated :func:`is_int64_dtype`, check ``dtype == np.dtype(np.int64)`` instead (:issue:`52564`) - .. --------------------------------------------------------------------------- @@ -202,6 +253,7 @@ Performance improvements - Performance improvement in :meth:`Series.to_numpy` when dtype is a numpy float dtype and ``na_value`` is ``np.nan`` (:issue:`52430`) - Performance improvement in :meth:`Series.corr` and :meth:`Series.cov` for extension dtypes (:issue:`52502`) - Performance improvement in :meth:`~arrays.ArrowExtensionArray.to_numpy` (:issue:`52525`) +- Performance improvement in :func:`concat` when ``axis=1`` and objects have different indexes (:issue:`52541`) - .. --------------------------------------------------------------------------- @@ -232,7 +284,7 @@ Timedelta Timezones ^^^^^^^^^ -- +- Bug in :func:`infer_freq` that raises ``TypeError`` for ``Series`` of timezone-aware timestamps (:issue:`52456`) - Numeric @@ -285,7 +337,8 @@ Period - :meth:`PeriodIndex.map` with ``na_action="ignore"`` now works as expected (:issue:`51644`) - Bug in :class:`PeriodDtype` constructor raising ``ValueError`` instead of ``TypeError`` when an invalid type is passed (:issue:`51790`) - Bug in :meth:`arrays.PeriodArray.map` and :meth:`PeriodIndex.map`, where the supplied callable operated array-wise instead of element-wise (:issue:`51977`) -- +- Bug in :func:`read_csv` not processing empty strings as a null value, with ``engine="pyarrow"`` (:issue:`52087`) +- Bug in :func:`read_csv` returning ``object`` dtype columns instead of ``float64`` dtype columns with ``engine="pyarrow"`` for columns that are all null with ``engine="pyarrow"`` (:issue:`52087`) Plotting ^^^^^^^^ From 96c6fe0de6b6294681f2867fcd91a2c4f46f3cb0 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Mon, 10 Apr 2023 19:31:36 -0400 Subject: [PATCH 23/29] Fixing formatting errors --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 7aeac1000b8d6..b59831fabef70 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -86,7 +86,7 @@ Other enhancements - Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`) - :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`) - :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`). -- Adding ``engine_kwargs`` parameter to :meth:`DataFrame.read_excel` (:issue:`52214`). +- Adding ``engine_kwargs`` parameter to :meth:`DataFrame.read_excel` (:issue:`52214`) - Add dtype of categories to ``repr`` information of :class:`CategoricalDtype` (:issue:`52179`) - From 0391c9f695d1dc7c3d56837a5b4c53cc60626556 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Mon, 10 Apr 2023 19:37:12 -0400 Subject: [PATCH 24/29] Fixing formatting errors --- doc/source/whatsnew/v2.1.0.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index b59831fabef70..664c89305dc77 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -86,9 +86,8 @@ Other enhancements - Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`) - :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`) - :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`). -- Adding ``engine_kwargs`` parameter to :meth:`DataFrame.read_excel` (:issue:`52214`) - Add dtype of categories to ``repr`` information of :class:`CategoricalDtype` (:issue:`52179`) -- +- Adding ``engine_kwargs`` parameter to :meth:`DataFrame.read_excel` (:issue:`52214`) .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: From f2c8e2a90ae409512486c5670eb6f856ca3514a7 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Mon, 10 Apr 2023 19:40:46 -0400 Subject: [PATCH 25/29] Fixing formatting errors --- doc/source/whatsnew/v2.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 664c89305dc77..2a729b8eaf851 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -88,6 +88,7 @@ Other enhancements - :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`). - Add dtype of categories to ``repr`` information of :class:`CategoricalDtype` (:issue:`52179`) - Adding ``engine_kwargs`` parameter to :meth:`DataFrame.read_excel` (:issue:`52214`) +- .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: From 679ab4bcb6ef888961f3cccff1d55c2ec14edbb9 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Tue, 11 Apr 2023 11:25:53 -0400 Subject: [PATCH 26/29] Fixing logic and formatting issues in unit tests --- pandas/tests/io/excel/test_readers.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 1441cf2f33502..05c86be850b32 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -159,15 +159,14 @@ def test_engine_kwargs(self, read_ext, engine): "ods": {"foo": "abcd"}, } - msg = re.escape(r"load_workbook() got an unexpected keyword argument 'foo'") - if read_ext[1:] == "xls" or read_ext[1:] == "xlsb": msg = re.escape(r"open_workbook() got an unexpected keyword argument 'foo'") - elif read_ext[1:] == "ods": msg = re.escape(r"load() got an unexpected keyword argument 'foo'") + else: + msg = re.escape(r"load_workbook() got an unexpected keyword argument 'foo'") - if engine is not None and expected_defaults[read_ext[1:]]: + if engine is not None: with pytest.raises(TypeError, match=msg): pd.read_excel( "test1" + read_ext, From 3412af01dce68dd4f9ec5f77b4077176511a2310 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Tue, 11 Apr 2023 12:27:28 -0400 Subject: [PATCH 27/29] Fixing issues with merge conflict --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 80e87ed0b3f36..dfbd130b74a1e 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -340,7 +340,7 @@ Period - Bug in :meth:`arrays.PeriodArray.map` and :meth:`PeriodIndex.map`, where the supplied callable operated array-wise instead of element-wise (:issue:`51977`) - Bug in :func:`read_csv` not processing empty strings as a null value, with ``engine="pyarrow"`` (:issue:`52087`) - Bug in :func:`read_csv` returning ``object`` dtype columns instead of ``float64`` dtype columns with ``engine="pyarrow"`` for columns that are all null with ``engine="pyarrow"`` (:issue:`52087`) -- +- Bug in incorrectly allowing construction of :class:`Period` or :class:`PeriodDtype` with :class:`CustomBusinessDay` freq; use :class:`BusinessDay` instead (:issue:`52534`) Plotting ^^^^^^^^ From f37912027ac1e9aade6c0d8f9f1adba9b0768351 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Tue, 11 Apr 2023 12:47:41 -0400 Subject: [PATCH 28/29] Fixing formatting issue --- doc/source/whatsnew/v2.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index dfbd130b74a1e..245cc111f3794 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -341,6 +341,7 @@ Period - Bug in :func:`read_csv` not processing empty strings as a null value, with ``engine="pyarrow"`` (:issue:`52087`) - Bug in :func:`read_csv` returning ``object`` dtype columns instead of ``float64`` dtype columns with ``engine="pyarrow"`` for columns that are all null with ``engine="pyarrow"`` (:issue:`52087`) - Bug in incorrectly allowing construction of :class:`Period` or :class:`PeriodDtype` with :class:`CustomBusinessDay` freq; use :class:`BusinessDay` instead (:issue:`52534`) +- Plotting ^^^^^^^^ From 8d7933c91407b0a9f635d2c69f3d6d5466099723 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 12 Apr 2023 08:51:41 -0700 Subject: [PATCH 29/29] Update pandas/io/excel/_base.py --- pandas/io/excel/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index d55cf92e229f0..92750bdd0f272 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -308,7 +308,7 @@ Notes ----- For specific information on the methods used for each Excel engine, refer to the pandas -user guide: :doc: `/source/user_guide/io` +:ref:`user guide ` Examples --------