diff --git a/ci/deps/azure-37-locale.yaml b/ci/deps/azure-37-locale.yaml
index 111ba6b020bc7..dc51597a33209 100644
--- a/ci/deps/azure-37-locale.yaml
+++ b/ci/deps/azure-37-locale.yaml
@@ -34,3 +34,6 @@ dependencies:
- xlsxwriter
- xlwt
- pyarrow>=0.15
+ - pip
+ - pip:
+ - pyxlsb
diff --git a/ci/deps/azure-macos-36.yaml b/ci/deps/azure-macos-36.yaml
index 3bbbdb4cf32ad..90980133b31c1 100644
--- a/ci/deps/azure-macos-36.yaml
+++ b/ci/deps/azure-macos-36.yaml
@@ -33,3 +33,4 @@ dependencies:
- pip
- pip:
- pyreadstat
+ - pyxlsb
diff --git a/ci/deps/azure-windows-37.yaml b/ci/deps/azure-windows-37.yaml
index 62be1075b3337..6b3ad6f560292 100644
--- a/ci/deps/azure-windows-37.yaml
+++ b/ci/deps/azure-windows-37.yaml
@@ -35,3 +35,6 @@ dependencies:
- xlsxwriter
- xlwt
- pyreadstat
+ - pip
+ - pip:
+ - pyxlsb
diff --git a/ci/deps/travis-36-cov.yaml b/ci/deps/travis-36-cov.yaml
index a46001c58d165..869d2ab683f0c 100644
--- a/ci/deps/travis-36-cov.yaml
+++ b/ci/deps/travis-36-cov.yaml
@@ -51,3 +51,4 @@ dependencies:
- coverage
- pandas-datareader
- python-dateutil
+ - pyxlsb
diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
index b3fd443e662a9..b5c512cdc8328 100644
--- a/doc/source/getting_started/install.rst
+++ b/doc/source/getting_started/install.rst
@@ -264,6 +264,7 @@ pyarrow 0.12.0 Parquet, ORC (requires 0.13.0), and
pymysql 0.7.11 MySQL engine for sqlalchemy
pyreadstat SPSS files (.sav) reading
pytables 3.4.2 HDF5 reading / writing
+pyxlsb 1.0.5 Reading for xlsb files
qtpy Clipboard I/O
s3fs 0.3.0 Amazon S3 access
tabulate 0.8.3 Printing in Markdown-friendly format (see `tabulate`_)
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index 55bbf6848820b..a8d84469fbf74 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -23,7 +23,7 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like
text;`JSON `__;:ref:`read_json`;:ref:`to_json`
text;`HTML `__;:ref:`read_html`;:ref:`to_html`
text; Local clipboard;:ref:`read_clipboard`;:ref:`to_clipboard`
- binary;`MS Excel `__;:ref:`read_excel`;:ref:`to_excel`
+ ;`MS Excel `__;:ref:`read_excel`;:ref:`to_excel`
binary;`OpenDocument `__;:ref:`read_excel`;
binary;`HDF5 Format `__;:ref:`read_hdf`;:ref:`to_hdf`
binary;`Feather Format `__;:ref:`read_feather`;:ref:`to_feather`
@@ -2768,7 +2768,8 @@ Excel files
The :func:`~pandas.read_excel` method can read Excel 2003 (``.xls``)
files using the ``xlrd`` Python module. Excel 2007+ (``.xlsx``) files
-can be read using either ``xlrd`` or ``openpyxl``.
+can be read using either ``xlrd`` or ``openpyxl``. Binary Excel (``.xlsb``)
+files can be read using ``pyxlsb``.
The :meth:`~DataFrame.to_excel` instance method is used for
saving a ``DataFrame`` to Excel. Generally the semantics are
similar to working with :ref:`csv` data.
@@ -3229,6 +3230,30 @@ OpenDocument spreadsheets match what can be done for `Excel files`_ using
Currently pandas only supports *reading* OpenDocument spreadsheets. Writing
is not implemented.
+.. _io.xlsb:
+
+Binary Excel (.xlsb) files
+--------------------------
+
+.. versionadded:: 1.0.0
+
+The :func:`~pandas.read_excel` method can also read binary Excel files
+using the ``pyxlsb`` module. The semantics and features for reading
+binary Excel files mostly match what can be done for `Excel files`_ using
+``engine='pyxlsb'``. ``pyxlsb`` does not recognize datetime types
+in files and will return floats instead.
+
+.. code-block:: python
+
+ # Returns a DataFrame
+ pd.read_excel('path_to_file.xlsb', engine='pyxlsb')
+
+.. note::
+
+ Currently pandas only supports *reading* binary Excel files. Writing
+ is not implemented.
+
+
.. _io.clipboard:
Clipboard
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index fa562838c8f7c..1623aca49b9c3 100755
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -215,7 +215,8 @@ Other enhancements
- :meth:`Styler.format` added the ``na_rep`` parameter to help format the missing values (:issue:`21527`, :issue:`28358`)
- Roundtripping DataFrames with nullable integer, string and period data types to parquet
(:meth:`~DataFrame.to_parquet` / :func:`read_parquet`) using the `'pyarrow'` engine
- now preserve those data types with pyarrow >= 0.16.0 (:issue:`20612`, :issue:`28371`).
+ now preserve those data types with pyarrow >= 1.0.0 (:issue:`20612`).
+- :func:`read_excel` now can read binary Excel (``.xlsb``) files by passing ``engine='pyxlsb'``. For more details and example usage, see the :ref:`Binary Excel files documentation `. Closes :issue:`8540`.
- The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`)
- :func:`pandas.read_json` now parses ``NaN``, ``Infinity`` and ``-Infinity`` (:issue:`12213`)
- :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue:`30270`)
diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py
index 7aeb0327139f1..d561ab9a10548 100644
--- a/pandas/compat/_optional.py
+++ b/pandas/compat/_optional.py
@@ -19,6 +19,7 @@
"pyarrow": "0.13.0",
"pytables": "3.4.2",
"pytest": "5.0.1",
+ "pyxlsb": "1.0.5",
"s3fs": "0.3.0",
"scipy": "0.19.0",
"sqlalchemy": "1.1.4",
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index afdd8a01ee003..eb1587313910d 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -479,6 +479,7 @@ def use_inf_as_na_cb(key):
_xlsm_options = ["xlrd", "openpyxl"]
_xlsx_options = ["xlrd", "openpyxl"]
_ods_options = ["odf"]
+_xlsb_options = ["pyxlsb"]
with cf.config_prefix("io.excel.xls"):
@@ -515,6 +516,13 @@ def use_inf_as_na_cb(key):
validator=str,
)
+with cf.config_prefix("io.excel.xlsb"):
+ cf.register_option(
+ "reader",
+ "auto",
+ reader_engine_doc.format(ext="xlsb", others=", ".join(_xlsb_options)),
+ validator=str,
+ )
# Set up the io.excel specific writer configuration.
writer_engine_doc = """
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 04015a08bce2f..2a91381b7fbeb 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -35,8 +35,9 @@
"""
Read an Excel file into a pandas DataFrame.
-Support both `xls` and `xlsx` file extensions from a local filesystem or URL.
-Support an option to read a single sheet or a list of sheets.
+Supports `xls`, `xlsx`, `xlsm`, `xlsb`, and `odf` file extensions
+read from a local filesystem or URL. Supports an option to read
+a single sheet or a list of sheets.
Parameters
----------
@@ -789,15 +790,21 @@ class ExcelFile:
If a string or path object, expected to be a path to xls, xlsx or odf file.
engine : str, default None
If io is not a buffer or path, this must be set to identify io.
- Acceptable values are None, ``xlrd``, ``openpyxl`` or ``odf``.
+ Acceptable values are None, ``xlrd``, ``openpyxl``, ``odf``, or ``pyxlsb``.
Note that ``odf`` reads tables out of OpenDocument formatted files.
"""
from pandas.io.excel._odfreader import _ODFReader
from pandas.io.excel._openpyxl import _OpenpyxlReader
from pandas.io.excel._xlrd import _XlrdReader
-
- _engines = {"xlrd": _XlrdReader, "openpyxl": _OpenpyxlReader, "odf": _ODFReader}
+ from pandas.io.excel._pyxlsb import _PyxlsbReader
+
+ _engines = {
+ "xlrd": _XlrdReader,
+ "openpyxl": _OpenpyxlReader,
+ "odf": _ODFReader,
+ "pyxlsb": _PyxlsbReader,
+ }
def __init__(self, io, engine=None):
if engine is None:
diff --git a/pandas/io/excel/_pyxlsb.py b/pandas/io/excel/_pyxlsb.py
new file mode 100644
index 0000000000000..df6a38000452d
--- /dev/null
+++ b/pandas/io/excel/_pyxlsb.py
@@ -0,0 +1,68 @@
+from typing import List
+
+from pandas._typing import FilePathOrBuffer, Scalar
+from pandas.compat._optional import import_optional_dependency
+
+from pandas.io.excel._base import _BaseExcelReader
+
+
+class _PyxlsbReader(_BaseExcelReader):
+ def __init__(self, filepath_or_buffer: FilePathOrBuffer):
+ """Reader using pyxlsb engine.
+
+ Parameters
+ __________
+ filepath_or_buffer: string, path object, or Workbook
+ Object to be parsed.
+ """
+ import_optional_dependency("pyxlsb")
+ # This will call load_workbook on the filepath or buffer
+ # And set the result to the book-attribute
+ super().__init__(filepath_or_buffer)
+
+ @property
+ def _workbook_class(self):
+ from pyxlsb import Workbook
+
+ return Workbook
+
+ def load_workbook(self, filepath_or_buffer: FilePathOrBuffer):
+ from pyxlsb import open_workbook
+
+ # Todo: hack in buffer capability
+ # This might need some modifications to the Pyxlsb library
+ # Actual work for opening it is in xlsbpackage.py, line 20-ish
+
+ return open_workbook(filepath_or_buffer)
+
+ @property
+ def sheet_names(self) -> List[str]:
+ return self.book.sheets
+
+ def get_sheet_by_name(self, name: str):
+ return self.book.get_sheet(name)
+
+ def get_sheet_by_index(self, index: int):
+ # pyxlsb sheets are indexed from 1 onwards
+ # There's a fix for this in the source, but the pypi package doesn't have it
+ return self.book.get_sheet(index + 1)
+
+ def _convert_cell(self, cell, convert_float: bool) -> Scalar:
+ # Todo: there is no way to distinguish between floats and datetimes in pyxlsb
+ # This means that there is no way to read datetime types from an xlsb file yet
+ if cell.v is None:
+ return "" # Prevents non-named columns from not showing up as Unnamed: i
+ if isinstance(cell.v, float) and convert_float:
+ val = int(cell.v)
+ if val == cell.v:
+ return val
+ else:
+ return float(cell.v)
+
+ return cell.v
+
+ def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]:
+ return [
+ [self._convert_cell(c, convert_float) for c in r]
+ for r in sheet.rows(sparse=False)
+ ]
diff --git a/pandas/tests/io/data/excel/blank.xlsb b/pandas/tests/io/data/excel/blank.xlsb
new file mode 100644
index 0000000000000..d72fd68ab3dbf
Binary files /dev/null and b/pandas/tests/io/data/excel/blank.xlsb differ
diff --git a/pandas/tests/io/data/excel/blank_with_header.xlsb b/pandas/tests/io/data/excel/blank_with_header.xlsb
new file mode 100644
index 0000000000000..3c241513d221a
Binary files /dev/null and b/pandas/tests/io/data/excel/blank_with_header.xlsb differ
diff --git a/pandas/tests/io/data/excel/test1.xlsb b/pandas/tests/io/data/excel/test1.xlsb
new file mode 100644
index 0000000000000..d0b8a1f2735bd
Binary files /dev/null and b/pandas/tests/io/data/excel/test1.xlsb differ
diff --git a/pandas/tests/io/data/excel/test2.xlsb b/pandas/tests/io/data/excel/test2.xlsb
new file mode 100644
index 0000000000000..e19a0f1e067c8
Binary files /dev/null and b/pandas/tests/io/data/excel/test2.xlsb differ
diff --git a/pandas/tests/io/data/excel/test3.xlsb b/pandas/tests/io/data/excel/test3.xlsb
new file mode 100644
index 0000000000000..617d27630e8a0
Binary files /dev/null and b/pandas/tests/io/data/excel/test3.xlsb differ
diff --git a/pandas/tests/io/data/excel/test4.xlsb b/pandas/tests/io/data/excel/test4.xlsb
new file mode 100644
index 0000000000000..2e5bb229939be
Binary files /dev/null and b/pandas/tests/io/data/excel/test4.xlsb differ
diff --git a/pandas/tests/io/data/excel/test5.xlsb b/pandas/tests/io/data/excel/test5.xlsb
new file mode 100644
index 0000000000000..022ebef25aee2
Binary files /dev/null and b/pandas/tests/io/data/excel/test5.xlsb differ
diff --git a/pandas/tests/io/data/excel/test_converters.xlsb b/pandas/tests/io/data/excel/test_converters.xlsb
new file mode 100644
index 0000000000000..c39c33d8dd94f
Binary files /dev/null and b/pandas/tests/io/data/excel/test_converters.xlsb differ
diff --git a/pandas/tests/io/data/excel/test_index_name_pre17.xlsb b/pandas/tests/io/data/excel/test_index_name_pre17.xlsb
new file mode 100644
index 0000000000000..5251b8f3b3194
Binary files /dev/null and b/pandas/tests/io/data/excel/test_index_name_pre17.xlsb differ
diff --git a/pandas/tests/io/data/excel/test_multisheet.xlsb b/pandas/tests/io/data/excel/test_multisheet.xlsb
new file mode 100644
index 0000000000000..39b15568a7121
Binary files /dev/null and b/pandas/tests/io/data/excel/test_multisheet.xlsb differ
diff --git a/pandas/tests/io/data/excel/test_squeeze.xlsb b/pandas/tests/io/data/excel/test_squeeze.xlsb
new file mode 100644
index 0000000000000..6aadd727e957b
Binary files /dev/null and b/pandas/tests/io/data/excel/test_squeeze.xlsb differ
diff --git a/pandas/tests/io/data/excel/test_types.xlsb b/pandas/tests/io/data/excel/test_types.xlsb
new file mode 100644
index 0000000000000..e7403aa288263
Binary files /dev/null and b/pandas/tests/io/data/excel/test_types.xlsb differ
diff --git a/pandas/tests/io/data/excel/testdateoverflow.xlsb b/pandas/tests/io/data/excel/testdateoverflow.xlsb
new file mode 100644
index 0000000000000..3d279396924b9
Binary files /dev/null and b/pandas/tests/io/data/excel/testdateoverflow.xlsb differ
diff --git a/pandas/tests/io/data/excel/testdtype.xlsb b/pandas/tests/io/data/excel/testdtype.xlsb
new file mode 100644
index 0000000000000..1c1d45f0d783b
Binary files /dev/null and b/pandas/tests/io/data/excel/testdtype.xlsb differ
diff --git a/pandas/tests/io/data/excel/testmultiindex.xlsb b/pandas/tests/io/data/excel/testmultiindex.xlsb
new file mode 100644
index 0000000000000..b66d6dab17ee0
Binary files /dev/null and b/pandas/tests/io/data/excel/testmultiindex.xlsb differ
diff --git a/pandas/tests/io/data/excel/testskiprows.xlsb b/pandas/tests/io/data/excel/testskiprows.xlsb
new file mode 100644
index 0000000000000..a5ff4ed22e70c
Binary files /dev/null and b/pandas/tests/io/data/excel/testskiprows.xlsb differ
diff --git a/pandas/tests/io/data/excel/times_1900.xlsb b/pandas/tests/io/data/excel/times_1900.xlsb
new file mode 100644
index 0000000000000..ceb7bccb0c66e
Binary files /dev/null and b/pandas/tests/io/data/excel/times_1900.xlsb differ
diff --git a/pandas/tests/io/data/excel/times_1904.xlsb b/pandas/tests/io/data/excel/times_1904.xlsb
new file mode 100644
index 0000000000000..e426dc959da49
Binary files /dev/null and b/pandas/tests/io/data/excel/times_1904.xlsb differ
diff --git a/pandas/tests/io/excel/conftest.py b/pandas/tests/io/excel/conftest.py
index a257735dc1ec5..0455e0d61ad97 100644
--- a/pandas/tests/io/excel/conftest.py
+++ b/pandas/tests/io/excel/conftest.py
@@ -35,7 +35,7 @@ def df_ref(datapath):
return df_ref
-@pytest.fixture(params=[".xls", ".xlsx", ".xlsm", ".ods"])
+@pytest.fixture(params=[".xls", ".xlsx", ".xlsm", ".ods", ".xlsb"])
def read_ext(request):
"""
Valid extensions for reading Excel files.
diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
index 629d3d02028bd..f8ff3567b8b64 100644
--- a/pandas/tests/io/excel/test_readers.py
+++ b/pandas/tests/io/excel/test_readers.py
@@ -31,7 +31,7 @@ def ignore_xlrd_time_clock_warning():
yield
-read_ext_params = [".xls", ".xlsx", ".xlsm", ".ods"]
+read_ext_params = [".xls", ".xlsx", ".xlsm", ".xlsb", ".ods"]
engine_params = [
# Add any engines to test here
# When defusedxml is installed it triggers deprecation warnings for
@@ -57,6 +57,7 @@ def ignore_xlrd_time_clock_warning():
pytest.mark.filterwarnings("ignore:.*(tree\\.iter|html argument)"),
],
),
+ pytest.param("pyxlsb", marks=td.skip_if_no("pyxlsb")),
pytest.param("odf", marks=td.skip_if_no("odf")),
]
@@ -73,6 +74,10 @@ def _is_valid_engine_ext_pair(engine, read_ext: str) -> bool:
return False
if read_ext == ".ods" and engine != "odf":
return False
+ if engine == "pyxlsb" and read_ext != ".xlsb":
+ return False
+ if read_ext == ".xlsb" and engine != "pyxlsb":
+ return False
return True
@@ -120,7 +125,6 @@ def cd_and_set_engine(self, engine, datapath, monkeypatch):
"""
Change directory and set engine for read_excel calls.
"""
-
func = partial(pd.read_excel, engine=engine)
monkeypatch.chdir(datapath("io", "data", "excel"))
monkeypatch.setattr(pd, "read_excel", func)
@@ -142,6 +146,8 @@ def test_usecols_int(self, read_ext, df_ref):
)
def test_usecols_list(self, read_ext, df_ref):
+ if pd.read_excel.keywords["engine"] == "pyxlsb":
+ pytest.xfail("Sheets containing datetimes not supported by pyxlsb")
df_ref = df_ref.reindex(columns=["B", "C"])
df1 = pd.read_excel(
@@ -156,6 +162,8 @@ def test_usecols_list(self, read_ext, df_ref):
tm.assert_frame_equal(df2, df_ref, check_names=False)
def test_usecols_str(self, read_ext, df_ref):
+ if pd.read_excel.keywords["engine"] == "pyxlsb":
+ pytest.xfail("Sheets containing datetimes not supported by pyxlsb")
df1 = df_ref.reindex(columns=["A", "B", "C"])
df2 = pd.read_excel("test1" + read_ext, "Sheet1", index_col=0, usecols="A:D")
@@ -188,6 +196,9 @@ def test_usecols_str(self, read_ext, df_ref):
"usecols", [[0, 1, 3], [0, 3, 1], [1, 0, 3], [1, 3, 0], [3, 0, 1], [3, 1, 0]]
)
def test_usecols_diff_positional_int_columns_order(self, read_ext, usecols, df_ref):
+ if pd.read_excel.keywords["engine"] == "pyxlsb":
+ pytest.xfail("Sheets containing datetimes not supported by pyxlsb")
+
expected = df_ref[["A", "C"]]
result = pd.read_excel(
"test1" + read_ext, "Sheet1", index_col=0, usecols=usecols
@@ -203,11 +214,17 @@ def test_usecols_diff_positional_str_columns_order(self, read_ext, usecols, df_r
tm.assert_frame_equal(result, expected, check_names=False)
def test_read_excel_without_slicing(self, read_ext, df_ref):
+ if pd.read_excel.keywords["engine"] == "pyxlsb":
+ pytest.xfail("Sheets containing datetimes not supported by pyxlsb")
+
expected = df_ref
result = pd.read_excel("test1" + read_ext, "Sheet1", index_col=0)
tm.assert_frame_equal(result, expected, check_names=False)
def test_usecols_excel_range_str(self, read_ext, df_ref):
+ if pd.read_excel.keywords["engine"] == "pyxlsb":
+ pytest.xfail("Sheets containing datetimes not supported by pyxlsb")
+
expected = df_ref[["C", "D"]]
result = pd.read_excel(
"test1" + read_ext, "Sheet1", index_col=0, usecols="A,D:E"
@@ -274,12 +291,16 @@ def test_excel_stop_iterator(self, read_ext):
tm.assert_frame_equal(parsed, expected)
def test_excel_cell_error_na(self, read_ext):
+ if pd.read_excel.keywords["engine"] == "pyxlsb":
+ pytest.xfail("Sheets containing datetimes not supported by pyxlsb")
parsed = pd.read_excel("test3" + read_ext, "Sheet1")
expected = DataFrame([[np.nan]], columns=["Test"])
tm.assert_frame_equal(parsed, expected)
def test_excel_table(self, read_ext, df_ref):
+ if pd.read_excel.keywords["engine"] == "pyxlsb":
+ pytest.xfail("Sheets containing datetimes not supported by pyxlsb")
df1 = pd.read_excel("test1" + read_ext, "Sheet1", index_col=0)
df2 = pd.read_excel("test1" + read_ext, "Sheet2", skiprows=[1], index_col=0)
@@ -291,6 +312,8 @@ def test_excel_table(self, read_ext, df_ref):
tm.assert_frame_equal(df3, df1.iloc[:-1])
def test_reader_special_dtypes(self, read_ext):
+ if pd.read_excel.keywords["engine"] == "pyxlsb":
+ pytest.xfail("Sheets containing datetimes not supported by pyxlsb")
expected = DataFrame.from_dict(
OrderedDict(
@@ -488,6 +511,9 @@ def test_read_excel_blank_with_header(self, read_ext):
def test_date_conversion_overflow(self, read_ext):
# GH 10001 : pandas.ExcelFile ignore parse_dates=False
+ if pd.read_excel.keywords["engine"] == "pyxlsb":
+ pytest.xfail("Sheets containing datetimes not supported by pyxlsb")
+
expected = pd.DataFrame(
[
[pd.Timestamp("2016-03-12"), "Marc Johnson"],
@@ -504,9 +530,14 @@ def test_date_conversion_overflow(self, read_ext):
tm.assert_frame_equal(result, expected)
def test_sheet_name(self, read_ext, df_ref):
+ if pd.read_excel.keywords["engine"] == "pyxlsb":
+ pytest.xfail("Sheets containing datetimes not supported by pyxlsb")
filename = "test1"
sheet_name = "Sheet1"
+ if pd.read_excel.keywords["engine"] == "openpyxl":
+ pytest.xfail("Maybe not supported by openpyxl")
+
df1 = pd.read_excel(
filename + read_ext, sheet_name=sheet_name, index_col=0
) # doc
@@ -531,6 +562,10 @@ def test_bad_engine_raises(self, read_ext):
@tm.network
def test_read_from_http_url(self, read_ext):
+ if read_ext == ".xlsb":
+ pytest.xfail("xlsb files not present in master repo yet")
+ if pd.read_excel.keywords["engine"] == "pyxlsb":
+ pytest.xfail("Sheets containing datetimes not supported by pyxlsb")
url = (
"https://raw.githubusercontent.com/pandas-dev/pandas/master/"
@@ -599,6 +634,8 @@ def test_read_from_py_localpath(self, read_ext):
tm.assert_frame_equal(expected, actual)
def test_reader_seconds(self, read_ext):
+ if pd.read_excel.keywords["engine"] == "pyxlsb":
+ pytest.xfail("Sheets containing datetimes not supported by pyxlsb")
# Test reading times with and without milliseconds. GH5945.
expected = DataFrame.from_dict(
@@ -627,6 +664,9 @@ def test_reader_seconds(self, read_ext):
def test_read_excel_multiindex(self, read_ext):
# see gh-4679
+ if pd.read_excel.keywords["engine"] == "pyxlsb":
+ pytest.xfail("Sheets containing datetimes not supported by pyxlsb")
+
mi = MultiIndex.from_product([["foo", "bar"], ["a", "b"]])
mi_file = "testmultiindex" + read_ext
@@ -786,6 +826,9 @@ def test_read_excel_chunksize(self, read_ext):
def test_read_excel_skiprows_list(self, read_ext):
# GH 4903
+ if pd.read_excel.keywords["engine"] == "pyxlsb":
+ pytest.xfail("Sheets containing datetimes not supported by pyxlsb")
+
actual = pd.read_excel(
"testskiprows" + read_ext, "skiprows_list", skiprows=[0, 2]
)
@@ -851,13 +894,11 @@ def cd_and_set_engine(self, engine, datapath, monkeypatch):
"""
Change directory and set engine for ExcelFile objects.
"""
-
func = partial(pd.ExcelFile, engine=engine)
monkeypatch.chdir(datapath("io", "data", "excel"))
monkeypatch.setattr(pd, "ExcelFile", func)
def test_excel_passes_na(self, read_ext):
-
with pd.ExcelFile("test4" + read_ext) as excel:
parsed = pd.read_excel(
excel, "Sheet1", keep_default_na=False, na_values=["apple"]
@@ -928,6 +969,10 @@ def test_unexpected_kwargs_raises(self, read_ext, arg):
pd.read_excel(excel, **kwarg)
def test_excel_table_sheet_by_index(self, read_ext, df_ref):
+ # For some reason pd.read_excel has no attribute 'keywords' here.
+ # Skipping based on read_ext instead.
+ if read_ext == ".xlsb":
+ pytest.xfail("Sheets containing datetimes not supported by pyxlsb")
with pd.ExcelFile("test1" + read_ext) as excel:
df1 = pd.read_excel(excel, 0, index_col=0)
@@ -951,6 +996,11 @@ def test_excel_table_sheet_by_index(self, read_ext, df_ref):
tm.assert_frame_equal(df3, df1.iloc[:-1])
def test_sheet_name(self, read_ext, df_ref):
+ # For some reason pd.read_excel has no attribute 'keywords' here.
+ # Skipping based on read_ext instead.
+ if read_ext == ".xlsb":
+ pytest.xfail("Sheets containing datetimes not supported by pyxlsb")
+
filename = "test1"
sheet_name = "Sheet1"
diff --git a/pandas/tests/io/excel/test_xlrd.py b/pandas/tests/io/excel/test_xlrd.py
index d1f900a2dc58b..cc7e2311f362a 100644
--- a/pandas/tests/io/excel/test_xlrd.py
+++ b/pandas/tests/io/excel/test_xlrd.py
@@ -10,9 +10,11 @@
@pytest.fixture(autouse=True)
-def skip_ods_files(read_ext):
+def skip_ods_and_xlsb_files(read_ext):
if read_ext == ".ods":
pytest.skip("Not valid for xlrd")
+ if read_ext == ".xlsb":
+ pytest.skip("Not valid for xlrd")
def test_read_xlrd_book(read_ext, frame):