Skip to content

Commit 712ffbd

Browse files
authored
Moto server (#35655)
1 parent 8f79543 commit 712ffbd

26 files changed

+307
-120
lines changed

ci/deps/azure-37-locale.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ dependencies:
2121
- lxml
2222
- matplotlib>=3.3.0
2323
- moto
24+
- flask
2425
- nomkl
2526
- numexpr
2627
- numpy=1.16.*

ci/deps/azure-37-slow.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,11 @@ dependencies:
2727
- python-dateutil
2828
- pytz
2929
- s3fs>=0.4.0
30+
- moto>=1.3.14
3031
- scipy
3132
- sqlalchemy
3233
- xlrd
3334
- xlsxwriter
3435
- xlwt
3536
- moto
37+
- flask

ci/deps/azure-38-locale.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ dependencies:
1414

1515
# pandas dependencies
1616
- beautifulsoup4
17+
- flask
1718
- html5lib
1819
- ipython
1920
- jinja2
@@ -32,6 +33,7 @@ dependencies:
3233
- xlrd
3334
- xlsxwriter
3435
- xlwt
36+
- moto
3537
- pyarrow>=0.15
3638
- pip
3739
- pip:

ci/deps/azure-windows-37.yaml

+4-3
Original file line numberDiff line numberDiff line change
@@ -15,21 +15,22 @@ dependencies:
1515
# pandas dependencies
1616
- beautifulsoup4
1717
- bottleneck
18-
- fsspec>=0.7.4
18+
- fsspec>=0.8.0
1919
- gcsfs>=0.6.0
2020
- html5lib
2121
- jinja2
2222
- lxml
2323
- matplotlib=2.2.*
24-
- moto
24+
- moto>=1.3.14
25+
- flask
2526
- numexpr
2627
- numpy=1.16.*
2728
- openpyxl
2829
- pyarrow=0.15
2930
- pytables
3031
- python-dateutil
3132
- pytz
32-
- s3fs>=0.4.0
33+
- s3fs>=0.4.2
3334
- scipy
3435
- sqlalchemy
3536
- xlrd

ci/deps/azure-windows-38.yaml

+4
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@ dependencies:
1616
- blosc
1717
- bottleneck
1818
- fastparquet>=0.3.2
19+
- flask
20+
- fsspec>=0.8.0
1921
- matplotlib=3.1.3
22+
- moto>=1.3.14
2023
- numba
2124
- numexpr
2225
- numpy=1.18.*
@@ -26,6 +29,7 @@ dependencies:
2629
- pytables
2730
- python-dateutil
2831
- pytz
32+
- s3fs>=0.4.0
2933
- scipy
3034
- xlrd
3135
- xlsxwriter

ci/deps/travis-37-arm64.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,6 @@ dependencies:
1717
- python-dateutil
1818
- pytz
1919
- pip
20+
- flask
2021
- pip:
2122
- moto

ci/deps/travis-37-cov.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ dependencies:
2323
- geopandas
2424
- html5lib
2525
- matplotlib
26-
- moto
26+
- moto>=1.3.14
27+
- flask
2728
- nomkl
2829
- numexpr
2930
- numpy=1.16.*

ci/deps/travis-37-locale.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,12 @@ dependencies:
2121
- jinja2
2222
- lxml=4.3.0
2323
- matplotlib=3.0.*
24-
- moto
2524
- nomkl
2625
- numexpr
2726
- numpy
2827
- openpyxl
2928
- pandas-gbq=0.12.0
29+
- pyarrow>=0.17
3030
- psycopg2=2.7
3131
- pyarrow>=0.15.0 # GH #35813
3232
- pymysql=0.7.11

ci/deps/travis-37.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ dependencies:
2020
- pyarrow
2121
- pytz
2222
- s3fs>=0.4.0
23+
- moto>=1.3.14
24+
- flask
2325
- tabulate
2426
- pyreadstat
2527
- pip
26-
- pip:
27-
- moto

doc/source/whatsnew/v1.2.0.rst

+3
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ of the individual storage backends (detailed from the fsspec docs for
2424
`builtin implementations`_ and linked to `external ones`_). See
2525
Section :ref:`io.remote`.
2626

27+
:issue:`35655` added fsspec support (including ``storage_options``)
28+
for reading excel files.
29+
2730
.. _builtin implementations: https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
2831
.. _external ones: https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
2932

environment.yml

+1
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ dependencies:
5151
- botocore>=1.11
5252
- hypothesis>=3.82
5353
- moto # mock S3
54+
- flask
5455
- pytest>=5.0.1
5556
- pytest-cov
5657
- pytest-xdist>=1.21

pandas/io/excel/_base.py

+23-7
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@
33
from io import BufferedIOBase, BytesIO, RawIOBase
44
import os
55
from textwrap import fill
6-
from typing import Union
6+
from typing import Any, Mapping, Union
77

88
from pandas._config import config
99

1010
from pandas._libs.parsers import STR_NA_VALUES
11+
from pandas._typing import StorageOptions
1112
from pandas.errors import EmptyDataError
1213
from pandas.util._decorators import Appender, deprecate_nonkeyword_arguments
1314

@@ -199,6 +200,15 @@
199200
Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than
200201
'X'...'X'. Passing in False will cause data to be overwritten if there
201202
are duplicate names in the columns.
203+
storage_options : StorageOptions
204+
Extra options that make sense for a particular storage connection, e.g.
205+
host, port, username, password, etc., if using a URL that will
206+
be parsed by ``fsspec``, e.g., starting "s3://", "gcs://". An error
207+
will be raised if providing this argument with a local path or
208+
a file-like buffer. See the fsspec and backend storage implementation
209+
docs for the set of allowed keys and values
210+
211+
.. versionadded:: 1.2.0
202212
203213
Returns
204214
-------
@@ -298,10 +308,11 @@ def read_excel(
298308
skipfooter=0,
299309
convert_float=True,
300310
mangle_dupe_cols=True,
311+
storage_options: StorageOptions = None,
301312
):
302313

303314
if not isinstance(io, ExcelFile):
304-
io = ExcelFile(io, engine=engine)
315+
io = ExcelFile(io, storage_options=storage_options, engine=engine)
305316
elif engine and engine != io.engine:
306317
raise ValueError(
307318
"Engine should not be specified when passing "
@@ -336,12 +347,14 @@ def read_excel(
336347

337348

338349
class _BaseExcelReader(metaclass=abc.ABCMeta):
339-
def __init__(self, filepath_or_buffer):
350+
def __init__(self, filepath_or_buffer, storage_options: StorageOptions = None):
340351
# If filepath_or_buffer is a url, load the data into a BytesIO
341352
if is_url(filepath_or_buffer):
342353
filepath_or_buffer = BytesIO(urlopen(filepath_or_buffer).read())
343354
elif not isinstance(filepath_or_buffer, (ExcelFile, self._workbook_class)):
344-
filepath_or_buffer, _, _, _ = get_filepath_or_buffer(filepath_or_buffer)
355+
filepath_or_buffer, _, _, _ = get_filepath_or_buffer(
356+
filepath_or_buffer, storage_options=storage_options
357+
)
345358

346359
if isinstance(filepath_or_buffer, self._workbook_class):
347360
self.book = filepath_or_buffer
@@ -837,14 +850,16 @@ class ExcelFile:
837850
from pandas.io.excel._pyxlsb import _PyxlsbReader
838851
from pandas.io.excel._xlrd import _XlrdReader
839852

840-
_engines = {
853+
_engines: Mapping[str, Any] = {
841854
"xlrd": _XlrdReader,
842855
"openpyxl": _OpenpyxlReader,
843856
"odf": _ODFReader,
844857
"pyxlsb": _PyxlsbReader,
845858
}
846859

847-
def __init__(self, path_or_buffer, engine=None):
860+
def __init__(
861+
self, path_or_buffer, engine=None, storage_options: StorageOptions = None
862+
):
848863
if engine is None:
849864
engine = "xlrd"
850865
if isinstance(path_or_buffer, (BufferedIOBase, RawIOBase)):
@@ -858,13 +873,14 @@ def __init__(self, path_or_buffer, engine=None):
858873
raise ValueError(f"Unknown engine: {engine}")
859874

860875
self.engine = engine
876+
self.storage_options = storage_options
861877

862878
# Could be a str, ExcelFile, Book, etc.
863879
self.io = path_or_buffer
864880
# Always a string
865881
self._io = stringify_path(path_or_buffer)
866882

867-
self._reader = self._engines[engine](self._io)
883+
self._reader = self._engines[engine](self._io, storage_options=storage_options)
868884

869885
def __fspath__(self):
870886
return self._io

pandas/io/excel/_odfreader.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import numpy as np
44

5-
from pandas._typing import FilePathOrBuffer, Scalar
5+
from pandas._typing import FilePathOrBuffer, Scalar, StorageOptions
66
from pandas.compat._optional import import_optional_dependency
77

88
import pandas as pd
@@ -16,13 +16,19 @@ class _ODFReader(_BaseExcelReader):
1616
1717
Parameters
1818
----------
19-
filepath_or_buffer: string, path to be parsed or
19+
filepath_or_buffer : string, path to be parsed or
2020
an open readable stream.
21+
storage_options : StorageOptions
22+
passed to fsspec for appropriate URLs (see ``get_filepath_or_buffer``)
2123
"""
2224

23-
def __init__(self, filepath_or_buffer: FilePathOrBuffer):
25+
def __init__(
26+
self,
27+
filepath_or_buffer: FilePathOrBuffer,
28+
storage_options: StorageOptions = None,
29+
):
2430
import_optional_dependency("odf")
25-
super().__init__(filepath_or_buffer)
31+
super().__init__(filepath_or_buffer, storage_options=storage_options)
2632

2733
@property
2834
def _workbook_class(self):

pandas/io/excel/_openpyxl.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import numpy as np
44

5-
from pandas._typing import FilePathOrBuffer, Scalar
5+
from pandas._typing import FilePathOrBuffer, Scalar, StorageOptions
66
from pandas.compat._optional import import_optional_dependency
77

88
from pandas.io.excel._base import ExcelWriter, _BaseExcelReader
@@ -467,17 +467,23 @@ def write_cells(
467467

468468

469469
class _OpenpyxlReader(_BaseExcelReader):
470-
def __init__(self, filepath_or_buffer: FilePathOrBuffer) -> None:
470+
def __init__(
471+
self,
472+
filepath_or_buffer: FilePathOrBuffer,
473+
storage_options: StorageOptions = None,
474+
) -> None:
471475
"""
472476
Reader using openpyxl engine.
473477
474478
Parameters
475479
----------
476480
filepath_or_buffer : string, path object or Workbook
477481
Object to be parsed.
482+
storage_options : StorageOptions
483+
passed to fsspec for appropriate URLs (see ``get_filepath_or_buffer``)
478484
"""
479485
import_optional_dependency("openpyxl")
480-
super().__init__(filepath_or_buffer)
486+
super().__init__(filepath_or_buffer, storage_options=storage_options)
481487

482488
@property
483489
def _workbook_class(self):

pandas/io/excel/_pyxlsb.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,31 @@
11
from typing import List
22

3-
from pandas._typing import FilePathOrBuffer, Scalar
3+
from pandas._typing import FilePathOrBuffer, Scalar, StorageOptions
44
from pandas.compat._optional import import_optional_dependency
55

66
from pandas.io.excel._base import _BaseExcelReader
77

88

99
class _PyxlsbReader(_BaseExcelReader):
10-
def __init__(self, filepath_or_buffer: FilePathOrBuffer):
10+
def __init__(
11+
self,
12+
filepath_or_buffer: FilePathOrBuffer,
13+
storage_options: StorageOptions = None,
14+
):
1115
"""
1216
Reader using pyxlsb engine.
1317
1418
Parameters
1519
----------
16-
filepath_or_buffer: str, path object, or Workbook
20+
filepath_or_buffer : str, path object, or Workbook
1721
Object to be parsed.
22+
storage_options : StorageOptions
23+
passed to fsspec for appropriate URLs (see ``get_filepath_or_buffer``)
1824
"""
1925
import_optional_dependency("pyxlsb")
2026
# This will call load_workbook on the filepath or buffer
2127
# And set the result to the book-attribute
22-
super().__init__(filepath_or_buffer)
28+
super().__init__(filepath_or_buffer, storage_options=storage_options)
2329

2430
@property
2531
def _workbook_class(self):

pandas/io/excel/_xlrd.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,27 @@
22

33
import numpy as np
44

5+
from pandas._typing import StorageOptions
56
from pandas.compat._optional import import_optional_dependency
67

78
from pandas.io.excel._base import _BaseExcelReader
89

910

1011
class _XlrdReader(_BaseExcelReader):
11-
def __init__(self, filepath_or_buffer):
12+
def __init__(self, filepath_or_buffer, storage_options: StorageOptions = None):
1213
"""
1314
Reader using xlrd engine.
1415
1516
Parameters
1617
----------
1718
filepath_or_buffer : string, path object or Workbook
1819
Object to be parsed.
20+
storage_options : StorageOptions
21+
passed to fsspec for appropriate URLs (see ``get_filepath_or_buffer``)
1922
"""
2023
err_msg = "Install xlrd >= 1.0.0 for Excel support"
2124
import_optional_dependency("xlrd", extra=err_msg)
22-
super().__init__(filepath_or_buffer)
25+
super().__init__(filepath_or_buffer, storage_options=storage_options)
2326

2427
@property
2528
def _workbook_class(self):

pandas/io/feather_format.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
""" feather-format compat """
22

3+
from pandas._typing import StorageOptions
34
from pandas.compat._optional import import_optional_dependency
45

56
from pandas import DataFrame, Int64Index, RangeIndex
67

78
from pandas.io.common import get_filepath_or_buffer
89

910

10-
def to_feather(df: DataFrame, path, storage_options=None, **kwargs):
11+
def to_feather(df: DataFrame, path, storage_options: StorageOptions = None, **kwargs):
1112
"""
1213
Write a DataFrame to the binary Feather format.
1314
@@ -77,7 +78,9 @@ def to_feather(df: DataFrame, path, storage_options=None, **kwargs):
7778
feather.write_feather(df, path, **kwargs)
7879

7980

80-
def read_feather(path, columns=None, use_threads: bool = True, storage_options=None):
81+
def read_feather(
82+
path, columns=None, use_threads: bool = True, storage_options: StorageOptions = None
83+
):
8184
"""
8285
Load a feather-format object from the file path.
8386

0 commit comments

Comments
 (0)