Skip to content

Commit 74cfc9c

Browse files
authored
Merge branch 'main' into issue-60550-fix-v2
2 parents 8da1615 + 9528057 commit 74cfc9c

File tree

6 files changed

+18
-4
lines changed

6 files changed

+18
-4
lines changed

Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,5 @@ COPY requirements-dev.txt /tmp
1313
RUN python -m pip install -r /tmp/requirements-dev.txt
1414
RUN git config --global --add safe.directory /home/pandas
1515

16-
ENV SHELL "/bin/bash"
16+
ENV SHELL="/bin/bash"
1717
CMD ["/bin/bash"]

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -718,6 +718,7 @@ I/O
718718
^^^
719719
- Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping`` elements. (:issue:`57915`)
720720
- Bug in :meth:`.DataFrame.to_json` when ``"index"`` was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`)
721+
- Bug in :meth:`.io.common.is_fsspec_url` not recognizing chained fsspec URLs (:issue:`48978`)
721722
- Bug in :meth:`DataFrame._repr_html_` which ignored the ``"display.float_format"`` option (:issue:`59876`)
722723
- Bug in :meth:`DataFrame.from_records` where ``columns`` parameter with numpy structured array was not reordering and filtering out the columns (:issue:`59717`)
723724
- Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`)

pandas/io/common.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@
7171

7272
_VALID_URLS = set(uses_relative + uses_netloc + uses_params)
7373
_VALID_URLS.discard("")
74-
_RFC_3986_PATTERN = re.compile(r"^[A-Za-z][A-Za-z0-9+\-+.]*://")
74+
_FSSPEC_URL_PATTERN = re.compile(r"^[A-Za-z][A-Za-z0-9+\-+.]*(::[A-Za-z0-9+\-+.]+)*://")
7575

7676
BaseBufferT = TypeVar("BaseBufferT", bound=BaseBuffer)
7777

@@ -291,7 +291,7 @@ def is_fsspec_url(url: FilePath | BaseBuffer) -> bool:
291291
"""
292292
return (
293293
isinstance(url, str)
294-
and bool(_RFC_3986_PATTERN.match(url))
294+
and bool(_FSSPEC_URL_PATTERN.match(url))
295295
and not url.startswith(("http://", "https://"))
296296
)
297297

pandas/tests/io/json/test_pandas.py

+1
Original file line numberDiff line numberDiff line change
@@ -1753,6 +1753,7 @@ def test_read_timezone_information(self):
17531753
[
17541754
"s3://example-fsspec/",
17551755
"gcs://another-fsspec/file.json",
1756+
"filecache::s3://yet-another-fsspec/file.json",
17561757
"https://example-site.com/data",
17571758
"some-protocol://data.txt",
17581759
],

pandas/tests/io/test_common.py

+12
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,18 @@ def test_is_fsspec_url():
501501
assert icom.is_fsspec_url("RFC-3986+compliant.spec://something")
502502

503503

504+
def test_is_fsspec_url_chained():
505+
# GH#48978 Support chained fsspec URLs
506+
# See https://filesystem-spec.readthedocs.io/en/latest/features.html#url-chaining.
507+
assert icom.is_fsspec_url("filecache::s3://pandas/test.csv")
508+
assert icom.is_fsspec_url("zip://test.csv::filecache::gcs://bucket/file.zip")
509+
assert icom.is_fsspec_url("filecache::zip://test.csv::gcs://bucket/file.zip")
510+
assert icom.is_fsspec_url("filecache::dask::s3://pandas/test.csv")
511+
assert not icom.is_fsspec_url("filecache:s3://pandas/test.csv")
512+
assert not icom.is_fsspec_url("filecache:::s3://pandas/test.csv")
513+
assert not icom.is_fsspec_url("filecache::://pandas/test.csv")
514+
515+
504516
@pytest.mark.parametrize("encoding", [None, "utf-8"])
505517
@pytest.mark.parametrize("format", ["csv", "json"])
506518
def test_codecs_encoding(encoding, format):

web/pandas/community/ecosystem.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -590,7 +590,7 @@ df = pd.read_csv("big.csv") # use all your cores!
590590
### [Pandarallel](https://github.com/nalepae/pandarallel)
591591

592592
Pandarallel provides a simple way to parallelize your pandas operations on all your CPUs by changing only one line of code.
593-
If also displays progress bars.
593+
It also displays progress bars.
594594

595595
```python
596596
from pandarallel import pandarallel

0 commit comments

Comments
 (0)