Skip to content

Commit 22df68e

Browse files
authored
BUG: read_csv with chained fsspec TAR file and compression="infer" (#60100)
1 parent f9f72d1 commit 22df68e

File tree

4 files changed

+18
-0
lines changed

4 files changed

+18
-0
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -784,6 +784,7 @@ Other
784784
- Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`)
785785
- Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
786786
- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
787+
- Bug in :meth:`read_csv` where chained fsspec TAR file and ``compression="infer"`` fails with ``tarfile.ReadError`` (:issue:`60028`)
787788
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)
788789
- Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`)
789790

pandas/io/common.py

+3
Original file line numberDiff line numberDiff line change
@@ -584,6 +584,9 @@ def infer_compression(
584584
# Infer compression
585585
if compression == "infer":
586586
# Convert all path types (e.g. pathlib.Path) to strings
587+
if isinstance(filepath_or_buffer, str) and "::" in filepath_or_buffer:
588+
# chained URLs contain ::
589+
filepath_or_buffer = filepath_or_buffer.split("::")[0]
587590
filepath_or_buffer = stringify_path(filepath_or_buffer, convert_file_like=True)
588591
if not isinstance(filepath_or_buffer, str):
589592
# Cannot infer compression of a buffer, assume no compression

pandas/tests/io/data/tar/test-csv.tar

10 KB
Binary file not shown.

pandas/tests/io/test_common.py

+14
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
WASM,
2626
is_platform_windows,
2727
)
28+
import pandas.util._test_decorators as td
2829

2930
import pandas as pd
3031
import pandas._testing as tm
@@ -642,6 +643,19 @@ def close(self):
642643
handles.created_handles.append(TestError())
643644

644645

646+
@td.skip_if_no("fsspec", min_version="2023.1.0")
647+
@pytest.mark.parametrize("compression", [None, "infer"])
648+
def test_read_csv_chained_url_no_error(compression):
649+
# GH 60100
650+
tar_file_path = "pandas/tests/io/data/tar/test-csv.tar"
651+
chained_file_url = f"tar://test.csv::file://{tar_file_path}"
652+
653+
result = pd.read_csv(chained_file_url, compression=compression, sep=";")
654+
expected = pd.DataFrame({"1": {0: 3}, "2": {0: 4}})
655+
656+
tm.assert_frame_equal(expected, result)
657+
658+
645659
@pytest.mark.parametrize(
646660
"reader",
647661
[

0 commit comments

Comments
 (0)