diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 89bc942cb7250..de69166b8c196 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -784,6 +784,7 @@ Other - Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`) - Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`) - Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`) +- Bug in :meth:`read_csv` where chained fsspec TAR file and ``compression="infer"`` fails with ``tarfile.ReadError`` (:issue:`60028`) - Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`) - Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`) diff --git a/pandas/io/common.py b/pandas/io/common.py index 8da3ca0218983..e0076eb486976 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -584,6 +584,9 @@ def infer_compression( # Infer compression if compression == "infer": # Convert all path types (e.g. pathlib.Path) to strings + if isinstance(filepath_or_buffer, str) and "::" in filepath_or_buffer: + # chained URLs contain :: + filepath_or_buffer = filepath_or_buffer.split("::")[0] filepath_or_buffer = stringify_path(filepath_or_buffer, convert_file_like=True) if not isinstance(filepath_or_buffer, str): # Cannot infer compression of a buffer, assume no compression diff --git a/pandas/tests/io/data/tar/test-csv.tar b/pandas/tests/io/data/tar/test-csv.tar new file mode 100644 index 0000000000000..c3b3091348426 Binary files /dev/null and b/pandas/tests/io/data/tar/test-csv.tar differ diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 10e3af601b7ef..4f3f613f71542 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -25,6 +25,7 @@ WASM, is_platform_windows, ) +import pandas.util._test_decorators as td import pandas as pd import pandas._testing as tm @@ -642,6 +643,19 @@ def close(self): handles.created_handles.append(TestError()) +@td.skip_if_no("fsspec", min_version="2023.1.0") +@pytest.mark.parametrize("compression", [None, "infer"]) +def test_read_csv_chained_url_no_error(compression): + # GH 60100 + tar_file_path = "pandas/tests/io/data/tar/test-csv.tar" + chained_file_url = f"tar://test.csv::file://{tar_file_path}" + + result = pd.read_csv(chained_file_url, compression=compression, sep=";") + expected = pd.DataFrame({"1": {0: 3}, "2": {0: 4}}) + + tm.assert_frame_equal(expected, result) + + @pytest.mark.parametrize( "reader", [