diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 8dabaeb6c7bfe..34af5d1e3dbe5 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -356,6 +356,7 @@ I/O - Bug in :func:`read_hdf` not properly closing store after a ``KeyError`` is raised (:issue:`25766`) - Bug in ``read_csv`` which would not raise ``ValueError`` if a column index in ``usecols`` was out of bounds (:issue:`25623`) - Improved :meth:`pandas.read_stata` and :class:`pandas.io.stata.StataReader` to read incorrectly formatted 118 format files saved by Stata (:issue:`25960`) +- Fixed bug in loading objects from S3 that contain ``#`` characters in the URL (:issue:`25945`) Plotting ^^^^^^^^ diff --git a/pandas/io/s3.py b/pandas/io/s3.py index f127bb4c8094c..607eae27021c3 100644 --- a/pandas/io/s3.py +++ b/pandas/io/s3.py @@ -10,7 +10,7 @@ def _strip_schema(url): """Returns the url without the s3:// part""" - result = parse_url(url) + result = parse_url(url, allow_fragments=False) return result.netloc + result.path diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py index af6f7ac4ef528..a4e778a68c728 100644 --- a/pandas/tests/io/conftest.py +++ b/pandas/tests/io/conftest.py @@ -59,6 +59,7 @@ def s3_resource(tips_file, jsonl_file): moto = pytest.importorskip('moto') test_s3_files = [ + ('tips#1.csv', tips_file), ('tips.csv', tips_file), ('tips.csv.gz', tips_file + '.gz'), ('tips.csv.bz2', tips_file + '.bz2'), diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index b36508d89d770..c8cace6118ad8 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -198,3 +198,8 @@ def test_read_csv_chunked_download(self, s3_resource, caplog): read_csv("s3://pandas-test/large-file.csv", nrows=5) # log of fetch_range (start, stop) assert ((0, 5505024) in {x.args[-2:] for x in caplog.records}) + + def test_read_s3_with_hash_in_key(self, tips_df): + # GH 25945 + result = read_csv('s3://pandas-test/tips#1.csv') + tm.assert_frame_equal(tips_df, result)