From 0007503fb37374232b3ca3c2f7d6197edbc76079 Mon Sep 17 00:00:00 2001
From: Scott Talbert <swt@techie.net>
Date: Tue, 9 Apr 2019 11:00:49 -0400
Subject: [PATCH] BUG: Fix loading files from S3 with # characters in URL
 (GH25945)

This fixes loading files with URLs such as s3://bucket/key#1.csv.  The part
from the # on was being lost because it was considered to be a URL fragment.
The fix disables URL fragment parsing as it doesn't make sense for S3 URLs.
---
 doc/source/whatsnew/v0.25.0.rst        | 1 +
 pandas/io/s3.py                        | 2 +-
 pandas/tests/io/conftest.py            | 1 +
 pandas/tests/io/parser/test_network.py | 5 +++++
 4 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 8dabaeb6c7bfe..34af5d1e3dbe5 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -356,6 +356,7 @@ I/O
 - Bug in :func:`read_hdf` not properly closing store after a ``KeyError`` is raised (:issue:`25766`)
 - Bug in ``read_csv`` which would not raise ``ValueError`` if a column index in ``usecols`` was out of bounds (:issue:`25623`)
 - Improved :meth:`pandas.read_stata` and :class:`pandas.io.stata.StataReader` to read incorrectly formatted 118 format files saved by Stata (:issue:`25960`)
+- Fixed bug in loading objects from S3 that contain ``#`` characters in the URL (:issue:`25945`)
 
 Plotting
 ^^^^^^^^
diff --git a/pandas/io/s3.py b/pandas/io/s3.py
index f127bb4c8094c..607eae27021c3 100644
--- a/pandas/io/s3.py
+++ b/pandas/io/s3.py
@@ -10,7 +10,7 @@
 
 def _strip_schema(url):
     """Returns the url without the s3:// part"""
-    result = parse_url(url)
+    result = parse_url(url, allow_fragments=False)
     return result.netloc + result.path
 
 
diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py
index af6f7ac4ef528..a4e778a68c728 100644
--- a/pandas/tests/io/conftest.py
+++ b/pandas/tests/io/conftest.py
@@ -59,6 +59,7 @@ def s3_resource(tips_file, jsonl_file):
         moto = pytest.importorskip('moto')
 
         test_s3_files = [
+            ('tips#1.csv', tips_file),
             ('tips.csv', tips_file),
             ('tips.csv.gz', tips_file + '.gz'),
             ('tips.csv.bz2', tips_file + '.bz2'),
diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py
index b36508d89d770..c8cace6118ad8 100644
--- a/pandas/tests/io/parser/test_network.py
+++ b/pandas/tests/io/parser/test_network.py
@@ -198,3 +198,8 @@ def test_read_csv_chunked_download(self, s3_resource, caplog):
             read_csv("s3://pandas-test/large-file.csv", nrows=5)
             # log of fetch_range (start, stop)
             assert ((0, 5505024) in {x.args[-2:] for x in caplog.records})
+
+    def test_read_s3_with_hash_in_key(self, tips_df):
+        # GH 25945
+        result = read_csv('s3://pandas-test/tips#1.csv')
+        tm.assert_frame_equal(tips_df, result)