diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 3b3bf8cffe41b..b45c843d28eb8 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -467,6 +467,7 @@ Other enhancements - ``pd.read_csv`` can now read bz2-compressed files incrementally, and the C parser can read bz2-compressed files from AWS S3 (:issue:`11070`, :issue:`11072`). +- In ``pd.read_csv``, recognize "s3n://" and "s3a://" URLs as designating S3 file storage (:issue:`11070`, :issue:`11071`). .. _whatsnew_0170.api: diff --git a/pandas/io/common.py b/pandas/io/common.py index c6ece61f05a01..5ab5640ca12c0 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -66,9 +66,9 @@ def _is_url(url): def _is_s3_url(url): - """Check for an s3 url""" + """Check for an s3, s3n, or s3a url""" try: - return parse_url(url).scheme == 's3' + return parse_url(url).scheme in ['s3', 's3n', 's3a'] except: return False diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index fabe4ce40b22f..205140e02a8ea 100755 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -4253,6 +4253,22 @@ def test_parse_public_s3_bucket(self): nt.assert_false(df.empty) tm.assert_frame_equal(pd.read_csv(tm.get_data_path('tips.csv')), df) + @tm.network + def test_parse_public_s3n_bucket(self): + # Read from AWS s3 as "s3n" URL + df = pd.read_csv('s3n://pandas-test/tips.csv', nrows=10) + self.assertTrue(isinstance(df, pd.DataFrame)) + self.assertFalse(df.empty) + tm.assert_frame_equal(pd.read_csv(tm.get_data_path('tips.csv')).iloc[:10], df) + + @tm.network + def test_parse_public_s3a_bucket(self): + # Read from AWS s3 as "s3a" URL + df = pd.read_csv('s3a://pandas-test/tips.csv', nrows=10) + self.assertTrue(isinstance(df, pd.DataFrame)) + self.assertFalse(df.empty) + tm.assert_frame_equal(pd.read_csv(tm.get_data_path('tips.csv')).iloc[:10], df) + @tm.network def test_s3_fails(self): import boto