pandas-dev
diff --git a/‎appveyor.yml
+5 b/‎appveyor.yml
+5
diff --git a/‎ci/requirements-2.7.run
+1 b/‎ci/requirements-2.7.run
+1
diff --git a/‎ci/requirements-2.7_SLOW.run
+1 b/‎ci/requirements-2.7_SLOW.run
+1
diff --git a/‎ci/requirements-2.7_WIN.pip b/‎ci/requirements-2.7_WIN.pip
diff --git a/‎ci/requirements-2.7_WIN.run
+1 b/‎ci/requirements-2.7_WIN.run
+1
diff --git a/‎ci/requirements-3.5.run
+1 b/‎ci/requirements-3.5.run
+1
diff --git a/‎ci/requirements-3.5_OSX.run
+1 b/‎ci/requirements-3.5_OSX.run
+1
diff --git a/‎ci/requirements-3.6.run
+1 b/‎ci/requirements-3.6.run
+1
diff --git a/‎ci/requirements-3.6_LOCALE.run
+1 b/‎ci/requirements-3.6_LOCALE.run
+1
diff --git a/‎ci/requirements-3.6_LOCALE_SLOW.run
+1 b/‎ci/requirements-3.6_LOCALE_SLOW.run
+1
diff --git a/‎ci/requirements-3.6_NUMPY_DEV.pip b/‎ci/requirements-3.6_NUMPY_DEV.pip
diff --git a/‎ci/requirements-3.6_WIN.pip b/‎ci/requirements-3.6_WIN.pip
diff --git a/‎pandas/tests/io/parser/data/tips.csv.bz2
1.29 KB b/‎pandas/tests/io/parser/data/tips.csv.bz2
1.29 KB
diff --git a/‎pandas/tests/io/parser/data/tips.csv.gz
1.7 KB b/‎pandas/tests/io/parser/data/tips.csv.gz
1.7 KB
diff --git a/‎pandas/tests/io/parser/test_network.py
+159-125 b/‎pandas/tests/io/parser/test_network.py
+159-125
@@ -80,6 +80,11 @@ install:
   - cmd: conda list -n pandas
   - cmd: echo "installing requirements from %REQ% - done"
 
+  # add some pip only reqs to the env
+  - SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.pip
+  - cmd: echo "installing requirements from %REQ%"
+  - cmd: pip install -Ur %REQ%
+
   # build em using the local source checkout in the correct windows env
   - cmd: '%CMD_IN_ENV% python setup.py build_ext --inplace'
 
 
@@ -18,3 +18,4 @@ patsy
 pymysql=0.6.3
 jinja2=2.8
 xarray=0.8.0
+moto
@@ -17,3 +17,4 @@ psycopg2
 pymysql
 html5lib
 beautiful-soup
+moto
@@ -16,3 +16,4 @@ bottleneck
 html5lib
 beautiful-soup
 jinja2=2.8
+moto
@@ -18,3 +18,4 @@ psycopg2
 s3fs
 beautifulsoup4
 ipython
+moto
@@ -14,3 +14,4 @@ bottleneck
 xarray
 s3fs
 beautifulsoup4
+moto
@@ -23,3 +23,4 @@ beautifulsoup4
 s3fs
 xarray
 ipython
+moto
@@ -20,3 +20,4 @@ beautifulsoup4
 s3fs
 xarray
 ipython
+moto
@@ -20,3 +20,4 @@ beautifulsoup4
 s3fs
 xarray
 ipython
+moto
@@ -4,21 +4,64 @@
 Tests parsers ability to read and parse non-local files
 and hence require a network connection to be read.
 """
-
 import os
+
 import pytest
+import six
 
 import pandas.util.testing as tm
 from pandas import DataFrame
 from pandas.io.parsers import read_csv, read_table
 
 
+@pytest.fixture(scope='module')
+def tips_file():
+    return os.path.join(tm.get_data_path(), 'tips.csv')
+
+
 @pytest.fixture(scope='module')
 def salaries_table():
     path = os.path.join(tm.get_data_path(), 'salaries.csv')
     return read_table(path)
 
 
+@pytest.fixture(scope='module')
+def test_s3_resource(request, tips_file):
+    pytest.importorskip('s3fs')
+    moto = pytest.importorskip('moto')
+    moto.mock_s3().start()
+
+    test_s3_files = [
+        ('tips.csv', tips_file),
+        ('tips.csv.gz', tips_file + '.gz'),
+        ('tips.csv.bz2', tips_file + '.bz2'),
+    ]
+
+    def add_tips_files(bucket_name):
+        for s3_key, file_name in test_s3_files:
+            with open(file_name, 'rb') as f:
+                conn.Bucket(bucket_name).put_object(
+                    Key=s3_key,
+                    Body=f)
+
+    boto3 = pytest.importorskip('boto3')
+    # see gh-16135
+    bucket = 'pandas-test'
+
+    conn = boto3.resource("s3", region_name="us-east-1")
+    conn.create_bucket(Bucket=bucket)
+    add_tips_files(bucket)
+
+    conn.create_bucket(Bucket='cant_get_it', ACL='private')
+    add_tips_files('cant_get_it')
+
+    def teardown():
+        moto.mock_s3().stop()
+    request.addfinalizer(teardown)
+
+    return conn
+
+
 @pytest.mark.network
 @pytest.mark.parametrize(
     "compression,extension",
@@ -50,151 +93,142 @@ def check_compressed_urls(salaries_table, compression, extension, mode,
     tm.assert_frame_equal(url_table, salaries_table)
 
 
-class TestS3(object):
+@tm.network
+def test_parse_public_s3_bucket():
+    pytest.importorskip('s3fs')
+    # more of an integration test due to the not-public contents portion
+    # can probably mock this though.
+    for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
+        df = read_csv('s3://pandas-test/tips.csv' +
+                      ext, compression=comp)
+        assert isinstance(df, DataFrame)
+        assert not df.empty
+        tm.assert_frame_equal(read_csv(
+            tm.get_data_path('tips.csv')), df)
+
+    # Read public file from bucket with not-public contents
+    df = read_csv('s3://cant_get_it/tips.csv')
+    assert isinstance(df, DataFrame)
+    assert not df.empty
+    tm.assert_frame_equal(read_csv(tm.get_data_path('tips.csv')), df)
+
+
+def test_parse_public_s3n_bucket(test_s3_resource):
+
+    # Read from AWS s3 as "s3n" URL
+    df = read_csv('s3n://pandas-test/tips.csv', nrows=10)
+    assert isinstance(df, DataFrame)
+    assert not df.empty
+    tm.assert_frame_equal(read_csv(
+        tm.get_data_path('tips.csv')).iloc[:10], df)
 
-    def setup_method(self, method):
-        try:
-            import s3fs  # noqa
-        except ImportError:
-            pytest.skip("s3fs not installed")
 
-    @tm.network
-    def test_parse_public_s3_bucket(self):
-        for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
-            df = read_csv('s3://pandas-test/tips.csv' +
-                          ext, compression=comp)
+def test_parse_public_s3a_bucket(test_s3_resource):
+    # Read from AWS s3 as "s3a" URL
+    df = read_csv('s3a://pandas-test/tips.csv', nrows=10)
+    assert isinstance(df, DataFrame)
+    assert not df.empty
+    tm.assert_frame_equal(read_csv(
+        tm.get_data_path('tips.csv')).iloc[:10], df)
+
+
+def test_parse_public_s3_bucket_nrows(test_s3_resource):
+    for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
+        df = read_csv('s3://pandas-test/tips.csv' +
+                      ext, nrows=10, compression=comp)
+        assert isinstance(df, DataFrame)
+        assert not df.empty
+        tm.assert_frame_equal(read_csv(
+            tm.get_data_path('tips.csv')).iloc[:10], df)
+
+
+def test_parse_public_s3_bucket_chunked(test_s3_resource):
+    # Read with a chunksize
+    chunksize = 5
+    local_tips = read_csv(tm.get_data_path('tips.csv'))
+    for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
+        df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
+                             chunksize=chunksize, compression=comp)
+        assert df_reader.chunksize == chunksize
+        for i_chunk in [0, 1, 2]:
+            # Read a couple of chunks and make sure we see them
+            # properly.
+            df = df_reader.get_chunk()
+            assert isinstance(df, DataFrame)
+            assert not df.empty
+            true_df = local_tips.iloc[
+                chunksize * i_chunk: chunksize * (i_chunk + 1)]
+            tm.assert_frame_equal(true_df, df)
+
+
+def test_parse_public_s3_bucket_chunked_python(test_s3_resource):
+    # Read with a chunksize using the Python parser
+    chunksize = 5
+    local_tips = read_csv(tm.get_data_path('tips.csv'))
+    for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
+        df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
+                             chunksize=chunksize, compression=comp,
+                             engine='python')
+        assert df_reader.chunksize == chunksize
+        for i_chunk in [0, 1, 2]:
+            # Read a couple of chunks and make sure we see them properly.
+            df = df_reader.get_chunk()
             assert isinstance(df, DataFrame)
             assert not df.empty
-            tm.assert_frame_equal(read_csv(
-                tm.get_data_path('tips.csv')), df)
+            true_df = local_tips.iloc[
+                chunksize * i_chunk: chunksize * (i_chunk + 1)]
+            tm.assert_frame_equal(true_df, df)
+
 
-        # Read public file from bucket with not-public contents
-        df = read_csv('s3://cant_get_it/tips.csv')
+def test_parse_public_s3_bucket_python(test_s3_resource):
+    for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
+        df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python',
+                      compression=comp)
         assert isinstance(df, DataFrame)
         assert not df.empty
-        tm.assert_frame_equal(read_csv(tm.get_data_path('tips.csv')), df)
+        tm.assert_frame_equal(read_csv(
+            tm.get_data_path('tips.csv')), df)
 
-    @tm.network
-    def test_parse_public_s3n_bucket(self):
-        # Read from AWS s3 as "s3n" URL
-        df = read_csv('s3n://pandas-test/tips.csv', nrows=10)
+
+def test_infer_s3_compression(test_s3_resource):
+    for ext in ['', '.gz', '.bz2']:
+        df = read_csv('s3://pandas-test/tips.csv' + ext,
+                      engine='python', compression='infer')
         assert isinstance(df, DataFrame)
         assert not df.empty
         tm.assert_frame_equal(read_csv(
-            tm.get_data_path('tips.csv')).iloc[:10], df)
+            tm.get_data_path('tips.csv')), df)
+
 
-    @tm.network
-    def test_parse_public_s3a_bucket(self):
-        # Read from AWS s3 as "s3a" URL
-        df = read_csv('s3a://pandas-test/tips.csv', nrows=10)
+def test_parse_public_s3_bucket_nrows_python(test_s3_resource):
+    for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
+        df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python',
+                      nrows=10, compression=comp)
         assert isinstance(df, DataFrame)
         assert not df.empty
         tm.assert_frame_equal(read_csv(
             tm.get_data_path('tips.csv')).iloc[:10], df)
 
-    @tm.network
-    def test_parse_public_s3_bucket_nrows(self):
-        for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
-            df = read_csv('s3://pandas-test/tips.csv' +
-                          ext, nrows=10, compression=comp)
-            assert isinstance(df, DataFrame)
-            assert not df.empty
-            tm.assert_frame_equal(read_csv(
-                tm.get_data_path('tips.csv')).iloc[:10], df)
-
-    @tm.network
-    def test_parse_public_s3_bucket_chunked(self):
-        # Read with a chunksize
-        chunksize = 5
-        local_tips = read_csv(tm.get_data_path('tips.csv'))
-        for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
-            df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
-                                 chunksize=chunksize, compression=comp)
-            assert df_reader.chunksize == chunksize
-            for i_chunk in [0, 1, 2]:
-                # Read a couple of chunks and make sure we see them
-                # properly.
-                df = df_reader.get_chunk()
-                assert isinstance(df, DataFrame)
-                assert not df.empty
-                true_df = local_tips.iloc[
-                    chunksize * i_chunk: chunksize * (i_chunk + 1)]
-                tm.assert_frame_equal(true_df, df)
-
-    @tm.network
-    def test_parse_public_s3_bucket_chunked_python(self):
-        # Read with a chunksize using the Python parser
-        chunksize = 5
-        local_tips = read_csv(tm.get_data_path('tips.csv'))
-        for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
-            df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
-                                 chunksize=chunksize, compression=comp,
-                                 engine='python')
-            assert df_reader.chunksize == chunksize
-            for i_chunk in [0, 1, 2]:
-                # Read a couple of chunks and make sure we see them properly.
-                df = df_reader.get_chunk()
-                assert isinstance(df, DataFrame)
-                assert not df.empty
-                true_df = local_tips.iloc[
-                    chunksize * i_chunk: chunksize * (i_chunk + 1)]
-                tm.assert_frame_equal(true_df, df)
-
-    @tm.network
-    def test_parse_public_s3_bucket_python(self):
-        for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
-            df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python',
-                          compression=comp)
-            assert isinstance(df, DataFrame)
-            assert not df.empty
-            tm.assert_frame_equal(read_csv(
-                tm.get_data_path('tips.csv')), df)
-
-    @tm.network
-    def test_infer_s3_compression(self):
-        for ext in ['', '.gz', '.bz2']:
-            df = read_csv('s3://pandas-test/tips.csv' + ext,
-                          engine='python', compression='infer')
-            assert isinstance(df, DataFrame)
-            assert not df.empty
-            tm.assert_frame_equal(read_csv(
-                tm.get_data_path('tips.csv')), df)
-
-    @tm.network
-    def test_parse_public_s3_bucket_nrows_python(self):
-        for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
-            df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python',
-                          nrows=10, compression=comp)
-            assert isinstance(df, DataFrame)
-            assert not df.empty
-            tm.assert_frame_equal(read_csv(
-                tm.get_data_path('tips.csv')).iloc[:10], df)
 
-    @tm.network
-    def test_s3_fails(self):
-        with pytest.raises(IOError):
-            read_csv('s3://nyqpug/asdf.csv')
+def test_s3_fails(test_s3_resource):
+    with pytest.raises(IOError):
+        read_csv('s3://nyqpug/asdf.csv')
 
-        # Receive a permission error when trying to read a private bucket.
-        # It's irrelevant here that this isn't actually a table.
-        with pytest.raises(IOError):
-            read_csv('s3://cant_get_it/')
+    # Receive a permission error when trying to read a private bucket.
+    # It's irrelevant here that this isn't actually a table.
+    with pytest.raises(IOError):
+        read_csv('s3://cant_get_it/')
 
-    @tm.network
-    def boto3_client_s3(self):
-        # see gh-16135
 
-        # boto3 is a dependency of s3fs
-        import boto3
-        client = boto3.client("s3")
+def test_read_csv__handles_boto_s3_object(test_s3_resource, tips_file):
+    # see gh-16135
 
-        key = "/tips.csv"
-        bucket = "pandas-test"
-        s3_object = client.get_object(Bucket=bucket, Key=key)
+    s3_object = test_s3_resource.meta.client.get_object(Bucket='pandas-test',
+                                                        Key='tips.csv')
 
-        result = read_csv(s3_object["Body"])
-        assert isinstance(result, DataFrame)
-        assert not result.empty
+    result = read_csv(six.BytesIO(s3_object["Body"].read()), encoding='utf8')
+    assert isinstance(result, DataFrame)
+    assert not result.empty
 
-        expected = read_csv(tm.get_data_path('tips.csv'))
-        tm.assert_frame_equal(result, expected)
+    expected = read_csv(tips_file)
+    tm.assert_frame_equal(result, expected)
-Original file line number
+Diff line change
 s3fs
 beautifulsoup4
 ipython
 +moto