CI: Update deps, docs

TomAugspurger · TomAugspurger · commit 92ac0634e331 · 2016-12-18T17:49:22.000-06:00
diff --git a/ci/requirements-2.7-64.run b/ci/requirements-2.7-64.run
@@ -11,7 +11,7 @@ sqlalchemy
 lxml=3.2.1
 scipy
 xlsxwriter
-boto
+s3fs
 bottleneck
 html5lib
 beautiful-soup
diff --git a/ci/requirements-2.7.run b/ci/requirements-2.7.run
@@ -11,7 +11,7 @@ sqlalchemy=0.9.6
 lxml=3.2.1
 scipy
 xlsxwriter=0.4.6
-boto=2.36.0
+s3fs
 bottleneck
 psycopg2=2.5.2
 patsy
diff --git a/ci/requirements-2.7_SLOW.run b/ci/requirements-2.7_SLOW.run
@@ -13,7 +13,7 @@ numexpr
 pytables
 sqlalchemy
 lxml
-boto
+s3fs
 bottleneck
 psycopg2
 pymysql
diff --git a/ci/requirements-3.5.run b/ci/requirements-3.5.run
@@ -17,7 +17,7 @@ sqlalchemy
 pymysql
 psycopg2
 xarray
-boto
+s3fs
 
 # incompat with conda ATM
 # beautiful-soup
diff --git a/ci/requirements-3.5_OSX.run b/ci/requirements-3.5_OSX.run
@@ -12,7 +12,7 @@ matplotlib
 jinja2
 bottleneck
 xarray
-boto
+s3fs
 
 # incompat with conda ATM
 # beautiful-soup
diff --git a/doc/source/install.rst b/doc/source/install.rst
@@ -262,7 +262,7 @@ Optional Dependencies
   * `XlsxWriter <https://pypi.python.org/pypi/XlsxWriter>`__: Alternative Excel writer
 
 * `Jinja2 <http://jinja.pocoo.org/>`__: Template engine for conditional HTML formatting.
-* `boto <https://pypi.python.org/pypi/boto>`__: necessary for Amazon S3 access.
+* `s3fs <http://s3fs.readthedocs.io/>`__: necessary for Amazon S3 access (s3fs >= 0.0.7).
 * `blosc <https://pypi.python.org/pypi/blosc>`__: for msgpack compression using ``blosc``
 * One of `PyQt4
   <http://www.riverbankcomputing.com/software/pyqt/download>`__, `PySide
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -1487,6 +1487,23 @@ options include:
 Specifying any of the above options will produce a ``ParserWarning`` unless the
 python engine is selected explicitly using ``engine='python'``.
 
+Reading remote files
+''''''''''''''''''''
+
+You can pass in a URL to a CSV file:
+
+.. code-block:: python
+
+   df = pd.read_csv('https://download.bls.gov/pub/time.series/cu/cu.item',
+                    sep='\t')
+
+S3 URLs are handled as well:
+
+.. code-block:: python
+
+   df = pd.read_csv('s3://pandas-test/tips.csv')
+
+
 Writing out Data
 ''''''''''''''''
 
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -108,8 +108,6 @@ Other enhancements
 
 
 
-.. _whatsnew_0200.api:
-
 .. _whatsnew_0200.api_breaking:
 
 Backwards incompatible API changes
@@ -183,24 +181,26 @@ Map on Index types now return other Index types
 
     s.map(lambda x: x.hour)
 
+.. _whatsnew_0200.s3:
+
+S3 File Handling
+^^^^^^^^^^^^^^^^
+
+pandas now uses `s3fs <http://s3fs.readthedocs.io/>`_ for handling S3 connections. This shouldn't break
+any code. However, since s3fs is not a required dependency, you will need to install it separately (like boto
+in prior versions of pandas) (:issue:`11915`).
+
+.. _whatsnew_0200.api:
 
 - ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`)
 - ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`)
-- ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`)
 
-.. _whatsnew_2000.api.s3
 
-S3 File Handling
-^^^^^^^^^^^^^^^^
 
-pandas now uses `s3fs <http://s3fs.readthedocs.io/>`_ for handling S3 connections. This shouldn't break
-any code, but since s3fs is not a required dependency, you will need to install it separately (like boto
-in prior versions of pandas).
 
 Other API Changes
 ^^^^^^^^^^^^^^^^^
 
-
 .. _whatsnew_0200.deprecations:
 
 Deprecations
diff --git a/pandas/io/s3.py b/pandas/io/s3.py
@@ -2,6 +2,7 @@
 from pandas import compat
 try:
     import s3fs
+    from botocore.exceptions import NoCredentialsError
 except:
     raise ImportError("The s3fs library is required to handle s3 files")
 
@@ -19,15 +20,16 @@ def _strip_schema(url):
 
 def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
                            compression=None):
-
-    # Assuming AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY and AWS_S3_HOST
-    # are environment variables
     fs = s3fs.S3FileSystem(anon=False)
     try:
         filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer))
-    except OSError:
+    except (OSError, NoCredentialsError):
         # boto3 has troubles when trying to access a public file
         # when credentialed...
+        # An OSError is raised if you have credentials, but they
+        # aren't valid for that bucket.
+        # A NoCredentialsError is raised if you don't have creds
+        # for that bucket.
         fs = s3fs.S3FileSystem(anon=True)
         filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer))
     return filepath_or_buffer, None, compression
diff --git a/pandas/util/print_versions.py b/pandas/util/print_versions.py
@@ -94,7 +94,7 @@ def show_versions(as_json=False):
         ("pymysql", lambda mod: mod.__version__),
         ("psycopg2", lambda mod: mod.__version__),
         ("jinja2", lambda mod: mod.__version__),
-        ("boto", lambda mod: mod.__version__),
+        ("s3fs", lambda mod: mod.__version__),
         ("pandas_datareader", lambda mod: mod.__version__)
     ]
 

Original file line number	Diff line number	Diff line change
`@@ -94,7 +94,7 @@ def show_versions(as_json=False):`
`94`	`94`	`("pymysql", lambda mod: mod.__version__),`
`95`	`95`	`("psycopg2", lambda mod: mod.__version__),`
`96`	`96`	`("jinja2", lambda mod: mod.__version__),`
`97`		`- ("boto", lambda mod: mod.__version__),`
	`97`	`+ ("s3fs", lambda mod: mod.__version__),`
`98`	`98`	`("pandas_datareader", lambda mod: mod.__version__)`
`99`	`99`	`]`
`100`	`100`