forking-repos
diff --git a/‎asv_bench/benchmarks/io_bench.py
+1-1 b/‎asv_bench/benchmarks/io_bench.py
+1-1
diff --git a/‎ci/lint.sh
+3-3 b/‎ci/lint.sh
+3-3
diff --git a/‎ci/requirements-2.7-64.run
+1-1 b/‎ci/requirements-2.7-64.run
+1-1
diff --git a/‎ci/requirements-2.7.run
+1-1 b/‎ci/requirements-2.7.run
+1-1
diff --git a/‎ci/requirements-2.7_SLOW.run
+1-1 b/‎ci/requirements-2.7_SLOW.run
+1-1
diff --git a/‎ci/requirements-3.5.run
+1-1 b/‎ci/requirements-3.5.run
+1-1
diff --git a/‎ci/requirements-3.5_OSX.run
+1-1 b/‎ci/requirements-3.5_OSX.run
+1-1
diff --git a/‎doc/foo
+5 b/‎doc/foo
+5
diff --git a/‎doc/source/install.rst
+1-1 b/‎doc/source/install.rst
+1-1
diff --git a/‎doc/source/io.rst
+17 b/‎doc/source/io.rst
+17
diff --git a/‎doc/source/whatsnew/v0.20.0.txt
+13-8 b/‎doc/source/whatsnew/v0.20.0.txt
+13-8
diff --git a/‎pandas/core/algorithms.py
+32 b/‎pandas/core/algorithms.py
+32
diff --git a/‎pandas/core/base.py
+1-1 b/‎pandas/core/base.py
+1-1
diff --git a/‎pandas/core/categorical.py
+1-2 b/‎pandas/core/categorical.py
+1-2
diff --git a/‎pandas/core/frame.py
+5 b/‎pandas/core/frame.py
+5
diff --git a/‎pandas/core/generic.py
+3 b/‎pandas/core/generic.py
+3
diff --git a/‎pandas/core/internals.py
-5 b/‎pandas/core/internals.py
-5
diff --git a/‎pandas/core/nanops.py
+1-28 b/‎pandas/core/nanops.py
+1-28
diff --git a/‎pandas/io/common.py
+11-5 b/‎pandas/io/common.py
+11-5
@@ -153,7 +153,7 @@ def setup(self, compression, engine):
             # The Python 2 C parser can't read bz2 from open files.
             raise NotImplementedError
         try:
-            import boto
+            import s3fs
         except ImportError:
             # Skip these benchmarks if `boto` is not installed.
             raise NotImplementedError
 
@@ -7,6 +7,8 @@ source activate pandas
 RET=0
 
 if [ "$LINT" ]; then
+    pip install cpplint
+
     # pandas/rpy is deprecated and will be removed.
     # pandas/src is C code, so no need to search there.
     echo "Linting  *.py"
@@ -43,13 +45,11 @@ if [ "$LINT" ]; then
     # from Cython files nor do we want to lint C files that we didn't modify for
     # this particular codebase (e.g. src/headers, src/klib, src/msgpack). However,
     # we can lint all header files since they aren't "generated" like C files are.
-    pip install cpplint
-
     echo "Linting *.c and *.h"
     for path in '*.h' 'period_helper.c' 'datetime' 'parser' 'ujson'
     do
         echo "linting -> pandas/src/$path"
-        cpplint --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive pandas/src/$path
+        cpplint --quiet --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive pandas/src/$path
         if [ $? -ne "0" ]; then
             RET=1
         fi
 
@@ -11,7 +11,7 @@ sqlalchemy
 lxml=3.2.1
 scipy
 xlsxwriter
-boto
+s3fs
 bottleneck
 html5lib
 beautiful-soup
 
@@ -11,7 +11,7 @@ sqlalchemy=0.9.6
 lxml=3.2.1
 scipy
 xlsxwriter=0.4.6
-boto=2.36.0
+s3fs
 bottleneck
 psycopg2=2.5.2
 patsy
 
@@ -13,7 +13,7 @@ numexpr
 pytables
 sqlalchemy
 lxml
-boto
+s3fs
 bottleneck
 psycopg2
 pymysql
 
@@ -17,7 +17,7 @@ sqlalchemy
 pymysql
 psycopg2
 xarray
-boto
+s3fs
 
 # incompat with conda ATM
 # beautiful-soup
@@ -12,7 +12,7 @@ matplotlib
 jinja2
 bottleneck
 xarray
-boto
+s3fs
 
 # incompat with conda ATM
 # beautiful-soup
@@ -0,0 +1,5 @@
+,col_1
+0,1
+1,2
+2,'A'
+3,4.22
@@ -262,7 +262,7 @@ Optional Dependencies
   * `XlsxWriter <https://pypi.python.org/pypi/XlsxWriter>`__: Alternative Excel writer
 
 * `Jinja2 <http://jinja.pocoo.org/>`__: Template engine for conditional HTML formatting.
-* `boto <https://pypi.python.org/pypi/boto>`__: necessary for Amazon S3 access.
+* `s3fs <http://s3fs.readthedocs.io/>`__: necessary for Amazon S3 access (s3fs >= 0.0.7).
 * `blosc <https://pypi.python.org/pypi/blosc>`__: for msgpack compression using ``blosc``
 * One of `PyQt4
   <http://www.riverbankcomputing.com/software/pyqt/download>`__, `PySide
 
@@ -1487,6 +1487,23 @@ options include:
 Specifying any of the above options will produce a ``ParserWarning`` unless the
 python engine is selected explicitly using ``engine='python'``.
 
+Reading remote files
+''''''''''''''''''''
+
+You can pass in a URL to a CSV file:
+
+.. code-block:: python
+
+   df = pd.read_csv('https://download.bls.gov/pub/time.series/cu/cu.item',
+                    sep='\t')
+
+S3 URLs are handled as well:
+
+.. code-block:: python
+
+   df = pd.read_csv('s3://pandas-test/tips.csv')
+
+
 Writing out Data
 ''''''''''''''''
 
 
@@ -106,13 +106,14 @@ Other enhancements
 - ``pd.Series.interpolate`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`)
 - ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`)
 
+- ``.select_dtypes()`` now allows `datetimetz` to generically select datetimes with tz (:issue:`14910`)
+
 
 .. _whatsnew_0200.api_breaking:
 
 Backwards incompatible API changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-
 .. _whatsnew.api_breaking.index_map
 
 Map on Index types now return other Index types
@@ -181,18 +182,22 @@ Map on Index types now return other Index types
 
     s.map(lambda x: x.hour)
 
+.. _whatsnew_0200.s3:
 
-  .. _whatsnew_0200.api:
-
-- ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`)
-- ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`)
-
+S3 File Handling
+^^^^^^^^^^^^^^^^
 
+pandas now uses `s3fs <http://s3fs.readthedocs.io/>`_ for handling S3 connections. This shouldn't break
+any code. However, since s3fs is not a required dependency, you will need to install it separately (like boto
+in prior versions of pandas) (:issue:`11915`).
 
+.. _whatsnew_0200.api:
 
 Other API Changes
 ^^^^^^^^^^^^^^^^^
 
+- ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`)
+- ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`)
 - :ref:`DatetimeIndex Partial String Indexing <timeseries.partialindexing>` now works as exact match provided that string resolution coincides with index resolution, including a case when both are seconds (:issue:`14826`). See :ref:`Slice vs. Exact Match <timeseries.slice_vs_exact_match>` for details.
 
   .. ipython:: python
@@ -266,9 +271,9 @@ Bug Fixes
 ~~~~~~~~~
 
 - Bug in ``TimedeltaIndex`` addition where overflow was being allowed without error (:issue:`14816`)
+- Bug in ``DataFrame`` construction in which unsigned 64-bit integer elements were being converted to objects (:issue:`14881`)
 - Bug in ``astype()`` where ``inf`` values were incorrectly converted to integers. Now raises error now with ``astype()`` for Series and DataFrames (:issue:`14265`)
-
-
+- Bug in ``describe()`` when passing a numpy array which does not contain the median to the ``percentiles`` keyword argument (:issue:`14908`)
 
 
 
 
@@ -113,6 +113,38 @@ def _unique_generic(values, table_type, type_caster):
     return type_caster(uniques)
 
 
+def unique1d(values):
+    """
+    Hash table-based unique
+    """
+    if np.issubdtype(values.dtype, np.floating):
+        table = htable.Float64HashTable(len(values))
+        uniques = np.array(table.unique(_ensure_float64(values)),
+                           dtype=np.float64)
+    elif np.issubdtype(values.dtype, np.datetime64):
+        table = htable.Int64HashTable(len(values))
+        uniques = table.unique(_ensure_int64(values))
+        uniques = uniques.view('M8[ns]')
+    elif np.issubdtype(values.dtype, np.timedelta64):
+        table = htable.Int64HashTable(len(values))
+        uniques = table.unique(_ensure_int64(values))
+        uniques = uniques.view('m8[ns]')
+    elif np.issubdtype(values.dtype, np.integer):
+        table = htable.Int64HashTable(len(values))
+        uniques = table.unique(_ensure_int64(values))
+    else:
+
+        # its cheaper to use a String Hash Table than Object
+        if lib.infer_dtype(values) in ['string']:
+            table = htable.StringHashTable(len(values))
+        else:
+            table = htable.PyObjectHashTable(len(values))
+
+        uniques = table.unique(_ensure_object(values))
+
+    return uniques
+
+
 def isin(comps, values):
     """
     Compute the isin boolean array
 
@@ -969,7 +969,7 @@ def unique(self):
         if hasattr(values, 'unique'):
             result = values.unique()
         else:
-            from pandas.core.nanops import unique1d
+            from pandas.core.algorithms import unique1d
             result = unique1d(values)
         return result
 
 
@@ -25,7 +25,7 @@
                                  is_scalar)
 from pandas.core.common import is_null_slice
 
-from pandas.core.algorithms import factorize, take_1d
+from pandas.core.algorithms import factorize, take_1d, unique1d
 from pandas.core.base import (PandasObject, PandasDelegate,
                               NoNewAttributesMixin, _shared_docs)
 import pandas.core.common as com
@@ -1834,7 +1834,6 @@ def unique(self):
         unique values : ``Categorical``
         """
 
-        from pandas.core.nanops import unique1d
         # unlike np.unique, unique1d does not sort
         unique_codes = unique1d(self.codes)
         cat = self.copy()
 
@@ -2257,7 +2257,12 @@ def select_dtypes(self, include=None, exclude=None):
           this will return *all* object dtype columns
         * See the `numpy dtype hierarchy
           <http://docs.scipy.org/doc/numpy/reference/arrays.scalars.html>`__
+        * To select datetimes, use np.datetime64, 'datetime' or 'datetime64'
+        * To select timedeltas, use np.timedelta64, 'timedelta' or
+          'timedelta64'
         * To select Pandas categorical dtypes, use 'category'
+        * To select Pandas datetimetz dtypes, use 'datetimetz' (new in 0.20.0),
+          or a 'datetime64[ns, tz]' string
 
         Examples
         --------
 
@@ -5262,6 +5262,9 @@ def describe(self, percentiles=None, include=None, exclude=None):
             raise ValueError("Cannot describe a DataFrame without columns")
 
         if percentiles is not None:
+            # explicit conversion of `percentiles` to list
+            percentiles = list(percentiles)
+
             # get them all to be in [0, 1]
             self._check_percentile(percentiles)
 
 
@@ -4314,11 +4314,6 @@ def form_blocks(arrays, names, axes):
         elif is_datetimetz(v):
             datetime_tz_items.append((i, k, v))
         elif issubclass(v.dtype.type, np.integer):
-            if v.dtype == np.uint64:
-                # HACK #2355 definite overflow
-                if (v > 2**63 - 1).any():
-                    object_items.append((i, k, v))
-                    continue
             int_items.append((i, k, v))
         elif v.dtype == np.bool_:
             bool_items.append((i, k, v))
 
@@ -9,10 +9,8 @@
 except ImportError:  # pragma: no cover
     _USE_BOTTLENECK = False
 
-import pandas.hashtable as _hash
 from pandas import compat, lib, algos, tslib
-from pandas.types.common import (_ensure_int64, _ensure_object,
-                                 _ensure_float64, _get_dtype,
+from pandas.types.common import (_get_dtype,
                                  is_float, is_scalar,
                                  is_integer, is_complex, is_float_dtype,
                                  is_complex_dtype, is_integer_dtype,
@@ -784,28 +782,3 @@ def f(x, y):
 nanle = make_nancomp(operator.le)
 naneq = make_nancomp(operator.eq)
 nanne = make_nancomp(operator.ne)
-
-
-def unique1d(values):
-    """
-    Hash table-based unique
-    """
-    if np.issubdtype(values.dtype, np.floating):
-        table = _hash.Float64HashTable(len(values))
-        uniques = np.array(table.unique(_ensure_float64(values)),
-                           dtype=np.float64)
-    elif np.issubdtype(values.dtype, np.datetime64):
-        table = _hash.Int64HashTable(len(values))
-        uniques = table.unique(_ensure_int64(values))
-        uniques = uniques.view('M8[ns]')
-    elif np.issubdtype(values.dtype, np.timedelta64):
-        table = _hash.Int64HashTable(len(values))
-        uniques = table.unique(_ensure_int64(values))
-        uniques = uniques.view('m8[ns]')
-    elif np.issubdtype(values.dtype, np.integer):
-        table = _hash.Int64HashTable(len(values))
-        uniques = table.unique(_ensure_int64(values))
-    else:
-        table = _hash.PyObjectHashTable(len(values))
-        uniques = table.unique(_ensure_object(values))
-    return uniques
@@ -12,6 +12,12 @@
 from pandas.core.common import AbstractMethodError
 from pandas.types.common import is_number
 
+try:
+    from s3fs import S3File
+    need_text_wrapping = (BytesIO, S3File)
+except ImportError:
+    need_text_wrapping = (BytesIO,)
+
 # common NA values
 # no longer excluding inf representations
 # '1.#INF','-1.#INF', '1.#INF000000',
@@ -212,10 +218,10 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
         return reader, encoding, compression
 
     if _is_s3_url(filepath_or_buffer):
-        from pandas.io.s3 import get_filepath_or_buffer
-        return get_filepath_or_buffer(filepath_or_buffer,
-                                      encoding=encoding,
-                                      compression=compression)
+        from pandas.io import s3
+        return s3.get_filepath_or_buffer(filepath_or_buffer,
+                                         encoding=encoding,
+                                         compression=compression)
 
     # It is a pathlib.Path/py.path.local or string
     filepath_or_buffer = _stringify_path(filepath_or_buffer)
@@ -391,7 +397,7 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
         handles.append(f)
 
     # in Python 3, convert BytesIO or fileobjects passed with an encoding
-    if compat.PY3 and (compression or isinstance(f, compat.BytesIO)):
+    if compat.PY3 and (compression or isinstance(f, need_text_wrapping)):
         from io import TextIOWrapper
         f = TextIOWrapper(f, encoding=encoding)
         handles.append(f)
-Original file line number
+Diff line change
 +,col_1
 +0,1
 +1,2
 +2,'A'
 +3,4.22