Skip to content

Commit 0e87874

Browse files
committed
resolved merge conflict in whatsnew/v0.20.0.txt
2 parents 0814e5b + 8e630b6 commit 0e87874

32 files changed

+231
-208
lines changed

asv_bench/benchmarks/io_bench.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def setup(self, compression, engine):
153153
# The Python 2 C parser can't read bz2 from open files.
154154
raise NotImplementedError
155155
try:
156-
import boto
156+
import s3fs
157157
except ImportError:
158158
# Skip these benchmarks if `boto` is not installed.
159159
raise NotImplementedError

ci/lint.sh

+3-3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ source activate pandas
77
RET=0
88

99
if [ "$LINT" ]; then
10+
pip install cpplint
11+
1012
# pandas/rpy is deprecated and will be removed.
1113
# pandas/src is C code, so no need to search there.
1214
echo "Linting *.py"
@@ -43,13 +45,11 @@ if [ "$LINT" ]; then
4345
# from Cython files nor do we want to lint C files that we didn't modify for
4446
# this particular codebase (e.g. src/headers, src/klib, src/msgpack). However,
4547
# we can lint all header files since they aren't "generated" like C files are.
46-
pip install cpplint
47-
4848
echo "Linting *.c and *.h"
4949
for path in '*.h' 'period_helper.c' 'datetime' 'parser' 'ujson'
5050
do
5151
echo "linting -> pandas/src/$path"
52-
cpplint --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive pandas/src/$path
52+
cpplint --quiet --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive pandas/src/$path
5353
if [ $? -ne "0" ]; then
5454
RET=1
5555
fi

ci/requirements-2.7-64.run

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ sqlalchemy
1111
lxml=3.2.1
1212
scipy
1313
xlsxwriter
14-
boto
14+
s3fs
1515
bottleneck
1616
html5lib
1717
beautiful-soup

ci/requirements-2.7.run

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ sqlalchemy=0.9.6
1111
lxml=3.2.1
1212
scipy
1313
xlsxwriter=0.4.6
14-
boto=2.36.0
14+
s3fs
1515
bottleneck
1616
psycopg2=2.5.2
1717
patsy

ci/requirements-2.7_SLOW.run

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ numexpr
1313
pytables
1414
sqlalchemy
1515
lxml
16-
boto
16+
s3fs
1717
bottleneck
1818
psycopg2
1919
pymysql

ci/requirements-3.5.run

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ sqlalchemy
1717
pymysql
1818
psycopg2
1919
xarray
20-
boto
20+
s3fs
2121

2222
# incompat with conda ATM
2323
# beautiful-soup

ci/requirements-3.5_OSX.run

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ matplotlib
1212
jinja2
1313
bottleneck
1414
xarray
15-
boto
15+
s3fs
1616

1717
# incompat with conda ATM
1818
# beautiful-soup

doc/foo

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
,col_1
2+
0,1
3+
1,2
4+
2,'A'
5+
3,4.22

doc/source/install.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ Optional Dependencies
262262
* `XlsxWriter <https://pypi.python.org/pypi/XlsxWriter>`__: Alternative Excel writer
263263

264264
* `Jinja2 <http://jinja.pocoo.org/>`__: Template engine for conditional HTML formatting.
265-
* `boto <https://pypi.python.org/pypi/boto>`__: necessary for Amazon S3 access.
265+
* `s3fs <http://s3fs.readthedocs.io/>`__: necessary for Amazon S3 access (s3fs >= 0.0.7).
266266
* `blosc <https://pypi.python.org/pypi/blosc>`__: for msgpack compression using ``blosc``
267267
* One of `PyQt4
268268
<http://www.riverbankcomputing.com/software/pyqt/download>`__, `PySide

doc/source/io.rst

+17
Original file line numberDiff line numberDiff line change
@@ -1487,6 +1487,23 @@ options include:
14871487
Specifying any of the above options will produce a ``ParserWarning`` unless the
14881488
python engine is selected explicitly using ``engine='python'``.
14891489

1490+
Reading remote files
1491+
''''''''''''''''''''
1492+
1493+
You can pass in a URL to a CSV file:
1494+
1495+
.. code-block:: python
1496+
1497+
df = pd.read_csv('https://download.bls.gov/pub/time.series/cu/cu.item',
1498+
sep='\t')
1499+
1500+
S3 URLs are handled as well:
1501+
1502+
.. code-block:: python
1503+
1504+
df = pd.read_csv('s3://pandas-test/tips.csv')
1505+
1506+
14901507
Writing out Data
14911508
''''''''''''''''
14921509

doc/source/whatsnew/v0.20.0.txt

+13-8
Original file line numberDiff line numberDiff line change
@@ -106,13 +106,14 @@ Other enhancements
106106
- ``pd.Series.interpolate`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`)
107107
- ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`)
108108

109+
- ``.select_dtypes()`` now allows `datetimetz` to generically select datetimes with tz (:issue:`14910`)
110+
109111

110112
.. _whatsnew_0200.api_breaking:
111113

112114
Backwards incompatible API changes
113115
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
114116

115-
116117
.. _whatsnew.api_breaking.index_map
117118

118119
Map on Index types now return other Index types
@@ -181,18 +182,22 @@ Map on Index types now return other Index types
181182

182183
s.map(lambda x: x.hour)
183184

185+
.. _whatsnew_0200.s3:
184186

185-
.. _whatsnew_0200.api:
186-
187-
- ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`)
188-
- ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`)
189-
187+
S3 File Handling
188+
^^^^^^^^^^^^^^^^
190189

190+
pandas now uses `s3fs <http://s3fs.readthedocs.io/>`_ for handling S3 connections. This shouldn't break
191+
any code. However, since s3fs is not a required dependency, you will need to install it separately (like boto
192+
in prior versions of pandas) (:issue:`11915`).
191193

194+
.. _whatsnew_0200.api:
192195

193196
Other API Changes
194197
^^^^^^^^^^^^^^^^^
195198

199+
- ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`)
200+
- ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`)
196201
- :ref:`DatetimeIndex Partial String Indexing <timeseries.partialindexing>` now works as exact match provided that string resolution coincides with index resolution, including a case when both are seconds (:issue:`14826`). See :ref:`Slice vs. Exact Match <timeseries.slice_vs_exact_match>` for details.
197202

198203
.. ipython:: python
@@ -266,9 +271,9 @@ Bug Fixes
266271
~~~~~~~~~
267272

268273
- Bug in ``TimedeltaIndex`` addition where overflow was being allowed without error (:issue:`14816`)
274+
- Bug in ``DataFrame`` construction in which unsigned 64-bit integer elements were being converted to objects (:issue:`14881`)
269275
- Bug in ``astype()`` where ``inf`` values were incorrectly converted to integers. Now raises error now with ``astype()`` for Series and DataFrames (:issue:`14265`)
270-
271-
276+
- Bug in ``describe()`` when passing a numpy array which does not contain the median to the ``percentiles`` keyword argument (:issue:`14908`)
272277

273278

274279

pandas/core/algorithms.py

+32
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,38 @@ def _unique_generic(values, table_type, type_caster):
113113
return type_caster(uniques)
114114

115115

116+
def unique1d(values):
117+
"""
118+
Hash table-based unique
119+
"""
120+
if np.issubdtype(values.dtype, np.floating):
121+
table = htable.Float64HashTable(len(values))
122+
uniques = np.array(table.unique(_ensure_float64(values)),
123+
dtype=np.float64)
124+
elif np.issubdtype(values.dtype, np.datetime64):
125+
table = htable.Int64HashTable(len(values))
126+
uniques = table.unique(_ensure_int64(values))
127+
uniques = uniques.view('M8[ns]')
128+
elif np.issubdtype(values.dtype, np.timedelta64):
129+
table = htable.Int64HashTable(len(values))
130+
uniques = table.unique(_ensure_int64(values))
131+
uniques = uniques.view('m8[ns]')
132+
elif np.issubdtype(values.dtype, np.integer):
133+
table = htable.Int64HashTable(len(values))
134+
uniques = table.unique(_ensure_int64(values))
135+
else:
136+
137+
# its cheaper to use a String Hash Table than Object
138+
if lib.infer_dtype(values) in ['string']:
139+
table = htable.StringHashTable(len(values))
140+
else:
141+
table = htable.PyObjectHashTable(len(values))
142+
143+
uniques = table.unique(_ensure_object(values))
144+
145+
return uniques
146+
147+
116148
def isin(comps, values):
117149
"""
118150
Compute the isin boolean array

pandas/core/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -969,7 +969,7 @@ def unique(self):
969969
if hasattr(values, 'unique'):
970970
result = values.unique()
971971
else:
972-
from pandas.core.nanops import unique1d
972+
from pandas.core.algorithms import unique1d
973973
result = unique1d(values)
974974
return result
975975

pandas/core/categorical.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
is_scalar)
2626
from pandas.core.common import is_null_slice
2727

28-
from pandas.core.algorithms import factorize, take_1d
28+
from pandas.core.algorithms import factorize, take_1d, unique1d
2929
from pandas.core.base import (PandasObject, PandasDelegate,
3030
NoNewAttributesMixin, _shared_docs)
3131
import pandas.core.common as com
@@ -1834,7 +1834,6 @@ def unique(self):
18341834
unique values : ``Categorical``
18351835
"""
18361836

1837-
from pandas.core.nanops import unique1d
18381837
# unlike np.unique, unique1d does not sort
18391838
unique_codes = unique1d(self.codes)
18401839
cat = self.copy()

pandas/core/frame.py

+5
Original file line numberDiff line numberDiff line change
@@ -2257,7 +2257,12 @@ def select_dtypes(self, include=None, exclude=None):
22572257
this will return *all* object dtype columns
22582258
* See the `numpy dtype hierarchy
22592259
<http://docs.scipy.org/doc/numpy/reference/arrays.scalars.html>`__
2260+
* To select datetimes, use np.datetime64, 'datetime' or 'datetime64'
2261+
* To select timedeltas, use np.timedelta64, 'timedelta' or
2262+
'timedelta64'
22602263
* To select Pandas categorical dtypes, use 'category'
2264+
* To select Pandas datetimetz dtypes, use 'datetimetz' (new in 0.20.0),
2265+
or a 'datetime64[ns, tz]' string
22612266
22622267
Examples
22632268
--------

pandas/core/generic.py

+3
Original file line numberDiff line numberDiff line change
@@ -5262,6 +5262,9 @@ def describe(self, percentiles=None, include=None, exclude=None):
52625262
raise ValueError("Cannot describe a DataFrame without columns")
52635263

52645264
if percentiles is not None:
5265+
# explicit conversion of `percentiles` to list
5266+
percentiles = list(percentiles)
5267+
52655268
# get them all to be in [0, 1]
52665269
self._check_percentile(percentiles)
52675270

pandas/core/internals.py

-5
Original file line numberDiff line numberDiff line change
@@ -4314,11 +4314,6 @@ def form_blocks(arrays, names, axes):
43144314
elif is_datetimetz(v):
43154315
datetime_tz_items.append((i, k, v))
43164316
elif issubclass(v.dtype.type, np.integer):
4317-
if v.dtype == np.uint64:
4318-
# HACK #2355 definite overflow
4319-
if (v > 2**63 - 1).any():
4320-
object_items.append((i, k, v))
4321-
continue
43224317
int_items.append((i, k, v))
43234318
elif v.dtype == np.bool_:
43244319
bool_items.append((i, k, v))

pandas/core/nanops.py

+1-28
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,8 @@
99
except ImportError: # pragma: no cover
1010
_USE_BOTTLENECK = False
1111

12-
import pandas.hashtable as _hash
1312
from pandas import compat, lib, algos, tslib
14-
from pandas.types.common import (_ensure_int64, _ensure_object,
15-
_ensure_float64, _get_dtype,
13+
from pandas.types.common import (_get_dtype,
1614
is_float, is_scalar,
1715
is_integer, is_complex, is_float_dtype,
1816
is_complex_dtype, is_integer_dtype,
@@ -784,28 +782,3 @@ def f(x, y):
784782
nanle = make_nancomp(operator.le)
785783
naneq = make_nancomp(operator.eq)
786784
nanne = make_nancomp(operator.ne)
787-
788-
789-
def unique1d(values):
790-
"""
791-
Hash table-based unique
792-
"""
793-
if np.issubdtype(values.dtype, np.floating):
794-
table = _hash.Float64HashTable(len(values))
795-
uniques = np.array(table.unique(_ensure_float64(values)),
796-
dtype=np.float64)
797-
elif np.issubdtype(values.dtype, np.datetime64):
798-
table = _hash.Int64HashTable(len(values))
799-
uniques = table.unique(_ensure_int64(values))
800-
uniques = uniques.view('M8[ns]')
801-
elif np.issubdtype(values.dtype, np.timedelta64):
802-
table = _hash.Int64HashTable(len(values))
803-
uniques = table.unique(_ensure_int64(values))
804-
uniques = uniques.view('m8[ns]')
805-
elif np.issubdtype(values.dtype, np.integer):
806-
table = _hash.Int64HashTable(len(values))
807-
uniques = table.unique(_ensure_int64(values))
808-
else:
809-
table = _hash.PyObjectHashTable(len(values))
810-
uniques = table.unique(_ensure_object(values))
811-
return uniques

pandas/io/common.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,12 @@
1212
from pandas.core.common import AbstractMethodError
1313
from pandas.types.common import is_number
1414

15+
try:
16+
from s3fs import S3File
17+
need_text_wrapping = (BytesIO, S3File)
18+
except ImportError:
19+
need_text_wrapping = (BytesIO,)
20+
1521
# common NA values
1622
# no longer excluding inf representations
1723
# '1.#INF','-1.#INF', '1.#INF000000',
@@ -212,10 +218,10 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
212218
return reader, encoding, compression
213219

214220
if _is_s3_url(filepath_or_buffer):
215-
from pandas.io.s3 import get_filepath_or_buffer
216-
return get_filepath_or_buffer(filepath_or_buffer,
217-
encoding=encoding,
218-
compression=compression)
221+
from pandas.io import s3
222+
return s3.get_filepath_or_buffer(filepath_or_buffer,
223+
encoding=encoding,
224+
compression=compression)
219225

220226
# It is a pathlib.Path/py.path.local or string
221227
filepath_or_buffer = _stringify_path(filepath_or_buffer)
@@ -391,7 +397,7 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
391397
handles.append(f)
392398

393399
# in Python 3, convert BytesIO or fileobjects passed with an encoding
394-
if compat.PY3 and (compression or isinstance(f, compat.BytesIO)):
400+
if compat.PY3 and (compression or isinstance(f, need_text_wrapping)):
395401
from io import TextIOWrapper
396402
f = TextIOWrapper(f, encoding=encoding)
397403
handles.append(f)

0 commit comments

Comments
 (0)