Skip to content

Commit 81d55a0

Browse files
authored
Merge branch 'master' into pickle_io_compression
2 parents 025a0cd + 7f0eefc commit 81d55a0

File tree

112 files changed

+2638
-1541
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

112 files changed

+2638
-1541
lines changed

.travis.yml

+2-3
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,10 @@ matrix:
8080
apt:
8181
packages:
8282
- xsel
83-
- python: 3.6-dev
83+
- python: 3.6
8484
env:
8585
- PYTHON_VERSION=3.6
86-
- JOB_NAME: "36_dev"
87-
- JOB_TAG=_DEV
86+
- JOB_NAME: "36"
8887
- NOSE_ARGS="not slow and not network and not disabled"
8988
- PANDAS_TESTING_MODE="deprecate"
9089
addons:

appveyor.yml

+1
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ install:
8080
- cmd: conda config --set ssl_verify false
8181

8282
# add the pandas channel *before* defaults to have defaults take priority
83+
- cmd: conda config --add channels conda-forge
8384
- cmd: conda config --add channels pandas
8485
- cmd: conda config --remove channels defaults
8586
- cmd: conda config --add channels defaults

asv_bench/benchmarks/frame_methods.py

+7
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ class Iteration(object):
6868
def setup(self):
6969
self.df = DataFrame(randn(10000, 1000))
7070
self.df2 = DataFrame(np.random.randn(50000, 10))
71+
self.df3 = pd.DataFrame(np.random.randn(1000,5000),
72+
columns=['C'+str(c) for c in range(5000)])
7173

7274
def f(self):
7375
if hasattr(self.df, '_item_cache'):
@@ -85,6 +87,11 @@ def time_iteritems(self):
8587
def time_iteritems_cached(self):
8688
self.g()
8789

90+
def time_iteritems_indexing(self):
91+
df = self.df3
92+
for col in df:
93+
df[col]
94+
8895
def time_itertuples(self):
8996
for row in self.df2.itertuples():
9097
pass

asv_bench/benchmarks/io_bench.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def setup(self, compression, engine):
153153
# The Python 2 C parser can't read bz2 from open files.
154154
raise NotImplementedError
155155
try:
156-
import boto
156+
import s3fs
157157
except ImportError:
158158
# Skip these benchmarks if `boto` is not installed.
159159
raise NotImplementedError

asv_bench/benchmarks/period.py

+25
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,28 @@ def time_value_counts_pindex(self):
4949
self.i.value_counts()
5050

5151

52+
class period_standard_indexing(object):
53+
goal_time = 0.2
54+
55+
def setup(self):
56+
self.index = PeriodIndex(start='1985', periods=1000, freq='D')
57+
self.series = Series(range(1000), index=self.index)
58+
self.period = self.index[500]
59+
60+
def time_get_loc(self):
61+
self.index.get_loc(self.period)
62+
63+
def time_shape(self):
64+
self.index.shape
65+
66+
def time_shallow_copy(self):
67+
self.index._shallow_copy()
68+
69+
def time_series_loc(self):
70+
self.series.loc[self.period]
71+
72+
def time_align(self):
73+
pd.DataFrame({'a': self.series, 'b': self.series[:500]})
74+
75+
def time_intersection(self):
76+
self.index[:750].intersection(self.index[250:])

ci/install-3.6_DEV.sh

-16
This file was deleted.

ci/install_travis.sh

+11-2
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ else
7171
conda config --set always_yes true --set changeps1 false || exit 1
7272
conda update -q conda
7373

74-
# add the pandas channel *before* defaults to have defaults take priority
74+
# add the pandas channel to take priority
75+
# to add extra packages
7576
echo "add channels"
7677
conda config --add channels pandas || exit 1
7778
conda config --remove channels defaults || exit 1
@@ -90,7 +91,15 @@ if [ -e ${INSTALL} ]; then
9091
else
9192

9293
# create new env
93-
time conda create -n pandas python=$PYTHON_VERSION nose coverage flake8 || exit 1
94+
time conda create -n pandas python=$PYTHON_VERSION nose || exit 1
95+
96+
if [ "$COVERAGE" ]; then
97+
pip install coverage
98+
fi
99+
if [ "$LINT" ]; then
100+
conda install flake8
101+
pip install cpplint
102+
fi
94103
fi
95104

96105
# build deps

ci/lint.sh

+2-3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ source activate pandas
77
RET=0
88

99
if [ "$LINT" ]; then
10+
1011
# pandas/rpy is deprecated and will be removed.
1112
# pandas/src is C code, so no need to search there.
1213
echo "Linting *.py"
@@ -43,13 +44,11 @@ if [ "$LINT" ]; then
4344
# from Cython files nor do we want to lint C files that we didn't modify for
4445
# this particular codebase (e.g. src/headers, src/klib, src/msgpack). However,
4546
# we can lint all header files since they aren't "generated" like C files are.
46-
pip install cpplint
47-
4847
echo "Linting *.c and *.h"
4948
for path in '*.h' 'period_helper.c' 'datetime' 'parser' 'ujson'
5049
do
5150
echo "linting -> pandas/src/$path"
52-
cpplint --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive pandas/src/$path
51+
cpplint --quiet --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive pandas/src/$path
5352
if [ $? -ne "0" ]; then
5453
RET=1
5554
fi

ci/requirements-2.7-64.run

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,15 @@ pytz
33
numpy=1.10*
44
xlwt
55
numexpr
6-
pytables
6+
pytables==3.2.2
77
matplotlib
88
openpyxl
99
xlrd
1010
sqlalchemy
1111
lxml=3.2.1
1212
scipy
1313
xlsxwriter
14-
boto
14+
s3fs
1515
bottleneck
1616
html5lib
1717
beautiful-soup

ci/requirements-2.7.run

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ sqlalchemy=0.9.6
1111
lxml=3.2.1
1212
scipy
1313
xlsxwriter=0.4.6
14-
boto=2.36.0
14+
s3fs
1515
bottleneck
1616
psycopg2=2.5.2
1717
patsy

ci/requirements-2.7.sh

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/bash
2+
3+
source activate pandas
4+
5+
echo "install 27"
6+
7+
conda install -n pandas -c conda-forge feather-format

ci/requirements-2.7_SLOW.run

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ numexpr
1313
pytables
1414
sqlalchemy
1515
lxml
16-
boto
16+
s3fs
1717
bottleneck
1818
psycopg2
1919
pymysql

ci/requirements-3.5-64.run

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
python-dateutil
22
pytz
3-
numpy=1.10*
3+
numpy
44
openpyxl
55
xlsxwriter
66
xlrd
77
xlwt
88
scipy
9+
feather-format
910
numexpr
1011
pytables
1112
matplotlib

ci/requirements-3.5.run

+2-4
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,5 @@ sqlalchemy
1717
pymysql
1818
psycopg2
1919
xarray
20-
boto
21-
22-
# incompat with conda ATM
23-
# beautiful-soup
20+
s3fs
21+
beautifulsoup4

ci/requirements-3.5.sh

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/bash
2+
3+
source activate pandas
4+
5+
echo "install 35"
6+
7+
conda install -n pandas -c conda-forge feather-format

ci/requirements-3.5_OSX.run

+2-4
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,5 @@ matplotlib
1212
jinja2
1313
bottleneck
1414
xarray
15-
boto
16-
17-
# incompat with conda ATM
18-
# beautiful-soup
15+
s3fs
16+
beautifulsoup4

ci/requirements-3.5_OSX.sh

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/bash
2+
3+
source activate pandas
4+
5+
echo "install 35_OSX"
6+
7+
conda install -n pandas -c conda-forge feather-format

ci/requirements-3.6.build

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
python-dateutil
2+
pytz
3+
numpy
4+
cython

ci/requirements-3.6.run

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
python-dateutil
2+
pytz
3+
numpy
4+
scipy

doc/cheatsheet/Pandas_Cheat_Sheet.pdf

670 KB
Binary file not shown.
102 KB
Binary file not shown.

doc/cheatsheet/README.txt

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
The Pandas Cheat Sheet was created using Microsoft Powerpoint 2013.
2+
To create the PDF version, within Powerpoint, simply do a "Save As"
3+
and pick "PDF' as the format.
4+

doc/source/api.rst

+9
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,14 @@ HDFStore: PyTables (HDF5)
8383
HDFStore.get
8484
HDFStore.select
8585

86+
Feather
87+
~~~~~~~
88+
89+
.. autosummary::
90+
:toctree: generated/
91+
92+
read_feather
93+
8694
SAS
8795
~~~
8896

@@ -1015,6 +1023,7 @@ Serialization / IO / Conversion
10151023
DataFrame.to_excel
10161024
DataFrame.to_json
10171025
DataFrame.to_html
1026+
DataFrame.to_feather
10181027
DataFrame.to_latex
10191028
DataFrame.to_stata
10201029
DataFrame.to_msgpack

doc/source/basics.rst

+12
Original file line numberDiff line numberDiff line change
@@ -1757,6 +1757,7 @@ then the more *general* one will be used as the result of the operation.
17571757
# conversion of dtypes
17581758
df3.astype('float32').dtypes
17591759
1760+
17601761
Convert a subset of columns to a specified type using :meth:`~DataFrame.astype`
17611762

17621763
.. ipython:: python
@@ -1766,6 +1767,17 @@ Convert a subset of columns to a specified type using :meth:`~DataFrame.astype`
17661767
dft
17671768
dft.dtypes
17681769
1770+
.. versionadded:: 0.19.0
1771+
1772+
Convert certain columns to a specific dtype by passing a dict to :meth:`~DataFrame.astype`
1773+
1774+
.. ipython:: python
1775+
1776+
dft1 = pd.DataFrame({'a': [1,0,1], 'b': [4,5,6], 'c': [7, 8, 9]})
1777+
dft1 = dft1.astype({'a': np.bool, 'c': np.float64})
1778+
dft1
1779+
dft1.dtypes
1780+
17691781
.. note::
17701782

17711783
When trying to convert a subset of columns to a specified type using :meth:`~DataFrame.astype` and :meth:`~DataFrame.loc`, upcasting occurs.

doc/source/install.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@ Optional Dependencies
247247
* `SciPy <http://www.scipy.org>`__: miscellaneous statistical functions
248248
* `xarray <http://xarray.pydata.org>`__: pandas like handling for > 2 dims, needed for converting Panels to xarray objects. Version 0.7.0 or higher is recommended.
249249
* `PyTables <http://www.pytables.org>`__: necessary for HDF5-based storage. Version 3.0.0 or higher required, Version 3.2.1 or higher highly recommended.
250+
* `Feather Format <https://github.com/wesm/feather>`__: necessary for feather-based storage, version 0.3.1 or higher.
250251
* `SQLAlchemy <http://www.sqlalchemy.org>`__: for SQL database support. Version 0.8.1 or higher recommended. Besides SQLAlchemy, you also need a database specific driver. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs <http://docs.sqlalchemy.org/en/latest/dialects/index.html>`__. Some common drivers are:
251252

252253
- `psycopg2 <http://initd.org/psycopg/>`__: for PostgreSQL
@@ -262,7 +263,7 @@ Optional Dependencies
262263
* `XlsxWriter <https://pypi.python.org/pypi/XlsxWriter>`__: Alternative Excel writer
263264

264265
* `Jinja2 <http://jinja.pocoo.org/>`__: Template engine for conditional HTML formatting.
265-
* `boto <https://pypi.python.org/pypi/boto>`__: necessary for Amazon S3 access.
266+
* `s3fs <http://s3fs.readthedocs.io/>`__: necessary for Amazon S3 access (s3fs >= 0.0.7).
266267
* `blosc <https://pypi.python.org/pypi/blosc>`__: for msgpack compression using ``blosc``
267268
* One of `PyQt4
268269
<http://www.riverbankcomputing.com/software/pyqt/download>`__, `PySide

0 commit comments

Comments
 (0)