Skip to content

Commit 41cd000

Browse files
committed
Merge from master
2 parents 396088f + d05e8f2 commit 41cd000

File tree

625 files changed

+56684
-30934
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

625 files changed

+56684
-30934
lines changed

.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ dist
6161
.coverage
6262
coverage.xml
6363
coverage_html_report
64+
*.pytest_cache
6465

6566
# OS generated files #
6667
######################
@@ -90,7 +91,6 @@ scikits
9091

9192
# Unit / Performance Testing #
9293
##############################
93-
.pytest_cache/
9494
asv_bench/env/
9595
asv_bench/html/
9696
asv_bench/results/
@@ -109,3 +109,4 @@ doc/tmp.sv
109109
doc/source/styled.xlsx
110110
doc/source/templates/
111111
env/
112+
doc/source/savefig/

.travis.yml

+15-11
Original file line numberDiff line numberDiff line change
@@ -34,55 +34,59 @@ matrix:
3434
- os: osx
3535
language: generic
3636
env:
37-
- JOB="3.5_OSX" TEST_ARGS="--skip-slow --skip-network"
37+
- JOB="3.5, OSX" ENV_FILE="ci/travis-35-osx.yaml" TEST_ARGS="--skip-slow --skip-network"
38+
3839
- dist: trusty
3940
env:
40-
- JOB="2.7_LOCALE" LOCALE_OVERRIDE="zh_CN.UTF-8" SLOW=true
41+
- JOB="3.7" ENV_FILE="ci/travis-37.yaml" TEST_ARGS="--skip-slow --skip-network"
42+
43+
- dist: trusty
44+
env:
45+
- JOB="2.7, locale, slow, old NumPy" ENV_FILE="ci/travis-27-locale.yaml" LOCALE_OVERRIDE="zh_CN.UTF-8" SLOW=true
4146
addons:
4247
apt:
4348
packages:
4449
- language-pack-zh-hans
4550
- dist: trusty
4651
env:
47-
- JOB="2.7" TEST_ARGS="--skip-slow" LINT=true
52+
- JOB="2.7, lint" ENV_FILE="ci/travis-27.yaml" TEST_ARGS="--skip-slow" LINT=true
4853
addons:
4954
apt:
5055
packages:
5156
- python-gtk2
52-
# In allow_failures
5357
- dist: trusty
5458
env:
55-
- JOB="3.6" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" CONDA_FORGE=true COVERAGE=true
59+
- JOB="3.6, coverage" ENV_FILE="ci/travis-36.yaml" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" COVERAGE=true
5660
# In allow_failures
5761
- dist: trusty
5862
env:
59-
- JOB="2.7_SLOW" SLOW=true
63+
- JOB="3.6, slow" ENV_FILE="ci/travis-36-slow.yaml" SLOW=true
6064
# In allow_failures
6165
- dist: trusty
6266
env:
63-
- JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate"
67+
- JOB="3.6, NumPy dev" ENV_FILE="ci/travis-36-numpydev.yaml" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate"
6468
addons:
6569
apt:
6670
packages:
6771
- xsel
6872
# In allow_failures
6973
- dist: trusty
7074
env:
71-
- JOB="3.6_DOC" DOC=true
75+
- JOB="3.6, doc" ENV_FILE="ci/travis-36-doc.yaml" DOC=true
7276
allow_failures:
7377
- dist: trusty
7478
env:
75-
- JOB="2.7_SLOW" SLOW=true
79+
- JOB="3.6, slow" ENV_FILE="ci/travis-36-slow.yaml" SLOW=true
7680
- dist: trusty
7781
env:
78-
- JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate"
82+
- JOB="3.6, NumPy dev" ENV_FILE="ci/travis-36-numpydev.yaml" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate"
7983
addons:
8084
apt:
8185
packages:
8286
- xsel
8387
- dist: trusty
8488
env:
85-
- JOB="3.6_DOC" DOC=true
89+
- JOB="3.6, doc" ENV_FILE="ci/travis-36-doc.yaml" DOC=true
8690

8791
before_install:
8892
- echo "before_install"

MANIFEST.in

+23-11
Original file line numberDiff line numberDiff line change
@@ -3,27 +3,39 @@ include LICENSE
33
include RELEASE.md
44
include README.md
55
include setup.py
6-
include pyproject.toml
76

87
graft doc
98
prune doc/build
109

10+
graft LICENSES
11+
1112
graft pandas
1213

13-
global-exclude *.so
14-
global-exclude *.pyd
14+
global-exclude *.bz2
15+
global-exclude *.csv
16+
global-exclude *.dta
17+
global-exclude *.gz
18+
global-exclude *.h5
19+
global-exclude *.html
20+
global-exclude *.json
21+
global-exclude *.msgpack
22+
global-exclude *.pickle
23+
global-exclude *.png
1524
global-exclude *.pyc
25+
global-exclude *.pyd
26+
global-exclude *.sas7bdat
27+
global-exclude *.so
28+
global-exclude *.xls
29+
global-exclude *.xlsm
30+
global-exclude *.xlsx
31+
global-exclude *.xpt
32+
global-exclude *.xz
33+
global-exclude *.zip
1634
global-exclude *~
17-
global-exclude \#*
18-
global-exclude .git*
1935
global-exclude .DS_Store
20-
global-exclude *.png
36+
global-exclude .git*
37+
global-exclude \#*
2138

22-
# include examples/data/*
23-
# recursive-include examples *.py
24-
# recursive-include doc/source *
25-
# recursive-include doc/sphinxext *
26-
# recursive-include LICENSES *
2739
include versioneer.py
2840
include pandas/_version.py
2941
include pandas/io/formats/templates/*.tpl

Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,4 @@ doc:
2323
cd doc; \
2424
python make.py clean; \
2525
python make.py html
26+
python make.py spellcheck

README.md

+33-22
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,33 @@
99
<table>
1010
<tr>
1111
<td>Latest Release</td>
12-
<td><img src="https://img.shields.io/pypi/v/pandas.svg" alt="latest release" /></td>
12+
<td>
13+
<a href="https://pypi.org/project/pandas/">
14+
<img src="https://img.shields.io/pypi/v/pandas.svg" alt="latest release" />
15+
</a>
16+
</td>
1317
</tr>
1418
<td></td>
15-
<td><img src="https://anaconda.org/conda-forge/pandas/badges/version.svg" alt="latest release" /></td>
19+
<td>
20+
<a href="https://anaconda.org/anaconda/pandas/">
21+
<img src="https://anaconda.org/conda-forge/pandas/badges/version.svg" alt="latest release" />
22+
</a>
23+
</td>
1624
</tr>
1725
<tr>
1826
<td>Package Status</td>
19-
<td><img src="https://img.shields.io/pypi/status/pandas.svg" alt="status" /></td>
27+
<td>
28+
<a href="https://pypi.org/project/pandas/">
29+
<img src="https://img.shields.io/pypi/status/pandas.svg" alt="status" /></td>
30+
</a>
2031
</tr>
2132
<tr>
2233
<td>License</td>
23-
<td><img src="https://img.shields.io/pypi/l/pandas.svg" alt="license" /></td>
34+
<td>
35+
<a href="https://github.com/pandas-dev/pandas/blob/master/LICENSE">
36+
<img src="https://img.shields.io/pypi/l/pandas.svg" alt="license" />
37+
</a>
38+
</td>
2439
</tr>
2540
<tr>
2641
<td>Build Status</td>
@@ -48,35 +63,31 @@
4863
</tr>
4964
<tr>
5065
<td>Coverage</td>
51-
<td><img src="https://codecov.io/github/pandas-dev/pandas/coverage.svg?branch=master" alt="coverage" /></td>
52-
</tr>
53-
<tr>
54-
<td>Conda</td>
55-
<td>
56-
<a href="https://pandas.pydata.org">
57-
<img src="http://pubbadges.s3-website-us-east-1.amazonaws.com/pkgs-downloads-pandas.png" alt="conda default downloads" />
66+
 <td>
67+
<a href="https://codecov.io/gh/pandas-dev/pandas">
68+
<img src="https://codecov.io/github/pandas-dev/pandas/coverage.svg?branch=master" alt="coverage" />
5869
</a>
5970
</td>
6071
</tr>
6172
<tr>
62-
<td>Conda-forge</td>
73+
<td>Downloads</td>
6374
<td>
6475
<a href="https://pandas.pydata.org">
6576
<img src="https://anaconda.org/conda-forge/pandas/badges/downloads.svg" alt="conda-forge downloads" />
6677
</a>
6778
</td>
6879
</tr>
6980
<tr>
70-
<td>PyPI</td>
71-
<td>
72-
<a href="https://pypi.python.org/pypi/pandas/">
73-
<img src="https://img.shields.io/pypi/dm/pandas.svg" alt="pypi downloads" />
74-
</a>
75-
</td>
81+
<td>Gitter</td>
82+
<td>
83+
<a href="https://gitter.im/pydata/pandas">
84+
<img src="https://badges.gitter.im/Join%20Chat.svg"
85+
</a>
86+
</td>
7687
</tr>
7788
</table>
7889

79-
[![https://gitter.im/pydata/pandas](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/pydata/pandas?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
90+
8091

8192
## What is it
8293

@@ -147,7 +158,7 @@ The source code is currently hosted on GitHub at:
147158
https://github.com/pandas-dev/pandas
148159

149160
Binary installers for the latest released version are available at the [Python
150-
package index](https://pypi.python.org/pypi/pandas) and on conda.
161+
package index](https://pypi.org/project/pandas) and on conda.
151162

152163
```sh
153164
# conda
@@ -160,7 +171,7 @@ pip install pandas
160171
```
161172

162173
## Dependencies
163-
- [NumPy](http://www.numpy.org): 1.9.0 or higher
174+
- [NumPy](https://www.numpy.org): 1.9.0 or higher
164175
- [python-dateutil](https://labix.org/python-dateutil): 2.5.0 or higher
165176
- [pytz](https://pythonhosted.org/pytz): 2011k or higher
166177

@@ -222,7 +233,7 @@ All contributions, bug reports, bug fixes, documentation improvements, enhanceme
222233

223234
A detailed overview on how to contribute can be found in the **[contributing guide.](https://pandas.pydata.org/pandas-docs/stable/contributing.html)**
224235

225-
If you are simply looking to start working with the pandas codebase, navigate to the [GitHub “issues” tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [Difficulty Novice](https://github.com/pandas-dev/pandas/issues?q=is%3Aopen+is%3Aissue+label%3A%22Difficulty+Novice%22) where you could start out.
236+
If you are simply looking to start working with the pandas codebase, navigate to the [GitHub “issues” tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [good first issue](https://github.com/pandas-dev/pandas/issues?labels=good+first+issue&sort=updated&state=open) where you could start out.
226237

227238
You can also triage issues which may include reproducing bug reports, or asking for vital information such as version numbers or reproduction instructions. If you would like to start triaging issues, one easy way to get started is to [subscribe to pandas on CodeTriage](https://www.codetriage.com/pandas-dev/pandas).
228239

appveyor.yml

+4-11
Original file line numberDiff line numberDiff line change
@@ -73,19 +73,12 @@ install:
7373
- cmd: conda info -a
7474

7575
# create our env
76-
- cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest>=3.1.0 pytest-xdist
76+
- cmd: conda env create -q -n pandas --file=ci\appveyor-%CONDA_PY%.yaml
7777
- cmd: activate pandas
78-
- cmd: pip install moto
79-
- SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.run
80-
- cmd: echo "installing requirements from %REQ%"
81-
- cmd: conda install -n pandas --file=%REQ%
8278
- cmd: conda list -n pandas
83-
- cmd: echo "installing requirements from %REQ% - done"
84-
85-
# add some pip only reqs to the env
86-
- SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.pip
87-
- cmd: echo "installing requirements from %REQ%"
88-
- cmd: pip install -Ur %REQ%
79+
# uninstall pandas if it's present
80+
- cmd: conda remove pandas -y --force & exit 0
81+
- cmd: pip uninstall -y pandas & exit 0
8982

9083
# build em using the local source checkout in the correct windows env
9184
- cmd: '%CMD_IN_ENV% python setup.py build_ext --inplace'

asv_bench/benchmarks/categoricals.py

+97
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ def setup(self):
5151

5252
self.values_some_nan = list(np.tile(self.categories + [np.nan], N))
5353
self.values_all_nan = [np.nan] * len(self.values)
54+
self.values_all_int8 = np.ones(N, 'int8')
5455

5556
def time_regular(self):
5657
pd.Categorical(self.values, self.categories)
@@ -70,6 +71,9 @@ def time_with_nan(self):
7071
def time_all_nan(self):
7172
pd.Categorical(self.values_all_nan)
7273

74+
def time_from_codes_all_int8(self):
75+
pd.Categorical.from_codes(self.values_all_int8, self.categories)
76+
7377

7478
class ValueCounts(object):
7579

@@ -148,3 +152,96 @@ def time_rank_int_cat(self):
148152

149153
def time_rank_int_cat_ordered(self):
150154
self.s_int_cat_ordered.rank()
155+
156+
157+
class Isin(object):
158+
159+
goal_time = 0.2
160+
161+
params = ['object', 'int64']
162+
param_names = ['dtype']
163+
164+
def setup(self, dtype):
165+
np.random.seed(1234)
166+
n = 5 * 10**5
167+
sample_size = 100
168+
arr = [i for i in np.random.randint(0, n // 10, size=n)]
169+
if dtype == 'object':
170+
arr = ['s%04d' % i for i in arr]
171+
self.sample = np.random.choice(arr, sample_size)
172+
self.series = pd.Series(arr).astype('category')
173+
174+
def time_isin_categorical(self, dtype):
175+
self.series.isin(self.sample)
176+
177+
178+
class IsMonotonic(object):
179+
180+
def setup(self):
181+
N = 1000
182+
self.c = pd.CategoricalIndex(list('a' * N + 'b' * N + 'c' * N))
183+
self.s = pd.Series(self.c)
184+
185+
def time_categorical_index_is_monotonic_increasing(self):
186+
self.c.is_monotonic_increasing
187+
188+
def time_categorical_index_is_monotonic_decreasing(self):
189+
self.c.is_monotonic_decreasing
190+
191+
def time_categorical_series_is_monotonic_increasing(self):
192+
self.s.is_monotonic_increasing
193+
194+
def time_categorical_series_is_monotonic_decreasing(self):
195+
self.s.is_monotonic_decreasing
196+
197+
198+
class Contains(object):
199+
200+
goal_time = 0.2
201+
202+
def setup(self):
203+
N = 10**5
204+
self.ci = tm.makeCategoricalIndex(N)
205+
self.c = self.ci.values
206+
self.key = self.ci.categories[0]
207+
208+
def time_categorical_index_contains(self):
209+
self.key in self.ci
210+
211+
def time_categorical_contains(self):
212+
self.key in self.c
213+
214+
215+
class CategoricalSlicing(object):
216+
217+
goal_time = 0.2
218+
params = ['monotonic_incr', 'monotonic_decr', 'non_monotonic']
219+
param_names = ['index']
220+
221+
def setup(self, index):
222+
N = 10**6
223+
values = list('a' * N + 'b' * N + 'c' * N)
224+
indices = {
225+
'monotonic_incr': pd.Categorical(values),
226+
'monotonic_decr': pd.Categorical(reversed(values)),
227+
'non_monotonic': pd.Categorical(list('abc' * N))}
228+
self.data = indices[index]
229+
230+
self.scalar = 10000
231+
self.list = list(range(10000))
232+
self.cat_scalar = 'b'
233+
234+
def time_getitem_scalar(self, index):
235+
self.data[self.scalar]
236+
237+
def time_getitem_slice(self, index):
238+
self.data[:self.scalar]
239+
240+
def time_getitem_list_like(self, index):
241+
self.data[[self.scalar]]
242+
243+
def time_getitem_list(self, index):
244+
self.data[self.list]
245+
246+
def time_getitem_bool_array(self, index):
247+
self.data[self.data == self.cat_scalar]

0 commit comments

Comments
 (0)