Skip to content

Commit 7189cd0

Browse files
committed
update from master
2 parents 58a7da6 + 26bd34d commit 7189cd0

File tree

1,045 files changed

+143775
-110906
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,045 files changed

+143775
-110906
lines changed

.github/PULL_REQUEST_TEMPLATE.md

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
- [ ] closes #xxxx
22
- [ ] tests added / passed
3+
- [ ] passes `black pandas`
34
- [ ] passes `git diff upstream/master -u -- "*.py" | flake8 --diff`
45
- [ ] whatsnew entry

.pre-commit-config.yaml

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
repos:
2+
- repo: https://github.com/python/black
3+
rev: stable
4+
hooks:
5+
- id: black
6+
language_version: python3.7
7+
- repo: https://gitlab.com/pycqa/flake8
8+
rev: 3.7.7
9+
hooks:
10+
- id: flake8
11+
language: python_venv
12+
- repo: https://github.com/pre-commit/mirrors-isort
13+
rev: v4.3.20
14+
hooks:
15+
- id: isort
16+
language: python_venv

.travis.yml

-8
Original file line numberDiff line numberDiff line change
@@ -48,17 +48,10 @@ matrix:
4848
env:
4949
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow"
5050

51-
# In allow_failures
52-
- dist: trusty
53-
env:
54-
- JOB="3.6, doc" ENV_FILE="ci/deps/travis-36-doc.yaml" DOC=true
5551
allow_failures:
5652
- dist: trusty
5753
env:
5854
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow"
59-
- dist: trusty
60-
env:
61-
- JOB="3.6, doc" ENV_FILE="ci/deps/travis-36-doc.yaml" DOC=true
6255

6356
before_install:
6457
- echo "before_install"
@@ -97,7 +90,6 @@ before_script:
9790
script:
9891
- echo "script start"
9992
- source activate pandas-dev
100-
- ci/build_docs.sh
10193
- ci/run_tests.sh
10294

10395
after_script:

LICENSES/HAVEN_LICENSE

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
YEAR: 2013-2016
2+
COPYRIGHT HOLDER: Hadley Wickham; RStudio; and Evan Miller

LICENSES/HAVEN_MIT

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
Based on http://opensource.org/licenses/MIT
2+
3+
This is a template. Complete and ship as file LICENSE the following 2
4+
lines (only)
5+
6+
YEAR:
7+
COPYRIGHT HOLDER:
8+
9+
and specify as
10+
11+
License: MIT + file LICENSE
12+
13+
Copyright (c) <YEAR>, <COPYRIGHT HOLDER>
14+
15+
Permission is hereby granted, free of charge, to any person obtaining
16+
a copy of this software and associated documentation files (the
17+
"Software"), to deal in the Software without restriction, including
18+
without limitation the rights to use, copy, modify, merge, publish,
19+
distribute, sublicense, and/or sell copies of the Software, and to
20+
permit persons to whom the Software is furnished to do so, subject to
21+
the following conditions:
22+
23+
The above copyright notice and this permission notice shall be
24+
included in all copies or substantial portions of the Software.
25+
26+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
30+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
31+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
32+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

Makefile

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
tseries: pandas/_libs/lib.pyx pandas/_libs/tslib.pyx pandas/_libs/hashtable.pyx
2-
python setup.py build_ext --inplace
3-
4-
.PHONY : develop build clean clean_pyc tseries doc
1+
.PHONY : develop build clean clean_pyc doc lint-diff black
52

63
clean:
74
-python setup.py clean
@@ -15,8 +12,11 @@ build: clean_pyc
1512
lint-diff:
1613
git diff upstream/master --name-only -- "*.py" | xargs flake8
1714

15+
black:
16+
black . --exclude '(asv_bench/env|\.egg|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|_build|buck-out|build|dist)'
17+
1818
develop: build
19-
-python setup.py develop
19+
python setup.py develop
2020

2121
doc:
2222
-rm -rf doc/build doc/source/generated

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ Most development discussion is taking place on github in this repo. Further, the
224224

225225
All contributions, bug reports, bug fixes, documentation improvements, enhancements and ideas are welcome.
226226

227-
A detailed overview on how to contribute can be found in the **[contributing guide](https://pandas-docs.github.io/pandas-docs-travis/contributing.html)**. There is also an [overview](.github/CONTRIBUTING.md) on GitHub.
227+
A detailed overview on how to contribute can be found in the **[contributing guide](https://dev.pandas.io/contributing.html)**. There is also an [overview](.github/CONTRIBUTING.md) on GitHub.
228228

229229
If you are simply looking to start working with the pandas codebase, navigate to the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [good first issue](https://github.com/pandas-dev/pandas/issues?labels=good+first+issue&sort=updated&state=open) where you could start out.
230230

asv_bench/benchmarks/algorithms.py

+94-52
Original file line numberDiff line numberDiff line change
@@ -2,28 +2,45 @@
22

33
import numpy as np
44

5+
from pandas._libs import lib
6+
57
import pandas as pd
68
from pandas.util import testing as tm
79

8-
for imp in ['pandas.util', 'pandas.tools.hashing']:
10+
for imp in ["pandas.util", "pandas.tools.hashing"]:
911
try:
1012
hashing = import_module(imp)
1113
break
1214
except (ImportError, TypeError, ValueError):
1315
pass
1416

1517

18+
class MaybeConvertObjects:
19+
def setup(self):
20+
N = 10 ** 5
21+
22+
data = list(range(N))
23+
data[0] = pd.NaT
24+
data = np.array(data)
25+
self.data = data
26+
27+
def time_maybe_convert_objects(self):
28+
lib.maybe_convert_objects(self.data)
29+
30+
1631
class Factorize:
1732

18-
params = [[True, False], ['int', 'uint', 'float', 'string']]
19-
param_names = ['sort', 'dtype']
33+
params = [[True, False], ["int", "uint", "float", "string"]]
34+
param_names = ["sort", "dtype"]
2035

2136
def setup(self, sort, dtype):
22-
N = 10**5
23-
data = {'int': pd.Int64Index(np.arange(N).repeat(5)),
24-
'uint': pd.UInt64Index(np.arange(N).repeat(5)),
25-
'float': pd.Float64Index(np.random.randn(N).repeat(5)),
26-
'string': tm.makeStringIndex(N).repeat(5)}
37+
N = 10 ** 5
38+
data = {
39+
"int": pd.Int64Index(np.arange(N).repeat(5)),
40+
"uint": pd.UInt64Index(np.arange(N).repeat(5)),
41+
"float": pd.Float64Index(np.random.randn(N).repeat(5)),
42+
"string": tm.makeStringIndex(N).repeat(5),
43+
}
2744
self.idx = data[dtype]
2845

2946
def time_factorize(self, sort, dtype):
@@ -32,15 +49,17 @@ def time_factorize(self, sort, dtype):
3249

3350
class FactorizeUnique:
3451

35-
params = [[True, False], ['int', 'uint', 'float', 'string']]
36-
param_names = ['sort', 'dtype']
52+
params = [[True, False], ["int", "uint", "float", "string"]]
53+
param_names = ["sort", "dtype"]
3754

3855
def setup(self, sort, dtype):
39-
N = 10**5
40-
data = {'int': pd.Int64Index(np.arange(N)),
41-
'uint': pd.UInt64Index(np.arange(N)),
42-
'float': pd.Float64Index(np.arange(N)),
43-
'string': tm.makeStringIndex(N)}
56+
N = 10 ** 5
57+
data = {
58+
"int": pd.Int64Index(np.arange(N)),
59+
"uint": pd.UInt64Index(np.arange(N)),
60+
"float": pd.Float64Index(np.arange(N)),
61+
"string": tm.makeStringIndex(N),
62+
}
4463
self.idx = data[dtype]
4564
assert self.idx.is_unique
4665

@@ -50,15 +69,17 @@ def time_factorize(self, sort, dtype):
5069

5170
class Duplicated:
5271

53-
params = [['first', 'last', False], ['int', 'uint', 'float', 'string']]
54-
param_names = ['keep', 'dtype']
72+
params = [["first", "last", False], ["int", "uint", "float", "string"]]
73+
param_names = ["keep", "dtype"]
5574

5675
def setup(self, keep, dtype):
57-
N = 10**5
58-
data = {'int': pd.Int64Index(np.arange(N).repeat(5)),
59-
'uint': pd.UInt64Index(np.arange(N).repeat(5)),
60-
'float': pd.Float64Index(np.random.randn(N).repeat(5)),
61-
'string': tm.makeStringIndex(N).repeat(5)}
76+
N = 10 ** 5
77+
data = {
78+
"int": pd.Int64Index(np.arange(N).repeat(5)),
79+
"uint": pd.UInt64Index(np.arange(N).repeat(5)),
80+
"float": pd.Float64Index(np.random.randn(N).repeat(5)),
81+
"string": tm.makeStringIndex(N).repeat(5),
82+
}
6283
self.idx = data[dtype]
6384
# cache is_unique
6485
self.idx.is_unique
@@ -69,15 +90,17 @@ def time_duplicated(self, keep, dtype):
6990

7091
class DuplicatedUniqueIndex:
7192

72-
params = ['int', 'uint', 'float', 'string']
73-
param_names = ['dtype']
93+
params = ["int", "uint", "float", "string"]
94+
param_names = ["dtype"]
7495

7596
def setup(self, dtype):
76-
N = 10**5
77-
data = {'int': pd.Int64Index(np.arange(N)),
78-
'uint': pd.UInt64Index(np.arange(N)),
79-
'float': pd.Float64Index(np.random.randn(N)),
80-
'string': tm.makeStringIndex(N)}
97+
N = 10 ** 5
98+
data = {
99+
"int": pd.Int64Index(np.arange(N)),
100+
"uint": pd.UInt64Index(np.arange(N)),
101+
"float": pd.Float64Index(np.random.randn(N)),
102+
"string": tm.makeStringIndex(N),
103+
}
81104
self.idx = data[dtype]
82105
# cache is_unique
83106
self.idx.is_unique
@@ -87,58 +110,77 @@ def time_duplicated_unique(self, dtype):
87110

88111

89112
class Hashing:
90-
91113
def setup_cache(self):
92-
N = 10**5
114+
N = 10 ** 5
93115

94116
df = pd.DataFrame(
95-
{'strings': pd.Series(tm.makeStringIndex(10000).take(
96-
np.random.randint(0, 10000, size=N))),
97-
'floats': np.random.randn(N),
98-
'ints': np.arange(N),
99-
'dates': pd.date_range('20110101', freq='s', periods=N),
100-
'timedeltas': pd.timedelta_range('1 day', freq='s', periods=N)})
101-
df['categories'] = df['strings'].astype('category')
117+
{
118+
"strings": pd.Series(
119+
tm.makeStringIndex(10000).take(np.random.randint(0, 10000, size=N))
120+
),
121+
"floats": np.random.randn(N),
122+
"ints": np.arange(N),
123+
"dates": pd.date_range("20110101", freq="s", periods=N),
124+
"timedeltas": pd.timedelta_range("1 day", freq="s", periods=N),
125+
}
126+
)
127+
df["categories"] = df["strings"].astype("category")
102128
df.iloc[10:20] = np.nan
103129
return df
104130

105131
def time_frame(self, df):
106132
hashing.hash_pandas_object(df)
107133

108134
def time_series_int(self, df):
109-
hashing.hash_pandas_object(df['ints'])
135+
hashing.hash_pandas_object(df["ints"])
110136

111137
def time_series_string(self, df):
112-
hashing.hash_pandas_object(df['strings'])
138+
hashing.hash_pandas_object(df["strings"])
113139

114140
def time_series_float(self, df):
115-
hashing.hash_pandas_object(df['floats'])
141+
hashing.hash_pandas_object(df["floats"])
116142

117143
def time_series_categorical(self, df):
118-
hashing.hash_pandas_object(df['categories'])
144+
hashing.hash_pandas_object(df["categories"])
119145

120146
def time_series_timedeltas(self, df):
121-
hashing.hash_pandas_object(df['timedeltas'])
147+
hashing.hash_pandas_object(df["timedeltas"])
122148

123149
def time_series_dates(self, df):
124-
hashing.hash_pandas_object(df['dates'])
150+
hashing.hash_pandas_object(df["dates"])
125151

126152

127153
class Quantile:
128-
params = [[0, 0.5, 1],
129-
['linear', 'nearest', 'lower', 'higher', 'midpoint'],
130-
['float', 'int', 'uint']]
131-
param_names = ['quantile', 'interpolation', 'dtype']
154+
params = [
155+
[0, 0.5, 1],
156+
["linear", "nearest", "lower", "higher", "midpoint"],
157+
["float", "int", "uint"],
158+
]
159+
param_names = ["quantile", "interpolation", "dtype"]
132160

133161
def setup(self, quantile, interpolation, dtype):
134-
N = 10**5
135-
data = {'int': np.arange(N),
136-
'uint': np.arange(N).astype(np.uint64),
137-
'float': np.random.randn(N)}
162+
N = 10 ** 5
163+
data = {
164+
"int": np.arange(N),
165+
"uint": np.arange(N).astype(np.uint64),
166+
"float": np.random.randn(N),
167+
}
138168
self.idx = pd.Series(data[dtype].repeat(5))
139169

140170
def time_quantile(self, quantile, interpolation, dtype):
141171
self.idx.quantile(quantile, interpolation=interpolation)
142172

143173

174+
class SortIntegerArray:
175+
params = [10 ** 3, 10 ** 5]
176+
177+
def setup(self, N):
178+
data = np.arange(N, dtype=float)
179+
data[40] = np.nan
180+
self.array = pd.array(data, dtype="Int64")
181+
182+
def time_argsort(self, N):
183+
self.array.argsort()
184+
185+
144186
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/attrs_caching.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
import numpy as np
22
from pandas import DataFrame
3+
34
try:
45
from pandas.util import cache_readonly
56
except ImportError:
67
from pandas.util.decorators import cache_readonly
78

89

910
class DataFrameAttributes:
10-
1111
def setup(self):
1212
self.df = DataFrame(np.random.randn(10, 6))
1313
self.cur_index = self.df.index
@@ -20,14 +20,12 @@ def time_set_index(self):
2020

2121

2222
class CacheReadonly:
23-
2423
def setup(self):
25-
2624
class Foo:
27-
2825
@cache_readonly
2926
def prop(self):
3027
return 5
28+
3129
self.obj = Foo()
3230

3331
def time_cache_readonly(self):

0 commit comments

Comments
 (0)