Skip to content

Commit bd2aeb2

Browse files
Merging from master
2 parents 21967f5 + caf462c commit bd2aeb2

File tree

531 files changed

+16237
-11194
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

531 files changed

+16237
-11194
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ asv_bench/pandas/
101101
# Documentation generated files #
102102
#################################
103103
doc/source/generated
104+
doc/source/api/generated
104105
doc/source/_static
105106
doc/source/vbench
106107
doc/source/vbench.rst

.travis.yml

+4-4
Original file line numberDiff line numberDiff line change
@@ -34,23 +34,23 @@ matrix:
3434
include:
3535
- dist: trusty
3636
env:
37-
- JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" PATTERN="not slow and not network and not db"
37+
- JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" PATTERN="(not slow and not network)"
3838

3939
- dist: trusty
4040
env:
41-
- JOB="2.7" ENV_FILE="ci/deps/travis-27.yaml" PATTERN="not slow"
41+
- JOB="2.7" ENV_FILE="ci/deps/travis-27.yaml" PATTERN="(not slow or (single and db))"
4242
addons:
4343
apt:
4444
packages:
4545
- python-gtk2
4646

4747
- dist: trusty
4848
env:
49-
- JOB="3.6, locale" ENV_FILE="ci/deps/travis-36-locale.yaml" PATTERN="not slow and not network" LOCALE_OVERRIDE="zh_CN.UTF-8"
49+
- JOB="3.6, locale" ENV_FILE="ci/deps/travis-36-locale.yaml" PATTERN="((not slow and not network) or (single and db))" LOCALE_OVERRIDE="zh_CN.UTF-8"
5050

5151
- dist: trusty
5252
env:
53-
- JOB="3.6, coverage" ENV_FILE="ci/deps/travis-36.yaml" PATTERN="not slow and not network" PANDAS_TESTING_MODE="deprecate" COVERAGE=true
53+
- JOB="3.6, coverage" ENV_FILE="ci/deps/travis-36.yaml" PATTERN="((not slow and not network) or (single and db))" PANDAS_TESTING_MODE="deprecate" COVERAGE=true
5454

5555
# In allow_failures
5656
- dist: trusty

LICENSES/DATEUTIL_LICENSE

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
Copyright 2017- Paul Ganssle <[email protected]>
2+
Copyright 2017- dateutil contributors (see AUTHORS file)
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
16+
The above license applies to all contributions after 2017-12-01, as well as
17+
all contributions that have been re-licensed (see AUTHORS file for the list of
18+
contributors who have re-licensed their code).
19+
--------------------------------------------------------------------------------
20+
dateutil - Extensions to the standard Python datetime module.
21+
22+
Copyright (c) 2003-2011 - Gustavo Niemeyer <[email protected]>
23+
Copyright (c) 2012-2014 - Tomi Pieviläinen <[email protected]>
24+
Copyright (c) 2014-2016 - Yaron de Leeuw <[email protected]>
25+
Copyright (c) 2015- - Paul Ganssle <[email protected]>
26+
Copyright (c) 2015- - dateutil contributors (see AUTHORS file)
27+
28+
All rights reserved.
29+
30+
Redistribution and use in source and binary forms, with or without
31+
modification, are permitted provided that the following conditions are met:
32+
33+
* Redistributions of source code must retain the above copyright notice,
34+
this list of conditions and the following disclaimer.
35+
* Redistributions in binary form must reproduce the above copyright notice,
36+
this list of conditions and the following disclaimer in the documentation
37+
and/or other materials provided with the distribution.
38+
* Neither the name of the copyright holder nor the names of its
39+
contributors may be used to endorse or promote products derived from
40+
this software without specific prior written permission.
41+
42+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
43+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
44+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
45+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
46+
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
47+
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
48+
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
49+
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
50+
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
51+
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
52+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53+
54+
The above BSD License Applies to all code, even that also covered by Apache 2.0.

asv_bench/benchmarks/ctors.py

+55-13
Original file line numberDiff line numberDiff line change
@@ -3,26 +3,68 @@
33
from pandas import Series, Index, DatetimeIndex, Timestamp, MultiIndex
44

55

6+
def no_change(arr):
7+
return arr
8+
9+
10+
def list_of_str(arr):
11+
return list(arr.astype(str))
12+
13+
14+
def gen_of_str(arr):
15+
return (x for x in arr.astype(str))
16+
17+
18+
def arr_dict(arr):
19+
return dict(zip(range(len(arr)), arr))
20+
21+
22+
def list_of_tuples(arr):
23+
return [(i, -i) for i in arr]
24+
25+
26+
def gen_of_tuples(arr):
27+
return ((i, -i) for i in arr)
28+
29+
30+
def list_of_lists(arr):
31+
return [[i, -i] for i in arr]
32+
33+
34+
def list_of_tuples_with_none(arr):
35+
return [(i, -i) for i in arr][:-1] + [None]
36+
37+
38+
def list_of_lists_with_none(arr):
39+
return [[i, -i] for i in arr][:-1] + [None]
40+
41+
642
class SeriesConstructors(object):
743

8-
param_names = ["data_fmt", "with_index"]
9-
params = [[lambda x: x,
44+
param_names = ["data_fmt", "with_index", "dtype"]
45+
params = [[no_change,
1046
list,
11-
lambda arr: list(arr.astype(str)),
12-
lambda arr: dict(zip(range(len(arr)), arr)),
13-
lambda arr: [(i, -i) for i in arr],
14-
lambda arr: [[i, -i] for i in arr],
15-
lambda arr: ([(i, -i) for i in arr][:-1] + [None]),
16-
lambda arr: ([[i, -i] for i in arr][:-1] + [None])],
17-
[False, True]]
18-
19-
def setup(self, data_fmt, with_index):
47+
list_of_str,
48+
gen_of_str,
49+
arr_dict,
50+
list_of_tuples,
51+
gen_of_tuples,
52+
list_of_lists,
53+
list_of_tuples_with_none,
54+
list_of_lists_with_none],
55+
[False, True],
56+
['float', 'int']]
57+
58+
def setup(self, data_fmt, with_index, dtype):
2059
N = 10**4
21-
arr = np.random.randn(N)
60+
if dtype == 'float':
61+
arr = np.random.randn(N)
62+
else:
63+
arr = np.arange(N)
2264
self.data = data_fmt(arr)
2365
self.index = np.arange(N) if with_index else None
2466

25-
def time_series_constructor(self, data_fmt, with_index):
67+
def time_series_constructor(self, data_fmt, with_index, dtype):
2668
Series(self.data, index=self.index)
2769

2870

asv_bench/benchmarks/dtypes.py

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
from pandas.api.types import pandas_dtype
2+
3+
import numpy as np
4+
from .pandas_vb_common import (
5+
numeric_dtypes, datetime_dtypes, string_dtypes, extension_dtypes)
6+
7+
8+
_numpy_dtypes = [np.dtype(dtype)
9+
for dtype in (numeric_dtypes +
10+
datetime_dtypes +
11+
string_dtypes)]
12+
_dtypes = _numpy_dtypes + extension_dtypes
13+
14+
15+
class Dtypes(object):
16+
params = (_dtypes +
17+
list(map(lambda dt: dt.name, _dtypes)))
18+
param_names = ['dtype']
19+
20+
def time_pandas_dtype(self, dtype):
21+
pandas_dtype(dtype)
22+
23+
24+
class DtypesInvalid(object):
25+
param_names = ['dtype']
26+
params = ['scalar-string', 'scalar-int', 'list-string', 'array-string']
27+
data_dict = {'scalar-string': 'foo',
28+
'scalar-int': 1,
29+
'list-string': ['foo'] * 1000,
30+
'array-string': np.array(['foo'] * 1000)}
31+
32+
def time_pandas_dtype_invalid(self, dtype):
33+
try:
34+
pandas_dtype(self.data_dict[dtype])
35+
except TypeError:
36+
pass
37+
38+
39+
from .pandas_vb_common import setup # noqa: F401

asv_bench/benchmarks/pandas_vb_common.py

+10
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from importlib import import_module
33

44
import numpy as np
5+
import pandas as pd
56

67
# Compatibility import for lib
78
for imp in ['pandas._libs.lib', 'pandas.lib']:
@@ -14,6 +15,15 @@
1415
numeric_dtypes = [np.int64, np.int32, np.uint32, np.uint64, np.float32,
1516
np.float64, np.int16, np.int8, np.uint16, np.uint8]
1617
datetime_dtypes = [np.datetime64, np.timedelta64]
18+
string_dtypes = [np.object]
19+
extension_dtypes = [pd.Int8Dtype, pd.Int16Dtype,
20+
pd.Int32Dtype, pd.Int64Dtype,
21+
pd.UInt8Dtype, pd.UInt16Dtype,
22+
pd.UInt32Dtype, pd.UInt64Dtype,
23+
pd.CategoricalDtype,
24+
pd.IntervalDtype,
25+
pd.DatetimeTZDtype('ns', 'UTC'),
26+
pd.PeriodDtype('D')]
1727

1828

1929
def setup(*args, **kwargs):

asv_bench/benchmarks/stat_ops.py

+7
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ def setup(self, method, use_bottleneck):
106106
from pandas.core import nanops
107107
nanops._USE_BOTTLENECK = use_bottleneck
108108
self.df = pd.DataFrame(np.random.randn(1000, 30))
109+
self.df2 = pd.DataFrame(np.random.randn(1000, 30))
109110
self.s = pd.Series(np.random.randn(1000))
110111
self.s2 = pd.Series(np.random.randn(1000))
111112

@@ -115,6 +116,12 @@ def time_corr(self, method, use_bottleneck):
115116
def time_corr_series(self, method, use_bottleneck):
116117
self.s.corr(self.s2, method=method)
117118

119+
def time_corrwith_cols(self, method, use_bottleneck):
120+
self.df.corrwith(self.df2, method=method)
121+
122+
def time_corrwith_rows(self, method, use_bottleneck):
123+
self.df.corrwith(self.df2, axis=1, method=method)
124+
118125

119126
class Covariance(object):
120127

asv_bench/benchmarks/timeseries.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
class DatetimeIndex(object):
1414

15-
params = ['dst', 'repeated', 'tz_aware', 'tz_naive']
15+
params = ['dst', 'repeated', 'tz_aware', 'tz_local', 'tz_naive']
1616
param_names = ['index_type']
1717

1818
def setup(self, index_type):
@@ -26,6 +26,10 @@ def setup(self, index_type):
2626
periods=N,
2727
freq='s',
2828
tz='US/Eastern'),
29+
'tz_local': date_range(start='2000',
30+
periods=N,
31+
freq='s',
32+
tz=dateutil.tz.tzlocal()),
2933
'tz_naive': date_range(start='2000',
3034
periods=N,
3135
freq='s')}

ci/azure/posix.yml

+6-6
Original file line numberDiff line numberDiff line change
@@ -12,37 +12,37 @@ jobs:
1212
py35_np_120:
1313
ENV_FILE: ci/deps/azure-macos-35.yaml
1414
CONDA_PY: "35"
15-
PATTERN: "not slow and not network and not db"
15+
PATTERN: "not slow and not network"
1616

1717
${{ if eq(parameters.name, 'Linux') }}:
1818
py27_np_120:
1919
ENV_FILE: ci/deps/azure-27-compat.yaml
2020
CONDA_PY: "27"
21-
PATTERN: "not slow and not network and not db"
21+
PATTERN: "not slow and not network"
2222

2323
py27_locale_slow_old_np:
2424
ENV_FILE: ci/deps/azure-27-locale.yaml
2525
CONDA_PY: "27"
26-
PATTERN: "slow and not db"
26+
PATTERN: "slow"
2727
LOCALE_OVERRIDE: "zh_CN.UTF-8"
2828
EXTRA_APT: "language-pack-zh-hans"
2929

3030
py36_locale_slow:
3131
ENV_FILE: ci/deps/azure-36-locale_slow.yaml
3232
CONDA_PY: "36"
33-
PATTERN: "not slow and not network and not db"
33+
PATTERN: "not slow and not network"
3434
LOCALE_OVERRIDE: "it_IT.UTF-8"
3535

3636
py37_locale:
3737
ENV_FILE: ci/deps/azure-37-locale.yaml
3838
CONDA_PY: "37"
39-
PATTERN: "not slow and not network and not db"
39+
PATTERN: "not slow and not network"
4040
LOCALE_OVERRIDE: "zh_CN.UTF-8"
4141

4242
py37_np_dev:
4343
ENV_FILE: ci/deps/azure-37-numpydev.yaml
4444
CONDA_PY: "37"
45-
PATTERN: "not slow and not network and not db"
45+
PATTERN: "not slow and not network"
4646
TEST_ARGS: "-W error"
4747
PANDAS_TESTING_MODE: "deprecate"
4848
EXTRA_APT: "xsel"

ci/azure/windows.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ jobs:
3838
displayName: 'Build'
3939
- script: |
4040
call activate pandas-dev
41-
pytest -m "not slow and not network and not db" --junitxml=test-data.xml pandas -n 2 -r sxX --strict --durations=10 %*
41+
pytest -m "not slow and not network" --junitxml=test-data.xml pandas -n 2 -r sxX --strict --durations=10 %*
4242
displayName: 'Test'
4343
- task: PublishTestResults@2
4444
inputs:

ci/code_checks.sh

+13
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,11 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
148148
invgrep -R --exclude=*.pyc --exclude=testing.py --exclude=test_util.py assert_raises_regex pandas
149149
RET=$(($RET + $?)) ; echo $MSG "DONE"
150150

151+
# Check for the following code in testing: `unittest.mock`, `mock.Mock()` or `mock.patch`
152+
MSG='Check that unittest.mock is not used (pytest builtin monkeypatch fixture should be used instead)' ; echo $MSG
153+
invgrep -r -E --include '*.py' '(unittest(\.| import )mock|mock\.Mock\(\)|mock\.patch)' pandas/tests/
154+
RET=$(($RET + $?)) ; echo $MSG "DONE"
155+
151156
# Check that we use pytest.raises only as a context manager
152157
#
153158
# For any flake8-compliant code, the only way this regex gets
@@ -157,6 +162,14 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
157162
# invgrep -R --include '*.py' -E '[[:space:]] pytest.raises' pandas/tests
158163
# RET=$(($RET + $?)) ; echo $MSG "DONE"
159164

165+
MSG='Check for wrong space after code-block directive and before colon (".. code-block ::" instead of ".. code-block::")' ; echo $MSG
166+
invgrep -R --include="*.rst" ".. code-block ::" doc/source
167+
RET=$(($RET + $?)) ; echo $MSG "DONE"
168+
169+
MSG='Check for wrong space after ipython directive and before colon (".. ipython ::" instead of ".. ipython::")' ; echo $MSG
170+
invgrep -R --include="*.rst" ".. ipython ::" doc/source
171+
RET=$(($RET + $?)) ; echo $MSG "DONE"
172+
160173
MSG='Check that no file in the repo contains tailing whitespaces' ; echo $MSG
161174
set -o pipefail
162175
if [[ "$AZURE" == "true" ]]; then

ci/deps/azure-macos-35.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ dependencies:
1313
- numexpr
1414
- numpy=1.12.0
1515
- openpyxl=2.5.5
16+
- pyarrow
1617
- pytables
1718
- python=3.5*
1819
- pytz

ci/deps/azure-windows-36.yaml

+1-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ dependencies:
66
- blosc
77
- bottleneck
88
- boost-cpp<1.67
9-
- fastparquet
9+
- fastparquet>=0.2.1
1010
- matplotlib
1111
- numexpr
1212
- numpy=1.14*
@@ -18,7 +18,6 @@ dependencies:
1818
- python=3.6.6
1919
- pytz
2020
- scipy
21-
- thrift=0.10*
2221
- xlrd
2322
- xlsxwriter
2423
- xlwt

ci/deps/travis-27.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ dependencies:
66
- beautifulsoup4
77
- bottleneck
88
- cython=0.28.2
9-
- fastparquet
9+
- fastparquet>=0.2.1
1010
- gcsfs
1111
- html5lib
1212
- ipython

0 commit comments

Comments
 (0)