Skip to content

Commit d10b47b

Browse files
committed
Merge remote-tracking branch 'upstream/master' into assert_frame_equal-colname
2 parents 39c58d3 + b9f26e2 commit d10b47b

File tree

335 files changed

+6954
-7303
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

335 files changed

+6954
-7303
lines changed

.github/workflows/ci.yml

+12-9
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@ jobs:
1515
runs-on: ubuntu-latest
1616
steps:
1717

18+
- name: Setting conda path
19+
run: echo "::add-path::${HOME}/miniconda3/bin"
20+
1821
- name: Checkout
1922
uses: actions/checkout@v1
2023

21-
- name: Setting conda path
22-
run: echo "::set-env name=PATH::${HOME}/miniconda3/bin:${PATH}"
23-
2424
- name: Looking for unwanted patterns
2525
run: ci/code_checks.sh patterns
2626
if: true
@@ -80,15 +80,18 @@ jobs:
8080
git fetch upstream
8181
if git diff upstream/master --name-only | grep -q "^asv_bench/"; then
8282
asv machine --yes
83-
ASV_OUTPUT="$(asv dev)"
84-
if [[ $(echo "$ASV_OUTPUT" | grep "failed") ]]; then
85-
echo "##vso[task.logissue type=error]Benchmarks run with errors"
86-
echo "$ASV_OUTPUT"
83+
asv dev | sed "/failed$/ s/^/##[error]/" | tee benchmarks.log
84+
if grep "failed" benchmarks.log > /dev/null ; then
8785
exit 1
88-
else
89-
echo "Benchmarks run without errors"
9086
fi
9187
else
9288
echo "Benchmarks did not run, no changes detected"
9389
fi
9490
if: true
91+
92+
- name: Publish benchmarks artifact
93+
uses: actions/upload-artifact@master
94+
with:
95+
name: Benchmarks log
96+
path: asv_bench/benchmarks.log
97+
if: failure()

asv_bench/benchmarks/array.py

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import numpy as np
2+
3+
import pandas as pd
4+
5+
6+
class BooleanArray:
7+
def setup(self):
8+
self.values_bool = np.array([True, False, True, False])
9+
self.values_float = np.array([1.0, 0.0, 1.0, 0.0])
10+
self.values_integer = np.array([1, 0, 1, 0])
11+
self.values_integer_like = [1, 0, 1, 0]
12+
13+
def time_from_bool_array(self):
14+
pd.array(self.values_bool, dtype="boolean")
15+
16+
def time_from_integer_array(self):
17+
pd.array(self.values_integer, dtype="boolean")
18+
19+
def time_from_integer_like(self):
20+
pd.array(self.values_integer_like, dtype="boolean")
21+
22+
def time_from_float_array(self):
23+
pd.array(self.values_float, dtype="boolean")

asv_bench/benchmarks/boolean.py

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import numpy as np
2+
3+
import pandas as pd
4+
5+
6+
class TimeLogicalOps:
7+
def setup(self):
8+
N = 10_000
9+
left, right, lmask, rmask = np.random.randint(0, 2, size=(4, N)).astype("bool")
10+
self.left = pd.arrays.BooleanArray(left, lmask)
11+
self.right = pd.arrays.BooleanArray(right, rmask)
12+
13+
def time_or_scalar(self):
14+
self.left | True
15+
self.left | False
16+
17+
def time_or_array(self):
18+
self.left | self.right
19+
20+
def time_and_scalar(self):
21+
self.left & True
22+
self.left & False
23+
24+
def time_and_array(self):
25+
self.left & self.right
26+
27+
def time_xor_scalar(self):
28+
self.left ^ True
29+
self.left ^ False
30+
31+
def time_xor_array(self):
32+
self.left ^ self.right

asv_bench/benchmarks/frame_methods.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,7 @@ def setup(self):
565565

566566
def time_frame_get_dtype_counts(self):
567567
with warnings.catch_warnings(record=True):
568-
self.df.get_dtype_counts()
568+
self.df._data.get_dtype_counts()
569569

570570
def time_info(self):
571571
self.df.info()

asv_bench/benchmarks/index_object.py

+13
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
Float64Index,
88
Index,
99
IntervalIndex,
10+
MultiIndex,
1011
RangeIndex,
1112
Series,
1213
date_range,
@@ -111,6 +112,18 @@ def time_get_loc_dec(self):
111112
self.idx_dec.get_loc(100000)
112113

113114

115+
class IndexEquals:
116+
def setup(self):
117+
idx_large_fast = RangeIndex(100000)
118+
idx_small_slow = date_range(start="1/1/2012", periods=1)
119+
self.mi_large_slow = MultiIndex.from_product([idx_large_fast, idx_small_slow])
120+
121+
self.idx_non_object = RangeIndex(1)
122+
123+
def time_non_object_equals_multiindex(self):
124+
self.idx_non_object.equals(self.mi_large_slow)
125+
126+
114127
class IndexAppend:
115128
def setup(self):
116129

asv_bench/benchmarks/multiindex_object.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import numpy as np
44

5-
from pandas import DataFrame, MultiIndex, date_range
5+
from pandas import DataFrame, MultiIndex, RangeIndex, date_range
66
import pandas.util.testing as tm
77

88

@@ -147,4 +147,16 @@ def time_categorical_level(self):
147147
self.df.set_index(["a", "b"])
148148

149149

150+
class Equals:
151+
def setup(self):
152+
idx_large_fast = RangeIndex(100000)
153+
idx_small_slow = date_range(start="1/1/2012", periods=1)
154+
self.mi_large_slow = MultiIndex.from_product([idx_large_fast, idx_small_slow])
155+
156+
self.idx_non_object = RangeIndex(1)
157+
158+
def time_equals_non_object_index(self):
159+
self.mi_large_slow.equals(self.idx_non_object)
160+
161+
150162
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/stat_ops.py

+28-52
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,14 @@
77

88
class FrameOps:
99

10-
params = [ops, ["float", "int"], [0, 1], [True, False]]
11-
param_names = ["op", "dtype", "axis", "use_bottleneck"]
10+
params = [ops, ["float", "int"], [0, 1]]
11+
param_names = ["op", "dtype", "axis"]
1212

13-
def setup(self, op, dtype, axis, use_bottleneck):
13+
def setup(self, op, dtype, axis):
1414
df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype)
15-
try:
16-
pd.options.compute.use_bottleneck = use_bottleneck
17-
except TypeError:
18-
from pandas.core import nanops
19-
20-
nanops._USE_BOTTLENECK = use_bottleneck
2115
self.df_func = getattr(df, op)
2216

23-
def time_op(self, op, dtype, axis, use_bottleneck):
17+
def time_op(self, op, dtype, axis):
2418
self.df_func(axis=axis)
2519

2620

@@ -46,20 +40,14 @@ def time_op(self, level, op):
4640

4741
class SeriesOps:
4842

49-
params = [ops, ["float", "int"], [True, False]]
50-
param_names = ["op", "dtype", "use_bottleneck"]
43+
params = [ops, ["float", "int"]]
44+
param_names = ["op", "dtype"]
5145

52-
def setup(self, op, dtype, use_bottleneck):
46+
def setup(self, op, dtype):
5347
s = pd.Series(np.random.randn(100000)).astype(dtype)
54-
try:
55-
pd.options.compute.use_bottleneck = use_bottleneck
56-
except TypeError:
57-
from pandas.core import nanops
58-
59-
nanops._USE_BOTTLENECK = use_bottleneck
6048
self.s_func = getattr(s, op)
6149

62-
def time_op(self, op, dtype, use_bottleneck):
50+
def time_op(self, op, dtype):
6351
self.s_func()
6452

6553

@@ -101,61 +89,49 @@ def time_average_old(self, constructor, pct):
10189

10290
class Correlation:
10391

104-
params = [["spearman", "kendall", "pearson"], [True, False]]
105-
param_names = ["method", "use_bottleneck"]
92+
params = [["spearman", "kendall", "pearson"]]
93+
param_names = ["method"]
10694

107-
def setup(self, method, use_bottleneck):
108-
try:
109-
pd.options.compute.use_bottleneck = use_bottleneck
110-
except TypeError:
111-
from pandas.core import nanops
95+
def setup(self, method):
96+
self.df = pd.DataFrame(np.random.randn(500, 15))
97+
self.df2 = pd.DataFrame(np.random.randn(500, 15))
98+
self.df_wide = pd.DataFrame(np.random.randn(500, 100))
99+
self.df_wide_nans = self.df_wide.where(np.random.random((500, 100)) < 0.9)
100+
self.s = pd.Series(np.random.randn(500))
101+
self.s2 = pd.Series(np.random.randn(500))
112102

113-
nanops._USE_BOTTLENECK = use_bottleneck
114-
self.df = pd.DataFrame(np.random.randn(1000, 30))
115-
self.df2 = pd.DataFrame(np.random.randn(1000, 30))
116-
self.df_wide = pd.DataFrame(np.random.randn(1000, 200))
117-
self.df_wide_nans = self.df_wide.where(np.random.random((1000, 200)) < 0.9)
118-
self.s = pd.Series(np.random.randn(1000))
119-
self.s2 = pd.Series(np.random.randn(1000))
120-
121-
def time_corr(self, method, use_bottleneck):
103+
def time_corr(self, method):
122104
self.df.corr(method=method)
123105

124-
def time_corr_wide(self, method, use_bottleneck):
106+
def time_corr_wide(self, method):
125107
self.df_wide.corr(method=method)
126108

127-
def time_corr_wide_nans(self, method, use_bottleneck):
109+
def time_corr_wide_nans(self, method):
128110
self.df_wide_nans.corr(method=method)
129111

130-
def peakmem_corr_wide(self, method, use_bottleneck):
112+
def peakmem_corr_wide(self, method):
131113
self.df_wide.corr(method=method)
132114

133-
def time_corr_series(self, method, use_bottleneck):
115+
def time_corr_series(self, method):
134116
self.s.corr(self.s2, method=method)
135117

136-
def time_corrwith_cols(self, method, use_bottleneck):
118+
def time_corrwith_cols(self, method):
137119
self.df.corrwith(self.df2, method=method)
138120

139-
def time_corrwith_rows(self, method, use_bottleneck):
121+
def time_corrwith_rows(self, method):
140122
self.df.corrwith(self.df2, axis=1, method=method)
141123

142124

143125
class Covariance:
144126

145-
params = [[True, False]]
146-
param_names = ["use_bottleneck"]
147-
148-
def setup(self, use_bottleneck):
149-
try:
150-
pd.options.compute.use_bottleneck = use_bottleneck
151-
except TypeError:
152-
from pandas.core import nanops
127+
params = []
128+
param_names = []
153129

154-
nanops._USE_BOTTLENECK = use_bottleneck
130+
def setup(self):
155131
self.s = pd.Series(np.random.randn(100000))
156132
self.s2 = pd.Series(np.random.randn(100000))
157133

158-
def time_cov_series(self, use_bottleneck):
134+
def time_cov_series(self):
159135
self.s.cov(self.s2)
160136

161137

ci/azure/posix.yml

+12-10
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,14 @@ jobs:
2020
CONDA_PY: "36"
2121
PATTERN: "not slow and not network"
2222
py36_locale_slow_old_np:
23-
ENV_FILE: ci/deps/azure-36-locale.yaml
23+
ENV_FILE: ci/deps/azure-36-locale_slow.yaml
2424
CONDA_PY: "36"
2525
PATTERN: "slow"
2626
LOCALE_OVERRIDE: "zh_CN.UTF-8"
2727
EXTRA_APT: "language-pack-zh-hans"
2828

29-
py36_locale_slow:
30-
ENV_FILE: ci/deps/azure-36-locale_slow.yaml
29+
py36_locale:
30+
ENV_FILE: ci/deps/azure-36-locale.yaml
3131
CONDA_PY: "36"
3232
PATTERN: "not slow and not network"
3333
LOCALE_OVERRIDE: "it_IT.UTF-8"
@@ -44,13 +44,15 @@ jobs:
4444
PATTERN: "not slow and not network"
4545
LOCALE_OVERRIDE: "zh_CN.UTF-8"
4646

47-
py37_np_dev:
48-
ENV_FILE: ci/deps/azure-37-numpydev.yaml
49-
CONDA_PY: "37"
50-
PATTERN: "not slow and not network"
51-
TEST_ARGS: "-W error"
52-
PANDAS_TESTING_MODE: "deprecate"
53-
EXTRA_APT: "xsel"
47+
# Disabled for NumPy object-dtype warning.
48+
# https://github.com/pandas-dev/pandas/issues/30043
49+
# py37_np_dev:
50+
# ENV_FILE: ci/deps/azure-37-numpydev.yaml
51+
# CONDA_PY: "37"
52+
# PATTERN: "not slow and not network"
53+
# TEST_ARGS: "-W error"
54+
# PANDAS_TESTING_MODE: "deprecate"
55+
# EXTRA_APT: "xsel"
5456

5557
steps:
5658
- script: |

ci/code_checks.sh

+18-17
Original file line numberDiff line numberDiff line change
@@ -34,17 +34,13 @@ function invgrep {
3434
#
3535
# This is useful for the CI, as we want to fail if one of the patterns
3636
# that we want to avoid is found by grep.
37-
if [[ "$AZURE" == "true" ]]; then
38-
set -o pipefail
39-
grep -n "$@" | awk -F ":" '{print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Found unwanted pattern: " $3}'
40-
else
41-
grep "$@"
42-
fi
43-
return $((! $?))
37+
grep -n "$@" | sed "s/^/$INVGREP_PREPEND/" | sed "s/$/$INVGREP_APPEND/" ; EXIT_STATUS=${PIPESTATUS[0]}
38+
return $((! $EXIT_STATUS))
4439
}
4540

46-
if [[ "$AZURE" == "true" ]]; then
47-
FLAKE8_FORMAT="##vso[task.logissue type=error;sourcepath=%(path)s;linenumber=%(row)s;columnnumber=%(col)s;code=%(code)s;]%(text)s"
41+
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
42+
FLAKE8_FORMAT="##[error]%(path)s:%(row)s:%(col)s:%(code):%(text)s"
43+
INVGREP_PREPEND="##[error]"
4844
else
4945
FLAKE8_FORMAT="default"
5046
fi
@@ -109,7 +105,12 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
109105

110106
# Imports - Check formatting using isort see setup.cfg for settings
111107
MSG='Check import format using isort ' ; echo $MSG
112-
isort --recursive --check-only pandas asv_bench
108+
ISORT_CMD="isort --recursive --check-only pandas asv_bench"
109+
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
110+
eval $ISORT_CMD | awk '{print "##[error]" $0}'; RET=$(($RET + ${PIPESTATUS[0]}))
111+
else
112+
eval $ISORT_CMD
113+
fi
113114
RET=$(($RET + $?)) ; echo $MSG "DONE"
114115

115116
fi
@@ -198,15 +199,15 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
198199
invgrep -R --include="*.py" -P '# type: (?!ignore)' pandas
199200
RET=$(($RET + $?)) ; echo $MSG "DONE"
200201

202+
MSG='Check for use of foo.__class__ instead of type(foo)' ; echo $MSG
203+
invgrep -R --include=*.{py,pyx} '\.__class__' pandas
204+
RET=$(($RET + $?)) ; echo $MSG "DONE"
205+
201206
MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG
202-
set -o pipefail
203-
if [[ "$AZURE" == "true" ]]; then
204-
# we exclude all c/cpp files as the c/cpp files of pandas code base are tested when Linting .c and .h files
205-
! grep -n '--exclude=*.'{svg,c,cpp,html,js} --exclude-dir=env -RI "\s$" * | awk -F ":" '{print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Tailing whitespaces found: " $3}'
206-
else
207-
! grep -n '--exclude=*.'{svg,c,cpp,html,js} --exclude-dir=env -RI "\s$" * | awk -F ":" '{print $1 ":" $2 ":Tailing whitespaces found: " $3}'
208-
fi
207+
INVGREP_APPEND=" <- trailing whitespaces found"
208+
invgrep -RI --exclude=\*.{svg,c,cpp,html,js} --exclude-dir=env "\s$" *
209209
RET=$(($RET + $?)) ; echo $MSG "DONE"
210+
unset INVGREP_APPEND
210211
fi
211212

212213
### CODE ###

0 commit comments

Comments
 (0)