Skip to content

Commit bd54627

Browse files
authored
Merge branch 'master' into gh-29403
2 parents ca98e77 + 72cee4a commit bd54627

File tree

366 files changed

+7841
-7707
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

366 files changed

+7841
-7707
lines changed

.github/workflows/ci.yml

+13-19
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,16 @@ on:
88

99
env:
1010
ENV_FILE: environment.yml
11-
# TODO: remove export PATH=... in each step once this works
12-
# PATH: $HOME/miniconda3/bin:$PATH
1311

1412
jobs:
1513
checks:
1614
name: Checks
1715
runs-on: ubuntu-latest
1816
steps:
1917

18+
- name: Setting conda path
19+
run: echo "::add-path::${HOME}/miniconda3/bin"
20+
2021
- name: Checkout
2122
uses: actions/checkout@v1
2223

@@ -25,79 +26,72 @@ jobs:
2526
if: true
2627

2728
- name: Setup environment and build pandas
28-
run: |
29-
export PATH=$HOME/miniconda3/bin:$PATH
30-
ci/setup_env.sh
29+
run: ci/setup_env.sh
3130
if: true
3231

3332
- name: Linting
3433
run: |
35-
export PATH=$HOME/miniconda3/bin:$PATH
3634
source activate pandas-dev
3735
ci/code_checks.sh lint
3836
if: true
3937

4038
- name: Dependencies consistency
4139
run: |
42-
export PATH=$HOME/miniconda3/bin:$PATH
4340
source activate pandas-dev
4441
ci/code_checks.sh dependencies
4542
if: true
4643

4744
- name: Checks on imported code
4845
run: |
49-
export PATH=$HOME/miniconda3/bin:$PATH
5046
source activate pandas-dev
5147
ci/code_checks.sh code
5248
if: true
5349

5450
- name: Running doctests
5551
run: |
56-
export PATH=$HOME/miniconda3/bin:$PATH
5752
source activate pandas-dev
5853
ci/code_checks.sh doctests
5954
if: true
6055

6156
- name: Docstring validation
6257
run: |
63-
export PATH=$HOME/miniconda3/bin:$PATH
6458
source activate pandas-dev
6559
ci/code_checks.sh docstrings
6660
if: true
6761

6862
- name: Typing validation
6963
run: |
70-
export PATH=$HOME/miniconda3/bin:$PATH
7164
source activate pandas-dev
7265
ci/code_checks.sh typing
7366
if: true
7467

7568
- name: Testing docstring validation script
7669
run: |
77-
export PATH=$HOME/miniconda3/bin:$PATH
7870
source activate pandas-dev
7971
pytest --capture=no --strict scripts
8072
if: true
8173

8274
- name: Running benchmarks
8375
run: |
84-
export PATH=$HOME/miniconda3/bin:$PATH
8576
source activate pandas-dev
8677
cd asv_bench
8778
asv check -E existing
8879
git remote add upstream https://github.com/pandas-dev/pandas.git
8980
git fetch upstream
9081
if git diff upstream/master --name-only | grep -q "^asv_bench/"; then
9182
asv machine --yes
92-
ASV_OUTPUT="$(asv dev)"
93-
if [[ $(echo "$ASV_OUTPUT" | grep "failed") ]]; then
94-
echo "##vso[task.logissue type=error]Benchmarks run with errors"
95-
echo "$ASV_OUTPUT"
83+
asv dev | sed "/failed$/ s/^/##[error]/" | tee benchmarks.log
84+
if grep "failed" benchmarks.log > /dev/null ; then
9685
exit 1
97-
else
98-
echo "Benchmarks run without errors"
9986
fi
10087
else
10188
echo "Benchmarks did not run, no changes detected"
10289
fi
10390
if: true
91+
92+
- name: Publish benchmarks artifact
93+
uses: actions/upload-artifact@master
94+
with:
95+
name: Benchmarks log
96+
path: asv_bench/benchmarks.log
97+
if: failure()

README.md

+4-5
Original file line numberDiff line numberDiff line change
@@ -164,12 +164,11 @@ pip install pandas
164164
```
165165

166166
## Dependencies
167-
- [NumPy](https://www.numpy.org): 1.13.3 or higher
168-
- [python-dateutil](https://labix.org/python-dateutil): 2.5.0 or higher
169-
- [pytz](https://pythonhosted.org/pytz): 2015.4 or higher
167+
- [NumPy](https://www.numpy.org)
168+
- [python-dateutil](https://labix.org/python-dateutil)
169+
- [pytz](https://pythonhosted.org/pytz)
170170

171-
See the [full installation instructions](https://pandas.pydata.org/pandas-docs/stable/install.html#dependencies)
172-
for recommended and optional dependencies.
171+
See the [full installation instructions](https://pandas.pydata.org/pandas-docs/stable/install.html#dependencies) for minimum supported versions of required, recommended and optional dependencies.
173172

174173
## Installation from sources
175174
To install pandas from source you need Cython in addition to the normal

asv_bench/benchmarks/categoricals.py

+27-15
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,6 @@
1414
pass
1515

1616

17-
class Concat:
18-
def setup(self):
19-
N = 10 ** 5
20-
self.s = pd.Series(list("aabbcd") * N).astype("category")
21-
22-
self.a = pd.Categorical(list("aabbcd") * N)
23-
self.b = pd.Categorical(list("bbcdjk") * N)
24-
25-
def time_concat(self):
26-
pd.concat([self.s, self.s])
27-
28-
def time_union(self):
29-
union_categoricals([self.a, self.b])
30-
31-
3217
class Constructor:
3318
def setup(self):
3419
N = 10 ** 5
@@ -77,6 +62,33 @@ def time_existing_series(self):
7762
pd.Categorical(self.series)
7863

7964

65+
class CategoricalOps:
66+
params = ["__lt__", "__le__", "__eq__", "__ne__", "__ge__", "__gt__"]
67+
param_names = ["op"]
68+
69+
def setup(self, op):
70+
N = 10 ** 5
71+
self.cat = pd.Categorical(list("aabbcd") * N, ordered=True)
72+
73+
def time_categorical_op(self, op):
74+
getattr(self.cat, op)("b")
75+
76+
77+
class Concat:
78+
def setup(self):
79+
N = 10 ** 5
80+
self.s = pd.Series(list("aabbcd") * N).astype("category")
81+
82+
self.a = pd.Categorical(list("aabbcd") * N)
83+
self.b = pd.Categorical(list("bbcdjk") * N)
84+
85+
def time_concat(self):
86+
pd.concat([self.s, self.s])
87+
88+
def time_union(self):
89+
union_categoricals([self.a, self.b])
90+
91+
8092
class ValueCounts:
8193

8294
params = [True, False]

asv_bench/benchmarks/frame_methods.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,7 @@ def setup(self):
565565

566566
def time_frame_get_dtype_counts(self):
567567
with warnings.catch_warnings(record=True):
568-
self.df.get_dtype_counts()
568+
self.df._data.get_dtype_counts()
569569

570570
def time_info(self):
571571
self.df.info()

asv_bench/benchmarks/index_object.py

+13
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
Float64Index,
88
Index,
99
IntervalIndex,
10+
MultiIndex,
1011
RangeIndex,
1112
Series,
1213
date_range,
@@ -111,6 +112,18 @@ def time_get_loc_dec(self):
111112
self.idx_dec.get_loc(100000)
112113

113114

115+
class IndexEquals:
116+
def setup(self):
117+
idx_large_fast = RangeIndex(100000)
118+
idx_small_slow = date_range(start="1/1/2012", periods=1)
119+
self.mi_large_slow = MultiIndex.from_product([idx_large_fast, idx_small_slow])
120+
121+
self.idx_non_object = RangeIndex(1)
122+
123+
def time_non_object_equals_multiindex(self):
124+
self.idx_non_object.equals(self.mi_large_slow)
125+
126+
114127
class IndexAppend:
115128
def setup(self):
116129

asv_bench/benchmarks/multiindex_object.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import numpy as np
44

5-
from pandas import DataFrame, MultiIndex, date_range
5+
from pandas import DataFrame, MultiIndex, RangeIndex, date_range
66
import pandas.util.testing as tm
77

88

@@ -147,4 +147,16 @@ def time_categorical_level(self):
147147
self.df.set_index(["a", "b"])
148148

149149

150+
class Equals:
151+
def setup(self):
152+
idx_large_fast = RangeIndex(100000)
153+
idx_small_slow = date_range(start="1/1/2012", periods=1)
154+
self.mi_large_slow = MultiIndex.from_product([idx_large_fast, idx_small_slow])
155+
156+
self.idx_non_object = RangeIndex(1)
157+
158+
def time_equals_non_object_index(self):
159+
self.mi_large_slow.equals(self.idx_non_object)
160+
161+
150162
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/stat_ops.py

+28-52
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,14 @@
77

88
class FrameOps:
99

10-
params = [ops, ["float", "int"], [0, 1], [True, False]]
11-
param_names = ["op", "dtype", "axis", "use_bottleneck"]
10+
params = [ops, ["float", "int"], [0, 1]]
11+
param_names = ["op", "dtype", "axis"]
1212

13-
def setup(self, op, dtype, axis, use_bottleneck):
13+
def setup(self, op, dtype, axis):
1414
df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype)
15-
try:
16-
pd.options.compute.use_bottleneck = use_bottleneck
17-
except TypeError:
18-
from pandas.core import nanops
19-
20-
nanops._USE_BOTTLENECK = use_bottleneck
2115
self.df_func = getattr(df, op)
2216

23-
def time_op(self, op, dtype, axis, use_bottleneck):
17+
def time_op(self, op, dtype, axis):
2418
self.df_func(axis=axis)
2519

2620

@@ -46,20 +40,14 @@ def time_op(self, level, op):
4640

4741
class SeriesOps:
4842

49-
params = [ops, ["float", "int"], [True, False]]
50-
param_names = ["op", "dtype", "use_bottleneck"]
43+
params = [ops, ["float", "int"]]
44+
param_names = ["op", "dtype"]
5145

52-
def setup(self, op, dtype, use_bottleneck):
46+
def setup(self, op, dtype):
5347
s = pd.Series(np.random.randn(100000)).astype(dtype)
54-
try:
55-
pd.options.compute.use_bottleneck = use_bottleneck
56-
except TypeError:
57-
from pandas.core import nanops
58-
59-
nanops._USE_BOTTLENECK = use_bottleneck
6048
self.s_func = getattr(s, op)
6149

62-
def time_op(self, op, dtype, use_bottleneck):
50+
def time_op(self, op, dtype):
6351
self.s_func()
6452

6553

@@ -101,61 +89,49 @@ def time_average_old(self, constructor, pct):
10189

10290
class Correlation:
10391

104-
params = [["spearman", "kendall", "pearson"], [True, False]]
105-
param_names = ["method", "use_bottleneck"]
92+
params = [["spearman", "kendall", "pearson"]]
93+
param_names = ["method"]
10694

107-
def setup(self, method, use_bottleneck):
108-
try:
109-
pd.options.compute.use_bottleneck = use_bottleneck
110-
except TypeError:
111-
from pandas.core import nanops
95+
def setup(self, method):
96+
self.df = pd.DataFrame(np.random.randn(500, 15))
97+
self.df2 = pd.DataFrame(np.random.randn(500, 15))
98+
self.df_wide = pd.DataFrame(np.random.randn(500, 100))
99+
self.df_wide_nans = self.df_wide.where(np.random.random((500, 100)) < 0.9)
100+
self.s = pd.Series(np.random.randn(500))
101+
self.s2 = pd.Series(np.random.randn(500))
112102

113-
nanops._USE_BOTTLENECK = use_bottleneck
114-
self.df = pd.DataFrame(np.random.randn(1000, 30))
115-
self.df2 = pd.DataFrame(np.random.randn(1000, 30))
116-
self.df_wide = pd.DataFrame(np.random.randn(1000, 200))
117-
self.df_wide_nans = self.df_wide.where(np.random.random((1000, 200)) < 0.9)
118-
self.s = pd.Series(np.random.randn(1000))
119-
self.s2 = pd.Series(np.random.randn(1000))
120-
121-
def time_corr(self, method, use_bottleneck):
103+
def time_corr(self, method):
122104
self.df.corr(method=method)
123105

124-
def time_corr_wide(self, method, use_bottleneck):
106+
def time_corr_wide(self, method):
125107
self.df_wide.corr(method=method)
126108

127-
def time_corr_wide_nans(self, method, use_bottleneck):
109+
def time_corr_wide_nans(self, method):
128110
self.df_wide_nans.corr(method=method)
129111

130-
def peakmem_corr_wide(self, method, use_bottleneck):
112+
def peakmem_corr_wide(self, method):
131113
self.df_wide.corr(method=method)
132114

133-
def time_corr_series(self, method, use_bottleneck):
115+
def time_corr_series(self, method):
134116
self.s.corr(self.s2, method=method)
135117

136-
def time_corrwith_cols(self, method, use_bottleneck):
118+
def time_corrwith_cols(self, method):
137119
self.df.corrwith(self.df2, method=method)
138120

139-
def time_corrwith_rows(self, method, use_bottleneck):
121+
def time_corrwith_rows(self, method):
140122
self.df.corrwith(self.df2, axis=1, method=method)
141123

142124

143125
class Covariance:
144126

145-
params = [[True, False]]
146-
param_names = ["use_bottleneck"]
147-
148-
def setup(self, use_bottleneck):
149-
try:
150-
pd.options.compute.use_bottleneck = use_bottleneck
151-
except TypeError:
152-
from pandas.core import nanops
127+
params = []
128+
param_names = []
153129

154-
nanops._USE_BOTTLENECK = use_bottleneck
130+
def setup(self):
155131
self.s = pd.Series(np.random.randn(100000))
156132
self.s2 = pd.Series(np.random.randn(100000))
157133

158-
def time_cov_series(self, use_bottleneck):
134+
def time_cov_series(self):
159135
self.s.cov(self.s2)
160136

161137

asv_bench/benchmarks/timeseries.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ class InferFreq:
113113
def setup(self, freq):
114114
if freq is None:
115115
self.idx = date_range(start="1/1/1700", freq="D", periods=10000)
116-
self.idx.freq = None
116+
self.idx._data._freq = None
117117
else:
118118
self.idx = date_range(start="1/1/1700", freq=freq, periods=10000)
119119

0 commit comments

Comments
 (0)