Skip to content

Commit 1dfd814

Browse files
Merge remote-tracking branch 'upstream/master' into bisect
2 parents 23bcfab + 8f6ec1e commit 1dfd814

File tree

351 files changed

+6772
-4154
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

351 files changed

+6772
-4154
lines changed

.github/workflows/pre-commit.yml

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
name: pre-commit
2+
3+
on:
4+
pull_request:
5+
push:
6+
branches: [master]
7+
8+
jobs:
9+
pre-commit:
10+
runs-on: ubuntu-latest
11+
steps:
12+
- uses: actions/checkout@v2
13+
- uses: actions/setup-python@v2
14+
- uses: pre-commit/[email protected]

.github/workflows/stale-pr.yml

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
name: "Stale PRs"
2+
on:
3+
schedule:
4+
# * is a special character in YAML so you have to quote this string
5+
- cron: "0 */6 * * *"
6+
7+
jobs:
8+
stale:
9+
runs-on: ubuntu-latest
10+
steps:
11+
- uses: actions/stale@v3
12+
with:
13+
repo-token: ${{ secrets.GITHUB_TOKEN }}
14+
stale-pr-message: "This pull request is stale because it has been open for thirty days with no activity."
15+
skip-stale-pr-message: true
16+
stale-pr-label: "Stale"
17+
exempt-pr-labels: "Needs Review,Blocked,Needs Discussion"
18+
days-before-stale: 30
19+
days-before-close: -1
20+
remove-stale-when-updated: false
21+
debug-only: false

.pre-commit-config.yaml

+9-9
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,32 @@
11
repos:
22
- repo: https://github.com/python/black
3-
rev: 19.10b0
3+
rev: 20.8b1
44
hooks:
55
- id: black
6-
language_version: python3
76
- repo: https://gitlab.com/pycqa/flake8
8-
rev: 3.7.7
7+
rev: 3.8.3
98
hooks:
109
- id: flake8
11-
language: python_venv
1210
additional_dependencies: [flake8-comprehensions>=3.1.0]
1311
- id: flake8
1412
name: flake8-pyx
15-
language: python_venv
1613
files: \.(pyx|pxd)$
1714
types:
1815
- file
1916
args: [--append-config=flake8/cython.cfg]
2017
- id: flake8
2118
name: flake8-pxd
22-
language: python_venv
2319
files: \.pxi\.in$
2420
types:
2521
- file
2622
args: [--append-config=flake8/cython-template.cfg]
27-
- repo: https://github.com/pre-commit/mirrors-isort
28-
rev: v4.3.21
23+
- repo: https://github.com/PyCQA/isort
24+
rev: 5.2.2
2925
hooks:
3026
- id: isort
31-
language: python_venv
3227
exclude: ^pandas/__init__\.py$|^pandas/core/api\.py$
28+
- repo: https://github.com/asottile/pyupgrade
29+
rev: v2.7.2
30+
hooks:
31+
- id: pyupgrade
32+
args: [--py37-plus]

.travis.yml

+1-8
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ matrix:
4242

4343
- arch: arm64
4444
env:
45-
- JOB="3.7, arm64" PYTEST_WORKERS=8 ENV_FILE="ci/deps/travis-37-arm64.yaml" PATTERN="(not slow and not network and not clipboard)"
45+
- JOB="3.7, arm64" PYTEST_WORKERS=8 ENV_FILE="ci/deps/travis-37-arm64.yaml" PATTERN="(not slow and not network and not clipboard and not arm_slow)"
4646

4747
- env:
4848
- JOB="3.7, locale" ENV_FILE="ci/deps/travis-37-locale.yaml" PATTERN="((not slow and not network and not clipboard) or (single and db))" LOCALE_OVERRIDE="zh_CN.UTF-8" SQL="1"
@@ -58,13 +58,6 @@ matrix:
5858
services:
5959
- mysql
6060
- postgresql
61-
allow_failures:
62-
- arch: arm64
63-
env:
64-
- JOB="3.7, arm64" PYTEST_WORKERS=8 ENV_FILE="ci/deps/travis-37-arm64.yaml" PATTERN="(not slow and not network and not clipboard)"
65-
- dist: bionic
66-
env:
67-
- JOB="3.9-dev" PATTERN="(not slow and not network and not clipboard)"
6861

6962

7063
before_install:

Makefile

+6
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,9 @@ check:
3232
--included-file-extensions="py" \
3333
--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored \
3434
pandas/
35+
36+
python3 scripts/validate_unwanted_patterns.py \
37+
--validation-type="private_import_across_module" \
38+
--included-file-extensions="py" \
39+
--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/
40+
pandas/

asv_bench/asv.conf.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
// followed by the pip installed packages).
4040
"matrix": {
4141
"numpy": [],
42-
"Cython": ["0.29.16"],
42+
"Cython": ["0.29.21"],
4343
"matplotlib": [],
4444
"sqlalchemy": [],
4545
"scipy": [],

asv_bench/benchmarks/arithmetic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def setup(self, op):
125125
arr1 = np.random.randn(n_rows, int(n_cols / 2)).astype("f8")
126126
arr2 = np.random.randn(n_rows, int(n_cols / 2)).astype("f4")
127127
df = pd.concat(
128-
[pd.DataFrame(arr1), pd.DataFrame(arr2)], axis=1, ignore_index=True,
128+
[pd.DataFrame(arr1), pd.DataFrame(arr2)], axis=1, ignore_index=True
129129
)
130130
# should already be the case, but just to be sure
131131
df._consolidate_inplace()

asv_bench/benchmarks/frame_methods.py

+40
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,46 @@ def time_to_html_mixed(self):
219219
self.df2.to_html()
220220

221221

222+
class ToNumpy:
223+
def setup(self):
224+
N = 10000
225+
M = 10
226+
self.df_tall = DataFrame(np.random.randn(N, M))
227+
self.df_wide = DataFrame(np.random.randn(M, N))
228+
self.df_mixed_tall = self.df_tall.copy()
229+
self.df_mixed_tall["foo"] = "bar"
230+
self.df_mixed_tall[0] = period_range("2000", periods=N)
231+
self.df_mixed_tall[1] = range(N)
232+
self.df_mixed_wide = self.df_wide.copy()
233+
self.df_mixed_wide["foo"] = "bar"
234+
self.df_mixed_wide[0] = period_range("2000", periods=M)
235+
self.df_mixed_wide[1] = range(M)
236+
237+
def time_to_numpy_tall(self):
238+
self.df_tall.to_numpy()
239+
240+
def time_to_numpy_wide(self):
241+
self.df_wide.to_numpy()
242+
243+
def time_to_numpy_mixed_tall(self):
244+
self.df_mixed_tall.to_numpy()
245+
246+
def time_to_numpy_mixed_wide(self):
247+
self.df_mixed_wide.to_numpy()
248+
249+
def time_values_tall(self):
250+
self.df_tall.values
251+
252+
def time_values_wide(self):
253+
self.df_wide.values
254+
255+
def time_values_mixed_tall(self):
256+
self.df_mixed_tall.values
257+
258+
def time_values_mixed_wide(self):
259+
self.df_mixed_wide.values
260+
261+
222262
class Repr:
223263
def setup(self):
224264
nrows = 10000

asv_bench/benchmarks/groupby.py

+32-14
Original file line numberDiff line numberDiff line change
@@ -627,49 +627,63 @@ def time_first(self):
627627

628628

629629
class TransformEngine:
630-
def setup(self):
630+
631+
param_names = ["parallel"]
632+
params = [[True, False]]
633+
634+
def setup(self, parallel):
631635
N = 10 ** 3
632636
data = DataFrame(
633637
{0: [str(i) for i in range(100)] * N, 1: list(range(100)) * N},
634638
columns=[0, 1],
635639
)
640+
self.parallel = parallel
636641
self.grouper = data.groupby(0)
637642

638-
def time_series_numba(self):
643+
def time_series_numba(self, parallel):
639644
def function(values, index):
640645
return values * 5
641646

642-
self.grouper[1].transform(function, engine="numba")
647+
self.grouper[1].transform(
648+
function, engine="numba", engine_kwargs={"parallel": self.parallel}
649+
)
643650

644-
def time_series_cython(self):
651+
def time_series_cython(self, parallel):
645652
def function(values):
646653
return values * 5
647654

648655
self.grouper[1].transform(function, engine="cython")
649656

650-
def time_dataframe_numba(self):
657+
def time_dataframe_numba(self, parallel):
651658
def function(values, index):
652659
return values * 5
653660

654-
self.grouper.transform(function, engine="numba")
661+
self.grouper.transform(
662+
function, engine="numba", engine_kwargs={"parallel": self.parallel}
663+
)
655664

656-
def time_dataframe_cython(self):
665+
def time_dataframe_cython(self, parallel):
657666
def function(values):
658667
return values * 5
659668

660669
self.grouper.transform(function, engine="cython")
661670

662671

663672
class AggEngine:
664-
def setup(self):
673+
674+
param_names = ["parallel"]
675+
params = [[True, False]]
676+
677+
def setup(self, parallel):
665678
N = 10 ** 3
666679
data = DataFrame(
667680
{0: [str(i) for i in range(100)] * N, 1: list(range(100)) * N},
668681
columns=[0, 1],
669682
)
683+
self.parallel = parallel
670684
self.grouper = data.groupby(0)
671685

672-
def time_series_numba(self):
686+
def time_series_numba(self, parallel):
673687
def function(values, index):
674688
total = 0
675689
for i, value in enumerate(values):
@@ -679,9 +693,11 @@ def function(values, index):
679693
total += value * 2
680694
return total
681695

682-
self.grouper[1].agg(function, engine="numba")
696+
self.grouper[1].agg(
697+
function, engine="numba", engine_kwargs={"parallel": self.parallel}
698+
)
683699

684-
def time_series_cython(self):
700+
def time_series_cython(self, parallel):
685701
def function(values):
686702
total = 0
687703
for i, value in enumerate(values):
@@ -693,7 +709,7 @@ def function(values):
693709

694710
self.grouper[1].agg(function, engine="cython")
695711

696-
def time_dataframe_numba(self):
712+
def time_dataframe_numba(self, parallel):
697713
def function(values, index):
698714
total = 0
699715
for i, value in enumerate(values):
@@ -703,9 +719,11 @@ def function(values, index):
703719
total += value * 2
704720
return total
705721

706-
self.grouper.agg(function, engine="numba")
722+
self.grouper.agg(
723+
function, engine="numba", engine_kwargs={"parallel": self.parallel}
724+
)
707725

708-
def time_dataframe_cython(self):
726+
def time_dataframe_cython(self, parallel):
709727
def function(values):
710728
total = 0
711729
for i, value in enumerate(values):

asv_bench/benchmarks/strings.py

+12-5
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,20 @@ class Construction:
1313
param_names = ["dtype"]
1414

1515
def setup(self, dtype):
16-
self.data = tm.rands_array(nchars=10 ** 5, size=10)
16+
self.series_arr = tm.rands_array(nchars=10, size=10 ** 5)
17+
self.frame_arr = self.series_arr.reshape((50_000, 2)).copy()
1718

18-
def time_construction(self, dtype):
19-
Series(self.data, dtype=dtype)
19+
def time_series_construction(self, dtype):
20+
Series(self.series_arr, dtype=dtype)
2021

21-
def peakmem_construction(self, dtype):
22-
Series(self.data, dtype=dtype)
22+
def peakmem_series_construction(self, dtype):
23+
Series(self.series_arr, dtype=dtype)
24+
25+
def time_frame_construction(self, dtype):
26+
DataFrame(self.frame_arr, dtype=dtype)
27+
28+
def peakmem_frame_construction(self, dtype):
29+
DataFrame(self.frame_arr, dtype=dtype)
2330

2431

2532
class Methods:

asv_bench/benchmarks/timeseries.py

+23
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,29 @@ def time_lookup_and_cleanup(self):
263263
self.ts.index._cleanup()
264264

265265

266+
class ToDatetimeFromIntsFloats:
267+
def setup(self):
268+
self.ts_sec = Series(range(1521080307, 1521685107), dtype="int64")
269+
self.ts_sec_float = self.ts_sec.astype("float64")
270+
271+
self.ts_nanosec = 1_000_000 * self.ts_sec
272+
self.ts_nanosec_float = self.ts_nanosec.astype("float64")
273+
274+
# speed of int64 and float64 paths should be comparable
275+
276+
def time_nanosec_int64(self):
277+
to_datetime(self.ts_nanosec, unit="ns")
278+
279+
def time_nanosec_float64(self):
280+
to_datetime(self.ts_nanosec_float, unit="ns")
281+
282+
def time_sec_int64(self):
283+
to_datetime(self.ts_sec, unit="s")
284+
285+
def time_sec_float64(self):
286+
to_datetime(self.ts_sec_float, unit="s")
287+
288+
266289
class ToDatetimeYYYYMMDD:
267290
def setup(self):
268291
rng = date_range(start="1/1/2000", periods=10000, freq="D")

ci/build39.sh

+1-9
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,7 @@
33

44
sudo apt-get install build-essential gcc xvfb
55
pip install --no-deps -U pip wheel setuptools
6-
pip install python-dateutil pytz pytest pytest-xdist hypothesis
7-
pip install cython --pre # https://github.com/cython/cython/issues/3395
8-
9-
git clone https://github.com/numpy/numpy
10-
cd numpy
11-
python setup.py build_ext --inplace
12-
python setup.py install
13-
cd ..
14-
rm -rf numpy
6+
pip install cython numpy python-dateutil pytz pytest pytest-xdist hypothesis
157

168
python setup.py build_ext -inplace
179
python -m pip install --no-build-isolation -e .

ci/code_checks.sh

+11-3
Original file line numberDiff line numberDiff line change
@@ -116,11 +116,19 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
116116
fi
117117
RET=$(($RET + $?)) ; echo $MSG "DONE"
118118

119-
MSG='Check for use of private module attribute access' ; echo $MSG
119+
MSG='Check for import of private attributes across modules' ; echo $MSG
120120
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
121-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/
121+
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/
122122
else
123-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/
123+
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/
124+
fi
125+
RET=$(($RET + $?)) ; echo $MSG "DONE"
126+
127+
MSG='Check for use of private functions across modules' ; echo $MSG
128+
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
129+
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ --format="##[error]{source_path}:{line_number}:{msg}" pandas/
130+
else
131+
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ pandas/
124132
fi
125133
RET=$(($RET + $?)) ; echo $MSG "DONE"
126134

ci/deps/azure-37-32bit.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,6 @@ dependencies:
2121
# see comment above
2222
- pip
2323
- pip:
24-
- cython>=0.29.16
24+
- cython>=0.29.21
2525
- numpy>=1.16.5
2626
- pytest>=5.0.1

ci/deps/azure-37-locale.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ dependencies:
66
- python=3.7.*
77

88
# tools
9-
- cython>=0.29.16
9+
- cython>=0.29.21
1010
- pytest>=5.0.1
1111
- pytest-xdist>=1.21
1212
- pytest-asyncio

0 commit comments

Comments
 (0)