Skip to content

Commit 039094c

Browse files
committed
Merge remote-tracking branch 'upstream/master' into io-parquet-multiindex
2 parents c859a4f + 90dc9ae commit 039094c

File tree

642 files changed

+31500
-28803
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

642 files changed

+31500
-28803
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
name: "Update pre-commit config"
2+
3+
on:
4+
schedule:
5+
- cron: "0 7 * * 1" # At 07:00 on each Monday.
6+
workflow_dispatch:
7+
8+
jobs:
9+
update-pre-commit:
10+
if: github.repository_owner == 'pandas-dev'
11+
name: Autoupdate pre-commit config
12+
runs-on: ubuntu-latest
13+
steps:
14+
- name: Set up Python
15+
uses: actions/setup-python@v2
16+
- name: Cache multiple paths
17+
uses: actions/cache@v2
18+
with:
19+
path: |
20+
~/.cache/pre-commit
21+
~/.cache/pip
22+
key: pre-commit-autoupdate-${{ runner.os }}-build
23+
- name: Update pre-commit config packages
24+
uses: technote-space/create-pr-action@v2
25+
with:
26+
GITHUB_TOKEN: ${{ secrets.ACTION_TRIGGER_TOKEN }}
27+
EXECUTE_COMMANDS: |
28+
pip install pre-commit
29+
pre-commit autoupdate || (exit 0);
30+
pre-commit run -a || (exit 0);
31+
COMMIT_MESSAGE: "⬆️ UPGRADE: Autoupdate pre-commit config"
32+
PR_BRANCH_NAME: "pre-commit-config-update-${PR_ID}"
33+
PR_TITLE: "⬆️ UPGRADE: Autoupdate pre-commit config"

.github/workflows/ci.yml

-6
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,6 @@ jobs:
3737
ci/code_checks.sh lint
3838
if: always()
3939

40-
- name: Dependencies consistency
41-
run: |
42-
source activate pandas-dev
43-
ci/code_checks.sh dependencies
44-
if: always()
45-
4640
- name: Checks on imported code
4741
run: |
4842
source activate pandas-dev

.pre-commit-config.yaml

+119-16
Original file line numberDiff line numberDiff line change
@@ -9,40 +9,42 @@ repos:
99
- id: flake8
1010
additional_dependencies: [flake8-comprehensions>=3.1.0]
1111
- id: flake8
12-
name: flake8-pyx
13-
files: \.(pyx|pxd)$
14-
types:
15-
- file
12+
name: flake8 (cython)
13+
types: [cython]
1614
args: [--append-config=flake8/cython.cfg]
1715
- id: flake8
18-
name: flake8-pxd
16+
name: flake8 (cython template)
1917
files: \.pxi\.in$
20-
types:
21-
- file
18+
types: [text]
2219
args: [--append-config=flake8/cython-template.cfg]
2320
- repo: https://github.com/PyCQA/isort
24-
rev: 5.6.0
21+
rev: 5.6.4
2522
hooks:
2623
- id: isort
27-
exclude: ^pandas/__init__\.py$|^pandas/core/api\.py$
28-
files: '.pxd$|.py$'
29-
types: [file]
24+
name: isort (python)
25+
- id: isort
26+
name: isort (cython)
27+
types: [cython]
3028
- repo: https://github.com/asottile/pyupgrade
31-
rev: v2.7.2
29+
rev: v2.7.3
3230
hooks:
3331
- id: pyupgrade
3432
args: [--py37-plus]
3533
- repo: https://github.com/pre-commit/pygrep-hooks
36-
rev: v1.6.0
34+
rev: v1.7.0
3735
hooks:
3836
- id: rst-backticks
37+
- id: rst-directive-colons
38+
types: [text]
39+
- id: rst-inline-touching-normal
40+
types: [text]
3941
- repo: local
4042
hooks:
4143
- id: pip_to_conda
4244
name: Generate pip dependency from conda
4345
description: This hook checks if the conda environment.yml and requirements-dev.txt are equal
4446
language: python
45-
entry: python -m scripts.generate_pip_deps_from_conda
47+
entry: python scripts/generate_pip_deps_from_conda.py
4648
files: ^(environment.yml|requirements-dev.txt)$
4749
pass_filenames: false
4850
additional_dependencies: [pyyaml]
@@ -54,12 +56,113 @@ repos:
5456
types: [rst]
5557
args: [--filename=*.rst]
5658
additional_dependencies: [flake8-rst==0.7.0, flake8==3.7.9]
59+
- id: non-standard-imports
60+
name: Check for non-standard imports
61+
language: pygrep
62+
entry: |
63+
(?x)
64+
# Check for imports from pandas.core.common instead of `import pandas.core.common as com`
65+
from\ pandas\.core\.common\ import|
66+
from\ pandas\.core\ import\ common|
67+
68+
# Check for imports from collections.abc instead of `from collections import abc`
69+
from\ collections\.abc\ import
70+
71+
- id: non-standard-numpy.random-related-imports
72+
name: Check for non-standard numpy.random-related imports excluding pandas/_testing.py
73+
language: pygrep
74+
exclude: pandas/_testing.py
75+
entry: |
76+
(?x)
77+
# Check for imports from np.random.<method> instead of `from numpy import random` or `from numpy.random import <method>`
78+
from\ numpy\ import\ random|
79+
from\ numpy.random\ import
80+
types: [python]
81+
- id: non-standard-imports-in-tests
82+
name: Check for non-standard imports in test suite
83+
language: pygrep
84+
entry: |
85+
(?x)
86+
# Check for imports from pandas._testing instead of `import pandas._testing as tm`
87+
from\ pandas\._testing\ import|
88+
from\ pandas\ import\ _testing\ as\ tm|
89+
90+
# No direct imports from conftest
91+
conftest\ import|
92+
import\ conftest
93+
types: [python]
94+
files: ^pandas/tests/
95+
- id: incorrect-code-directives
96+
name: Check for incorrect code block or IPython directives
97+
language: pygrep
98+
entry: (\.\. code-block ::|\.\. ipython ::)
99+
files: \.(py|pyx|rst)$
100+
- id: unwanted-patterns-strings-to-concatenate
101+
name: Check for use of not concatenated strings
102+
language: python
103+
entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate"
104+
files: \.(py|pyx|pxd|pxi)$
105+
- id: unwanted-patterns-strings-with-wrong-placed-whitespace
106+
name: Check for strings with wrong placed spaces
107+
language: python
108+
entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace"
109+
files: \.(py|pyx|pxd|pxi)$
110+
- id: unwanted-patterns-private-import-across-module
111+
name: Check for import of private attributes across modules
112+
language: python
113+
entry: python scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module"
114+
types: [python]
115+
exclude: ^(asv_bench|pandas/tests|doc)/
116+
- id: unwanted-patterns-private-function-across-module
117+
name: Check for use of private functions across modules
118+
language: python
119+
entry: python scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module"
120+
types: [python]
121+
exclude: ^(asv_bench|pandas/tests|doc)/
122+
- id: inconsistent-namespace-usage
123+
name: 'Check for inconsistent use of pandas namespace in tests'
124+
entry: python scripts/check_for_inconsistent_pandas_namespace.py
125+
language: python
126+
types: [python]
127+
files: ^pandas/tests/
128+
- id: FrameOrSeriesUnion
129+
name: Check for use of Union[Series, DataFrame] instead of FrameOrSeriesUnion alias
130+
entry: Union\[.*(Series.*DataFrame|DataFrame.*Series).*\]
131+
language: pygrep
132+
types: [python]
133+
exclude: ^pandas/_typing\.py$
134+
- id: type-not-class
135+
name: Check for use of foo.__class__ instead of type(foo)
136+
entry: \.__class__
137+
language: pygrep
138+
files: \.(py|pyx)$
139+
- id: unwanted-typing
140+
name: Check for use of comment-based annotation syntax and missing error codes
141+
entry: |
142+
(?x)
143+
\#\ type:\ (?!ignore)|
144+
\#\ type:\s?ignore(?!\[)
145+
language: pygrep
146+
types: [python]
147+
- id: no-os-remove
148+
name: Check code for instances of os.remove
149+
entry: os\.remove
150+
language: pygrep
151+
types: [python]
152+
files: ^pandas/tests/
153+
exclude: |
154+
(?x)^
155+
pandas/tests/io/excel/test_writers\.py|
156+
pandas/tests/io/pytables/common\.py|
157+
pandas/tests/io/pytables/test_store\.py$
57158
- repo: https://github.com/asottile/yesqa
58159
rev: v1.2.2
59160
hooks:
60161
- id: yesqa
61162
- repo: https://github.com/pre-commit/pre-commit-hooks
62-
rev: v3.2.0
163+
rev: v3.3.0
63164
hooks:
64165
- id: end-of-file-fixer
65-
exclude: '.html$|^LICENSES/|.csv$|.txt$|.svg$|.py$'
166+
exclude: ^LICENSES/|\.(html|csv|txt|svg|py)$
167+
- id: trailing-whitespace
168+
exclude: \.(html|svg)$

Makefile

+2-2
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,11 @@ check:
3030
python3 scripts/validate_unwanted_patterns.py \
3131
--validation-type="private_function_across_module" \
3232
--included-file-extensions="py" \
33-
--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored \
33+
--excluded-file-paths=pandas/tests,asv_bench/ \
3434
pandas/
3535

3636
python3 scripts/validate_unwanted_patterns.py \
3737
--validation-type="private_import_across_module" \
3838
--included-file-extensions="py" \
39-
--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/
39+
--excluded-file-paths=pandas/tests,asv_bench/,doc/
4040
pandas/

asv_bench/benchmarks/groupby.py

+20
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,26 @@ def time_category_size(self):
358358
self.draws.groupby(self.cats).size()
359359

360360

361+
class FillNA:
362+
def setup(self):
363+
N = 100
364+
self.df = DataFrame(
365+
{"group": [1] * N + [2] * N, "value": [np.nan, 1.0] * N}
366+
).set_index("group")
367+
368+
def time_df_ffill(self):
369+
self.df.groupby("group").fillna(method="ffill")
370+
371+
def time_df_bfill(self):
372+
self.df.groupby("group").fillna(method="bfill")
373+
374+
def time_srs_ffill(self):
375+
self.df.groupby("group")["value"].fillna(method="ffill")
376+
377+
def time_srs_bfill(self):
378+
self.df.groupby("group")["value"].fillna(method="bfill")
379+
380+
361381
class GroupByMethods:
362382

363383
param_names = ["dtype", "method", "application"]

asv_bench/benchmarks/io/pickle.py

+6
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,11 @@ def time_read_pickle(self):
2424
def time_write_pickle(self):
2525
self.df.to_pickle(self.fname)
2626

27+
def peakmem_read_pickle(self):
28+
read_pickle(self.fname)
29+
30+
def peakmem_write_pickle(self):
31+
self.df.to_pickle(self.fname)
32+
2733

2834
from ..pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/rolling.py

+9
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,21 @@ class ExpandingMethods:
7676

7777
def setup(self, constructor, dtype, method):
7878
N = 10 ** 5
79+
N_groupby = 100
7980
arr = (100 * np.random.random(N)).astype(dtype)
8081
self.expanding = getattr(pd, constructor)(arr).expanding()
82+
self.expanding_groupby = (
83+
pd.DataFrame({"A": arr[:N_groupby], "B": range(N_groupby)})
84+
.groupby("B")
85+
.expanding()
86+
)
8187

8288
def time_expanding(self, constructor, dtype, method):
8389
getattr(self.expanding, method)()
8490

91+
def time_expanding_groupby(self, constructor, dtype, method):
92+
getattr(self.expanding_groupby, method)()
93+
8594

8695
class EWMMethods:
8796

asv_bench/benchmarks/strings.py

+17-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import numpy as np
44

5-
from pandas import DataFrame, Series
5+
from pandas import Categorical, DataFrame, Series
66

77
from .pandas_vb_common import tm
88

@@ -16,6 +16,10 @@ def setup(self, dtype):
1616
self.series_arr = tm.rands_array(nchars=10, size=10 ** 5)
1717
self.frame_arr = self.series_arr.reshape((50_000, 2)).copy()
1818

19+
# GH37371. Testing construction of string series/frames from ExtensionArrays
20+
self.series_cat_arr = Categorical(self.series_arr)
21+
self.frame_cat_arr = Categorical(self.frame_arr)
22+
1923
def time_series_construction(self, dtype):
2024
Series(self.series_arr, dtype=dtype)
2125

@@ -28,6 +32,18 @@ def time_frame_construction(self, dtype):
2832
def peakmem_frame_construction(self, dtype):
2933
DataFrame(self.frame_arr, dtype=dtype)
3034

35+
def time_cat_series_construction(self, dtype):
36+
Series(self.series_cat_arr, dtype=dtype)
37+
38+
def peakmem_cat_series_construction(self, dtype):
39+
Series(self.series_cat_arr, dtype=dtype)
40+
41+
def time_cat_frame_construction(self, dtype):
42+
DataFrame(self.frame_cat_arr, dtype=dtype)
43+
44+
def peakmem_cat_frame_construction(self, dtype):
45+
DataFrame(self.frame_cat_arr, dtype=dtype)
46+
3147

3248
class Methods:
3349
def setup(self):

asv_bench/benchmarks/timeseries.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,14 @@
33
import dateutil
44
import numpy as np
55

6-
from pandas import DataFrame, Series, date_range, period_range, to_datetime
6+
from pandas import (
7+
DataFrame,
8+
Series,
9+
date_range,
10+
period_range,
11+
timedelta_range,
12+
to_datetime,
13+
)
714

815
from pandas.tseries.frequencies import infer_freq
916

@@ -121,12 +128,15 @@ def time_convert(self):
121128

122129
class Iteration:
123130

124-
params = [date_range, period_range]
131+
params = [date_range, period_range, timedelta_range]
125132
param_names = ["time_index"]
126133

127134
def setup(self, time_index):
128135
N = 10 ** 6
129-
self.idx = time_index(start="20140101", freq="T", periods=N)
136+
if time_index is timedelta_range:
137+
self.idx = time_index(start=0, freq="T", periods=N)
138+
else:
139+
self.idx = time_index(start="20140101", freq="T", periods=N)
130140
self.exit = 10000
131141

132142
def time_iter(self, time_index):

azure-pipelines.yml

+25
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,28 @@ jobs:
2626
parameters:
2727
name: Windows
2828
vmImage: vs2017-win2016
29+
30+
- job: py37_32bit
31+
pool:
32+
vmImage: ubuntu-18.04
33+
34+
steps:
35+
- script: |
36+
docker pull quay.io/pypa/manylinux2014_i686
37+
docker run -v $(pwd):/pandas quay.io/pypa/manylinux2014_i686 \
38+
/bin/bash -xc "cd pandas && \
39+
/opt/python/cp37-cp37m/bin/python -m venv ~/virtualenvs/pandas-dev && \
40+
. ~/virtualenvs/pandas-dev/bin/activate && \
41+
python -m pip install --no-deps -U pip wheel setuptools && \
42+
pip install cython numpy python-dateutil pytz pytest pytest-xdist hypothesis pytest-azurepipelines && \
43+
python setup.py build_ext -q -i -j2 && \
44+
python -m pip install --no-build-isolation -e . && \
45+
pytest -m 'not slow and not network and not clipboard' pandas --junitxml=test-data.xml"
46+
displayName: 'Run 32-bit manylinux2014 Docker Build / Tests'
47+
48+
- task: PublishTestResults@2
49+
condition: succeededOrFailed()
50+
inputs:
51+
testResultsFiles: '**/test-*.xml'
52+
failTaskOnFailedTests: true
53+
testRunTitle: 'Publish test results for Python 3.7-32 bit full Linux'

0 commit comments

Comments
 (0)