Skip to content

Commit 49826cb

Browse files
author
Oleh Kozynets
committed
Merge Master
2 parents fb8cae9 + 0755915 commit 49826cb

File tree

763 files changed

+37145
-33651
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

763 files changed

+37145
-33651
lines changed

.github/CODE_OF_CONDUCT.md

-1
Original file line numberDiff line numberDiff line change
@@ -60,4 +60,3 @@ and the [Swift Code of Conduct][swift].
6060
[homepage]: https://www.contributor-covenant.org
6161
[version]: https://www.contributor-covenant.org/version/1/3/0/
6262
[swift]: https://swift.org/community/#code-of-conduct
63-
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
name: "Update pre-commit config"
2+
3+
on:
4+
schedule:
5+
- cron: "0 7 * * 1" # At 07:00 on each Monday.
6+
workflow_dispatch:
7+
8+
jobs:
9+
update-pre-commit:
10+
if: github.repository_owner == 'pandas-dev'
11+
name: Autoupdate pre-commit config
12+
runs-on: ubuntu-latest
13+
steps:
14+
- name: Set up Python
15+
uses: actions/setup-python@v2
16+
- name: Cache multiple paths
17+
uses: actions/cache@v2
18+
with:
19+
path: |
20+
~/.cache/pre-commit
21+
~/.cache/pip
22+
key: pre-commit-autoupdate-${{ runner.os }}-build
23+
- name: Update pre-commit config packages
24+
uses: technote-space/create-pr-action@v2
25+
with:
26+
GITHUB_TOKEN: ${{ secrets.ACTION_TRIGGER_TOKEN }}
27+
EXECUTE_COMMANDS: |
28+
pip install pre-commit
29+
pre-commit autoupdate || (exit 0);
30+
pre-commit run -a || (exit 0);
31+
COMMIT_MESSAGE: "⬆️ UPGRADE: Autoupdate pre-commit config"
32+
PR_BRANCH_NAME: "pre-commit-config-update-${PR_ID}"
33+
PR_TITLE: "⬆️ UPGRADE: Autoupdate pre-commit config"

.github/workflows/ci.yml

+1-7
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,6 @@ jobs:
3737
ci/code_checks.sh lint
3838
if: always()
3939

40-
- name: Dependencies consistency
41-
run: |
42-
source activate pandas-dev
43-
ci/code_checks.sh dependencies
44-
if: always()
45-
4640
- name: Checks on imported code
4741
run: |
4842
source activate pandas-dev
@@ -125,7 +119,7 @@ jobs:
125119
# This can be removed when the ipython directive fails when there are errors,
126120
# including the `tee sphinx.log` in te previous step (https://github.com/ipython/ipython/issues/11547)
127121
- name: Check ipython directive errors
128-
run: "! grep -B1 \"^<<<-------------------------------------------------------------------------$\" sphinx.log"
122+
run: "! grep -B10 \"^<<<-------------------------------------------------------------------------$\" sphinx.log"
129123

130124
- name: Install ssh key
131125
run: |

.pre-commit-config.yaml

+127-14
Original file line numberDiff line numberDiff line change
@@ -4,46 +4,159 @@ repos:
44
hooks:
55
- id: black
66
- repo: https://gitlab.com/pycqa/flake8
7-
rev: 3.8.3
7+
rev: 3.8.4
88
hooks:
99
- id: flake8
1010
additional_dependencies: [flake8-comprehensions>=3.1.0]
1111
- id: flake8
12-
name: flake8-pyx
13-
files: \.(pyx|pxd)$
14-
types:
15-
- file
12+
name: flake8 (cython)
13+
types: [cython]
1614
args: [--append-config=flake8/cython.cfg]
1715
- id: flake8
18-
name: flake8-pxd
16+
name: flake8 (cython template)
1917
files: \.pxi\.in$
20-
types:
21-
- file
18+
types: [text]
2219
args: [--append-config=flake8/cython-template.cfg]
2320
- repo: https://github.com/PyCQA/isort
24-
rev: 5.2.2
21+
rev: 5.6.4
2522
hooks:
2623
- id: isort
27-
exclude: ^pandas/__init__\.py$|^pandas/core/api\.py$
24+
name: isort (python)
25+
- id: isort
26+
name: isort (cython)
27+
types: [cython]
2828
- repo: https://github.com/asottile/pyupgrade
29-
rev: v2.7.2
29+
rev: v2.7.3
3030
hooks:
3131
- id: pyupgrade
3232
args: [--py37-plus]
3333
- repo: https://github.com/pre-commit/pygrep-hooks
34-
rev: v1.6.0
34+
rev: v1.7.0
3535
hooks:
3636
- id: rst-backticks
37+
- id: rst-directive-colons
38+
types: [text]
39+
- id: rst-inline-touching-normal
40+
types: [text]
3741
- repo: local
3842
hooks:
3943
- id: pip_to_conda
4044
name: Generate pip dependency from conda
4145
description: This hook checks if the conda environment.yml and requirements-dev.txt are equal
42-
language: system
43-
entry: python -m scripts.generate_pip_deps_from_conda
46+
language: python
47+
entry: python scripts/generate_pip_deps_from_conda.py
4448
files: ^(environment.yml|requirements-dev.txt)$
4549
pass_filenames: false
50+
additional_dependencies: [pyyaml]
51+
- id: flake8-rst
52+
name: flake8-rst
53+
description: Run flake8 on code snippets in docstrings or RST files
54+
language: python
55+
entry: flake8-rst
56+
types: [rst]
57+
args: [--filename=*.rst]
58+
additional_dependencies: [flake8-rst==0.7.0, flake8==3.7.9]
59+
- id: non-standard-imports
60+
name: Check for non-standard imports
61+
language: pygrep
62+
entry: |
63+
(?x)
64+
# Check for imports from pandas.core.common instead of `import pandas.core.common as com`
65+
from\ pandas\.core\.common\ import|
66+
from\ pandas\.core\ import\ common|
67+
68+
# Check for imports from collections.abc instead of `from collections import abc`
69+
from\ collections\.abc\ import
70+
71+
- id: non-standard-numpy.random-related-imports
72+
name: Check for non-standard numpy.random-related imports excluding pandas/_testing.py
73+
language: pygrep
74+
exclude: pandas/_testing.py
75+
entry: |
76+
(?x)
77+
# Check for imports from np.random.<method> instead of `from numpy import random` or `from numpy.random import <method>`
78+
from\ numpy\ import\ random|
79+
from\ numpy.random\ import
80+
types: [python]
81+
- id: non-standard-imports-in-tests
82+
name: Check for non-standard imports in test suite
83+
language: pygrep
84+
entry: |
85+
(?x)
86+
# Check for imports from pandas._testing instead of `import pandas._testing as tm`
87+
from\ pandas\._testing\ import|
88+
from\ pandas\ import\ _testing\ as\ tm|
89+
90+
# No direct imports from conftest
91+
conftest\ import|
92+
import\ conftest
93+
types: [python]
94+
files: ^pandas/tests/
95+
- id: incorrect-code-directives
96+
name: Check for incorrect code block or IPython directives
97+
language: pygrep
98+
entry: (\.\. code-block ::|\.\. ipython ::)
99+
files: \.(py|pyx|rst)$
100+
- id: unwanted-patterns-strings-to-concatenate
101+
name: Check for use of not concatenated strings
102+
language: python
103+
entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate"
104+
files: \.(py|pyx|pxd|pxi)$
105+
- id: unwanted-patterns-strings-with-wrong-placed-whitespace
106+
name: Check for strings with wrong placed spaces
107+
language: python
108+
entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace"
109+
files: \.(py|pyx|pxd|pxi)$
110+
- id: unwanted-patterns-private-import-across-module
111+
name: Check for import of private attributes across modules
112+
language: python
113+
entry: python scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module"
114+
types: [python]
115+
exclude: ^(asv_bench|pandas/tests|doc)/
116+
- id: unwanted-patterns-private-function-across-module
117+
name: Check for use of private functions across modules
118+
language: python
119+
entry: python scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module"
120+
types: [python]
121+
exclude: ^(asv_bench|pandas/tests|doc)/
122+
- id: FrameOrSeriesUnion
123+
name: Check for use of Union[Series, DataFrame] instead of FrameOrSeriesUnion alias
124+
entry: Union\[.*(Series.*DataFrame|DataFrame.*Series).*\]
125+
language: pygrep
126+
types: [python]
127+
exclude: ^pandas/_typing\.py$
128+
- id: type-not-class
129+
name: Check for use of foo.__class__ instead of type(foo)
130+
entry: \.__class__
131+
language: pygrep
132+
files: \.(py|pyx)$
133+
- id: unwanted-typing
134+
name: Check for use of comment-based annotation syntax and missing error codes
135+
entry: |
136+
(?x)
137+
\#\ type:\ (?!ignore)|
138+
\#\ type:\s?ignore(?!\[)
139+
language: pygrep
140+
types: [python]
141+
- id: no-os-remove
142+
name: Check code for instances of os.remove
143+
entry: os\.remove
144+
language: pygrep
145+
types: [python]
146+
files: ^pandas/tests/
147+
exclude: |
148+
(?x)^
149+
pandas/tests/io/excel/test_writers\.py|
150+
pandas/tests/io/pytables/common\.py|
151+
pandas/tests/io/pytables/test_store\.py$
46152
- repo: https://github.com/asottile/yesqa
47153
rev: v1.2.2
48154
hooks:
49155
- id: yesqa
156+
- repo: https://github.com/pre-commit/pre-commit-hooks
157+
rev: v3.3.0
158+
hooks:
159+
- id: end-of-file-fixer
160+
exclude: ^LICENSES/|\.(html|csv|txt|svg|py)$
161+
- id: trailing-whitespace
162+
exclude: \.(html|svg)$

.travis.yml

+4-4
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,10 @@ matrix:
4141
- JOB="3.9-dev" PATTERN="(not slow and not network and not clipboard)"
4242

4343
- env:
44-
- JOB="3.8" ENV_FILE="ci/deps/travis-38.yaml" PATTERN="(not slow and not network and not clipboard)"
45-
46-
- env:
47-
- JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" PATTERN="(not slow and not network and not clipboard)"
44+
- JOB="3.8, slow" ENV_FILE="ci/deps/travis-38-slow.yaml" PATTERN="slow" SQL="1"
45+
services:
46+
- mysql
47+
- postgresql
4848

4949
- env:
5050
- JOB="3.7, locale" ENV_FILE="ci/deps/travis-37-locale.yaml" PATTERN="((not slow and not network and not clipboard) or (single and db))" LOCALE_OVERRIDE="zh_CN.UTF-8" SQL="1"

AUTHORS.md

-1
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,3 @@ pandas is distributed under a 3-clause ("Simplified" or "New") BSD
5454
license. Parts of NumPy, SciPy, numpydoc, bottleneck, which all have
5555
BSD-compatible licenses, are included. Their licenses follow the pandas
5656
license.
57-

Makefile

+2-2
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,11 @@ check:
3030
python3 scripts/validate_unwanted_patterns.py \
3131
--validation-type="private_function_across_module" \
3232
--included-file-extensions="py" \
33-
--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored \
33+
--excluded-file-paths=pandas/tests,asv_bench/ \
3434
pandas/
3535

3636
python3 scripts/validate_unwanted_patterns.py \
3737
--validation-type="private_import_across_module" \
3838
--included-file-extensions="py" \
39-
--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/
39+
--excluded-file-paths=pandas/tests,asv_bench/,doc/
4040
pandas/

asv_bench/benchmarks/dtypes.py

+57
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
1+
import string
2+
13
import numpy as np
24

5+
from pandas import DataFrame
6+
import pandas._testing as tm
37
from pandas.api.types import pandas_dtype
48

59
from .pandas_vb_common import (
@@ -62,4 +66,57 @@ def time_infer(self, dtype):
6266
lib.infer_dtype(self.data_dict[dtype], skipna=False)
6367

6468

69+
class SelectDtypes:
70+
71+
params = [
72+
tm.ALL_INT_DTYPES
73+
+ tm.ALL_EA_INT_DTYPES
74+
+ tm.FLOAT_DTYPES
75+
+ tm.COMPLEX_DTYPES
76+
+ tm.DATETIME64_DTYPES
77+
+ tm.TIMEDELTA64_DTYPES
78+
+ tm.BOOL_DTYPES
79+
]
80+
param_names = ["dtype"]
81+
82+
def setup(self, dtype):
83+
N, K = 5000, 50
84+
self.index = tm.makeStringIndex(N)
85+
self.columns = tm.makeStringIndex(K)
86+
87+
def create_df(data):
88+
return DataFrame(data, index=self.index, columns=self.columns)
89+
90+
self.df_int = create_df(np.random.randint(low=100, size=(N, K)))
91+
self.df_float = create_df(np.random.randn(N, K))
92+
self.df_bool = create_df(np.random.choice([True, False], size=(N, K)))
93+
self.df_string = create_df(
94+
np.random.choice(list(string.ascii_letters), size=(N, K))
95+
)
96+
97+
def time_select_dtype_int_include(self, dtype):
98+
self.df_int.select_dtypes(include=dtype)
99+
100+
def time_select_dtype_int_exclude(self, dtype):
101+
self.df_int.select_dtypes(exclude=dtype)
102+
103+
def time_select_dtype_float_include(self, dtype):
104+
self.df_float.select_dtypes(include=dtype)
105+
106+
def time_select_dtype_float_exclude(self, dtype):
107+
self.df_float.select_dtypes(exclude=dtype)
108+
109+
def time_select_dtype_bool_include(self, dtype):
110+
self.df_bool.select_dtypes(include=dtype)
111+
112+
def time_select_dtype_bool_exclude(self, dtype):
113+
self.df_bool.select_dtypes(exclude=dtype)
114+
115+
def time_select_dtype_string_include(self, dtype):
116+
self.df_string.select_dtypes(include=dtype)
117+
118+
def time_select_dtype_string_exclude(self, dtype):
119+
self.df_string.select_dtypes(exclude=dtype)
120+
121+
65122
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/groupby.py

+20
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,26 @@ def time_category_size(self):
358358
self.draws.groupby(self.cats).size()
359359

360360

361+
class FillNA:
362+
def setup(self):
363+
N = 100
364+
self.df = DataFrame(
365+
{"group": [1] * N + [2] * N, "value": [np.nan, 1.0] * N}
366+
).set_index("group")
367+
368+
def time_df_ffill(self):
369+
self.df.groupby("group").fillna(method="ffill")
370+
371+
def time_df_bfill(self):
372+
self.df.groupby("group").fillna(method="bfill")
373+
374+
def time_srs_ffill(self):
375+
self.df.groupby("group")["value"].fillna(method="ffill")
376+
377+
def time_srs_bfill(self):
378+
self.df.groupby("group")["value"].fillna(method="bfill")
379+
380+
361381
class GroupByMethods:
362382

363383
param_names = ["dtype", "method", "application"]

asv_bench/benchmarks/io/pickle.py

+6
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,11 @@ def time_read_pickle(self):
2424
def time_write_pickle(self):
2525
self.df.to_pickle(self.fname)
2626

27+
def peakmem_read_pickle(self):
28+
read_pickle(self.fname)
29+
30+
def peakmem_write_pickle(self):
31+
self.df.to_pickle(self.fname)
32+
2733

2834
from ..pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/rolling.py

+9
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,21 @@ class ExpandingMethods:
7676

7777
def setup(self, constructor, dtype, method):
7878
N = 10 ** 5
79+
N_groupby = 100
7980
arr = (100 * np.random.random(N)).astype(dtype)
8081
self.expanding = getattr(pd, constructor)(arr).expanding()
82+
self.expanding_groupby = (
83+
pd.DataFrame({"A": arr[:N_groupby], "B": range(N_groupby)})
84+
.groupby("B")
85+
.expanding()
86+
)
8187

8288
def time_expanding(self, constructor, dtype, method):
8389
getattr(self.expanding, method)()
8490

91+
def time_expanding_groupby(self, constructor, dtype, method):
92+
getattr(self.expanding_groupby, method)()
93+
8594

8695
class EWMMethods:
8796

0 commit comments

Comments
 (0)