pandas-dev
diff --git a/‎.github/CODE_OF_CONDUCT.md
-1 b/‎.github/CODE_OF_CONDUCT.md
-1
diff --git a/‎.github/workflows/autoupdate-pre-commit-config.yml
+33 b/‎.github/workflows/autoupdate-pre-commit-config.yml
+33
diff --git a/‎.github/workflows/ci.yml
+1-7 b/‎.github/workflows/ci.yml
+1-7
diff --git a/‎.pre-commit-config.yaml
+127-14 b/‎.pre-commit-config.yaml
+127-14
diff --git a/‎.travis.yml
+4-4 b/‎.travis.yml
+4-4
diff --git a/‎AUTHORS.md
-1 b/‎AUTHORS.md
-1
diff --git a/‎Makefile
+2-2 b/‎Makefile
+2-2
diff --git a/‎asv_bench/benchmarks/dtypes.py
+57 b/‎asv_bench/benchmarks/dtypes.py
+57
diff --git a/‎asv_bench/benchmarks/groupby.py
+20 b/‎asv_bench/benchmarks/groupby.py
+20
diff --git a/‎asv_bench/benchmarks/io/pickle.py
+6 b/‎asv_bench/benchmarks/io/pickle.py
+6
diff --git a/‎asv_bench/benchmarks/rolling.py
+9 b/‎asv_bench/benchmarks/rolling.py
+9
@@ -60,4 +60,3 @@ and the [Swift Code of Conduct][swift].
 [homepage]: https://www.contributor-covenant.org
 [version]: https://www.contributor-covenant.org/version/1/3/0/
 [swift]: https://swift.org/community/#code-of-conduct
-
@@ -0,0 +1,33 @@
+name: "Update pre-commit config"
+
+on:
+  schedule:
+    - cron: "0 7 * * 1" # At 07:00 on each Monday.
+  workflow_dispatch:
+
+jobs:
+  update-pre-commit:
+    if: github.repository_owner == 'pandas-dev'
+    name: Autoupdate pre-commit config
+    runs-on: ubuntu-latest
+    steps:
+      - name: Set up Python
+        uses: actions/setup-python@v2
+      - name: Cache multiple paths
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/.cache/pre-commit
+            ~/.cache/pip
+          key: pre-commit-autoupdate-${{ runner.os }}-build
+      - name: Update pre-commit config packages
+        uses: technote-space/create-pr-action@v2
+        with:
+          GITHUB_TOKEN: ${{ secrets.ACTION_TRIGGER_TOKEN }}
+          EXECUTE_COMMANDS: |
+            pip install pre-commit
+            pre-commit autoupdate || (exit 0);
+            pre-commit run -a || (exit 0);
+          COMMIT_MESSAGE: "⬆️ UPGRADE: Autoupdate pre-commit config"
+          PR_BRANCH_NAME: "pre-commit-config-update-${PR_ID}"
+          PR_TITLE: "⬆️ UPGRADE: Autoupdate pre-commit config"
@@ -37,12 +37,6 @@ jobs:
         ci/code_checks.sh lint
       if: always()
 
-    - name: Dependencies consistency
-      run: |
-        source activate pandas-dev
-        ci/code_checks.sh dependencies
-      if: always()
-
     - name: Checks on imported code
       run: |
         source activate pandas-dev
@@ -125,7 +119,7 @@ jobs:
     # This can be removed when the ipython directive fails when there are errors,
     # including the `tee sphinx.log` in te previous step (https://github.com/ipython/ipython/issues/11547)
     - name: Check ipython directive errors
-      run: "! grep -B1 \"^<<<-------------------------------------------------------------------------$\" sphinx.log"
+      run: "! grep -B10 \"^<<<-------------------------------------------------------------------------$\" sphinx.log"
 
     - name: Install ssh key
       run: |
 
@@ -4,46 +4,159 @@ repos:
     hooks:
     -   id: black
 -   repo: https://gitlab.com/pycqa/flake8
-    rev: 3.8.3
+    rev: 3.8.4
     hooks:
     -   id: flake8
         additional_dependencies: [flake8-comprehensions>=3.1.0]
     -   id: flake8
-        name: flake8-pyx
-        files: \.(pyx|pxd)$
-        types:
-          - file
+        name: flake8 (cython)
+        types: [cython]
         args: [--append-config=flake8/cython.cfg]
     -   id: flake8
-        name: flake8-pxd
+        name: flake8 (cython template)
         files: \.pxi\.in$
-        types:
-          - file
+        types: [text]
         args: [--append-config=flake8/cython-template.cfg]
 -   repo: https://github.com/PyCQA/isort
-    rev: 5.2.2
+    rev: 5.6.4
     hooks:
     -   id: isort
-        exclude: ^pandas/__init__\.py$|^pandas/core/api\.py$
+        name: isort (python)
+    -   id: isort
+        name: isort (cython)
+        types: [cython]
 -   repo: https://github.com/asottile/pyupgrade
-    rev: v2.7.2
+    rev: v2.7.3
     hooks:
     -   id: pyupgrade
         args: [--py37-plus]
 -   repo: https://github.com/pre-commit/pygrep-hooks
-    rev: v1.6.0
+    rev: v1.7.0
     hooks:
       - id: rst-backticks
+      - id: rst-directive-colons
+        types: [text]
+      - id: rst-inline-touching-normal
+        types: [text]
 -   repo: local
     hooks:
     -   id: pip_to_conda
         name: Generate pip dependency from conda
         description: This hook checks if the conda environment.yml and requirements-dev.txt are equal
-        language: system
-        entry: python -m scripts.generate_pip_deps_from_conda
+        language: python
+        entry: python scripts/generate_pip_deps_from_conda.py
         files: ^(environment.yml|requirements-dev.txt)$
         pass_filenames: false
+        additional_dependencies: [pyyaml]
+    -   id: flake8-rst
+        name: flake8-rst
+        description: Run flake8 on code snippets in docstrings or RST files
+        language: python
+        entry: flake8-rst
+        types: [rst]
+        args: [--filename=*.rst]
+        additional_dependencies: [flake8-rst==0.7.0, flake8==3.7.9]
+    -   id: non-standard-imports
+        name: Check for non-standard imports
+        language: pygrep
+        entry: |
+            (?x)
+            # Check for imports from pandas.core.common instead of `import pandas.core.common as com`
+            from\ pandas\.core\.common\ import|
+            from\ pandas\.core\ import\ common|
+
+            # Check for imports from collections.abc instead of `from collections import abc`
+            from\ collections\.abc\ import
+
+    -   id: non-standard-numpy.random-related-imports
+        name: Check for non-standard numpy.random-related imports excluding pandas/_testing.py
+        language: pygrep
+        exclude: pandas/_testing.py
+        entry: |
+            (?x)
+            # Check for imports from np.random.<method> instead of `from numpy import random` or `from numpy.random import <method>`
+            from\ numpy\ import\ random|
+            from\ numpy.random\ import
+        types: [python]
+    -   id: non-standard-imports-in-tests
+        name: Check for non-standard imports in test suite
+        language: pygrep
+        entry: |
+            (?x)
+            # Check for imports from pandas._testing instead of `import pandas._testing as tm`
+            from\ pandas\._testing\ import|
+            from\ pandas\ import\ _testing\ as\ tm|
+
+            # No direct imports from conftest
+            conftest\ import|
+            import\ conftest
+        types: [python]
+        files: ^pandas/tests/
+    -   id: incorrect-code-directives
+        name: Check for incorrect code block or IPython directives
+        language: pygrep
+        entry: (\.\. code-block ::|\.\. ipython ::)
+        files: \.(py|pyx|rst)$
+    -   id: unwanted-patterns-strings-to-concatenate
+        name: Check for use of not concatenated strings
+        language: python
+        entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate"
+        files: \.(py|pyx|pxd|pxi)$
+    -   id: unwanted-patterns-strings-with-wrong-placed-whitespace
+        name: Check for strings with wrong placed spaces
+        language: python
+        entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace"
+        files: \.(py|pyx|pxd|pxi)$
+    -   id: unwanted-patterns-private-import-across-module
+        name: Check for import of private attributes across modules
+        language: python
+        entry: python scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module"
+        types: [python]
+        exclude: ^(asv_bench|pandas/tests|doc)/
+    -   id: unwanted-patterns-private-function-across-module
+        name: Check for use of private functions across modules
+        language: python
+        entry: python scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module"
+        types: [python]
+        exclude: ^(asv_bench|pandas/tests|doc)/
+    -   id: FrameOrSeriesUnion
+        name: Check for use of Union[Series, DataFrame] instead of FrameOrSeriesUnion alias
+        entry: Union\[.*(Series.*DataFrame|DataFrame.*Series).*\]
+        language: pygrep
+        types: [python]
+        exclude: ^pandas/_typing\.py$
+    -   id: type-not-class
+        name: Check for use of foo.__class__ instead of type(foo)
+        entry: \.__class__
+        language: pygrep
+        files: \.(py|pyx)$
+    -   id: unwanted-typing
+        name: Check for use of comment-based annotation syntax and missing error codes
+        entry: |
+            (?x)
+            \#\ type:\ (?!ignore)|
+            \#\ type:\s?ignore(?!\[)
+        language: pygrep
+        types: [python]
+    -   id: no-os-remove
+        name: Check code for instances of os.remove
+        entry: os\.remove
+        language: pygrep
+        types: [python]
+        files: ^pandas/tests/
+        exclude: |
+            (?x)^
+            pandas/tests/io/excel/test_writers\.py|
+            pandas/tests/io/pytables/common\.py|
+            pandas/tests/io/pytables/test_store\.py$
 -   repo: https://github.com/asottile/yesqa
     rev: v1.2.2
     hooks:
     -   id: yesqa
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v3.3.0
+    hooks:
+    -   id: end-of-file-fixer
+        exclude: ^LICENSES/|\.(html|csv|txt|svg|py)$
+    -   id: trailing-whitespace
+        exclude: \.(html|svg)$
@@ -41,10 +41,10 @@ matrix:
         - JOB="3.9-dev" PATTERN="(not slow and not network and not clipboard)"
 
     - env:
-        - JOB="3.8" ENV_FILE="ci/deps/travis-38.yaml" PATTERN="(not slow and not network and not clipboard)"
-
-    - env:
-        - JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" PATTERN="(not slow and not network and not clipboard)"
+      - JOB="3.8, slow" ENV_FILE="ci/deps/travis-38-slow.yaml" PATTERN="slow" SQL="1"
+      services:
+        - mysql
+        - postgresql
 
     - env:
         - JOB="3.7, locale" ENV_FILE="ci/deps/travis-37-locale.yaml" PATTERN="((not slow and not network and not clipboard) or (single and db))" LOCALE_OVERRIDE="zh_CN.UTF-8" SQL="1"
 
@@ -54,4 +54,3 @@ pandas is distributed under a 3-clause ("Simplified" or "New") BSD
 license. Parts of NumPy, SciPy, numpydoc, bottleneck, which all have
 BSD-compatible licenses, are included. Their licenses follow the pandas
 license.
-
@@ -30,11 +30,11 @@ check:
 	python3 scripts/validate_unwanted_patterns.py \
 		--validation-type="private_function_across_module" \
 		--included-file-extensions="py" \
-		--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored \
+		--excluded-file-paths=pandas/tests,asv_bench/ \
 		pandas/
 
 	python3 scripts/validate_unwanted_patterns.py \
 		--validation-type="private_import_across_module" \
 		--included-file-extensions="py" \
-		--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/
+		--excluded-file-paths=pandas/tests,asv_bench/,doc/
 		pandas/
@@ -1,5 +1,9 @@
+import string
+
 import numpy as np
 
+from pandas import DataFrame
+import pandas._testing as tm
 from pandas.api.types import pandas_dtype
 
 from .pandas_vb_common import (
@@ -62,4 +66,57 @@ def time_infer(self, dtype):
         lib.infer_dtype(self.data_dict[dtype], skipna=False)
 
 
+class SelectDtypes:
+
+    params = [
+        tm.ALL_INT_DTYPES
+        + tm.ALL_EA_INT_DTYPES
+        + tm.FLOAT_DTYPES
+        + tm.COMPLEX_DTYPES
+        + tm.DATETIME64_DTYPES
+        + tm.TIMEDELTA64_DTYPES
+        + tm.BOOL_DTYPES
+    ]
+    param_names = ["dtype"]
+
+    def setup(self, dtype):
+        N, K = 5000, 50
+        self.index = tm.makeStringIndex(N)
+        self.columns = tm.makeStringIndex(K)
+
+        def create_df(data):
+            return DataFrame(data, index=self.index, columns=self.columns)
+
+        self.df_int = create_df(np.random.randint(low=100, size=(N, K)))
+        self.df_float = create_df(np.random.randn(N, K))
+        self.df_bool = create_df(np.random.choice([True, False], size=(N, K)))
+        self.df_string = create_df(
+            np.random.choice(list(string.ascii_letters), size=(N, K))
+        )
+
+    def time_select_dtype_int_include(self, dtype):
+        self.df_int.select_dtypes(include=dtype)
+
+    def time_select_dtype_int_exclude(self, dtype):
+        self.df_int.select_dtypes(exclude=dtype)
+
+    def time_select_dtype_float_include(self, dtype):
+        self.df_float.select_dtypes(include=dtype)
+
+    def time_select_dtype_float_exclude(self, dtype):
+        self.df_float.select_dtypes(exclude=dtype)
+
+    def time_select_dtype_bool_include(self, dtype):
+        self.df_bool.select_dtypes(include=dtype)
+
+    def time_select_dtype_bool_exclude(self, dtype):
+        self.df_bool.select_dtypes(exclude=dtype)
+
+    def time_select_dtype_string_include(self, dtype):
+        self.df_string.select_dtypes(include=dtype)
+
+    def time_select_dtype_string_exclude(self, dtype):
+        self.df_string.select_dtypes(exclude=dtype)
+
+
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
@@ -358,6 +358,26 @@ def time_category_size(self):
         self.draws.groupby(self.cats).size()
 
 
+class FillNA:
+    def setup(self):
+        N = 100
+        self.df = DataFrame(
+            {"group": [1] * N + [2] * N, "value": [np.nan, 1.0] * N}
+        ).set_index("group")
+
+    def time_df_ffill(self):
+        self.df.groupby("group").fillna(method="ffill")
+
+    def time_df_bfill(self):
+        self.df.groupby("group").fillna(method="bfill")
+
+    def time_srs_ffill(self):
+        self.df.groupby("group")["value"].fillna(method="ffill")
+
+    def time_srs_bfill(self):
+        self.df.groupby("group")["value"].fillna(method="bfill")
+
+
 class GroupByMethods:
 
     param_names = ["dtype", "method", "application"]
 
@@ -24,5 +24,11 @@ def time_read_pickle(self):
     def time_write_pickle(self):
         self.df.to_pickle(self.fname)
 
+    def peakmem_read_pickle(self):
+        read_pickle(self.fname)
+
+    def peakmem_write_pickle(self):
+        self.df.to_pickle(self.fname)
+
 
 from ..pandas_vb_common import setup  # noqa: F401 isort:skip
@@ -76,12 +76,21 @@ class ExpandingMethods:
 
     def setup(self, constructor, dtype, method):
         N = 10 ** 5
+        N_groupby = 100
         arr = (100 * np.random.random(N)).astype(dtype)
         self.expanding = getattr(pd, constructor)(arr).expanding()
+        self.expanding_groupby = (
+            pd.DataFrame({"A": arr[:N_groupby], "B": range(N_groupby)})
+            .groupby("B")
+            .expanding()
+        )
 
     def time_expanding(self, constructor, dtype, method):
         getattr(self.expanding, method)()
 
+    def time_expanding_groupby(self, constructor, dtype, method):
+        getattr(self.expanding_groupby, method)()
+
 
 class EWMMethods: