pandas-dev
diff --git a/‎.github/workflows/autoupdate-pre-commit-config.yml
+33 b/‎.github/workflows/autoupdate-pre-commit-config.yml
+33
diff --git a/‎.github/workflows/ci.yml
-6 b/‎.github/workflows/ci.yml
-6
diff --git a/‎.pre-commit-config.yaml
+119-16 b/‎.pre-commit-config.yaml
+119-16
diff --git a/‎Makefile
+2-2 b/‎Makefile
+2-2
diff --git a/‎asv_bench/benchmarks/groupby.py
+20 b/‎asv_bench/benchmarks/groupby.py
+20
diff --git a/‎asv_bench/benchmarks/io/pickle.py
+6 b/‎asv_bench/benchmarks/io/pickle.py
+6
diff --git a/‎asv_bench/benchmarks/rolling.py
+9 b/‎asv_bench/benchmarks/rolling.py
+9
diff --git a/‎asv_bench/benchmarks/strings.py
+17-1 b/‎asv_bench/benchmarks/strings.py
+17-1
diff --git a/‎asv_bench/benchmarks/timeseries.py
+13-3 b/‎asv_bench/benchmarks/timeseries.py
+13-3
diff --git a/‎azure-pipelines.yml
+25 b/‎azure-pipelines.yml
+25
@@ -0,0 +1,33 @@
+name: "Update pre-commit config"
+
+on:
+  schedule:
+    - cron: "0 7 * * 1" # At 07:00 on each Monday.
+  workflow_dispatch:
+
+jobs:
+  update-pre-commit:
+    if: github.repository_owner == 'pandas-dev'
+    name: Autoupdate pre-commit config
+    runs-on: ubuntu-latest
+    steps:
+      - name: Set up Python
+        uses: actions/setup-python@v2
+      - name: Cache multiple paths
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/.cache/pre-commit
+            ~/.cache/pip
+          key: pre-commit-autoupdate-${{ runner.os }}-build
+      - name: Update pre-commit config packages
+        uses: technote-space/create-pr-action@v2
+        with:
+          GITHUB_TOKEN: ${{ secrets.ACTION_TRIGGER_TOKEN }}
+          EXECUTE_COMMANDS: |
+            pip install pre-commit
+            pre-commit autoupdate || (exit 0);
+            pre-commit run -a || (exit 0);
+          COMMIT_MESSAGE: "⬆️ UPGRADE: Autoupdate pre-commit config"
+          PR_BRANCH_NAME: "pre-commit-config-update-${PR_ID}"
+          PR_TITLE: "⬆️ UPGRADE: Autoupdate pre-commit config"
@@ -37,12 +37,6 @@ jobs:
         ci/code_checks.sh lint
       if: always()
 
-    - name: Dependencies consistency
-      run: |
-        source activate pandas-dev
-        ci/code_checks.sh dependencies
-      if: always()
-
     - name: Checks on imported code
       run: |
         source activate pandas-dev
 
@@ -9,40 +9,42 @@ repos:
     -   id: flake8
         additional_dependencies: [flake8-comprehensions>=3.1.0]
     -   id: flake8
-        name: flake8-pyx
-        files: \.(pyx|pxd)$
-        types:
-          - file
+        name: flake8 (cython)
+        types: [cython]
         args: [--append-config=flake8/cython.cfg]
     -   id: flake8
-        name: flake8-pxd
+        name: flake8 (cython template)
         files: \.pxi\.in$
-        types:
-          - file
+        types: [text]
         args: [--append-config=flake8/cython-template.cfg]
 -   repo: https://github.com/PyCQA/isort
-    rev: 5.6.0
+    rev: 5.6.4
     hooks:
     -   id: isort
-        exclude: ^pandas/__init__\.py$|^pandas/core/api\.py$
-        files: '.pxd$|.py$'
-        types: [file]
+        name: isort (python)
+    -   id: isort
+        name: isort (cython)
+        types: [cython]
 -   repo: https://github.com/asottile/pyupgrade
-    rev: v2.7.2
+    rev: v2.7.3
     hooks:
     -   id: pyupgrade
         args: [--py37-plus]
 -   repo: https://github.com/pre-commit/pygrep-hooks
-    rev: v1.6.0
+    rev: v1.7.0
     hooks:
       - id: rst-backticks
+      - id: rst-directive-colons
+        types: [text]
+      - id: rst-inline-touching-normal
+        types: [text]
 -   repo: local
     hooks:
     -   id: pip_to_conda
         name: Generate pip dependency from conda
         description: This hook checks if the conda environment.yml and requirements-dev.txt are equal
         language: python
-        entry: python -m scripts.generate_pip_deps_from_conda
+        entry: python scripts/generate_pip_deps_from_conda.py
         files: ^(environment.yml|requirements-dev.txt)$
         pass_filenames: false
         additional_dependencies: [pyyaml]
@@ -54,12 +56,113 @@ repos:
         types: [rst]
         args: [--filename=*.rst]
         additional_dependencies: [flake8-rst==0.7.0, flake8==3.7.9]
+    -   id: non-standard-imports
+        name: Check for non-standard imports
+        language: pygrep
+        entry: |
+            (?x)
+            # Check for imports from pandas.core.common instead of `import pandas.core.common as com`
+            from\ pandas\.core\.common\ import|
+            from\ pandas\.core\ import\ common|
+
+            # Check for imports from collections.abc instead of `from collections import abc`
+            from\ collections\.abc\ import
+
+    -   id: non-standard-numpy.random-related-imports
+        name: Check for non-standard numpy.random-related imports excluding pandas/_testing.py
+        language: pygrep
+        exclude: pandas/_testing.py
+        entry: |
+            (?x)
+            # Check for imports from np.random.<method> instead of `from numpy import random` or `from numpy.random import <method>`
+            from\ numpy\ import\ random|
+            from\ numpy.random\ import
+        types: [python]
+    -   id: non-standard-imports-in-tests
+        name: Check for non-standard imports in test suite
+        language: pygrep
+        entry: |
+            (?x)
+            # Check for imports from pandas._testing instead of `import pandas._testing as tm`
+            from\ pandas\._testing\ import|
+            from\ pandas\ import\ _testing\ as\ tm|
+
+            # No direct imports from conftest
+            conftest\ import|
+            import\ conftest
+        types: [python]
+        files: ^pandas/tests/
+    -   id: incorrect-code-directives
+        name: Check for incorrect code block or IPython directives
+        language: pygrep
+        entry: (\.\. code-block ::|\.\. ipython ::)
+        files: \.(py|pyx|rst)$
+    -   id: unwanted-patterns-strings-to-concatenate
+        name: Check for use of not concatenated strings
+        language: python
+        entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate"
+        files: \.(py|pyx|pxd|pxi)$
+    -   id: unwanted-patterns-strings-with-wrong-placed-whitespace
+        name: Check for strings with wrong placed spaces
+        language: python
+        entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace"
+        files: \.(py|pyx|pxd|pxi)$
+    -   id: unwanted-patterns-private-import-across-module
+        name: Check for import of private attributes across modules
+        language: python
+        entry: python scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module"
+        types: [python]
+        exclude: ^(asv_bench|pandas/tests|doc)/
+    -   id: unwanted-patterns-private-function-across-module
+        name: Check for use of private functions across modules
+        language: python
+        entry: python scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module"
+        types: [python]
+        exclude: ^(asv_bench|pandas/tests|doc)/
+    -   id: inconsistent-namespace-usage
+        name: 'Check for inconsistent use of pandas namespace in tests'
+        entry: python scripts/check_for_inconsistent_pandas_namespace.py
+        language: python
+        types: [python]
+        files: ^pandas/tests/
+    -   id: FrameOrSeriesUnion
+        name: Check for use of Union[Series, DataFrame] instead of FrameOrSeriesUnion alias
+        entry: Union\[.*(Series.*DataFrame|DataFrame.*Series).*\]
+        language: pygrep
+        types: [python]
+        exclude: ^pandas/_typing\.py$
+    -   id: type-not-class
+        name: Check for use of foo.__class__ instead of type(foo)
+        entry: \.__class__
+        language: pygrep
+        files: \.(py|pyx)$
+    -   id: unwanted-typing
+        name: Check for use of comment-based annotation syntax and missing error codes
+        entry: |
+            (?x)
+            \#\ type:\ (?!ignore)|
+            \#\ type:\s?ignore(?!\[)
+        language: pygrep
+        types: [python]
+    -   id: no-os-remove
+        name: Check code for instances of os.remove
+        entry: os\.remove
+        language: pygrep
+        types: [python]
+        files: ^pandas/tests/
+        exclude: |
+            (?x)^
+            pandas/tests/io/excel/test_writers\.py|
+            pandas/tests/io/pytables/common\.py|
+            pandas/tests/io/pytables/test_store\.py$
 -   repo: https://github.com/asottile/yesqa
     rev: v1.2.2
     hooks:
     -   id: yesqa
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v3.2.0
+    rev: v3.3.0
     hooks:
     -   id: end-of-file-fixer
-        exclude: '.html$|^LICENSES/|.csv$|.txt$|.svg$|.py$'
+        exclude: ^LICENSES/|\.(html|csv|txt|svg|py)$
+    -   id: trailing-whitespace
+        exclude: \.(html|svg)$
@@ -30,11 +30,11 @@ check:
 	python3 scripts/validate_unwanted_patterns.py \
 		--validation-type="private_function_across_module" \
 		--included-file-extensions="py" \
-		--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored \
+		--excluded-file-paths=pandas/tests,asv_bench/ \
 		pandas/
 
 	python3 scripts/validate_unwanted_patterns.py \
 		--validation-type="private_import_across_module" \
 		--included-file-extensions="py" \
-		--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/
+		--excluded-file-paths=pandas/tests,asv_bench/,doc/
 		pandas/
@@ -358,6 +358,26 @@ def time_category_size(self):
         self.draws.groupby(self.cats).size()
 
 
+class FillNA:
+    def setup(self):
+        N = 100
+        self.df = DataFrame(
+            {"group": [1] * N + [2] * N, "value": [np.nan, 1.0] * N}
+        ).set_index("group")
+
+    def time_df_ffill(self):
+        self.df.groupby("group").fillna(method="ffill")
+
+    def time_df_bfill(self):
+        self.df.groupby("group").fillna(method="bfill")
+
+    def time_srs_ffill(self):
+        self.df.groupby("group")["value"].fillna(method="ffill")
+
+    def time_srs_bfill(self):
+        self.df.groupby("group")["value"].fillna(method="bfill")
+
+
 class GroupByMethods:
 
     param_names = ["dtype", "method", "application"]
 
@@ -24,5 +24,11 @@ def time_read_pickle(self):
     def time_write_pickle(self):
         self.df.to_pickle(self.fname)
 
+    def peakmem_read_pickle(self):
+        read_pickle(self.fname)
+
+    def peakmem_write_pickle(self):
+        self.df.to_pickle(self.fname)
+
 
 from ..pandas_vb_common import setup  # noqa: F401 isort:skip
@@ -76,12 +76,21 @@ class ExpandingMethods:
 
     def setup(self, constructor, dtype, method):
         N = 10 ** 5
+        N_groupby = 100
         arr = (100 * np.random.random(N)).astype(dtype)
         self.expanding = getattr(pd, constructor)(arr).expanding()
+        self.expanding_groupby = (
+            pd.DataFrame({"A": arr[:N_groupby], "B": range(N_groupby)})
+            .groupby("B")
+            .expanding()
+        )
 
     def time_expanding(self, constructor, dtype, method):
         getattr(self.expanding, method)()
 
+    def time_expanding_groupby(self, constructor, dtype, method):
+        getattr(self.expanding_groupby, method)()
+
 
 class EWMMethods:
 
 
@@ -2,7 +2,7 @@
 
 import numpy as np
 
-from pandas import DataFrame, Series
+from pandas import Categorical, DataFrame, Series
 
 from .pandas_vb_common import tm
 
@@ -16,6 +16,10 @@ def setup(self, dtype):
         self.series_arr = tm.rands_array(nchars=10, size=10 ** 5)
         self.frame_arr = self.series_arr.reshape((50_000, 2)).copy()
 
+        # GH37371. Testing construction of string series/frames from ExtensionArrays
+        self.series_cat_arr = Categorical(self.series_arr)
+        self.frame_cat_arr = Categorical(self.frame_arr)
+
     def time_series_construction(self, dtype):
         Series(self.series_arr, dtype=dtype)
 
@@ -28,6 +32,18 @@ def time_frame_construction(self, dtype):
     def peakmem_frame_construction(self, dtype):
         DataFrame(self.frame_arr, dtype=dtype)
 
+    def time_cat_series_construction(self, dtype):
+        Series(self.series_cat_arr, dtype=dtype)
+
+    def peakmem_cat_series_construction(self, dtype):
+        Series(self.series_cat_arr, dtype=dtype)
+
+    def time_cat_frame_construction(self, dtype):
+        DataFrame(self.frame_cat_arr, dtype=dtype)
+
+    def peakmem_cat_frame_construction(self, dtype):
+        DataFrame(self.frame_cat_arr, dtype=dtype)
+
 
 class Methods:
     def setup(self):
 
@@ -3,7 +3,14 @@
 import dateutil
 import numpy as np
 
-from pandas import DataFrame, Series, date_range, period_range, to_datetime
+from pandas import (
+    DataFrame,
+    Series,
+    date_range,
+    period_range,
+    timedelta_range,
+    to_datetime,
+)
 
 from pandas.tseries.frequencies import infer_freq
 
@@ -121,12 +128,15 @@ def time_convert(self):
 
 class Iteration:
 
-    params = [date_range, period_range]
+    params = [date_range, period_range, timedelta_range]
     param_names = ["time_index"]
 
     def setup(self, time_index):
         N = 10 ** 6
-        self.idx = time_index(start="20140101", freq="T", periods=N)
+        if time_index is timedelta_range:
+            self.idx = time_index(start=0, freq="T", periods=N)
+        else:
+            self.idx = time_index(start="20140101", freq="T", periods=N)
         self.exit = 10000
 
     def time_iter(self, time_index):
 
@@ -26,3 +26,28 @@ jobs:
   parameters:
     name: Windows
     vmImage: vs2017-win2016
+
+- job: py37_32bit
+  pool:
+    vmImage: ubuntu-18.04
+
+  steps:
+    - script: |
+        docker pull quay.io/pypa/manylinux2014_i686
+        docker run -v $(pwd):/pandas quay.io/pypa/manylinux2014_i686 \
+        /bin/bash -xc "cd pandas && \
+        /opt/python/cp37-cp37m/bin/python -m venv ~/virtualenvs/pandas-dev && \
+        . ~/virtualenvs/pandas-dev/bin/activate && \
+        python -m pip install --no-deps -U pip wheel setuptools && \
+        pip install cython numpy python-dateutil pytz pytest pytest-xdist hypothesis pytest-azurepipelines && \
+        python setup.py build_ext -q -i -j2 && \
+        python -m pip install --no-build-isolation -e . && \
+        pytest -m 'not slow and not network and not clipboard' pandas --junitxml=test-data.xml"
+      displayName: 'Run 32-bit manylinux2014 Docker Build / Tests'
+
+    - task: PublishTestResults@2
+      condition: succeededOrFailed()
+      inputs:
+        testResultsFiles: '**/test-*.xml'
+        failTaskOnFailedTests: true
+        testRunTitle: 'Publish test results for Python 3.7-32 bit full Linux'