pandas-dev
diff --git a/‎.github/workflows/stale-pr.yml
+21 b/‎.github/workflows/stale-pr.yml
+21
diff --git a/‎.pre-commit-config.yaml
-12 b/‎.pre-commit-config.yaml
-12
diff --git a/‎Makefile
+13 b/‎Makefile
+13
diff --git a/‎asv_bench/asv.conf.json
+1-1 b/‎asv_bench/asv.conf.json
+1-1
diff --git a/‎asv_bench/benchmarks/groupby.py
+32-14 b/‎asv_bench/benchmarks/groupby.py
+32-14
diff --git a/‎ci/build39.sh
+1-8 b/‎ci/build39.sh
+1-8
diff --git a/‎ci/code_checks.sh
+31-4 b/‎ci/code_checks.sh
+31-4
diff --git a/‎ci/deps/azure-37-locale_slow.yaml
+1-1 b/‎ci/deps/azure-37-locale_slow.yaml
+1-1
diff --git a/‎ci/deps/azure-37-minimum_versions.yaml
+1-1 b/‎ci/deps/azure-37-minimum_versions.yaml
+1-1
diff --git a/‎ci/deps/azure-windows-37.yaml
+1-1 b/‎ci/deps/azure-windows-37.yaml
+1-1
diff --git a/‎ci/deps/azure-windows-38.yaml
+1-1 b/‎ci/deps/azure-windows-38.yaml
+1-1
diff --git a/‎doc/source/development/contributing.rst
+1 b/‎doc/source/development/contributing.rst
+1
diff --git a/‎doc/source/development/contributing_docstring.rst
+5-5 b/‎doc/source/development/contributing_docstring.rst
+5-5
diff --git a/‎doc/source/development/extending.rst
-2 b/‎doc/source/development/extending.rst
-2
diff --git a/‎doc/source/getting_started/install.rst
+1-3 b/‎doc/source/getting_started/install.rst
+1-3
@@ -0,0 +1,21 @@
+name: "Stale PRs"
+on:
+  schedule:
+  # * is a special character in YAML so you have to quote this string
+  - cron: "0 */6 * * *"
+
+jobs:
+  stale:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/stale@v3
+      with:
+        repo-token: ${{ secrets.GITHUB_TOKEN }}
+        stale-pr-message: "This pull request is stale because it has been open for thirty days with no activity."
+        skip-stale-pr-message: false
+        stale-pr-label: "Stale"
+        exempt-pr-labels: "Needs Review,Blocked"
+        days-before-stale: 30
+        days-before-close: -1
+        remove-stale-when-updated: true
+        debug-only: true
@@ -30,15 +30,3 @@ repos:
     -   id: isort
         language: python_venv
         exclude: ^pandas/__init__\.py$|^pandas/core/api\.py$
--   repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.730
-    hooks:
-     -  id: mypy
-        args:
-          # As long as a some files are excluded from check-untyped-defs
-          # we have to exclude it from the pre-commit hook as the configuration
-          # is based on modules but the hook runs on files.
-          - --no-check-untyped-defs
-          - --follow-imports
-          - skip
-        files: pandas/
@@ -25,3 +25,16 @@ doc:
 	cd doc; \
 	python make.py clean; \
 	python make.py html
+
+check:
+	python3 scripts/validate_unwanted_patterns.py \
+		--validation-type="private_function_across_module" \
+		--included-file-extensions="py" \
+		--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored \
+		pandas/
+
+	python3 scripts/validate_unwanted_patterns.py \
+		--validation-type="private_import_across_module" \
+		--included-file-extensions="py" \
+		--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/
+		pandas/
@@ -26,7 +26,7 @@
     // The Pythons you'd like to test against.  If not provided, defaults
     // to the current version of Python used to run `asv`.
     // "pythons": ["2.7", "3.4"],
-    "pythons": ["3.6"],
+    "pythons": ["3.8"],
 
     // The matrix of dependencies to test.  Each key is the name of a
     // package (in PyPI) and the values are version numbers.  An empty
 
@@ -627,49 +627,63 @@ def time_first(self):
 
 
 class TransformEngine:
-    def setup(self):
+
+    param_names = ["parallel"]
+    params = [[True, False]]
+
+    def setup(self, parallel):
         N = 10 ** 3
         data = DataFrame(
             {0: [str(i) for i in range(100)] * N, 1: list(range(100)) * N},
             columns=[0, 1],
         )
+        self.parallel = parallel
         self.grouper = data.groupby(0)
 
-    def time_series_numba(self):
+    def time_series_numba(self, parallel):
         def function(values, index):
             return values * 5
 
-        self.grouper[1].transform(function, engine="numba")
+        self.grouper[1].transform(
+            function, engine="numba", engine_kwargs={"parallel": self.parallel}
+        )
 
-    def time_series_cython(self):
+    def time_series_cython(self, parallel):
         def function(values):
             return values * 5
 
         self.grouper[1].transform(function, engine="cython")
 
-    def time_dataframe_numba(self):
+    def time_dataframe_numba(self, parallel):
         def function(values, index):
             return values * 5
 
-        self.grouper.transform(function, engine="numba")
+        self.grouper.transform(
+            function, engine="numba", engine_kwargs={"parallel": self.parallel}
+        )
 
-    def time_dataframe_cython(self):
+    def time_dataframe_cython(self, parallel):
         def function(values):
             return values * 5
 
         self.grouper.transform(function, engine="cython")
 
 
 class AggEngine:
-    def setup(self):
+
+    param_names = ["parallel"]
+    params = [[True, False]]
+
+    def setup(self, parallel):
         N = 10 ** 3
         data = DataFrame(
             {0: [str(i) for i in range(100)] * N, 1: list(range(100)) * N},
             columns=[0, 1],
         )
+        self.parallel = parallel
         self.grouper = data.groupby(0)
 
-    def time_series_numba(self):
+    def time_series_numba(self, parallel):
         def function(values, index):
             total = 0
             for i, value in enumerate(values):
@@ -679,9 +693,11 @@ def function(values, index):
                     total += value * 2
             return total
 
-        self.grouper[1].agg(function, engine="numba")
+        self.grouper[1].agg(
+            function, engine="numba", engine_kwargs={"parallel": self.parallel}
+        )
 
-    def time_series_cython(self):
+    def time_series_cython(self, parallel):
         def function(values):
             total = 0
             for i, value in enumerate(values):
@@ -693,7 +709,7 @@ def function(values):
 
         self.grouper[1].agg(function, engine="cython")
 
-    def time_dataframe_numba(self):
+    def time_dataframe_numba(self, parallel):
         def function(values, index):
             total = 0
             for i, value in enumerate(values):
@@ -703,9 +719,11 @@ def function(values, index):
                     total += value * 2
             return total
 
-        self.grouper.agg(function, engine="numba")
+        self.grouper.agg(
+            function, engine="numba", engine_kwargs={"parallel": self.parallel}
+        )
 
-    def time_dataframe_cython(self):
+    def time_dataframe_cython(self, parallel):
         def function(values):
             total = 0
             for i, value in enumerate(values):
 
@@ -3,16 +3,9 @@
 
 sudo apt-get install build-essential gcc xvfb
 pip install --no-deps -U pip wheel setuptools
-pip install python-dateutil pytz pytest pytest-xdist hypothesis
+pip install numpy python-dateutil pytz pytest pytest-xdist hypothesis
 pip install cython --pre # https://github.com/cython/cython/issues/3395
 
-git clone https://github.com/numpy/numpy
-cd numpy
-python setup.py build_ext --inplace
-python setup.py install
-cd ..
-rm -rf numpy
-
 python setup.py build_ext -inplace
 python -m pip install --no-build-isolation -e .
 
 
@@ -116,6 +116,22 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
     fi
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    MSG='Check for import of private attributes across modules' ; echo $MSG
+    if [[ "$GITHUB_ACTIONS" == "true" ]]; then
+        $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/
+    else
+        $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/
+    fi
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
+    MSG='Check for use of private functions across modules' ; echo $MSG
+    if [[ "$GITHUB_ACTIONS" == "true" ]]; then
+        $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ --format="##[error]{source_path}:{line_number}:{msg}" pandas/
+    else
+        $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ pandas/
+    fi
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
     echo "isort --version-number"
     isort --version-number
 
@@ -179,6 +195,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
     invgrep -R --include="*.py" -E "super\(\w*, (self|cls)\)" pandas
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    MSG='Check for use of builtin filter function' ; echo $MSG
+    invgrep -R --include="*.py" -P '(?<!def)[\(\s]filter\(' pandas
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
     # Check for the following code in testing: `np.testing` and `np.array_equal`
     MSG='Check for invalid testing' ; echo $MSG
     invgrep -r -E --include '*.py' --exclude testing.py '(numpy|np)(\.testing|\.array_equal)' pandas/tests/
@@ -226,15 +246,22 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
     invgrep -R --include=*.{py,pyx} '!r}' pandas
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    # -------------------------------------------------------------------------
+    # Type annotations
+
     MSG='Check for use of comment-based annotation syntax' ; echo $MSG
     invgrep -R --include="*.py" -P '# type: (?!ignore)' pandas
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
-    # https://github.com/python/mypy/issues/7384
-    # MSG='Check for missing error codes with # type: ignore' ; echo $MSG
-    # invgrep -R --include="*.py" -P '# type: ignore(?!\[)' pandas
-    # RET=$(($RET + $?)) ; echo $MSG "DONE"
+    MSG='Check for missing error codes with # type: ignore' ; echo $MSG
+    invgrep -R --include="*.py" -P '# type:\s?ignore(?!\[)' pandas
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
+    MSG='Check for use of Union[Series, DataFrame] instead of FrameOrSeriesUnion alias' ; echo $MSG
+    invgrep -R --include="*.py" --exclude=_typing.py -E 'Union\[.*(Series.*DataFrame|DataFrame.*Series).*\]' pandas
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    # -------------------------------------------------------------------------
     MSG='Check for use of foo.__class__ instead of type(foo)' ; echo $MSG
     invgrep -R --include=*.{py,pyx} '\.__class__' pandas
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
@@ -18,7 +18,7 @@ dependencies:
   - lxml
   - matplotlib=3.0.0
   - numpy=1.16.*
-  - openpyxl=2.5.7
+  - openpyxl=2.6.0
   - python-dateutil
   - python-blosc
   - pytz=2017.3
 
@@ -19,7 +19,7 @@ dependencies:
   - numba=0.46.0
   - numexpr=2.6.8
   - numpy=1.16.5
-  - openpyxl=2.5.7
+  - openpyxl=2.6.0
   - pytables=3.4.4
   - python-dateutil=2.7.3
   - pytz=2017.3
 
@@ -8,7 +8,7 @@ dependencies:
   # tools
   - cython>=0.29.16
   - pytest>=5.0.1
-  - pytest-xdist>=1.21,<2.0.0 # GH 35737
+  - pytest-xdist>=1.21
   - hypothesis>=3.58.0
   - pytest-azurepipelines
 
 
@@ -8,7 +8,7 @@ dependencies:
   # tools
   - cython>=0.29.16
   - pytest>=5.0.1
-  - pytest-xdist>=1.21,<2.0.0 # GH 35737
+  - pytest-xdist>=1.21
   - hypothesis>=3.58.0
   - pytest-azurepipelines
 
 
@@ -204,6 +204,7 @@ You will need `Build Tools for Visual Studio 2017
 	You DO NOT need to install Visual Studio 2019.
 	You only need "Build Tools for Visual Studio 2019" found by
 	scrolling down to "All downloads" -> "Tools for Visual Studio 2019".
+	In the installer, select the "C++ build tools" workload.
 
 **Mac OS**
 
 
@@ -32,18 +32,18 @@ The next example gives an idea of what a docstring looks like:
         Parameters
         ----------
         num1 : int
-            First number to add
+            First number to add.
         num2 : int
-            Second number to add
+            Second number to add.
 
         Returns
         -------
         int
-            The sum of `num1` and `num2`
+            The sum of `num1` and `num2`.
 
         See Also
         --------
-        subtract : Subtract one integer from another
+        subtract : Subtract one integer from another.
 
         Examples
         --------
@@ -998,4 +998,4 @@ mapping function names to docstrings. Wherever possible, we prefer using
 
 See ``pandas.core.generic.NDFrame.fillna`` for an example template, and
 ``pandas.core.series.Series.fillna`` and ``pandas.core.generic.frame.fillna``
-for the filled versions.
+for the filled versions.
@@ -73,8 +73,6 @@ applies only to certain dtypes.
 Extension types
 ---------------
 
-.. versionadded:: 0.23.0
-
 .. warning::
 
    The :class:`pandas.api.extensions.ExtensionDtype` and :class:`pandas.api.extensions.ExtensionArray` APIs are new and
 
@@ -274,7 +274,7 @@ html5lib                  1.0.1              HTML parser for read_html (see :ref
 lxml                      4.3.0              HTML parser for read_html (see :ref:`note <optional_html>`)
 matplotlib                2.2.3              Visualization
 numba                     0.46.0             Alternative execution engine for rolling operations
-openpyxl                  2.5.7              Reading / writing for xlsx files
+openpyxl                  2.6.0              Reading / writing for xlsx files
 pandas-gbq                0.12.0             Google Big Query access
 psycopg2                  2.7                PostgreSQL engine for sqlalchemy
 pyarrow                   0.15.0             Parquet, ORC, and feather reading / writing
@@ -301,8 +301,6 @@ Optional dependencies for parsing HTML
 One of the following combinations of libraries is needed to use the
 top-level :func:`~pandas.read_html` function:
 
-.. versionchanged:: 0.23.0
-
 * `BeautifulSoup4`_ and `html5lib`_
 * `BeautifulSoup4`_ and `lxml`_
 * `BeautifulSoup4`_ and `html5lib`_ and `lxml`_