pandas-dev
diff --git a/‎.github/workflows/autoupdate-pre-commit-config.yml
+33 b/‎.github/workflows/autoupdate-pre-commit-config.yml
+33
diff --git a/‎.github/workflows/ci.yml
+2-8 b/‎.github/workflows/ci.yml
+2-8
diff --git a/‎.gitignore
+1 b/‎.gitignore
+1
diff --git a/‎.pre-commit-config.yaml
+112-8 b/‎.pre-commit-config.yaml
+112-8
diff --git a/‎.travis.yml
+1-6 b/‎.travis.yml
+1-6
diff --git a/‎Dockerfile
+9-8 b/‎Dockerfile
+9-8
diff --git a/‎Makefile
+3-3 b/‎Makefile
+3-3
diff --git a/‎README.md
+22-22 b/‎README.md
+22-22
diff --git a/‎asv_bench/benchmarks/algorithms.py
+12 b/‎asv_bench/benchmarks/algorithms.py
+12
@@ -0,0 +1,33 @@
+name: "Update pre-commit config"
+
+on:
+  schedule:
+    - cron: "0 7 * * 1" # At 07:00 on each Monday.
+  workflow_dispatch:
+
+jobs:
+  update-pre-commit:
+    if: github.repository_owner == 'pandas-dev'
+    name: Autoupdate pre-commit config
+    runs-on: ubuntu-latest
+    steps:
+      - name: Set up Python
+        uses: actions/setup-python@v2
+      - name: Cache multiple paths
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/.cache/pre-commit
+            ~/.cache/pip
+          key: pre-commit-autoupdate-${{ runner.os }}-build
+      - name: Update pre-commit config packages
+        uses: technote-space/create-pr-action@v2
+        with:
+          GITHUB_TOKEN: ${{ secrets.ACTION_TRIGGER_TOKEN }}
+          EXECUTE_COMMANDS: |
+            pip install pre-commit
+            pre-commit autoupdate || (exit 0);
+            pre-commit run -a || (exit 0);
+          COMMIT_MESSAGE: "⬆️ UPGRADE: Autoupdate pre-commit config"
+          PR_BRANCH_NAME: "pre-commit-config-update-${PR_ID}"
+          PR_TITLE: "⬆️ UPGRADE: Autoupdate pre-commit config"
@@ -18,7 +18,7 @@ jobs:
     steps:
 
     - name: Setting conda path
-      run: echo "::add-path::${HOME}/miniconda3/bin"
+      run: echo "${HOME}/miniconda3/bin" >> $GITHUB_PATH
 
     - name: Checkout
       uses: actions/checkout@v1
@@ -37,12 +37,6 @@ jobs:
         ci/code_checks.sh lint
       if: always()
 
-    - name: Dependencies consistency
-      run: |
-        source activate pandas-dev
-        ci/code_checks.sh dependencies
-      if: always()
-
     - name: Checks on imported code
       run: |
         source activate pandas-dev
@@ -104,7 +98,7 @@ jobs:
     steps:
 
     - name: Setting conda path
-      run: echo "::set-env name=PATH::${HOME}/miniconda3/bin:${PATH}"
+      run: echo "${HOME}/miniconda3/bin" >> $GITHUB_PATH
 
     - name: Checkout
       uses: actions/checkout@v1
 
@@ -12,6 +12,7 @@
 *.log
 *.swp
 *.pdb
+*.zip
 .project
 .pydevproject
 .settings
 
@@ -15,33 +15,36 @@ repos:
     -   id: flake8
         name: flake8 (cython template)
         files: \.pxi\.in$
-        types:
-          - file
+        types: [text]
         args: [--append-config=flake8/cython-template.cfg]
 -   repo: https://github.com/PyCQA/isort
-    rev: 5.6.3
+    rev: 5.6.4
     hooks:
     -   id: isort
         name: isort (python)
     -   id: isort
         name: isort (cython)
         types: [cython]
 -   repo: https://github.com/asottile/pyupgrade
-    rev: v2.7.2
+    rev: v2.7.4
     hooks:
     -   id: pyupgrade
         args: [--py37-plus]
 -   repo: https://github.com/pre-commit/pygrep-hooks
-    rev: v1.6.0
+    rev: v1.7.0
     hooks:
       - id: rst-backticks
+      - id: rst-directive-colons
+        types: [text]
+      - id: rst-inline-touching-normal
+        types: [text]
 -   repo: local
     hooks:
     -   id: pip_to_conda
         name: Generate pip dependency from conda
         description: This hook checks if the conda environment.yml and requirements-dev.txt are equal
         language: python
-        entry: python -m scripts.generate_pip_deps_from_conda
+        entry: python scripts/generate_pip_deps_from_conda.py
         files: ^(environment.yml|requirements-dev.txt)$
         pass_filenames: false
         additional_dependencies: [pyyaml]
@@ -53,12 +56,113 @@ repos:
         types: [rst]
         args: [--filename=*.rst]
         additional_dependencies: [flake8-rst==0.7.0, flake8==3.7.9]
+    -   id: non-standard-imports
+        name: Check for non-standard imports
+        language: pygrep
+        entry: |
+            (?x)
+            # Check for imports from pandas.core.common instead of `import pandas.core.common as com`
+            from\ pandas\.core\.common\ import|
+            from\ pandas\.core\ import\ common|
+
+            # Check for imports from collections.abc instead of `from collections import abc`
+            from\ collections\.abc\ import
+
+    -   id: non-standard-numpy.random-related-imports
+        name: Check for non-standard numpy.random-related imports excluding pandas/_testing.py
+        language: pygrep
+        exclude: pandas/_testing.py
+        entry: |
+            (?x)
+            # Check for imports from np.random.<method> instead of `from numpy import random` or `from numpy.random import <method>`
+            from\ numpy\ import\ random|
+            from\ numpy.random\ import
+        types: [python]
+    -   id: non-standard-imports-in-tests
+        name: Check for non-standard imports in test suite
+        language: pygrep
+        entry: |
+            (?x)
+            # Check for imports from pandas._testing instead of `import pandas._testing as tm`
+            from\ pandas\._testing\ import|
+            from\ pandas\ import\ _testing\ as\ tm|
+
+            # No direct imports from conftest
+            conftest\ import|
+            import\ conftest
+        types: [python]
+        files: ^pandas/tests/
+    -   id: incorrect-code-directives
+        name: Check for incorrect code block or IPython directives
+        language: pygrep
+        entry: (\.\. code-block ::|\.\. ipython ::)
+        files: \.(py|pyx|rst)$
+    -   id: unwanted-patterns-strings-to-concatenate
+        name: Check for use of not concatenated strings
+        language: python
+        entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate"
+        files: \.(py|pyx|pxd|pxi)$
+    -   id: unwanted-patterns-strings-with-wrong-placed-whitespace
+        name: Check for strings with wrong placed spaces
+        language: python
+        entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace"
+        files: \.(py|pyx|pxd|pxi)$
+    -   id: unwanted-patterns-private-import-across-module
+        name: Check for import of private attributes across modules
+        language: python
+        entry: python scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module"
+        types: [python]
+        exclude: ^(asv_bench|pandas/tests|doc)/
+    -   id: unwanted-patterns-private-function-across-module
+        name: Check for use of private functions across modules
+        language: python
+        entry: python scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module"
+        types: [python]
+        exclude: ^(asv_bench|pandas/tests|doc)/
+    -   id: inconsistent-namespace-usage
+        name: 'Check for inconsistent use of pandas namespace in tests'
+        entry: python scripts/check_for_inconsistent_pandas_namespace.py
+        language: python
+        types: [python]
+        files: ^pandas/tests/
+    -   id: FrameOrSeriesUnion
+        name: Check for use of Union[Series, DataFrame] instead of FrameOrSeriesUnion alias
+        entry: Union\[.*(Series.*DataFrame|DataFrame.*Series).*\]
+        language: pygrep
+        types: [python]
+        exclude: ^pandas/_typing\.py$
+    -   id: type-not-class
+        name: Check for use of foo.__class__ instead of type(foo)
+        entry: \.__class__
+        language: pygrep
+        files: \.(py|pyx)$
+    -   id: unwanted-typing
+        name: Check for use of comment-based annotation syntax and missing error codes
+        entry: |
+            (?x)
+            \#\ type:\ (?!ignore)|
+            \#\ type:\s?ignore(?!\[)
+        language: pygrep
+        types: [python]
+    -   id: no-os-remove
+        name: Check code for instances of os.remove
+        entry: os\.remove
+        language: pygrep
+        types: [python]
+        files: ^pandas/tests/
+        exclude: |
+            (?x)^
+            pandas/tests/io/excel/test_writers\.py|
+            pandas/tests/io/pytables/common\.py|
+            pandas/tests/io/pytables/test_store\.py$
 -   repo: https://github.com/asottile/yesqa
     rev: v1.2.2
     hooks:
     -   id: yesqa
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v3.2.0
+    rev: v3.3.0
     hooks:
     -   id: end-of-file-fixer
-        exclude: '.html$|^LICENSES/|.csv$|.txt$|.svg$|.py$'
+        exclude: ^LICENSES/|\.(html|csv|txt|svg|py)$
+    -   id: trailing-whitespace
+        exclude: \.(html|svg)$
@@ -35,11 +35,6 @@ matrix:
   fast_finish: true
 
   include:
-    - dist: bionic
-      python: 3.9-dev
-      env:
-        - JOB="3.9-dev" PATTERN="(not slow and not network and not clipboard)"
-
     - env:
       - JOB="3.8, slow" ENV_FILE="ci/deps/travis-38-slow.yaml" PATTERN="slow" SQL="1"
       services:
@@ -94,7 +89,7 @@ install:
 script:
   - echo "script start"
   - echo "$JOB"
-  - if [ "$JOB" != "3.9-dev" ]; then source activate pandas-dev; fi
+  - source activate pandas-dev
   - ci/run_tests.sh
 
 after_script:
 
@@ -1,4 +1,4 @@
-FROM continuumio/miniconda3
+FROM quay.io/condaforge/miniforge3
 
 # if you forked pandas, you can pass in your own GitHub username to use your fork
 # i.e. gh_username=myname
@@ -15,10 +15,6 @@ RUN apt-get update \
     # Verify git, process tools, lsb-release (common in install instructions for CLIs) installed
     && apt-get -y install git iproute2 procps iproute2 lsb-release \
     #
-    # Install C compilers (gcc not enough, so just went with build-essential which admittedly might be overkill),
-    # needed to build pandas C extensions
-    && apt-get -y install build-essential \
-    #
     # cleanup
     && apt-get autoremove -y \
     && apt-get clean -y \
@@ -39,9 +35,14 @@ RUN mkdir "$pandas_home" \
 # we just update the base/root one from the 'environment.yml' file instead of creating a new one.
 #
 # Set up environment
-RUN conda env update -n base -f "$pandas_home/environment.yml"
+RUN conda install -y mamba
+RUN mamba env update -n base -f "$pandas_home/environment.yml"
 
 # Build C extensions and pandas
-RUN cd "$pandas_home" \
-    && python setup.py build_ext --inplace -j 4 \
+SHELL ["/bin/bash", "-c"]
+RUN . /opt/conda/etc/profile.d/conda.sh \
+    && conda activate base \
+    && cd "$pandas_home" \
+    && export \
+    && python setup.py build_ext -j 4 \
     && python -m pip install -e .
@@ -9,7 +9,7 @@ clean_pyc:
 	-find . -name '*.py[co]' -exec rm {} \;
 
 build: clean_pyc
-	python setup.py build_ext --inplace
+	python setup.py build_ext
 
 lint-diff:
 	git diff upstream/master --name-only -- "*.py" | xargs flake8
@@ -30,11 +30,11 @@ check:
 	python3 scripts/validate_unwanted_patterns.py \
 		--validation-type="private_function_across_module" \
 		--included-file-extensions="py" \
-		--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored \
+		--excluded-file-paths=pandas/tests,asv_bench/ \
 		pandas/
 
 	python3 scripts/validate_unwanted_patterns.py \
 		--validation-type="private_import_across_module" \
 		--included-file-extensions="py" \
-		--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/
+		--excluded-file-paths=pandas/tests,asv_bench/,doc/
 		pandas/
@@ -60,27 +60,27 @@ Here are just a few of the things that pandas does well:
     and saving/loading data from the ultrafast [**HDF5 format**][hdfstore]
   - [**Time series**][timeseries]-specific functionality: date range
     generation and frequency conversion, moving window statistics,
-    date shifting and lagging.
-
-
-   [missing-data]: https://pandas.pydata.org/pandas-docs/stable/missing_data.html#working-with-missing-data
-   [insertion-deletion]: https://pandas.pydata.org/pandas-docs/stable/dsintro.html#column-selection-addition-deletion
-   [alignment]: https://pandas.pydata.org/pandas-docs/stable/dsintro.html?highlight=alignment#intro-to-data-structures
-   [groupby]: https://pandas.pydata.org/pandas-docs/stable/groupby.html#group-by-split-apply-combine
-   [conversion]: https://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe
-   [slicing]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#slicing-ranges
-   [fancy-indexing]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#advanced-indexing-with-ix
-   [subsetting]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#boolean-indexing
-   [merging]: https://pandas.pydata.org/pandas-docs/stable/merging.html#database-style-dataframe-joining-merging
-   [joining]: https://pandas.pydata.org/pandas-docs/stable/merging.html#joining-on-index
-   [reshape]: https://pandas.pydata.org/pandas-docs/stable/reshaping.html#reshaping-and-pivot-tables
-   [pivot-table]: https://pandas.pydata.org/pandas-docs/stable/reshaping.html#pivot-tables-and-cross-tabulations
-   [mi]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#hierarchical-indexing-multiindex
-   [flat-files]: https://pandas.pydata.org/pandas-docs/stable/io.html#csv-text-files
-   [excel]: https://pandas.pydata.org/pandas-docs/stable/io.html#excel-files
-   [db]: https://pandas.pydata.org/pandas-docs/stable/io.html#sql-queries
-   [hdfstore]: https://pandas.pydata.org/pandas-docs/stable/io.html#hdf5-pytables
-   [timeseries]: https://pandas.pydata.org/pandas-docs/stable/timeseries.html#time-series-date-functionality
+    date shifting and lagging
+
+
+   [missing-data]: https://pandas.pydata.org/pandas-docs/stable/user_guide/missing_data.html
+   [insertion-deletion]: https://pandas.pydata.org/pandas-docs/stable/user_guide/dsintro.html#column-selection-addition-deletion
+   [alignment]: https://pandas.pydata.org/pandas-docs/stable/user_guide/dsintro.html?highlight=alignment#intro-to-data-structures
+   [groupby]: https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#group-by-split-apply-combine
+   [conversion]: https://pandas.pydata.org/pandas-docs/stable/user_guide/dsintro.html#dataframe
+   [slicing]: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#slicing-ranges
+   [fancy-indexing]: https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#advanced
+   [subsetting]: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#boolean-indexing
+   [merging]: https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html#database-style-dataframe-or-named-series-joining-merging
+   [joining]: https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html#joining-on-index
+   [reshape]: https://pandas.pydata.org/pandas-docs/stable/user_guide/reshaping.html
+   [pivot-table]: https://pandas.pydata.org/pandas-docs/stable/user_guide/reshaping.html
+   [mi]: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#hierarchical-indexing-multiindex
+   [flat-files]: https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#csv-text-files
+   [excel]: https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#excel-files
+   [db]: https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#sql-queries
+   [hdfstore]: https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#hdf5-pytables
+   [timeseries]: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#time-series-date-functionality
 
 ## Where to get it
 The source code is currently hosted on GitHub at:
@@ -154,7 +154,7 @@ For usage questions, the best place to go to is [StackOverflow](https://stackove
 Further, general questions and discussions can also take place on the [pydata mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata).
 
 ## Discussion and Development
-Most development discussions take place on github in this repo. Further, the [pandas-dev mailing list](https://mail.python.org/mailman/listinfo/pandas-dev) can also be used for specialized discussions or design issues, and a [Gitter channel](https://gitter.im/pydata/pandas) is available for quick development related questions.
+Most development discussions take place on GitHub in this repo. Further, the [pandas-dev mailing list](https://mail.python.org/mailman/listinfo/pandas-dev) can also be used for specialized discussions or design issues, and a [Gitter channel](https://gitter.im/pydata/pandas) is available for quick development related questions.
 
 ## Contributing to pandas [![Open Source Helpers](https://www.codetriage.com/pandas-dev/pandas/badges/users.svg)](https://www.codetriage.com/pandas-dev/pandas)
 
 
@@ -5,6 +5,7 @@
 from pandas._libs import lib
 
 import pandas as pd
+from pandas.core.algorithms import make_duplicates_of_left_unique_in_right
 
 from .pandas_vb_common import tm
 
@@ -174,4 +175,15 @@ def time_argsort(self, N):
         self.array.argsort()
 
 
+class RemoveDuplicates:
+    def setup(self):
+        N = 10 ** 5
+        na = np.arange(int(N / 2))
+        self.left = np.concatenate([na[: int(N / 4)], na[: int(N / 4)]])
+        self.right = np.concatenate([na, na])
+
+    def time_make_duplicates_of_left_unique_in_right(self):
+        make_duplicates_of_left_unique_in_right(self.left, self.right)
+
+
 from .pandas_vb_common import setup  # noqa: F401 isort:skip