pandas-dev
diff --git a/‎.github/ISSUE_TEMPLATE/documentation_improvement.md
+1-1 b/‎.github/ISSUE_TEMPLATE/documentation_improvement.md
+1-1
diff --git a/‎.github/workflows/comment_bot.yml
+1-1 b/‎.github/workflows/comment_bot.yml
+1-1
diff --git a/‎.pre-commit-config.yaml
+42-21 b/‎.pre-commit-config.yaml
+42-21
diff --git a/‎README.md
+1-1 b/‎README.md
+1-1
diff --git a/‎asv_bench/benchmarks/categoricals.py
+18-1 b/‎asv_bench/benchmarks/categoricals.py
+18-1
diff --git a/‎asv_bench/benchmarks/hash_functions.py
+9 b/‎asv_bench/benchmarks/hash_functions.py
+9
diff --git a/‎asv_bench/benchmarks/indexing.py
+14 b/‎asv_bench/benchmarks/indexing.py
+14
diff --git a/‎asv_bench/benchmarks/io/csv.py
+48 b/‎asv_bench/benchmarks/io/csv.py
+48
diff --git a/‎asv_bench/benchmarks/period.py
+1-1 b/‎asv_bench/benchmarks/period.py
+1-1
diff --git a/‎asv_bench/benchmarks/reshape.py
+2-1 b/‎asv_bench/benchmarks/reshape.py
+2-1
diff --git a/‎asv_bench/benchmarks/rolling.py
+23 b/‎asv_bench/benchmarks/rolling.py
+23
@@ -9,7 +9,7 @@ labels: "Docs, Needs Triage"
 
 #### Location of the documentation
 
-[this should provide the location of the documentation, e.g. "pandas.read_csv" or the URL of the documentation, e.g. "https://dev.pandas.io/docs/reference/api/pandas.read_csv.html"]
+[this should provide the location of the documentation, e.g. "pandas.read_csv" or the URL of the documentation, e.g. "https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html"]
 
 **Note**: You can check the latest versions of the docs on `master` [here](https://pandas.pydata.org/docs/dev/).
 
 
@@ -29,7 +29,7 @@ jobs:
       - name: Install-pre-commit
         run: python -m pip install --upgrade pre-commit
       - name: Run pre-commit
-        run: pre-commit run --all-files || (exit 0)
+        run: pre-commit run --from-ref=origin/master --to-ref=HEAD --all-files || (exit 0)
       - name: Commit results
         run: |
           git config user.name "$(git log -1 --pretty=format:%an)"
 
@@ -24,12 +24,12 @@ repos:
     hooks:
     -   id: isort
 -   repo: https://github.com/asottile/pyupgrade
-    rev: v2.7.4
+    rev: v2.10.0
     hooks:
     -   id: pyupgrade
-        args: [--py37-plus]
+        args: [--py37-plus, --keep-runtime-typing]
 -   repo: https://github.com/pre-commit/pygrep-hooks
-    rev: v1.7.0
+    rev: v1.7.1
     hooks:
       - id: rst-backticks
       - id: rst-directive-colons
@@ -60,11 +60,11 @@ repos:
         entry: |
             (?x)
             # Check for imports from pandas.core.common instead of `import pandas.core.common as com`
-            from\ pandas\.core\.common\ import|
-            from\ pandas\.core\ import\ common|
+            from\ pandas\.core\.common\ import
+            |from\ pandas\.core\ import\ common
 
             # Check for imports from collections.abc instead of `from collections import abc`
-            from\ collections\.abc\ import
+            |from\ collections\.abc\ import
 
     -   id: non-standard-numpy.random-related-imports
         name: Check for non-standard numpy.random-related imports excluding pandas/_testing.py
@@ -73,24 +73,24 @@ repos:
         entry: |
             (?x)
             # Check for imports from np.random.<method> instead of `from numpy import random` or `from numpy.random import <method>`
-            from\ numpy\ import\ random|
-            from\ numpy.random\ import
+            from\ numpy\ import\ random
+            |from\ numpy.random\ import
         types: [python]
     -   id: non-standard-imports-in-tests
         name: Check for non-standard imports in test suite
         language: pygrep
         entry: |
             (?x)
             # Check for imports from pandas._testing instead of `import pandas._testing as tm`
-            from\ pandas\._testing\ import|
-            from\ pandas\ import\ _testing\ as\ tm|
+            from\ pandas\._testing\ import
+            |from\ pandas\ import\ _testing\ as\ tm
 
             # No direct imports from conftest
-            conftest\ import|
-            import\ conftest
+            |conftest\ import
+            |import\ conftest
 
             # Check for use of pandas.testing instead of tm
-            pd\.testing\.
+            |pd\.testing\.
         types: [python]
         files: ^pandas/tests/
     -   id: incorrect-code-directives
@@ -127,6 +127,12 @@ repos:
         types: [python]
         files: ^pandas/tests/
         exclude: ^pandas/tests/extension/
+    -   id: unwanted-patters-pytest-xfail
+        name: Check for use of pytest.xfail
+        entry: pytest\.xfail
+        language: pygrep
+        types: [python]
+        files: ^pandas/tests/
     -   id: inconsistent-namespace-usage
         name: 'Check for inconsistent use of pandas namespace in tests'
         entry: python scripts/check_for_inconsistent_pandas_namespace.py
@@ -135,7 +141,7 @@ repos:
         files: ^pandas/tests/
     -   id: FrameOrSeriesUnion
         name: Check for use of Union[Series, DataFrame] instead of FrameOrSeriesUnion alias
-        entry: Union\[.*(Series.*DataFrame|DataFrame.*Series).*\]
+        entry: Union\[.*(Series,.*DataFrame|DataFrame,.*Series).*\]
         language: pygrep
         types: [python]
         exclude: ^pandas/_typing\.py$
@@ -148,9 +154,8 @@ repos:
         name: Check for outdated annotation syntax and missing error codes
         entry: |
             (?x)
-            \#\ type:\ (?!ignore)|
-            \#\ type:\s?ignore(?!\[)|
-            \)\ ->\ \"
+            \#\ type:\ (?!ignore)
+            |\#\ type:\s?ignore(?!\[)
         language: pygrep
         types: [python]
     -   id: np-bool
@@ -166,9 +171,15 @@ repos:
         files: ^pandas/tests/
         exclude: |
             (?x)^
-            pandas/tests/io/excel/test_writers\.py|
-            pandas/tests/io/pytables/common\.py|
-            pandas/tests/io/pytables/test_store\.py$
+            pandas/tests/io/excel/test_writers\.py
+            |pandas/tests/io/pytables/common\.py
+            |pandas/tests/io/pytables/test_store\.py$
+    -   id: no-pandas-api-types
+        name: Check code for instances of pd.api.types
+        entry: (pd|pandas)\.api\.types\.
+        language: pygrep
+        types: [python]
+        files: ^pandas/tests/
 -   repo: https://github.com/asottile/yesqa
     rev: v1.2.2
     hooks:
@@ -184,4 +195,14 @@ repos:
     hooks:
     -   id: codespell
         types_or: [python, rst, markdown]
-        files: ^pandas/core/
+        files: ^pandas/
+        exclude: ^pandas/tests/
+-   repo: https://github.com/MarcoGorelli/no-string-hints
+    rev: v0.1.7
+    hooks:
+    -   id: no-string-hints
+-   repo: https://github.com/MarcoGorelli/abs-imports
+    rev: v0.1.2
+    hooks:
+    -   id: abs-imports
+        files: ^pandas/
@@ -1,5 +1,5 @@
 <div align="center">
-  <img src="https://dev.pandas.io/static/img/pandas.svg"><br>
+  <img src="https://pandas.pydata.org/static/img/pandas.svg"><br>
 </div>
 
 -----------------
 
@@ -118,12 +118,29 @@ def setup(self):
         self.a = pd.Categorical(list("aabbcd") * N)
         self.b = pd.Categorical(list("bbcdjk") * N)
 
+        self.idx_a = pd.CategoricalIndex(range(N), range(N))
+        self.idx_b = pd.CategoricalIndex(range(N + 1), range(N + 1))
+        self.df_a = pd.DataFrame(range(N), columns=["a"], index=self.idx_a)
+        self.df_b = pd.DataFrame(range(N + 1), columns=["a"], index=self.idx_b)
+
     def time_concat(self):
         pd.concat([self.s, self.s])
 
     def time_union(self):
         union_categoricals([self.a, self.b])
 
+    def time_append_overlapping_index(self):
+        self.idx_a.append(self.idx_a)
+
+    def time_append_non_overlapping_index(self):
+        self.idx_a.append(self.idx_b)
+
+    def time_concat_overlapping_index(self):
+        pd.concat([self.df_a, self.df_a])
+
+    def time_concat_non_overlapping_index(self):
+        pd.concat([self.df_a, self.df_b])
+
 
 class ValueCounts:
 
@@ -306,7 +323,7 @@ def time_get_loc(self):
         self.index.get_loc(self.category)
 
     def time_shallow_copy(self):
-        self.index._shallow_copy()
+        self.index._view()
 
     def time_align(self):
         pd.DataFrame({"a": self.series, "b": self.series[:500]})
 
@@ -25,6 +25,15 @@ def time_isin_outside(self, dtype, exponent):
         self.s.isin(self.values_outside)
 
 
+class UniqueForLargePyObjectInts:
+    def setup(self):
+        lst = [x << 32 for x in range(5000)]
+        self.arr = np.array(lst, dtype=np.object_)
+
+    def time_unique(self):
+        pd.unique(self.arr)
+
+
 class IsinWithRandomFloat:
     params = [
         [np.float64, np.object],
 
@@ -243,6 +243,20 @@ def time_loc_list(self, monotonic):
         monotonic.loc[80000:]
 
 
+class DatetimeIndexIndexing:
+    def setup(self):
+        dti = date_range("2016-01-01", periods=10000, tz="US/Pacific")
+        dti2 = dti.tz_convert("UTC")
+        self.dti = dti
+        self.dti2 = dti2
+
+    def time_get_indexer_mismatched_tz(self):
+        # reached via e.g.
+        #  ser = Series(range(len(dti)), index=dti)
+        #  ser[dti2]
+        self.dti.get_indexer(self.dti2)
+
+
 class CategoricalIndexIndexing:
 
     params = ["monotonic_incr", "monotonic_decr", "non_monotonic"]
 
@@ -76,6 +76,54 @@ def time_frame(self, obs):
         self.data.to_csv(self.fname)
 
 
+class ToCSVIndexes(BaseIO):
+
+    fname = "__test__.csv"
+
+    @staticmethod
+    def _create_df(rows, cols):
+        index_cols = {
+            "index1": np.random.randint(0, rows, rows),
+            "index2": np.full(rows, 1, dtype=int),
+            "index3": np.full(rows, 1, dtype=int),
+        }
+        data_cols = {
+            f"col{i}": np.random.uniform(0, 100000.0, rows) for i in range(cols)
+        }
+        df = DataFrame({**index_cols, **data_cols})
+        return df
+
+    def setup(self):
+        ROWS = 100000
+        COLS = 5
+        # For tests using .head(), create an initial dataframe with this many times
+        # more rows
+        HEAD_ROW_MULTIPLIER = 10
+
+        self.df_standard_index = self._create_df(ROWS, COLS)
+
+        self.df_custom_index_then_head = (
+            self._create_df(ROWS * HEAD_ROW_MULTIPLIER, COLS)
+            .set_index(["index1", "index2", "index3"])
+            .head(ROWS)
+        )
+
+        self.df_head_then_custom_index = (
+            self._create_df(ROWS * HEAD_ROW_MULTIPLIER, COLS)
+            .head(ROWS)
+            .set_index(["index1", "index2", "index3"])
+        )
+
+    def time_standard_index(self):
+        self.df_standard_index.to_csv(self.fname)
+
+    def time_multiindex(self):
+        self.df_head_then_custom_index.to_csv(self.fname)
+
+    def time_head_of_multiindex(self):
+        self.df_custom_index_then_head.to_csv(self.fname)
+
+
 class StringIORewind:
     def data(self, stringio_object):
         stringio_object.seek(0)
 
@@ -86,7 +86,7 @@ def time_get_loc(self):
         self.index.get_loc(self.period)
 
     def time_shallow_copy(self):
-        self.index._shallow_copy()
+        self.index._view()
 
     def time_series_loc(self):
         self.series.loc[self.period]
 
@@ -5,6 +5,7 @@
 
 import pandas as pd
 from pandas import DataFrame, MultiIndex, date_range, melt, wide_to_long
+from pandas.api.types import CategoricalDtype
 
 
 class Melt:
@@ -196,7 +197,7 @@ def setup(self):
         categories = list(string.ascii_letters[:12])
         s = pd.Series(
             np.random.choice(categories, size=1000000),
-            dtype=pd.api.types.CategoricalDtype(categories),
+            dtype=CategoricalDtype(categories),
         )
         self.s = s
 
 
@@ -140,8 +140,11 @@ class Pairwise:
 
     def setup(self, window, method, pairwise):
         N = 10 ** 4
+        n_groups = 20
+        groups = [i for _ in range(N // n_groups) for i in range(n_groups)]
         arr = np.random.random(N)
         self.df = pd.DataFrame(arr)
+        self.df_group = pd.DataFrame({"A": groups, "B": arr}).groupby("A")
 
     def time_pairwise(self, window, method, pairwise):
         if window is None:
@@ -150,6 +153,13 @@ def time_pairwise(self, window, method, pairwise):
             r = self.df.rolling(window=window)
         getattr(r, method)(self.df, pairwise=pairwise)
 
+    def time_groupby(self, window, method, pairwise):
+        if window is None:
+            r = self.df_group.expanding()
+        else:
+            r = self.df_group.rolling(window=window)
+        getattr(r, method)(self.df, pairwise=pairwise)
+
 
 class Quantile:
     params = (
@@ -245,6 +255,19 @@ def time_rolling_multiindex_creation(self):
 
 class GroupbyEWM:
 
+    params = ["var", "std", "cov", "corr"]
+    param_names = ["method"]
+
+    def setup(self, method):
+        df = pd.DataFrame({"A": range(50), "B": range(50)})
+        self.gb_ewm = df.groupby("A").ewm(com=1.0)
+
+    def time_groupby_method(self, method):
+        getattr(self.gb_ewm, method)()
+
+
+class GroupbyEWMEngine:
+
     params = ["cython", "numba"]
     param_names = ["engine"]