simonjayhawkins
diff --git a/‎.circleci/config.yml
+21 b/‎.circleci/config.yml
+21
diff --git a/‎.github/PULL_REQUEST_TEMPLATE.md
+1-1 b/‎.github/PULL_REQUEST_TEMPLATE.md
+1-1
diff --git a/‎.github/workflows/ci.yml
+1 b/‎.github/workflows/ci.yml
+1
diff --git a/‎.github/workflows/posix.yml
+1 b/‎.github/workflows/posix.yml
+1
diff --git a/‎.github/workflows/python-dev.yml
+1 b/‎.github/workflows/python-dev.yml
+1
diff --git a/‎README.md
+1-1 b/‎README.md
+1-1
diff --git a/‎asv_bench/benchmarks/algorithms.py
+6-6 b/‎asv_bench/benchmarks/algorithms.py
+6-6
diff --git a/‎asv_bench/benchmarks/groupby.py
+11-1 b/‎asv_bench/benchmarks/groupby.py
+11-1
diff --git a/‎asv_bench/benchmarks/indexing_engines.py
+2-2 b/‎asv_bench/benchmarks/indexing_engines.py
+2-2
diff --git a/‎asv_bench/benchmarks/inference.py
+5 b/‎asv_bench/benchmarks/inference.py
+5
diff --git a/‎asv_bench/benchmarks/rolling.py
+27 b/‎asv_bench/benchmarks/rolling.py
+27
diff --git a/‎asv_bench/benchmarks/series_methods.py
+13 b/‎asv_bench/benchmarks/series_methods.py
+13
diff --git a/‎asv_bench/benchmarks/sparse.py
+43 b/‎asv_bench/benchmarks/sparse.py
+43
diff --git a/‎azure-pipelines.yml
+1 b/‎azure-pipelines.yml
+1
diff --git a/‎ci/azure/posix.yml
+7-1 b/‎ci/azure/posix.yml
+7-1
diff --git a/‎ci/azure/windows.yml
+19-2 b/‎ci/azure/windows.yml
+19-2
diff --git a/‎ci/run_tests.sh
+1-1 b/‎ci/run_tests.sh
+1-1
diff --git a/‎doc/source/development/contributing.rst
+6-1 b/‎doc/source/development/contributing.rst
+6-1
diff --git a/‎doc/source/development/contributing_environment.rst
-1 b/‎doc/source/development/contributing_environment.rst
-1
diff --git a/‎doc/source/ecosystem.rst
+14 b/‎doc/source/ecosystem.rst
+14
diff --git a/‎doc/source/reference/style.rst
+1 b/‎doc/source/reference/style.rst
+1
@@ -0,0 +1,21 @@
+version: 2.1
+
+jobs:
+  test-arm:
+    machine:
+      image: ubuntu-2004:202101-01
+    resource_class: arm.medium
+    environment:
+      ENV_FILE: ci/deps/circle-38-arm64.yaml
+      PYTEST_WORKERS: auto
+      PATTERN: "not slow and not network and not clipboard and not arm_slow"
+      PYTEST_TARGET: "pandas"
+    steps:
+      - checkout
+      - run: ci/setup_env.sh
+      - run: PATH=$HOME/miniconda3/envs/pandas-dev/bin:$HOME/miniconda3/condabin:$PATH ci/run_tests.sh
+
+workflows:
+  test:
+    jobs:
+      - test-arm
@@ -1,4 +1,4 @@
 - [ ] closes #xxxx
 - [ ] tests added / passed
-- [ ] Ensure all linting tests pass, see [here](https://pandas.pydata.org/pandas-docs/dev/development/contributing.html#code-standards) for how to run them
+- [ ] Ensure all linting tests pass, see [here](https://pandas.pydata.org/pandas-docs/dev/development/contributing_codebase.html#pre-commit) for how to run them
 - [ ] whatsnew entry
@@ -168,6 +168,7 @@ jobs:
         PANDAS_DATA_MANAGER: array
         PATTERN: ${{ matrix.pattern }}
         PYTEST_WORKERS: "auto"
+        PYTEST_TARGET: pandas
       run: |
         source activate pandas-dev
         ci/run_tests.sh
 
@@ -44,6 +44,7 @@ jobs:
       LC_ALL: ${{ matrix.settings[4] }}
       PANDAS_TESTING_MODE: ${{ matrix.settings[5] }}
       TEST_ARGS: ${{ matrix.settings[6] }}
+      PYTEST_TARGET:  pandas
     concurrency:
       group: ${{ github.ref }}-${{ matrix.settings[0] }}
       cancel-in-progress: ${{github.event_name == 'pull_request'}}
 
@@ -17,6 +17,7 @@ env:
   PANDAS_CI: 1
   PATTERN: "not slow and not network and not clipboard"
   COVERAGE: true
+  PYTEST_TARGET:  pandas
 
 jobs:
   build:
 
@@ -12,7 +12,7 @@
 [![License](https://img.shields.io/pypi/l/pandas.svg)](https://github.com/pandas-dev/pandas/blob/master/LICENSE)
 [![Azure Build Status](https://dev.azure.com/pandas-dev/pandas/_apis/build/status/pandas-dev.pandas?branch=master)](https://dev.azure.com/pandas-dev/pandas/_build/latest?definitionId=1&branch=master)
 [![Coverage](https://codecov.io/github/pandas-dev/pandas/coverage.svg?branch=master)](https://codecov.io/gh/pandas-dev/pandas)
-[![Downloads](https://anaconda.org/conda-forge/pandas/badges/downloads.svg)](https://pandas.pydata.org)
+[![Downloads](https://static.pepy.tech/personalized-badge/pandas?period=month&units=international_system&left_color=black&right_color=orange&left_text=PyPI%20downloads%20per%20month)](https://pepy.tech/project/pandas)
 [![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/pydata/pandas)
 [![Powered by NumFOCUS](https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](https://numfocus.org)
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 
@@ -44,9 +44,9 @@ def setup(self, unique, sort, dtype):
                 raise NotImplementedError
 
         data = {
-            "int": pd.Int64Index(np.arange(N)),
-            "uint": pd.UInt64Index(np.arange(N)),
-            "float": pd.Float64Index(np.random.randn(N)),
+            "int": pd.Index(np.arange(N), dtype="int64"),
+            "uint": pd.Index(np.arange(N), dtype="uint64"),
+            "float": pd.Index(np.random.randn(N), dtype="float64"),
             "object": string_index,
             "datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
             "datetime64[ns, tz]": pd.date_range(
@@ -76,9 +76,9 @@ class Duplicated:
     def setup(self, unique, keep, dtype):
         N = 10 ** 5
         data = {
-            "int": pd.Int64Index(np.arange(N)),
-            "uint": pd.UInt64Index(np.arange(N)),
-            "float": pd.Float64Index(np.random.randn(N)),
+            "int": pd.Index(np.arange(N), dtype="int64"),
+            "uint": pd.Index(np.arange(N), dtype="uint64"),
+            "float": pd.Index(np.random.randn(N), dtype="float64"),
             "string": tm.makeStringIndex(N),
             "datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
             "datetime64[ns, tz]": pd.date_range(
 
@@ -454,6 +454,16 @@ def setup(self, dtype, method, application, ncols):
             # DataFrameGroupBy doesn't have these methods
             raise NotImplementedError
 
+        if application == "transformation" and method in [
+            "head",
+            "tail",
+            "unique",
+            "value_counts",
+            "size",
+        ]:
+            # DataFrameGroupBy doesn't have these methods
+            raise NotImplementedError
+
         ngroups = 1000
         size = ngroups * 2
         rng = np.arange(ngroups).reshape(-1, 1)
@@ -480,7 +490,7 @@ def setup(self, dtype, method, application, ncols):
         if len(cols) == 1:
             cols = cols[0]
 
-        if application == "transform":
+        if application == "transformation":
             if method == "describe":
                 raise NotImplementedError
 
 
@@ -48,7 +48,7 @@ def setup(self, engine_and_dtype, index_type):
             "non_monotonic": np.array([1, 2, 3] * N, dtype=dtype),
         }[index_type]
 
-        self.data = engine(lambda: arr, len(arr))
+        self.data = engine(arr)
         # code belows avoids populating the mapping etc. while timing.
         self.data.get_loc(2)
 
@@ -70,7 +70,7 @@ def setup(self, index_type):
             "non_monotonic": np.array(list("abc") * N, dtype=object),
         }[index_type]
 
-        self.data = libindex.ObjectEngine(lambda: arr, len(arr))
+        self.data = libindex.ObjectEngine(arr)
         # code belows avoids populating the mapping etc. while timing.
         self.data.get_loc("b")
 
 
@@ -173,6 +173,7 @@ def setup(self):
         self.strings_tz_space = [
             x.strftime("%Y-%m-%d %H:%M:%S") + " -0800" for x in rng
         ]
+        self.strings_zero_tz = [x.strftime("%Y-%m-%d %H:%M:%S") + "Z" for x in rng]
 
     def time_iso8601(self):
         to_datetime(self.strings)
@@ -189,6 +190,10 @@ def time_iso8601_format_no_sep(self):
     def time_iso8601_tz_spaceformat(self):
         to_datetime(self.strings_tz_space)
 
+    def time_iso8601_infer_zero_tz_fromat(self):
+        # GH 41047
+        to_datetime(self.strings_zero_tz, infer_datetime_format=True)
+
 
 class ToDatetimeNONISO8601:
     def setup(self):
 
@@ -180,6 +180,33 @@ def time_quantile(self, constructor, window, dtype, percentile, interpolation):
         self.roll.quantile(percentile, interpolation=interpolation)
 
 
+class Rank:
+    params = (
+        ["DataFrame", "Series"],
+        [10, 1000],
+        ["int", "float"],
+        [True, False],
+        [True, False],
+        ["min", "max", "average"],
+    )
+    param_names = [
+        "constructor",
+        "window",
+        "dtype",
+        "percentile",
+        "ascending",
+        "method",
+    ]
+
+    def setup(self, constructor, window, dtype, percentile, ascending, method):
+        N = 10 ** 5
+        arr = np.random.random(N).astype(dtype)
+        self.roll = getattr(pd, constructor)(arr).rolling(window)
+
+    def time_rank(self, constructor, window, dtype, percentile, ascending, method):
+        self.roll.rank(pct=percentile, ascending=ascending, method=method)
+
+
 class PeakMemFixedWindowMinMax:
 
     params = ["min", "max"]
 
@@ -27,6 +27,19 @@ def time_constructor(self, data):
         Series(data=self.data, index=self.idx)
 
 
+class ToFrame:
+    params = [["int64", "datetime64[ns]", "category", "Int64"], [None, "foo"]]
+    param_names = ["dtype", "name"]
+
+    def setup(self, dtype, name):
+        arr = np.arange(10 ** 5)
+        ser = Series(arr, dtype=dtype)
+        self.ser = ser
+
+    def time_to_frame(self, dtype, name):
+        self.ser.to_frame(name)
+
+
 class NSort:
 
     params = ["first", "last", "all"]
 
@@ -91,6 +91,20 @@ def time_sparse_series_to_coo_single_level(self, sort_labels):
         self.ss_two_lvl.sparse.to_coo(sort_labels=sort_labels)
 
 
+class ToCooFrame:
+    def setup(self):
+        N = 10000
+        k = 10
+        arr = np.full((N, k), np.nan)
+        arr[0, 0] = 3.0
+        arr[12, 7] = -1.0
+        arr[0, 9] = 11.2
+        self.df = pd.DataFrame(arr, dtype=pd.SparseDtype("float"))
+
+    def time_to_coo(self):
+        self.df.sparse.to_coo()
+
+
 class Arithmetic:
 
     params = ([0.1, 0.01], [0, np.nan])
@@ -152,4 +166,33 @@ def time_division(self, fill_value):
         self.arr1 / self.arr2
 
 
+class MinMax:
+
+    params = (["min", "max"], [0.0, np.nan])
+    param_names = ["func", "fill_value"]
+
+    def setup(self, func, fill_value):
+        N = 1_000_000
+        arr = make_array(N, 1e-5, fill_value, np.float64)
+        self.sp_arr = SparseArray(arr, fill_value=fill_value)
+
+    def time_min_max(self, func, fill_value):
+        getattr(self.sp_arr, func)()
+
+
+class Take:
+
+    params = ([np.array([0]), np.arange(100_000), np.full(100_000, -1)], [True, False])
+    param_names = ["indices", "allow_fill"]
+
+    def setup(self, indices, allow_fill):
+        N = 1_000_000
+        fill_value = 0.0
+        arr = make_array(N, 1e-5, fill_value, np.float64)
+        self.sp_arr = SparseArray(arr, fill_value=fill_value)
+
+    def time_take(self, indices, allow_fill):
+        self.sp_arr.take(indices, allow_fill=allow_fill)
+
+
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
@@ -17,6 +17,7 @@ pr:
 
 variables:
   PYTEST_WORKERS: auto
+  PYTEST_TARGET:  pandas
 
 jobs:
 # Mac and Linux use the same template
 
@@ -9,10 +9,16 @@ jobs:
   strategy:
     matrix:
       ${{ if eq(parameters.name, 'macOS') }}:
-        py38_macos:
+        py38_macos_1:
           ENV_FILE: ci/deps/azure-macos-38.yaml
           CONDA_PY: "38"
           PATTERN: "not slow and not network"
+          PYTEST_TARGET: "pandas/tests/[a-h]*"
+        py38_macos_2:
+          ENV_FILE: ci/deps/azure-macos-38.yaml
+          CONDA_PY: "38"
+          PATTERN: "not slow and not network"
+          PYTEST_TARGET: "pandas/tests/[i-z]*"
 
   steps:
     - script: echo '##vso[task.prependpath]$(HOME)/miniconda3/bin'
 
@@ -8,17 +8,33 @@ jobs:
     vmImage: ${{ parameters.vmImage }}
   strategy:
     matrix:
-      py38_np18:
+      py38_np18_1:
         ENV_FILE: ci/deps/azure-windows-38.yaml
         CONDA_PY: "38"
         PATTERN: "not slow and not network"
         PYTEST_WORKERS: 2  # GH-42236
+        PYTEST_TARGET: "pandas/tests/[a-h]*"
 
-      py39:
+      py38_np18_2:
+        ENV_FILE: ci/deps/azure-windows-38.yaml
+        CONDA_PY: "38"
+        PATTERN: "not slow and not network"
+        PYTEST_WORKERS: 2  # GH-42236
+        PYTEST_TARGET: "pandas/tests/[i-z]*"
+
+      py39_1:
+        ENV_FILE: ci/deps/azure-windows-39.yaml
+        CONDA_PY: "39"
+        PATTERN: "not slow and not network and not high_memory"
+        PYTEST_WORKERS: 2  # GH-42236
+        PYTEST_TARGET: "pandas/tests/[a-h]*"
+
+      py39_2:
         ENV_FILE: ci/deps/azure-windows-39.yaml
         CONDA_PY: "39"
         PATTERN: "not slow and not network and not high_memory"
         PYTEST_WORKERS: 2  # GH-42236
+        PYTEST_TARGET: "pandas/tests/[i-z]*"
 
   steps:
     - powershell: |
@@ -39,6 +55,7 @@ jobs:
       displayName: 'Build'
     - bash: |
         source activate pandas-dev
+        wmic.exe cpu get caption, deviceid, name, numberofcores, maxclockspeed
         ci/run_tests.sh
       displayName: 'Test'
     - task: PublishTestResults@2
 
@@ -19,7 +19,7 @@ if [[ $(uname) == "Linux" && -z $DISPLAY ]]; then
     XVFB="xvfb-run "
 fi
 
-PYTEST_CMD="${XVFB}pytest -m \"$PATTERN\" -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE pandas"
+PYTEST_CMD="${XVFB}pytest -m \"$PATTERN\" -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET"
 
 if [[ $(uname) != "Linux"  && $(uname) != "Darwin" ]]; then
     # GH#37455 windows py38 build appears to be running out of memory
 
@@ -331,7 +331,12 @@ can comment::
 
     @github-actions pre-commit
 
-on that pull request. This will trigger a workflow which will autofix formatting errors.
+on that pull request. This will trigger a workflow which will autofix formatting
+errors.
+
+To automatically fix formatting errors on each commit you make, you can
+set up pre-commit yourself. First, create a Python :ref:`environment
+<contributing_environment>` and then set up :ref:`pre-commit <contributing.pre-commit>`.
 
 Delete your merged branch (optional)
 ------------------------------------
 
@@ -133,7 +133,6 @@ compiler installation instructions.
 
 Let us know if you have any difficulties by opening an issue or reaching out on `Gitter <https://gitter.im/pydata/pandas/>`_.
 
-
 Creating a Python environment
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 
@@ -575,3 +575,17 @@ Library            Accessor     Classes                              Description
 .. _composeml: https://github.com/alteryx/compose
 .. _datatest: https://datatest.readthedocs.io/
 .. _woodwork: https://github.com/alteryx/woodwork
+
+Development tools
+----------------------------
+
+`pandas-stubs <https://github.com/VirtusLab/pandas-stubs>`__
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+While pandas repository is partially typed, the package itself doesn't expose this information for external use.
+Install pandas-stubs to enable basic type coverage of pandas API.
+
+Learn more by reading through these issues `14468 <https://github.com/pandas-dev/pandas/issues/14468>`_,
+`26766 <https://github.com/pandas-dev/pandas/issues/26766>`_, `28142 <https://github.com/pandas-dev/pandas/issues/28142>`_.
+
+See installation and usage instructions on the `github page <https://github.com/VirtusLab/pandas-stubs>`__.
@@ -39,6 +39,7 @@ Style application
    Styler.apply_index
    Styler.applymap_index
    Styler.format
+   Styler.format_index
    Styler.hide_index
    Styler.hide_columns
    Styler.set_td_classes