debnathshoham
diff --git a/‎.circleci/config.yml
+21 b/‎.circleci/config.yml
+21
diff --git a/‎README.md
+1-1 b/‎README.md
+1-1
diff --git a/‎asv_bench/benchmarks/groupby.py
+11-1 b/‎asv_bench/benchmarks/groupby.py
+11-1
diff --git a/‎asv_bench/benchmarks/inference.py
+5 b/‎asv_bench/benchmarks/inference.py
+5
diff --git a/‎asv_bench/benchmarks/rolling.py
+27 b/‎asv_bench/benchmarks/rolling.py
+27
diff --git a/‎asv_bench/benchmarks/series_methods.py
+13 b/‎asv_bench/benchmarks/series_methods.py
+13
diff --git a/‎asv_bench/benchmarks/sparse.py
+28 b/‎asv_bench/benchmarks/sparse.py
+28
diff --git a/‎ci/azure/posix.yml
+7-1 b/‎ci/azure/posix.yml
+7-1
diff --git a/‎ci/azure/windows.yml
+4-4 b/‎ci/azure/windows.yml
+4-4
diff --git a/‎doc/source/reference/style.rst
+1 b/‎doc/source/reference/style.rst
+1
diff --git a/‎doc/source/reference/window.rst
+2 b/‎doc/source/reference/window.rst
+2
diff --git a/‎doc/source/user_guide/10min.rst
+8 b/‎doc/source/user_guide/10min.rst
+8
diff --git a/‎doc/source/user_guide/style.ipynb
+48-7 b/‎doc/source/user_guide/style.ipynb
+48-7
diff --git a/‎doc/source/whatsnew/index.rst
+1 b/‎doc/source/whatsnew/index.rst
+1
@@ -0,0 +1,21 @@
+version: 2.1
+
+jobs:
+  test-arm:
+    machine:
+      image: ubuntu-2004:202101-01
+    resource_class: arm.medium
+    environment:
+      ENV_FILE: ci/deps/circle-38-arm64.yaml
+      PYTEST_WORKERS: auto
+      PATTERN: "not slow and not network and not clipboard and not arm_slow"
+      PYTEST_TARGET: "pandas"
+    steps:
+      - checkout
+      - run: ci/setup_env.sh
+      - run: PATH=$HOME/miniconda3/envs/pandas-dev/bin:$HOME/miniconda3/condabin:$PATH ci/run_tests.sh
+
+workflows:
+  test:
+    jobs:
+      - test-arm
@@ -12,7 +12,7 @@
 [![License](https://img.shields.io/pypi/l/pandas.svg)](https://github.com/pandas-dev/pandas/blob/master/LICENSE)
 [![Azure Build Status](https://dev.azure.com/pandas-dev/pandas/_apis/build/status/pandas-dev.pandas?branch=master)](https://dev.azure.com/pandas-dev/pandas/_build/latest?definitionId=1&branch=master)
 [![Coverage](https://codecov.io/github/pandas-dev/pandas/coverage.svg?branch=master)](https://codecov.io/gh/pandas-dev/pandas)
-[![Downloads](https://anaconda.org/conda-forge/pandas/badges/downloads.svg)](https://pandas.pydata.org)
+[![Downloads](https://static.pepy.tech/personalized-badge/pandas?period=month&units=international_system&left_color=black&right_color=orange&left_text=PyPI%20downloads%20per%20month)](https://pepy.tech/project/pandas)
 [![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/pydata/pandas)
 [![Powered by NumFOCUS](https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](https://numfocus.org)
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 
@@ -454,6 +454,16 @@ def setup(self, dtype, method, application, ncols):
             # DataFrameGroupBy doesn't have these methods
             raise NotImplementedError
 
+        if application == "transformation" and method in [
+            "head",
+            "tail",
+            "unique",
+            "value_counts",
+            "size",
+        ]:
+            # DataFrameGroupBy doesn't have these methods
+            raise NotImplementedError
+
         ngroups = 1000
         size = ngroups * 2
         rng = np.arange(ngroups).reshape(-1, 1)
@@ -480,7 +490,7 @@ def setup(self, dtype, method, application, ncols):
         if len(cols) == 1:
             cols = cols[0]
 
-        if application == "transform":
+        if application == "transformation":
             if method == "describe":
                 raise NotImplementedError
 
 
@@ -173,6 +173,7 @@ def setup(self):
         self.strings_tz_space = [
             x.strftime("%Y-%m-%d %H:%M:%S") + " -0800" for x in rng
         ]
+        self.strings_zero_tz = [x.strftime("%Y-%m-%d %H:%M:%S") + "Z" for x in rng]
 
     def time_iso8601(self):
         to_datetime(self.strings)
@@ -189,6 +190,10 @@ def time_iso8601_format_no_sep(self):
     def time_iso8601_tz_spaceformat(self):
         to_datetime(self.strings_tz_space)
 
+    def time_iso8601_infer_zero_tz_fromat(self):
+        # GH 41047
+        to_datetime(self.strings_zero_tz, infer_datetime_format=True)
+
 
 class ToDatetimeNONISO8601:
     def setup(self):
 
@@ -180,6 +180,33 @@ def time_quantile(self, constructor, window, dtype, percentile, interpolation):
         self.roll.quantile(percentile, interpolation=interpolation)
 
 
+class Rank:
+    params = (
+        ["DataFrame", "Series"],
+        [10, 1000],
+        ["int", "float"],
+        [True, False],
+        [True, False],
+        ["min", "max", "average"],
+    )
+    param_names = [
+        "constructor",
+        "window",
+        "dtype",
+        "percentile",
+        "ascending",
+        "method",
+    ]
+
+    def setup(self, constructor, window, dtype, percentile, ascending, method):
+        N = 10 ** 5
+        arr = np.random.random(N).astype(dtype)
+        self.roll = getattr(pd, constructor)(arr).rolling(window)
+
+    def time_rank(self, constructor, window, dtype, percentile, ascending, method):
+        self.roll.rank(pct=percentile, ascending=ascending, method=method)
+
+
 class PeakMemFixedWindowMinMax:
 
     params = ["min", "max"]
 
@@ -27,6 +27,19 @@ def time_constructor(self, data):
         Series(data=self.data, index=self.idx)
 
 
+class ToFrame:
+    params = [["int64", "datetime64[ns]", "category", "Int64"], [None, "foo"]]
+    param_names = ["dtype", "name"]
+
+    def setup(self, dtype, name):
+        arr = np.arange(10 ** 5)
+        ser = Series(arr, dtype=dtype)
+        self.ser = ser
+
+    def time_to_frame(self, dtype, name):
+        self.ser.to_frame(name)
+
+
 class NSort:
 
     params = ["first", "last", "all"]
 
@@ -91,6 +91,20 @@ def time_sparse_series_to_coo_single_level(self, sort_labels):
         self.ss_two_lvl.sparse.to_coo(sort_labels=sort_labels)
 
 
+class ToCooFrame:
+    def setup(self):
+        N = 10000
+        k = 10
+        arr = np.full((N, k), np.nan)
+        arr[0, 0] = 3.0
+        arr[12, 7] = -1.0
+        arr[0, 9] = 11.2
+        self.df = pd.DataFrame(arr, dtype=pd.SparseDtype("float"))
+
+    def time_to_coo(self):
+        self.df.sparse.to_coo()
+
+
 class Arithmetic:
 
     params = ([0.1, 0.01], [0, np.nan])
@@ -152,4 +166,18 @@ def time_division(self, fill_value):
         self.arr1 / self.arr2
 
 
+class MinMax:
+
+    params = (["min", "max"], [0.0, np.nan])
+    param_names = ["func", "fill_value"]
+
+    def setup(self, func, fill_value):
+        N = 1_000_000
+        arr = make_array(N, 1e-5, fill_value, np.float64)
+        self.sp_arr = SparseArray(arr, fill_value=fill_value)
+
+    def time_min_max(self, func, fill_value):
+        getattr(self.sp_arr, func)()
+
+
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
@@ -9,10 +9,16 @@ jobs:
   strategy:
     matrix:
       ${{ if eq(parameters.name, 'macOS') }}:
-        py38_macos:
+        py38_macos_1:
           ENV_FILE: ci/deps/azure-macos-38.yaml
           CONDA_PY: "38"
           PATTERN: "not slow and not network"
+          PYTEST_TARGET: "pandas/tests/[a-h]*"
+        py38_macos_2:
+          ENV_FILE: ci/deps/azure-macos-38.yaml
+          CONDA_PY: "38"
+          PATTERN: "not slow and not network"
+          PYTEST_TARGET: "pandas/tests/[i-z]*"
 
   steps:
     - script: echo '##vso[task.prependpath]$(HOME)/miniconda3/bin'
 
@@ -13,28 +13,28 @@ jobs:
         CONDA_PY: "38"
         PATTERN: "not slow and not network"
         PYTEST_WORKERS: 2  # GH-42236
-        PYTEST_TARGET: "pandas/tests/[a-i]*"
+        PYTEST_TARGET: "pandas/tests/[a-h]*"
 
       py38_np18_2:
         ENV_FILE: ci/deps/azure-windows-38.yaml
         CONDA_PY: "38"
         PATTERN: "not slow and not network"
         PYTEST_WORKERS: 2  # GH-42236
-        PYTEST_TARGET: "pandas/tests/[j-z]*"
+        PYTEST_TARGET: "pandas/tests/[i-z]*"
 
       py39_1:
         ENV_FILE: ci/deps/azure-windows-39.yaml
         CONDA_PY: "39"
         PATTERN: "not slow and not network and not high_memory"
         PYTEST_WORKERS: 2  # GH-42236
-        PYTEST_TARGET: "pandas/tests/[a-i]*"
+        PYTEST_TARGET: "pandas/tests/[a-h]*"
 
       py39_2:
         ENV_FILE: ci/deps/azure-windows-39.yaml
         CONDA_PY: "39"
         PATTERN: "not slow and not network and not high_memory"
         PYTEST_WORKERS: 2  # GH-42236
-        PYTEST_TARGET: "pandas/tests/[j-z]*"
+        PYTEST_TARGET: "pandas/tests/[i-z]*"
 
   steps:
     - powershell: |
 
@@ -39,6 +39,7 @@ Style application
    Styler.apply_index
    Styler.applymap_index
    Styler.format
+   Styler.format_index
    Styler.hide_index
    Styler.hide_columns
    Styler.set_td_classes
 
@@ -35,6 +35,7 @@ Rolling window functions
    Rolling.aggregate
    Rolling.quantile
    Rolling.sem
+   Rolling.rank
 
 .. _api.functions_window:
 
@@ -75,6 +76,7 @@ Expanding window functions
    Expanding.aggregate
    Expanding.quantile
    Expanding.sem
+   Expanding.rank
 
 .. _api.functions_ewm:
 
 
@@ -733,6 +733,14 @@ The :meth:`~plt.close` method is used to `close <https://matplotlib.org/3.1.1/ap
    @savefig series_plot_basic.png
    ts.plot();
 
+If running under Jupyter Notebook, the plot will appear on :meth:`~ts.plot`.  Otherwise use
+`matplotlib.pyplot.show <https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.show.html>`__ to show it or
+`matplotlib.pyplot.savefig <https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.savefig.html>`__ to write it to a file.
+
+.. ipython:: python
+
+   plt.show();
+
 On a DataFrame, the :meth:`~DataFrame.plot` method is a convenience to plot all
 of the columns with labels:
 
 
@@ -150,15 +150,14 @@
     "\n",
     "### Formatting Values\n",
     "\n",
-    "Before adding styles it is useful to show that the [Styler][styler] can distinguish the *display* value from the *actual* value. To control the display value, the text is printed in each cell, and we can use the [.format()][formatfunc] method to manipulate this according to a [format spec string][format] or a callable that takes a single value and returns a string. It is possible to define this for the whole table or for individual columns. \n",
+    "Before adding styles it is useful to show that the [Styler][styler] can distinguish the *display* value from the *actual* value, in both datavlaues and index or columns headers. To control the display value, the text is printed in each cell as string, and we can use the [.format()][formatfunc] and [.format_index()][formatfuncindex] methods to manipulate this according to a [format spec string][format] or a callable that takes a single value and returns a string. It is possible to define this for the whole table, or index, or for individual columns, or MultiIndex levels. \n",
     "\n",
-    "Additionally, the format function has a **precision** argument to specifically help formatting floats, as well as **decimal** and **thousands** separators to support other locales, an **na_rep** argument to display missing data, and an **escape** argument to help displaying safe-HTML or safe-LaTeX. The default formatter is configured to adopt pandas' regular `display.precision` option, controllable using `with pd.option_context('display.precision', 2):`\n",
-    "\n",
-    "Here is an example of using the multiple options to control the formatting generally and with specific column formatters.\n",
+    "Additionally, the format function has a **precision** argument to specifically help formatting floats, as well as **decimal** and **thousands** separators to support other locales, an **na_rep** argument to display missing data, and an **escape** argument to help displaying safe-HTML or safe-LaTeX. The default formatter is configured to adopt pandas' regular `display.precision` option, controllable using `with pd.option_context('display.precision', 2):` \n",
     "\n",
     "[styler]: ../reference/api/pandas.io.formats.style.Styler.rst\n",
     "[format]: https://docs.python.org/3/library/string.html#format-specification-mini-language\n",
-    "[formatfunc]: ../reference/api/pandas.io.formats.style.Styler.format.rst"
+    "[formatfunc]: ../reference/api/pandas.io.formats.style.Styler.format.rst\n",
+    "[formatfuncindex]: ../reference/api/pandas.io.formats.style.Styler.format_index.rst"
    ]
   },
   {
@@ -173,6 +172,49 @@
     "                          })"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Using Styler to manipulate the display is a useful feature because maintaining the indexing and datavalues for other purposes gives greater control. You do not have to overwrite your DataFrame to display it how you like. Here is an example of using the formatting functions whilst still relying on the underlying data for indexing and calculations."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "weather_df = pd.DataFrame(np.random.rand(10,2)*5, \n",
+    "                          index=pd.date_range(start=\"2021-01-01\", periods=10),\n",
+    "                          columns=[\"Tokyo\", \"Beijing\"])\n",
+    "\n",
+    "def rain_condition(v): \n",
+    "    if v < 1.75:\n",
+    "        return \"Dry\"\n",
+    "    elif v < 2.75:\n",
+    "        return \"Rain\"\n",
+    "    return \"Heavy Rain\"\n",
+    "\n",
+    "def make_pretty(styler):\n",
+    "    styler.set_caption(\"Weather Conditions\")\n",
+    "    styler.format(rain_condition)\n",
+    "    styler.format_index(lambda v: v.strftime(\"%A\"))\n",
+    "    styler.background_gradient(axis=None, vmin=1, vmax=5, cmap=\"YlGnBu\")\n",
+    "    return styler\n",
+    "\n",
+    "weather_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "weather_df.loc[\"2021-01-04\":\"2021-01-08\"].style.pipe(make_pretty)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -187,7 +229,7 @@
     "\n",
     "Hiding does not change the integer arrangement of CSS classes, e.g. hiding the first two columns of a DataFrame means the column class indexing will start at `col2`, since `col0` and `col1` are simply ignored.\n",
     "\n",
-    "We can update our `Styler` object to hide some data and format the values.\n",
+    "We can update our `Styler` object from before to hide some data and format the values.\n",
     "\n",
     "[hideidx]: ../reference/api/pandas.io.formats.style.Styler.hide_index.rst\n",
     "[hidecols]: ../reference/api/pandas.io.formats.style.Styler.hide_columns.rst"
@@ -1974,7 +2016,6 @@
   }
  ],
  "metadata": {
-  "celltoolbar": "Edit Metadata",
   "kernelspec": {
    "display_name": "Python 3",
    "language": "python",
 
@@ -24,6 +24,7 @@ Version 1.3
 .. toctree::
    :maxdepth: 2
 
+   v1.3.4
    v1.3.3
    v1.3.2
    v1.3.1