pandas-dev
diff --git a/‎.pre-commit-config.yaml
+2-2 b/‎.pre-commit-config.yaml
+2-2
diff --git a/‎LICENSE
+1-1 b/‎LICENSE
+1-1
diff --git a/‎asv_bench/benchmarks/groupby.py
+28 b/‎asv_bench/benchmarks/groupby.py
+28
diff --git a/‎doc/source/_static/style/hq_ax1.png
5.95 KB b/‎doc/source/_static/style/hq_ax1.png
5.95 KB
diff --git a/‎doc/source/_static/style/hq_axNone.png
5.96 KB b/‎doc/source/_static/style/hq_axNone.png
5.96 KB
diff --git a/‎doc/source/_static/style/hq_props.png
6.09 KB b/‎doc/source/_static/style/hq_props.png
6.09 KB
diff --git a/‎doc/source/development/roadmap.rst
+2-2 b/‎doc/source/development/roadmap.rst
+2-2
diff --git a/‎doc/source/getting_started/install.rst
+15 b/‎doc/source/getting_started/install.rst
+15
diff --git a/‎doc/source/getting_started/intro_tutorials/01_table_oriented.rst
+1-1 b/‎doc/source/getting_started/intro_tutorials/01_table_oriented.rst
+1-1
diff --git a/‎doc/source/user_guide/io.rst
+5 b/‎doc/source/user_guide/io.rst
+5
diff --git a/‎doc/source/user_guide/style.ipynb
+61-16 b/‎doc/source/user_guide/style.ipynb
+61-16
diff --git a/‎doc/source/user_guide/visualization.rst
+26-6 b/‎doc/source/user_guide/visualization.rst
+26-6
@@ -35,7 +35,7 @@ repos:
         exclude: ^pandas/_libs/src/(klib|headers)/
         args: [--quiet, '--extensions=c,h', '--headers=h', --recursive, '--filter=-readability/casting,-runtime/int,-build/include_subdir']
 -   repo: https://gitlab.com/pycqa/flake8
-    rev: 3.9.0
+    rev: 3.9.1
     hooks:
     -   id: flake8
         additional_dependencies:
@@ -75,7 +75,7 @@ repos:
     hooks:
     -   id: yesqa
         additional_dependencies:
-            - flake8==3.9.0
+            - flake8==3.9.1
             - flake8-comprehensions==3.1.0
             - flake8-bugbear==21.3.2
             - pandas-dev-flaker==0.2.0
 
@@ -3,7 +3,7 @@ BSD 3-Clause License
 Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
 All rights reserved.
 
-Copyright (c) 2011-2020, Open source contributors.
+Copyright (c) 2011-2021, Open source contributors.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 
@@ -505,6 +505,34 @@ def time_frame_agg(self, dtype, method):
         self.df.groupby("key").agg(method)
 
 
+class CumminMax:
+    param_names = ["dtype", "method"]
+    params = [
+        ["float64", "int64", "Float64", "Int64"],
+        ["cummin", "cummax"],
+    ]
+
+    def setup(self, dtype, method):
+        N = 500_000
+        vals = np.random.randint(-10, 10, (N, 5))
+        null_vals = vals.astype(float, copy=True)
+        null_vals[::2, :] = np.nan
+        null_vals[::3, :] = np.nan
+        df = DataFrame(vals, columns=list("abcde"), dtype=dtype)
+        null_df = DataFrame(null_vals, columns=list("abcde"), dtype=dtype)
+        keys = np.random.randint(0, 100, size=N)
+        df["key"] = keys
+        null_df["key"] = keys
+        self.df = df
+        self.null_df = null_df
+
+    def time_frame_transform(self, dtype, method):
+        self.df.groupby("key").transform(method)
+
+    def time_frame_transform_many_nulls(self, dtype, method):
+        self.null_df.groupby("key").transform(method)
+
+
 class RankWithTies:
     # GH 21237
     param_names = ["dtype", "tie_method"]
 
@@ -71,8 +71,8 @@ instead of comparing as False).
 
 Long term, we want to introduce consistent missing data handling for all data
 types. This includes consistent behavior in all operations (indexing, arithmetic
-operations, comparisons, etc.). We want to eventually make the new semantics the
-default.
+operations, comparisons, etc.). There has been discussion of eventually making
+the new semantics the default.
 
 This has been discussed at
 `github #28095 <https://github.com/pandas-dev/pandas/issues/28095>`__ (and
 
@@ -362,6 +362,21 @@ pyarrow                   0.15.0             Parquet, ORC, and feather reading /
 pyreadstat                                   SPSS files (.sav) reading
 ========================= ================== =============================================================
 
+.. _install.warn_orc:
+
+.. warning::
+
+    * If you want to use :func:`~pandas.read_orc`, it is highly recommended to install pyarrow using conda.
+      The following is a summary of the environment in which :func:`~pandas.read_orc` can work.
+
+      ========================= ================== =============================================================
+      System                    Conda              PyPI
+      ========================= ================== =============================================================
+      Linux                     Successful         Failed(pyarrow==3.0 Successful)
+      macOS                     Successful         Failed
+      Windows                   Failed             Failed
+      ========================= ================== =============================================================
+
 Access data in the cloud
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
 
@@ -176,7 +176,7 @@ these are by default not taken into account by the :func:`~DataFrame.describe` m
 
 Many pandas operations return a ``DataFrame`` or a ``Series``. The
 :func:`~DataFrame.describe` method is an example of a pandas operation returning a
-pandas ``Series``.
+pandas ``Series`` or a pandas ``DataFrame``.
 
 .. raw:: html
 
 
@@ -5443,6 +5443,11 @@ Similar to the :ref:`parquet <io.parquet>` format, the `ORC Format <https://orc.
 for data frames. It is designed to make reading data frames efficient. pandas provides *only* a reader for the
 ORC format, :func:`~pandas.read_orc`. This requires the `pyarrow <https://arrow.apache.org/docs/python/>`__ library.
 
+.. warning::
+
+   * It is *highly recommended* to install pyarrow using conda due to some issues occurred by pyarrow.
+   * :func:`~pandas.read_orc` is not supported on Windows yet, you can find valid environments on :ref:`install optional dependencies <install.warn_orc>`.
+
 .. _io.sql:
 
 SQL queries
 
@@ -1006,7 +1006,30 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We expect certain styling functions to be common enough that we've included a few \"built-in\" to the `Styler`, so you don't have to write them yourself."
+    "Some styling functions are common enough that we've \"built them in\" to the `Styler`, so you don't have to write them and apply them yourself. The current list of such functions is:\n",
+    "\n",
+    " - [.highlight_null][nullfunc]: for use with identifying missing data. \n",
+    " - [.highlight_min][minfunc] and [.highlight_max][maxfunc]: for use with identifying extremeties in data.\n",
+    " - [.highlight_between][betweenfunc] and [.highlight_quantile][quantilefunc]: for use with identifying classes within data.\n",
+    " - [.background_gradient][bgfunc]: a flexible method for highlighting cells based or their, or other, values on a numeric scale.\n",
+    " -  [.bar][barfunc]: to display mini-charts within cell backgrounds.\n",
+    " \n",
+    "The individual documentation on each function often gives more examples of their arguments.\n",
+    "\n",
+    "[nullfunc]: ../reference/api/pandas.io.formats.style.Styler.highlight_null.rst\n",
+    "[minfunc]: ../reference/api/pandas.io.formats.style.Styler.highlight_min.rst\n",
+    "[maxfunc]: ../reference/api/pandas.io.formats.style.Styler.highlight_max.rst\n",
+    "[betweenfunc]: ../reference/api/pandas.io.formats.style.Styler.highlight_between.rst\n",
+    "[quantilefunc]: ../reference/api/pandas.io.formats.style.Styler.highlight_quantile.rst\n",
+    "[bgfunc]: ../reference/api/pandas.io.formats.style.Styler.background_gradient.rst\n",
+    "[barfunc]: ../reference/api/pandas.io.formats.style.Styler.bar.rst"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Highlight Null"
    ]
   },
   {
@@ -1017,14 +1040,14 @@
    "source": [
     "df2.iloc[0,2] = np.nan\n",
     "df2.iloc[4,3] = np.nan\n",
-    "df2.loc[:4].style.highlight_null(null_color='red')"
+    "df2.loc[:4].style.highlight_null(null_color='yellow')"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "You can create \"heatmaps\" with the `background_gradient` method. These require matplotlib, and we'll use [Seaborn](https://stanford.edu/~mwaskom/software/seaborn/) to get a nice colormap."
+    "### Highlight Min or Max"
    ]
   },
   {
@@ -1033,17 +1056,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import seaborn as sns\n",
-    "cm = sns.light_palette(\"green\", as_cmap=True)\n",
-    "\n",
-    "df2.style.background_gradient(cmap=cm)"
+    "df2.loc[:4].style.highlight_max(axis=1, props='color:white; font-weight:bold; background-color:darkblue;')"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "`Styler.background_gradient` takes the keyword arguments `low` and `high`. Roughly speaking these extend the range of your data by `low` and `high` percent so that when we convert the colors, the colormap's entire range isn't used. This is useful so that you can actually read the text still."
+    "### Highlight Between\n",
+    "This method accepts ranges as float, or NumPy arrays or Series provided the indexes match."
    ]
   },
   {
@@ -1052,8 +1073,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Uses the full color range\n",
-    "df2.loc[:4].style.background_gradient(cmap='viridis')"
+    "left = pd.Series([1.0, 0.0, 1.0], index=[\"A\", \"B\", \"D\"])\n",
+    "df2.loc[:4].style.highlight_between(left=left, right=1.5, axis=1, props='color:white; background-color:purple;')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Highlight Quantile\n",
+    "Useful for detecting the highest or lowest percentile values"
    ]
   },
   {
@@ -1062,17 +1091,21 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Compress the color range\n",
-    "df2.loc[:4].style\\\n",
-    "   .background_gradient(cmap='viridis', low=.5, high=0)\\\n",
-    "   .highlight_null('red')"
+    "df2.loc[:4].style.highlight_quantile(q_left=0.85, axis=None, color='yellow')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Background Gradient"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "There's also `.highlight_min` and `.highlight_max`, which is almost identical to the user defined version we created above, and also a `.highlight_null` method. "
+    "You can create \"heatmaps\" with the `background_gradient` method. These require matplotlib, and we'll use [Seaborn](https://stanford.edu/~mwaskom/software/seaborn/) to get a nice colormap."
    ]
   },
   {
@@ -1081,7 +1114,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df2.loc[:4].style.highlight_max(axis=0)"
+    "import seaborn as sns\n",
+    "cm = sns.light_palette(\"green\", as_cmap=True)\n",
+    "\n",
+    "df2.style.background_gradient(cmap=cm)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "[.background_gradient][bgfunc] has a number of keyword arguments to customise the gradients and colors. See its documentation.\n",
+    "\n",
+    "[bgfunc]: ../reference/api/pandas.io.formats.style.Styler.background_gradient.rst"
    ]
   },
   {
 
@@ -1458,25 +1458,23 @@ Horizontal and vertical error bars can be supplied to the ``xerr`` and ``yerr``
 * As a ``str`` indicating which of the columns of plotting :class:`DataFrame` contain the error values.
 * As raw values (``list``, ``tuple``, or ``np.ndarray``). Must be the same length as the plotting :class:`DataFrame`/:class:`Series`.
 
-Asymmetrical error bars are also supported, however raw error values must be provided in this case. For a ``N`` length :class:`Series`, a ``2xN`` array should be provided indicating lower and upper (or left and right) errors. For a ``MxN`` :class:`DataFrame`, asymmetrical errors should be in a ``Mx2xN`` array.
-
 Here is an example of one way to easily plot group means with standard deviations from the raw data.
 
 .. ipython:: python
 
    # Generate the data
    ix3 = pd.MultiIndex.from_arrays(
        [
-           ["a", "a", "a", "a", "b", "b", "b", "b"],
-           ["foo", "foo", "bar", "bar", "foo", "foo", "bar", "bar"],
+           ["a", "a", "a", "a", "a", "b", "b", "b", "b", "b"],
+           ["foo", "foo", "foo", "bar", "bar", "foo", "foo", "bar", "bar", "bar"],
        ],
        names=["letter", "word"],
    )
 
    df3 = pd.DataFrame(
        {
-           "data1": [3, 2, 4, 3, 2, 4, 3, 2],
-           "data2": [6, 5, 7, 5, 4, 5, 6, 5],
+           "data1": [9, 3, 2, 4, 3, 2, 4, 6, 3, 2],
+           "data2": [9, 6, 5, 7, 5, 4, 5, 6, 5, 1],
        },
        index=ix3,
    )
@@ -1499,6 +1497,28 @@ Here is an example of one way to easily plot group means with standard deviation
 
    plt.close("all")
 
+Asymmetrical error bars are also supported, however raw error values must be provided in this case. For a ``N`` length :class:`Series`, a ``2xN`` array should be provided indicating lower and upper (or left and right) errors. For a ``MxN`` :class:`DataFrame`, asymmetrical errors should be in a ``Mx2xN`` array.
+
+Here is an example of one way to plot the min/max range using asymmetrical error bars.
+
+.. ipython:: python
+
+   mins = gp3.min()
+   maxs = gp3.max()
+
+   # errors should be positive, and defined in the order of lower, upper
+   errors = [[means[c] - mins[c], maxs[c] - means[c]] for c in df3.columns]
+
+   # Plot
+   fig, ax = plt.subplots()
+   @savefig errorbar_asymmetrical_example.png
+   means.plot.bar(yerr=errors, ax=ax, capsize=4, rot=0);
+
+.. ipython:: python
+   :suppress:
+
+   plt.close("all")
+
 .. _visualization.table:
 
 Plotting tables