pandas-dev
diff --git a/‎.github/workflows/database.yml
+1-1 b/‎.github/workflows/database.yml
+1-1
diff --git a/‎.github/workflows/posix.yml
+1-1 b/‎.github/workflows/posix.yml
+1-1
diff --git a/‎.github/workflows/python-dev.yml
+1-1 b/‎.github/workflows/python-dev.yml
+1-1
diff --git a/‎.pre-commit-config.yaml
+1-1 b/‎.pre-commit-config.yaml
+1-1
diff --git a/‎asv_bench/benchmarks/groupby.py
+12 b/‎asv_bench/benchmarks/groupby.py
+12
diff --git a/‎asv_bench/benchmarks/reshape.py
+5-1 b/‎asv_bench/benchmarks/reshape.py
+5-1
diff --git a/‎ci/code_checks.sh
+3 b/‎ci/code_checks.sh
+3
diff --git a/‎ci/deps/actions-38-locale.yaml
+1-1 b/‎ci/deps/actions-38-locale.yaml
+1-1
diff --git a/‎ci/deps/actions-39-slow.yaml
+1 b/‎ci/deps/actions-39-slow.yaml
+1
diff --git a/‎ci/deps/actions-39.yaml
+1 b/‎ci/deps/actions-39.yaml
+1
diff --git a/‎ci/deps/azure-windows-39.yaml
+1 b/‎ci/deps/azure-windows-39.yaml
+1
diff --git a/‎doc/source/_static/style/df_pipe.png
8.47 KB b/‎doc/source/_static/style/df_pipe.png
8.47 KB
diff --git a/‎doc/source/development/contributing_environment.rst
+2-5 b/‎doc/source/development/contributing_environment.rst
+2-5
diff --git a/‎doc/source/user_guide/visualization.rst
+54 b/‎doc/source/user_guide/visualization.rst
+54
diff --git a/‎doc/source/whatsnew/v1.3.2.rst
+11-3 b/‎doc/source/whatsnew/v1.3.2.rst
+11-3
diff --git a/‎doc/source/whatsnew/v1.4.0.rst
+16-3 b/‎doc/source/whatsnew/v1.4.0.rst
+16-3
diff --git a/‎environment.yml
+1-1 b/‎environment.yml
+1-1
@@ -104,7 +104,7 @@ jobs:
       run: python ci/print_skipped.py
 
     - name: Upload coverage to Codecov
-      uses: codecov/codecov-action@v1
+      uses: codecov/codecov-action@v2
       with:
         flags: unittests
         name: codecov-pandas
 
@@ -94,7 +94,7 @@ jobs:
       run: python ci/print_skipped.py
 
     - name: Upload coverage to Codecov
-      uses: codecov/codecov-action@v1
+      uses: codecov/codecov-action@v2
       with:
         flags: unittests
         name: codecov-pandas
 
@@ -78,7 +78,7 @@ jobs:
         coverage report -m
 
     - name: Upload coverage to Codecov
-      uses: codecov/codecov-action@v1
+      uses: codecov/codecov-action@v2
       with:
         flags: unittests
         name: codecov-pandas
 
@@ -110,7 +110,7 @@ repos:
         entry: python scripts/generate_pip_deps_from_conda.py
         files: ^(environment.yml|requirements-dev.txt)$
         pass_filenames: false
-        additional_dependencies: [pyyaml]
+        additional_dependencies: [pyyaml, toml]
     -   id: sync-flake8-versions
         name: Check flake8 version is synced across flake8, yesqa, and environment.yml
         language: python
 
@@ -369,6 +369,18 @@ def time_category_size(self):
         self.draws.groupby(self.cats).size()
 
 
+class Shift:
+    def setup(self):
+        N = 18
+        self.df = DataFrame({"g": ["a", "b"] * 9, "v": list(range(N))})
+
+    def time_defaults(self):
+        self.df.groupby("g").shift()
+
+    def time_fill_value(self):
+        self.df.groupby("g").shift(fill_value=99)
+
+
 class FillNA:
     def setup(self):
         N = 100
 
@@ -102,6 +102,7 @@ def setup(self, dtype):
         columns = np.arange(n)
         if dtype == "int":
             values = np.arange(m * m * n).reshape(m * m, n)
+            self.df = DataFrame(values, index, columns)
         else:
             # the category branch is ~20x slower than int. So we
             # cut down the size a bit. Now it's only ~3x slower.
@@ -111,7 +112,10 @@ def setup(self, dtype):
             values = np.take(list(string.ascii_letters), indices)
             values = [pd.Categorical(v) for v in values.T]
 
-        self.df = DataFrame(values, index, columns)
+            self.df = DataFrame(
+                {i: cat for i, cat in enumerate(values)}, index, columns
+            )
+
         self.df2 = self.df.iloc[:-1]
 
     def time_full_product(self, dtype):
 
@@ -121,6 +121,9 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
       pandas/io/parsers/ \
       pandas/io/sas/ \
       pandas/io/sql.py \
+      pandas/io/formats/format.py \
+      pandas/io/formats/style.py \
+      pandas/io/stata.py \
       pandas/tseries/
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
 
@@ -18,7 +18,7 @@ dependencies:
   - html5lib
   - ipython
   - jinja2
-  - jedi<0.18.0
+  - jedi
   - lxml
   - matplotlib<3.3.0
   - moto
 
@@ -23,6 +23,7 @@ dependencies:
   - matplotlib
   - moto>=1.3.14
   - flask
+  - numba
   - numexpr
   - numpy
   - openpyxl
 
@@ -22,6 +22,7 @@ dependencies:
   - matplotlib
   - moto>=1.3.14
   - flask
+  - numba
   - numexpr
   - numpy
   - openpyxl
 
@@ -23,6 +23,7 @@ dependencies:
   - matplotlib
   - moto>=1.3.14
   - flask
+  - numba
   - numexpr
   - numpy
   - openpyxl
 
@@ -189,11 +189,8 @@ Creating a Python environment (pip)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 If you aren't using conda for your development environment, follow these instructions.
-You'll need to have at least the :ref:`minimum Python version <install.version>` that pandas supports. If your Python version
-is 3.8.0 (or later), you might need to update your ``setuptools`` to version 42.0.0 (or later)
-in your development environment before installing the build dependencies::
-
-      pip install --upgrade setuptools
+You'll need to have at least the :ref:`minimum Python version <install.version>` that pandas supports.
+You also need to have ``setuptools`` 51.0.0 or later to build pandas.
 
 **Unix**/**macOS with virtualenv**
 
 
@@ -316,6 +316,34 @@ The ``by`` keyword can be specified to plot grouped histograms:
    @savefig grouped_hist.png
    data.hist(by=np.random.randint(0, 4, 1000), figsize=(6, 4));
 
+.. ipython:: python
+   :suppress:
+
+   plt.close("all")
+   np.random.seed(123456)
+
+In addition, the ``by`` keyword can also be specified in :meth:`DataFrame.plot.hist`.
+
+.. versionchanged:: 1.4.0
+
+.. ipython:: python
+
+   data = pd.DataFrame(
+       {
+           "a": np.random.choice(["x", "y", "z"], 1000),
+           "b": np.random.choice(["e", "f", "g"], 1000),
+           "c": np.random.randn(1000),
+           "d": np.random.randn(1000) - 1,
+       },
+   )
+
+   @savefig grouped_hist_by.png
+   data.plot.hist(by=["a", "b"], figsize=(10, 5));
+
+.. ipython:: python
+   :suppress:
+
+   plt.close("all")
 
 .. _visualization.box:
 
@@ -448,6 +476,32 @@ columns:
 
     plt.close("all")
 
+You could also create groupings with :meth:`DataFrame.plot.box`, for instance:
+
+.. versionchanged:: 1.4.0
+
+.. ipython:: python
+   :suppress:
+
+   plt.close("all")
+   np.random.seed(123456)
+
+.. ipython:: python
+   :okwarning:
+
+   df = pd.DataFrame(np.random.rand(10, 3), columns=["Col1", "Col2", "Col3"])
+   df["X"] = pd.Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"])
+
+   plt.figure();
+
+   @savefig box_plot_ex4.png
+   bp = df.plot.box(column=["Col1", "Col2"], by="X")
+
+.. ipython:: python
+   :suppress:
+
+    plt.close("all")
+
 .. _visualization.box.return:
 
 In ``boxplot``, the return type can be controlled by the ``return_type``, keyword. The valid choices are ``{"axes", "dict", "both", None}``.
 
@@ -14,8 +14,15 @@ including other versions of pandas.
 
 Fixed regressions
 ~~~~~~~~~~~~~~~~~
--
--
+- Performance regression in :meth:`DataFrame.isin` and :meth:`Series.isin` for nullable data types (:issue:`42714`)
+- Regression in updating values of :class:`pandas.Series` using boolean index, created by using :meth:`pandas.DataFrame.pop` (:issue:`42530`)
+- Regression in :meth:`DataFrame.from_records` with empty records (:issue:`42456`)
+- Fixed regression in :meth:`DataFrame.shift` where TypeError occurred when shifting DataFrame created by concatenation of slices and fills with values (:issue:`42719`)
+- Regression in :meth:`DataFrame.agg` when the ``func`` argument returned lists and ``axis=1`` (:issue:`42727`)
+- Regression in :meth:`DataFrame.drop` does nothing if :class:`MultiIndex` has duplicates and indexer is a tuple or list of tuples (:issue:`42771`)
+- Fixed regression where :meth:`pandas.read_csv` raised a ``ValueError`` when parameters ``names`` and ``prefix`` were both set to None (:issue:`42387`)
+- Fixed regression in comparisons between :class:`Timestamp` object and ``datetime64`` objects outside the implementation bounds for nanosecond ``datetime64`` (:issue:`42794`)
+- Fixed regression in :meth:`.Styler.highlight_min` and :meth:`.Styler.highlight_max` where ``pandas.NA`` was not successfully ignored (:issue:`42650`)
 
 .. ---------------------------------------------------------------------------
 
@@ -24,7 +31,8 @@ Fixed regressions
 Bug fixes
 ~~~~~~~~~
 - Bug in :meth:`pandas.read_excel` modifies the dtypes dictionary when reading a file with duplicate columns (:issue:`42462`)
--
+- 1D slices over extension types turn into N-dimensional slices over ExtensionArrays (:issue:`42430`)
+- :meth:`.Styler.hide_columns` now hides the index name header row as well as column headers (:issue:`42101`)
 
 .. ---------------------------------------------------------------------------
 
 
@@ -35,6 +35,9 @@ Other enhancements
 -  Additional options added to :meth:`.Styler.bar` to control alignment and display, with keyword only arguments (:issue:`26070`, :issue:`36419`)
 - :meth:`Styler.bar` now validates the input argument ``width`` and ``height`` (:issue:`42511`)
 - :meth:`Series.ewm`, :meth:`DataFrame.ewm`, now support a ``method`` argument with a ``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`. See :ref:`Window Overview <window.overview>` for performance and functional benefits (:issue:`42273`)
+- Added ``sparse_index`` and ``sparse_columns`` keyword arguments to :meth:`.Styler.to_html` (:issue:`41946`)
+- Added keyword argument ``environment`` to :meth:`.Styler.to_latex` also allowing a specific "longtable" entry with a separate jinja2 template (:issue:`41866`)
+- :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` now support the argument ``skipna`` (:issue:`34047`)
 -
 
 .. ---------------------------------------------------------------------------
@@ -166,6 +169,10 @@ Performance improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 - Performance improvement in :meth:`.GroupBy.sample`, especially when ``weights`` argument provided (:issue:`34483`)
 - Performance improvement in :meth:`.GroupBy.transform` for user-defined functions (:issue:`41598`)
+- Performance improvement in constructing :class:`DataFrame` objects (:issue:`42631`)
+- Performance improvement in :meth:`GroupBy.shift` when ``fill_value`` argument is provided (:issue:`26615`)
+- Performance improvement in :meth:`DataFrame.corr` for ``method=pearson`` on data without missing values (:issue:`40956`)
+-
 
 .. ---------------------------------------------------------------------------
 
@@ -202,7 +209,7 @@ Numeric
 ^^^^^^^
 - Bug in :meth:`DataFrame.rank` raising ``ValueError`` with ``object`` columns and ``method="first"`` (:issue:`41931`)
 - Bug in :meth:`DataFrame.rank` treating missing values and extreme values as equal (for example ``np.nan`` and ``np.inf``), causing incorrect results when ``na_option="bottom"`` or ``na_option="top`` used (:issue:`41931`)
--
+- Bug in ``numexpr`` engine still being used when the option ``compute.use_numexpr`` is set to ``False`` (:issue:`32556`)
 
 Conversion
 ^^^^^^^^^^
@@ -225,7 +232,8 @@ Indexing
 - Bug in :meth:`Series.loc` when with a :class:`MultiIndex` whose first level contains only ``np.nan`` values (:issue:`42055`)
 - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` when passing a string, the return type depended on whether the index was monotonic (:issue:`24892`)
 - Bug in indexing on a :class:`MultiIndex` failing to drop scalar levels when the indexer is a tuple containing a datetime-like string (:issue:`42476`)
--
+- Bug in :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` when passing an ascending value, failed to raise or incorrectly raising ``ValueError`` (:issue:`41634`)
+- Bug in updating values of :class:`pandas.Series` using boolean index, created by using :meth:`pandas.DataFrame.pop` (:issue:`42530`)
 
 Missing
 ^^^^^^^
@@ -260,11 +268,15 @@ Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 - Fixed bug in :meth:`SeriesGroupBy.apply` where passing an unrecognized string argument failed to raise ``TypeError`` when the underlying ``Series`` is empty (:issue:`42021`)
 - Bug in :meth:`Series.rolling.apply`, :meth:`DataFrame.rolling.apply`, :meth:`Series.expanding.apply` and :meth:`DataFrame.expanding.apply` with ``engine="numba"`` where ``*args`` were being cached with the user passed function (:issue:`42287`)
--
+- Bug in :meth:`DataFrame.groupby.rolling.var` would calculate the rolling variance only on the first group (:issue:`42442`)
+- Bug in :meth:`GroupBy.shift` that would return the grouping columns if ``fill_value`` was not None (:issue:`41556`)
+- Bug in :meth:`pandas.DataFrame.ewm`, where non-float64 dtypes were silently failing (:issue:`42452`)
 
 Reshaping
 ^^^^^^^^^
+- Improved error message when creating a :class:`DataFrame` column from a multi-dimensional :class:`numpy.ndarray` (:issue:`42463`)
 - :func:`concat` creating :class:`MultiIndex` with duplicate level entries when concatenating a :class:`DataFrame` with duplicates in :class:`Index` and multiple keys (:issue:`42651`)
+- Bug in :meth:`pandas.cut` on :class:`Series` with duplicate indices (:issue:`42185`) and non-exact :meth:`pandas.CategoricalIndex` (:issue:`42425`)
 -
 
 Sparse
@@ -284,6 +296,7 @@ Styler
 
 Other
 ^^^^^
+- Bug in :meth:`CustomBusinessMonthBegin.__add__` (:meth:`CustomBusinessMonthEnd.__add__`) not applying the extra ``offset`` parameter when beginning (end) of the target month is already a business day (:issue:`41356`)
 
 .. ***DO NOT USE THIS SECTION***
 
 
@@ -108,7 +108,7 @@ dependencies:
   - fsspec>=0.7.4, <2021.6.0  # for generic remote file operations
   - gcsfs>=0.6.0  # file IO when using 'gcs://...' path
   - sqlalchemy  # pandas.read_sql, DataFrame.to_sql
-  - xarray  # DataFrame.to_xarray
+  - xarray<0.19  # DataFrame.to_xarray
   - cftime  # Needed for downstream xarray.CFTimeIndex test
   - pyreadstat  # pandas.read_spss
   - tabulate>=0.8.3  # DataFrame.to_markdown