pandas-dev
diff --git a/‎.pre-commit-config.yaml
+2-2 b/‎.pre-commit-config.yaml
+2-2
diff --git a/‎asv_bench/benchmarks/indexing_engines.py
+1-1 b/‎asv_bench/benchmarks/indexing_engines.py
+1-1
diff --git a/‎asv_bench/benchmarks/sparse.py
+15-2 b/‎asv_bench/benchmarks/sparse.py
+15-2
diff --git a/‎ci/code_checks.sh
+4 b/‎ci/code_checks.sh
+4
diff --git a/‎doc/source/index.rst.template
+3 b/‎doc/source/index.rst.template
+3
diff --git a/‎doc/source/user_guide/basics.rst
+4 b/‎doc/source/user_guide/basics.rst
+4
diff --git a/‎doc/source/user_guide/groupby.rst
+2-2 b/‎doc/source/user_guide/groupby.rst
+2-2
diff --git a/‎doc/source/user_guide/merging.rst
+13-1 b/‎doc/source/user_guide/merging.rst
+13-1
diff --git a/‎doc/source/user_guide/visualization.rst
+1-1 b/‎doc/source/user_guide/visualization.rst
+1-1
diff --git a/‎doc/source/whatsnew/v0.20.0.rst
+1 b/‎doc/source/whatsnew/v0.20.0.rst
+1
diff --git a/‎doc/source/whatsnew/v1.3.4.rst
+2 b/‎doc/source/whatsnew/v1.3.4.rst
+2
diff --git a/‎doc/source/whatsnew/v1.4.0.rst
+10-2 b/‎doc/source/whatsnew/v1.4.0.rst
+10-2
diff --git a/‎environment.yml
+1 b/‎environment.yml
+1
diff --git a/‎pandas/__init__.py
+120-4 b/‎pandas/__init__.py
+120-4
@@ -9,7 +9,7 @@ repos:
     -   id: absolufy-imports
         files: ^pandas/
 -   repo: https://github.com/python/black
-    rev: 21.7b0
+    rev: 21.9b0
     hooks:
     -   id: black
 -   repo: https://github.com/codespell-project/codespell
@@ -58,7 +58,7 @@ repos:
     hooks:
     -   id: isort
 -   repo: https://github.com/asottile/pyupgrade
-    rev: v2.23.3
+    rev: v2.29.0
     hooks:
     -   id: pyupgrade
         args: [--py38-plus]
 
@@ -1,5 +1,5 @@
 """
-Benchmarks in this fiel depend exclusively on code in _libs/
+Benchmarks in this file depend exclusively on code in _libs/
 
 If a PR does not edit anything in _libs, it is very unlikely that benchmarks
 in this file will be affected.
 
@@ -95,11 +95,11 @@ class ToCooFrame:
     def setup(self):
         N = 10000
         k = 10
-        arr = np.full((N, k), np.nan)
+        arr = np.zeros((N, k), dtype=float)
         arr[0, 0] = 3.0
         arr[12, 7] = -1.0
         arr[0, 9] = 11.2
-        self.df = pd.DataFrame(arr, dtype=pd.SparseDtype("float"))
+        self.df = pd.DataFrame(arr, dtype=pd.SparseDtype("float", fill_value=0.0))
 
     def time_to_coo(self):
         self.df.sparse.to_coo()
@@ -195,4 +195,17 @@ def time_take(self, indices, allow_fill):
         self.sp_arr.take(indices, allow_fill=allow_fill)
 
 
+class GetItem:
+    def setup(self):
+        N = 1_000_000
+        arr = make_array(N, 1e-5, np.nan, np.float64)
+        self.sp_arr = SparseArray(arr)
+
+    def time_integer_indexing(self):
+        self.sp_arr[78]
+
+    def time_slice(self):
+        self.sp_arr[1:]
+
+
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
@@ -84,6 +84,10 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
       pandas/tseries/
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    MSG='Cython Doctests' ; echo $MSG
+    python -m pytest --doctest-cython pandas/_libs
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
 fi
 
 ### DOCSTRINGS ###
 
@@ -12,6 +12,9 @@ pandas documentation
 
 **Download documentation**: `PDF Version <pandas.pdf>`__ | `Zipped HTML <pandas.zip>`__
 
+**Previous versions**: Documentation of previous pandas versions is available at
+`pandas.pydata.org <https://pandas.pydata.org/>`__.
+
 **Useful links**:
 `Binary Installers <https://pypi.org/project/pandas>`__ |
 `Source Repository <https://github.com/pandas-dev/pandas>`__ |
 
@@ -1045,6 +1045,9 @@ not noted for a particular column will be ``NaN``:
 Mixed dtypes
 ++++++++++++
 
+.. deprecated:: 1.4.0
+   Attempting to determine which columns cannot be aggregated and silently dropping them from the results is deprecated and will be removed in a future version. If any porition of the columns or operations provided fail, the call to ``.agg`` will raise.
+
 When presented with mixed dtypes that cannot aggregate, ``.agg`` will only take the valid
 aggregations. This is similar to how ``.groupby.agg`` works.
 
@@ -1061,6 +1064,7 @@ aggregations. This is similar to how ``.groupby.agg`` works.
    mdf.dtypes
 
 .. ipython:: python
+   :okwarning:
 
    mdf.agg(["min", "sum"])
 
 
@@ -578,7 +578,7 @@ column, which produces an aggregated result with a hierarchical index:
 
 .. ipython:: python
 
-   grouped.agg([np.sum, np.mean, np.std])
+   grouped[["C", "D"]].agg([np.sum, np.mean, np.std])
 
 
 The resulting aggregations are named for the functions themselves. If you
@@ -597,7 +597,7 @@ For a grouped ``DataFrame``, you can rename in a similar manner:
 .. ipython:: python
 
    (
-       grouped.agg([np.sum, np.mean, np.std]).rename(
+       grouped[["C", "D"]].agg([np.sum, np.mean, np.std]).rename(
            columns={"sum": "foo", "mean": "bar", "std": "baz"}
        )
    )
 
@@ -562,7 +562,7 @@ all standard database join operations between ``DataFrame`` or named ``Series``
   (hierarchical), the number of levels must match the number of join keys
   from the right DataFrame or Series.
 * ``right_index``: Same usage as ``left_index`` for the right DataFrame or Series
-* ``how``: One of ``'left'``, ``'right'``, ``'outer'``, ``'inner'``. Defaults
+* ``how``: One of ``'left'``, ``'right'``, ``'outer'``, ``'inner'``, ``'cross'``. Defaults
   to ``inner``. See below for more detailed description of each method.
 * ``sort``: Sort the result DataFrame by the join keys in lexicographical
   order. Defaults to ``True``, setting to ``False`` will improve performance
@@ -707,6 +707,7 @@ either the left or right tables, the values in the joined table will be
     ``right``, ``RIGHT OUTER JOIN``, Use keys from right frame only
     ``outer``, ``FULL OUTER JOIN``, Use union of keys from both frames
     ``inner``, ``INNER JOIN``, Use intersection of keys from both frames
+    ``cross``, ``CROSS JOIN``, Create the cartesian product of rows of both frames
 
 .. ipython:: python
 
@@ -751,6 +752,17 @@ either the left or right tables, the values in the joined table will be
    p.plot([left, right], result, labels=["left", "right"], vertical=False);
    plt.close("all");
 
+.. ipython:: python
+
+   result = pd.merge(left, right, how="cross")
+
+.. ipython:: python
+   :suppress:
+
+   @savefig merging_merge_cross.png
+   p.plot([left, right], result, labels=["left", "right"], vertical=False);
+   plt.close("all");
+
 You can merge a mult-indexed Series and a DataFrame, if the names of
 the MultiIndex correspond to the columns from the DataFrame. Transform
 the Series to a DataFrame using :meth:`Series.reset_index` before merging,
 
@@ -1651,7 +1651,7 @@ remedy this, ``DataFrame`` plotting supports the use of the ``colormap`` argumen
 which accepts either a Matplotlib `colormap <https://matplotlib.org/api/cm_api.html>`__
 or a string that is a name of a colormap registered with Matplotlib. A
 visualization of the default matplotlib colormaps is available `here
-<https://matplotlib.org/examples/color/colormaps_reference.html>`__.
+<https://matplotlib.org/stable/gallery/color/colormap_reference.html>`__.
 
 As matplotlib does not directly support colormaps for line-based plots, the
 colors are selected based on an even spacing determined by the number of columns
 
@@ -105,6 +105,7 @@ aggregations. This is similar to how groupby ``.agg()`` works. (:issue:`15015`)
    df.dtypes
 
 .. ipython:: python
+   :okwarning:
 
    df.agg(['min', 'sum'])
 
 
@@ -14,13 +14,15 @@ including other versions of pandas.
 
 Fixed regressions
 ~~~~~~~~~~~~~~~~~
+- Fixed regression in :meth:`.GroupBy.agg` where it was failing silently with mixed data types along ``axis=1`` and :class:`MultiIndex` (:issue:`43209`)
 - Fixed regression in :meth:`merge` with integer and ``NaN`` keys failing with ``outer`` merge (:issue:`43550`)
 - Fixed regression in :meth:`DataFrame.corr` raising ``ValueError`` with ``method="spearman"`` on 32-bit platforms (:issue:`43588`)
 - Fixed performance regression in :meth:`MultiIndex.equals` (:issue:`43549`)
 - Fixed performance regression in :meth:`.GroupBy.first` and :meth:`.GroupBy.last` with :class:`StringDtype` (:issue:`41596`)
 - Fixed regression in :meth:`Series.cat.reorder_categories` failing to update the categories on the ``Series`` (:issue:`43232`)
 - Fixed regression in :meth:`Series.cat.categories` setter failing to update the categories on the ``Series`` (:issue:`43334`)
 - Fixed regression in :meth:`pandas.read_csv` raising ``UnicodeDecodeError`` exception when ``memory_map=True`` (:issue:`43540`)
+- Fixed regression in :meth:`DataFrame.explode` raising ``AssertionError`` when ``column`` is any scalar which is not a string (:issue:`43314`)
 - Fixed regression in :meth:`Series.aggregate` attempting to pass ``args`` and ``kwargs`` multiple times to the user supplied ``func`` in certain cases (:issue:`43357`)
 
 .. ---------------------------------------------------------------------------
 
@@ -338,6 +338,7 @@ Other Deprecations
 - Deprecated the ``index`` argument to :class:`SparseArray` construction (:issue:`23089`)
 - Deprecated :meth:`.Rolling.validate`, :meth:`.Expanding.validate`, and :meth:`.ExponentialMovingWindow.validate` (:issue:`43665`)
 - Deprecated silent dropping of columns that raised a ``TypeError`` in :class:`Series.transform` and :class:`DataFrame.transform` when used with a dictionary (:issue:`43740`)
+- Deprecated silent dropping of columns that raised a ``TypeError``, ``DataError``, and some cases of ``ValueError`` in :meth:`Series.aggregate`, :meth:`DataFrame.aggregate`, :meth:`Series.groupby.aggregate`, and :meth:`DataFrame.groupby.aggregate` when used with a list (:issue:`43740`)
 
 .. ---------------------------------------------------------------------------
 
@@ -346,6 +347,7 @@ Other Deprecations
 Performance improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 - Performance improvement in :meth:`.GroupBy.sample`, especially when ``weights`` argument provided (:issue:`34483`)
+- Performance improvement when converting non-string arrays to string arrays (:issue:`34483`)
 - Performance improvement in :meth:`.GroupBy.transform` for user-defined functions (:issue:`41598`)
 - Performance improvement in constructing :class:`DataFrame` objects (:issue:`42631`)
 - Performance improvement in :meth:`GroupBy.shift` when ``fill_value`` argument is provided (:issue:`26615`)
@@ -357,8 +359,10 @@ Performance improvements
 - Performance improvement in indexing with a :class:`MultiIndex` indexer on another :class:`MultiIndex` (:issue:43370`)
 - Performance improvement in :meth:`GroupBy.quantile` (:issue:`43469`)
 - :meth:`SparseArray.min` and :meth:`SparseArray.max` no longer require converting to a dense array (:issue:`43526`)
+- Indexing into a :class:`SparseArray` with a ``slice`` with ``step=1`` no longer requires converting to a dense array (:issue:`43777`)
 - Performance improvement in :meth:`SparseArray.take` with ``allow_fill=False`` (:issue:`43654`)
 - Performance improvement in :meth:`.Rolling.mean` and :meth:`.Expanding.mean` with ``engine="numba"`` (:issue:`43612`)
+-
 
 .. ---------------------------------------------------------------------------
 
@@ -431,7 +435,8 @@ Indexing
 - Bug in :meth:`DataFrame.drop` where the error message did not show missing labels with commas when raising ``KeyError`` (:issue:`42881`)
 - Bug in :meth:`DataFrame.query` where method calls in query strings led to errors when the ``numexpr`` package was installed. (:issue:`22435`)
 - Bug in :meth:`DataFrame.nlargest` and :meth:`Series.nlargest` where sorted result did not count indexes containing ``np.nan`` (:issue:`28984`)
-
+- Bug in indexing on a non-unique object-dtype :class:`Index` with an NA scalar (e.g. ``np.nan``) (:issue:`43711`)
+-
 
 Missing
 ^^^^^^^
@@ -458,6 +463,8 @@ I/O
 - Bug in unpickling a :class:`Index` with object dtype incorrectly inferring numeric dtypes (:issue:`43188`)
 - Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raising uncontrolled ``IndexError`` (:issue:`43102`)
 - Bug in :func:`read_csv`, changed exception class when expecting a file path name or file-like object from ``OSError`` to ``TypeError`` (:issue:`43366`)
+- Bug in :func:`read_csv` with :code:`float_precision="round_trip"` which did not skip initial/trailing whitespace (:issue:`43713`)
+-
 
 Period
 ^^^^^^
@@ -484,6 +491,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrame.rolling.corr` when the :class:`DataFrame` columns was a :class:`MultiIndex` (:issue:`21157`)
 - Bug in :meth:`DataFrame.groupby.rolling` when specifying ``on`` and calling ``__getitem__`` would subsequently return incorrect results (:issue:`43355`)
 - Bug in :meth:`GroupBy.apply` with time-based :class:`Grouper` objects incorrectly raising ``ValueError`` in corner cases where the grouping vector contains a ``NaT`` (:issue:`43500`, :issue:`43515`)
+- Bug in :meth:`GroupBy.mean` failing with ``complex`` dtype (:issue:`43701`)
 
 Reshaping
 ^^^^^^^^^
@@ -499,7 +507,7 @@ Sparse
 ^^^^^^
 - Bug in :meth:`DataFrame.sparse.to_coo` raising ``AttributeError`` when column names are not unique (:issue:`29564`)
 - Bug in :meth:`SparseArray.max` and :meth:`SparseArray.min` raising ``ValueError`` for arrays with 0 non-null elements (:issue:`43527`)
--
+- Bug in :meth:`DataFrame.sparse.to_coo` silently converting non-zero fill values to zero (:issue:`24817`)
 -
 
 ExtensionArray
 
@@ -122,3 +122,4 @@ dependencies:
     - types-PyMySQL
     - types-pytz
     - types-setuptools
+    - pytest-cython
@@ -131,7 +131,7 @@
     qcut,
 )
 
-import pandas.api
+from pandas import api, arrays, errors, io, plotting, testing, tseries
 from pandas.util._print_versions import show_versions
 
 from pandas.io.api import (
@@ -170,8 +170,6 @@
 from pandas.io.json import _json_normalize as json_normalize
 
 from pandas.util._tester import test
-import pandas.testing
-import pandas.arrays
 
 # use the closest tagged version if possible
 from pandas._version import get_versions
@@ -181,7 +179,6 @@
 __git_version__ = v.get("full-revisionid")
 del get_versions, v
 
-
 # GH 27101
 __deprecated_num_index_names = ["Float64Index", "Int64Index", "UInt64Index"]
 
@@ -303,3 +300,122 @@ def __getattr__(name):
   - Time series-specific functionality: date range generation and frequency
     conversion, moving window statistics, date shifting and lagging.
 """
+
+# Use __all__ to let type checkers know what is part of the public API.
+# Pandas is not (yet) a py.typed library: the public API is determined
+# based on the documentation.
+__all__ = [
+    "BooleanDtype",
+    "Categorical",
+    "CategoricalDtype",
+    "CategoricalIndex",
+    "DataFrame",
+    "DateOffset",
+    "DatetimeIndex",
+    "DatetimeTZDtype",
+    "ExcelFile",
+    "ExcelWriter",
+    "Flags",
+    "Float32Dtype",
+    "Float64Dtype",
+    "Grouper",
+    "HDFStore",
+    "Index",
+    "IndexSlice",
+    "Int16Dtype",
+    "Int32Dtype",
+    "Int64Dtype",
+    "Int8Dtype",
+    "Interval",
+    "IntervalDtype",
+    "IntervalIndex",
+    "MultiIndex",
+    "NA",
+    "NaT",
+    "NamedAgg",
+    "NumericIndex",
+    "Period",
+    "PeriodDtype",
+    "PeriodIndex",
+    "RangeIndex",
+    "Series",
+    "SparseDtype",
+    "StringDtype",
+    "Timedelta",
+    "TimedeltaIndex",
+    "Timestamp",
+    "UInt16Dtype",
+    "UInt32Dtype",
+    "UInt64Dtype",
+    "UInt8Dtype",
+    "api",
+    "array",
+    "arrays",
+    "bdate_range",
+    "concat",
+    "crosstab",
+    "cut",
+    "date_range",
+    "describe_option",
+    "errors",
+    "eval",
+    "factorize",
+    "get_dummies",
+    "get_option",
+    "infer_freq",
+    "interval_range",
+    "io",
+    "isna",
+    "isnull",
+    "json_normalize",
+    "lreshape",
+    "melt",
+    "merge",
+    "merge_asof",
+    "merge_ordered",
+    "notna",
+    "notnull",
+    "offsets",
+    "option_context",
+    "options",
+    "period_range",
+    "pivot",
+    "pivot_table",
+    "plotting",
+    "qcut",
+    "read_clipboard",
+    "read_csv",
+    "read_excel",
+    "read_feather",
+    "read_fwf",
+    "read_gbq",
+    "read_hdf",
+    "read_html",
+    "read_json",
+    "read_orc",
+    "read_parquet",
+    "read_pickle",
+    "read_sas",
+    "read_spss",
+    "read_sql",
+    "read_sql_query",
+    "read_sql_table",
+    "read_stata",
+    "read_table",
+    "read_xml",
+    "reset_option",
+    "set_eng_float_format",
+    "set_option",
+    "show_versions",
+    "test",
+    "testing",
+    "timedelta_range",
+    "to_datetime",
+    "to_numeric",
+    "to_pickle",
+    "to_timedelta",
+    "tseries",
+    "unique",
+    "value_counts",
+    "wide_to_long",
+]
Original file line number	Diff line number	Diff line change
`@@ -578,7 +578,7 @@ column, which produces an aggregated result with a hierarchical index:`
`578`	`578`
`579`	`579`	`.. ipython:: python`
`580`	`580`
`581`		`- grouped.agg([np.sum, np.mean, np.std])`
	`581`	`+ grouped[["C", "D"]].agg([np.sum, np.mean, np.std])`
`582`	`582`
`583`	`583`
`584`	`584`	`The resulting aggregations are named for the functions themselves. If you`
@@ -597,7 +597,7 @@ For a grouped ``DataFrame``, you can rename in a similar manner:
`597`	`597`	`.. ipython:: python`
`598`	`598`
`599`	`599`	`(`
`600`		`- grouped.agg([np.sum, np.mean, np.std]).rename(`
	`600`	`+ grouped[["C", "D"]].agg([np.sum, np.mean, np.std]).rename(`
`601`	`601`	`columns={"sum": "foo", "mean": "bar", "std": "baz"}`
`602`	`602`	`)`
`603`	`603`	`)`