jbrockmendel
diff --git a/‎doc/source/_static/style/hq_ax1.png
5.95 KB b/‎doc/source/_static/style/hq_ax1.png
5.95 KB
diff --git a/‎doc/source/_static/style/hq_axNone.png
5.96 KB b/‎doc/source/_static/style/hq_axNone.png
5.96 KB
diff --git a/‎doc/source/_static/style/hq_props.png
6.09 KB b/‎doc/source/_static/style/hq_props.png
6.09 KB
diff --git a/‎doc/source/development/roadmap.rst
Lines changed: 2 additions & 2 deletions b/‎doc/source/development/roadmap.rst
Lines changed: 2 additions & 2 deletions
diff --git a/‎doc/source/getting_started/intro_tutorials/01_table_oriented.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/getting_started/intro_tutorials/01_table_oriented.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v1.3.0.rst
Lines changed: 9 additions & 1 deletion b/‎doc/source/whatsnew/v1.3.0.rst
Lines changed: 9 additions & 1 deletion
diff --git a/‎pandas/core/array_algos/take.py
Lines changed: 15 additions & 23 deletions b/‎pandas/core/array_algos/take.py
Lines changed: 15 additions & 23 deletions
diff --git a/‎pandas/core/arrays/masked.py
Lines changed: 1 addition & 1 deletion b/‎pandas/core/arrays/masked.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/core/arrays/string_arrow.py
Lines changed: 6 additions & 0 deletions b/‎pandas/core/arrays/string_arrow.py
Lines changed: 6 additions & 0 deletions
diff --git a/‎pandas/core/frame.py
Lines changed: 9 additions & 1 deletion b/‎pandas/core/frame.py
Lines changed: 9 additions & 1 deletion
diff --git a/‎pandas/core/groupby/base.py
Lines changed: 0 additions & 77 deletions b/‎pandas/core/groupby/base.py
Lines changed: 0 additions & 77 deletions
diff --git a/‎pandas/core/groupby/generic.py
Lines changed: 11 additions & 14 deletions b/‎pandas/core/groupby/generic.py
Lines changed: 11 additions & 14 deletions
@@ -71,8 +71,8 @@ instead of comparing as False).
 
 Long term, we want to introduce consistent missing data handling for all data
 types. This includes consistent behavior in all operations (indexing, arithmetic
-operations, comparisons, etc.). We want to eventually make the new semantics the
-default.
+operations, comparisons, etc.). There has been discussion of eventually making
+the new semantics the default.
 
 This has been discussed at
 `github #28095 <https://github.com/pandas-dev/pandas/issues/28095>`__ (and
 
@@ -176,7 +176,7 @@ these are by default not taken into account by the :func:`~DataFrame.describe` m
 
 Many pandas operations return a ``DataFrame`` or a ``Series``. The
 :func:`~DataFrame.describe` method is an example of a pandas operation returning a
-pandas ``Series``.
+pandas ``Series`` or a pandas ``DataFrame``.
 
 .. raw:: html
 
 
@@ -119,7 +119,9 @@ to accept more universal CSS language for arguments, such as ``'color:red;'`` in
 to allow custom CSS highlighting instead of default background coloring (:issue:`40242`).
 Enhancements to other built-in methods include extending the :meth:`.Styler.background_gradient`
 method to shade elements based on a given gradient map and not be restricted only to
-values in the DataFrame (:issue:`39930` :issue:`22727` :issue:`28901`).
+values in the DataFrame (:issue:`39930` :issue:`22727` :issue:`28901`). Additional
+built-in methods such as :meth:`.Styler.highlight_between` and :meth:`.Styler.highlight_quantile`
+have been added (:issue:`39821` and :issue:`40926`).
 
 The :meth:`.Styler.apply` now consistently allows functions with ``ndarray`` output to
 allow more flexible development of UDFs when ``axis`` is ``None`` ``0`` or ``1`` (:issue:`39393`).
@@ -219,6 +221,7 @@ Other enhancements
 - :meth:`pandas.read_csv` and :meth:`pandas.read_json` expose the argument ``encoding_errors`` to control how encoding errors are handled (:issue:`39450`)
 - :meth:`.GroupBy.any` and :meth:`.GroupBy.all` use Kleene logic with nullable data types (:issue:`37506`)
 - :meth:`.GroupBy.any` and :meth:`.GroupBy.all` return a ``BooleanDtype`` for columns with nullable data types (:issue:`33449`)
+- Add keyword ``sort`` to :func:`pivot_table` to allow non-sorting of the result (:issue:`39143`)
 -
 
 .. ---------------------------------------------------------------------------
@@ -601,8 +604,10 @@ Deprecations
 - Deprecated :meth:`.Styler.set_na_rep` and :meth:`.Styler.set_precision` in favour of :meth:`.Styler.format` with ``na_rep`` and ``precision`` as existing and new input arguments respectively (:issue:`40134`, :issue:`40425`)
 - Deprecated allowing partial failure in :meth:`Series.transform` and :meth:`DataFrame.transform` when ``func`` is list-like or dict-like and raises anything but ``TypeError``; ``func`` raising anything but a ``TypeError`` will raise in a future version (:issue:`40211`)
 - Deprecated support for ``np.ma.mrecords.MaskedRecords`` in the :class:`DataFrame` constructor, pass ``{name: data[name] for name in data.dtype.names}`` instead (:issue:`40363`)
+- Deprecated using :func:`merge` or :func:`join` on a different number of levels (:issue:`34862`)
 - Deprecated the use of ``**kwargs`` in :class:`.ExcelWriter`; use the keyword argument ``engine_kwargs`` instead (:issue:`40430`)
 - Deprecated the ``level`` keyword for :class:`DataFrame` and :class:`Series` aggregations; use groupby instead (:issue:`39983`)
+- Deprecated :func:`merge` producing duplicated columns through the ``suffixes`` keyword  and already existing columns (:issue:`22818`)
 
 .. ---------------------------------------------------------------------------
 
@@ -749,6 +754,7 @@ Missing
 - Bug in :class:`Grouper` now correctly propagates ``dropna`` argument and :meth:`DataFrameGroupBy.transform` now correctly handles missing values for ``dropna=True`` (:issue:`35612`)
 - Bug in :func:`isna`, and :meth:`Series.isna`, :meth:`Index.isna`, :meth:`DataFrame.isna` (and the corresponding ``notna`` functions) not recognizing ``Decimal("NaN")`` objects (:issue:`39409`)
 - Bug in :meth:`DataFrame.fillna` not accepting dictionary for ``downcast`` keyword (:issue:`40809`)
+- Bug in :func:`isna` not returning a copy of the mask for nullable types, causing any subsequent mask modification to change the original array (:issue:`40935`)
 
 MultiIndex
 ^^^^^^^^^^
@@ -831,6 +837,7 @@ Groupby/resample/rolling
 - Bug in :class:`core.window.RollingGroupby` where ``as_index=False`` argument in ``groupby`` was ignored (:issue:`39433`)
 - Bug in :meth:`.GroupBy.any` and :meth:`.GroupBy.all` raising ``ValueError`` when using with nullable type columns holding ``NA`` even with ``skipna=True`` (:issue:`40585`)
 - Bug in :meth:`GroupBy.cummin` and :meth:`GroupBy.cummax` incorrectly rounding integer values near the ``int64`` implementations bounds (:issue:`40767`)
+- Bug in :meth:`.GroupBy.rank` with nullable dtypes incorrectly raising ``TypeError`` (:issue:`41010`)
 
 Reshaping
 ^^^^^^^^^
@@ -851,6 +858,7 @@ Reshaping
 - Bug in :meth:`DataFrame.stack` not preserving ``CategoricalDtype`` in a ``MultiIndex`` (:issue:`36991`)
 - Bug in :func:`to_datetime` raising error when input sequence contains unhashable items (:issue:`39756`)
 - Bug in :meth:`Series.explode` preserving index when ``ignore_index`` was ``True`` and values were scalars (:issue:`40487`)
+- Bug in :func:`to_datetime` raising ``ValueError`` when :class:`Series` contains ``None`` and ``NaT`` and has more than 50 elements (:issue:`39882`)
 
 Sparse
 ^^^^^^
 
@@ -105,14 +105,13 @@ def take_nd(
         return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
 
     arr = np.asarray(arr)
-    return _take_nd_ndarray(arr, indexer, axis, None, fill_value, allow_fill)
+    return _take_nd_ndarray(arr, indexer, axis, fill_value, allow_fill)
 
 
 def _take_nd_ndarray(
     arr: np.ndarray,
     indexer,
     axis: int,
-    out: np.ndarray | None,
     fill_value,
     allow_fill: bool,
 ) -> np.ndarray:
@@ -124,7 +123,7 @@ def _take_nd_ndarray(
         indexer = ensure_platform_int(indexer)
 
     indexer, dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
-        arr, indexer, out, fill_value, allow_fill
+        arr, indexer, fill_value, allow_fill
     )
 
     flip_order = False
@@ -134,23 +133,20 @@ def _take_nd_ndarray(
     if flip_order:
         arr = arr.T
         axis = arr.ndim - axis - 1
-        if out is not None:
-            out = out.T
 
     # at this point, it's guaranteed that dtype can hold both the arr values
     # and the fill_value
-    if out is None:
-        out_shape_ = list(arr.shape)
-        out_shape_[axis] = len(indexer)
-        out_shape = tuple(out_shape_)
-        if arr.flags.f_contiguous and axis == arr.ndim - 1:
-            # minor tweak that can make an order-of-magnitude difference
-            # for dataframes initialized directly from 2-d ndarrays
-            # (s.t. df.values is c-contiguous and df._mgr.blocks[0] is its
-            # f-contiguous transpose)
-            out = np.empty(out_shape, dtype=dtype, order="F")
-        else:
-            out = np.empty(out_shape, dtype=dtype)
+    out_shape_ = list(arr.shape)
+    out_shape_[axis] = len(indexer)
+    out_shape = tuple(out_shape_)
+    if arr.flags.f_contiguous and axis == arr.ndim - 1:
+        # minor tweak that can make an order-of-magnitude difference
+        # for dataframes initialized directly from 2-d ndarrays
+        # (s.t. df.values is c-contiguous and df._mgr.blocks[0] is its
+        # f-contiguous transpose)
+        out = np.empty(out_shape, dtype=dtype, order="F")
+    else:
+        out = np.empty(out_shape, dtype=dtype)
 
     func = _get_take_nd_function(
         arr.ndim, arr.dtype, out.dtype, axis=axis, mask_info=mask_info
@@ -195,7 +191,7 @@ def take_1d(
         return arr.take(indexer)
 
     indexer, dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
-        arr, indexer, None, fill_value, True
+        arr, indexer, fill_value, True
     )
 
     # at this point, it's guaranteed that dtype can hold both the arr values
@@ -521,7 +517,6 @@ def _take_2d_multi_object(
 def _take_preprocess_indexer_and_fill_value(
     arr: np.ndarray,
     indexer: np.ndarray,
-    out: np.ndarray | None,
     fill_value,
     allow_fill: bool,
 ):
@@ -539,10 +534,7 @@ def _take_preprocess_indexer_and_fill_value(
             mask = indexer == -1
             needs_masking = mask.any()
             mask_info = mask, needs_masking
-            if needs_masking:
-                if out is not None and out.dtype != dtype:
-                    raise TypeError("Incompatible type for fill_value")
-            else:
+            if not needs_masking:
                 # if not, then depromote, set fill_value to dummy
                 # (it won't be used but we don't want the cython code
                 # to crash when trying to cast it to dtype)
 
@@ -352,7 +352,7 @@ def _hasna(self) -> bool:
         return self._mask.any()  # type: ignore[return-value]
 
     def isna(self) -> np.ndarray:
-        return self._mask
+        return self._mask.copy()
 
     @property
     def _na_value(self):
 
@@ -757,3 +757,9 @@ def _str_map(self, f, na_value=None, dtype: Dtype | None = None):
             #    or .findall returns a list).
             # -> We don't know the result type. E.g. `.get` can return anything.
             return lib.map_infer_mask(arr, f, mask.view("uint8"))
+
+    def _str_lower(self):
+        return type(self)(pc.utf8_lower(self._data))
+
+    def _str_upper(self):
+        return type(self)(pc.utf8_upper(self._data))
@@ -4790,7 +4790,8 @@ def drop(
         Remove rows or columns by specifying label names and corresponding
         axis, or by specifying directly index or column names. When using a
         multi-index, labels on different levels can be removed by specifying
-        the level.
+        the level. See the `user guide <advanced.shown_levels>`
+        for more information about the now unused levels.
 
         Parameters
         ----------
@@ -7821,6 +7822,11 @@ def pivot(self, index=None, columns=None, values=None) -> DataFrame:
 
             .. versionchanged:: 0.25.0
 
+        sort : bool, default True
+            Specifies if the result should be sorted.
+
+            .. versionadded:: 1.3.0
+
         Returns
         -------
         DataFrame
@@ -7924,6 +7930,7 @@ def pivot_table(
         dropna=True,
         margins_name="All",
         observed=False,
+        sort=True,
     ) -> DataFrame:
         from pandas.core.reshape.pivot import pivot_table
 
@@ -7938,6 +7945,7 @@ def pivot_table(
             dropna=dropna,
             margins_name=margins_name,
             observed=observed,
+            sort=sort,
         )
 
     def stack(self, level: Level = -1, dropna: bool = True):
 
@@ -7,83 +7,8 @@
 
 import collections
 
-from pandas._typing import final
-
-from pandas.core.dtypes.common import (
-    is_list_like,
-    is_scalar,
-)
-
-from pandas.core.base import PandasObject
-
 OutputKey = collections.namedtuple("OutputKey", ["label", "position"])
 
-
-class ShallowMixin(PandasObject):
-    _attributes: list[str] = []
-
-    @final
-    def _shallow_copy(self, obj, **kwargs):
-        """
-        return a new object with the replacement attributes
-        """
-        if isinstance(obj, self._constructor):
-            obj = obj.obj
-        for attr in self._attributes:
-            if attr not in kwargs:
-                kwargs[attr] = getattr(self, attr)
-        return self._constructor(obj, **kwargs)
-
-
-class GotItemMixin(PandasObject):
-    """
-    Provide the groupby facilities to the mixed object.
-    """
-
-    _attributes: list[str]
-
-    @final
-    def _gotitem(self, key, ndim, subset=None):
-        """
-        Sub-classes to define. Return a sliced object.
-
-        Parameters
-        ----------
-        key : string / list of selections
-        ndim : {1, 2}
-            requested ndim of result
-        subset : object, default None
-            subset to act on
-        """
-        # create a new object to prevent aliasing
-        if subset is None:
-            # error: "GotItemMixin" has no attribute "obj"
-            subset = self.obj  # type: ignore[attr-defined]
-
-        # we need to make a shallow copy of ourselves
-        # with the same groupby
-        kwargs = {attr: getattr(self, attr) for attr in self._attributes}
-
-        # Try to select from a DataFrame, falling back to a Series
-        try:
-            # error: "GotItemMixin" has no attribute "_groupby"
-            groupby = self._groupby[key]  # type: ignore[attr-defined]
-        except IndexError:
-            # error: "GotItemMixin" has no attribute "_groupby"
-            groupby = self._groupby  # type: ignore[attr-defined]
-
-        # error: Too many arguments for "GotItemMixin"
-        # error: Unexpected keyword argument "groupby" for "GotItemMixin"
-        # error: Unexpected keyword argument "parent" for "GotItemMixin"
-        self = type(self)(
-            subset, groupby=groupby, parent=self, **kwargs  # type: ignore[call-arg]
-        )
-        self._reset_cache()
-        if subset.ndim == 2 and (is_scalar(key) and key in subset or is_list_like(key)):
-            self._selection = key
-        return self
-
-
 # special case to prevent duplicate plots when catching exceptions when
 # forwarding methods from NDFrames
 plotting_methods = frozenset(["plot", "hist"])
@@ -122,8 +47,6 @@ def _gotitem(self, key, ndim, subset=None):
 # require postprocessing of the result by transform.
 cythonized_kernels = frozenset(["cumprod", "cumsum", "shift", "cummin", "cummax"])
 
-cython_cast_blocklist = frozenset(["rank", "count", "size", "idxmin", "idxmax"])
-
 # List of aggregation/reduction functions.
 # These map each group to a single numeric value
 reduction_kernels = frozenset(
 
@@ -46,7 +46,6 @@
 
 from pandas.core.dtypes.cast import (
     find_common_type,
-    maybe_cast_result_dtype,
     maybe_downcast_numeric,
 )
 from pandas.core.dtypes.common import (
@@ -58,7 +57,6 @@
     is_interval_dtype,
     is_numeric_dtype,
     is_scalar,
-    needs_i8_conversion,
 )
 from pandas.core.dtypes.missing import (
     isna,
@@ -1104,13 +1102,11 @@ def _cython_agg_manager(
 
         using_array_manager = isinstance(data, ArrayManager)
 
-        def cast_agg_result(result, values: ArrayLike, how: str) -> ArrayLike:
+        def cast_agg_result(
+            result: ArrayLike, values: ArrayLike, how: str
+        ) -> ArrayLike:
             # see if we can cast the values to the desired dtype
             # this may not be the original dtype
-            assert not isinstance(result, DataFrame)
-
-            dtype = maybe_cast_result_dtype(values.dtype, how)
-            result = maybe_downcast_numeric(result, dtype)
 
             if isinstance(values, Categorical) and isinstance(result, np.ndarray):
                 # If the Categorical op didn't raise, it is dtype-preserving
@@ -1125,6 +1121,7 @@ def cast_agg_result(result, values: ArrayLike, how: str) -> ArrayLike:
             ):
                 # We went through a SeriesGroupByPath and need to reshape
                 # GH#32223 includes case with IntegerArray values
+                # We only get here with values.dtype == object
                 result = result.reshape(1, -1)
                 # test_groupby_duplicate_columns gets here with
                 #  result.dtype == int64, values.dtype=object, how="min"
@@ -1140,8 +1137,11 @@ def py_fallback(values: ArrayLike) -> ArrayLike:
 
             # call our grouper again with only this block
             if values.ndim == 1:
+                # We only get here with ExtensionArray
+
                 obj = Series(values)
             else:
+                # We only get here with values.dtype == object
                 # TODO special case not needed with ArrayManager
                 obj = DataFrame(values.T)
                 if obj.shape[1] == 1:
@@ -1193,7 +1193,8 @@ def array_func(values: ArrayLike) -> ArrayLike:
 
                 result = py_fallback(values)
 
-            return cast_agg_result(result, values, how)
+                return cast_agg_result(result, values, how)
+            return result
 
         # TypeError -> we may have an exception in trying to aggregate
         #  continue and exclude the block
@@ -1366,11 +1367,7 @@ def _wrap_applied_output_series(
 
         # if we have date/time like in the original, then coerce dates
         # as we are stacking can easily have object dtypes here
-        so = self._selected_obj
-        if so.ndim == 2 and so.dtypes.apply(needs_i8_conversion).any():
-            result = result._convert(datetime=True)
-        else:
-            result = result._convert(datetime=True)
+        result = result._convert(datetime=True)
 
         if not self.as_index:
             self._insert_inaxis_grouper_inplace(result)
@@ -1507,7 +1504,7 @@ def _choose_path(self, fast_path: Callable, slow_path: Callable, group: DataFram
         try:
             res_fast = fast_path(group)
         except AssertionError:
-            raise
+            raise  # pragma: no cover
         except Exception:
             # GH#29631 For user-defined function, we can't predict what may be
             #  raised; see test_transform.test_transform_fastpath_raises