sthagen
diff --git a/‎doc/cheatsheet/Pandas_Cheat_Sheet.pdf
9.56 KB b/‎doc/cheatsheet/Pandas_Cheat_Sheet.pdf
9.56 KB
diff --git a/‎doc/cheatsheet/Pandas_Cheat_Sheet.pptx
9.14 KB b/‎doc/cheatsheet/Pandas_Cheat_Sheet.pptx
9.14 KB
diff --git a/‎doc/source/ecosystem.rst
+2-1 b/‎doc/source/ecosystem.rst
+2-1
diff --git a/‎doc/source/user_guide/index.rst
+1-1 b/‎doc/source/user_guide/index.rst
+1-1
diff --git a/‎doc/source/user_guide/style.ipynb
+785-407 b/‎doc/source/user_guide/style.ipynb
+785-407
diff --git a/‎doc/source/user_guide/visualization.rst
+6-3 b/‎doc/source/user_guide/visualization.rst
+6-3
diff --git a/‎doc/source/user_guide/window.rst
+1-1 b/‎doc/source/user_guide/window.rst
+1-1
diff --git a/‎doc/source/whatsnew/v1.3.0.rst
+33 b/‎doc/source/whatsnew/v1.3.0.rst
+33
diff --git a/‎pandas/_libs/algos.pyx
+19-7 b/‎pandas/_libs/algos.pyx
+19-7
diff --git a/‎pandas/_libs/groupby.pyx
+9-14 b/‎pandas/_libs/groupby.pyx
+9-14
diff --git a/‎pandas/_libs/internals.pyi
+58 b/‎pandas/_libs/internals.pyi
+58
diff --git a/‎pandas/_libs/testing.pyi
+8 b/‎pandas/_libs/testing.pyi
+8
diff --git a/‎pandas/_testing/asserters.py
+7-1 b/‎pandas/_testing/asserters.py
+7-1
diff --git a/‎pandas/core/algorithms.py
+4-2 b/‎pandas/core/algorithms.py
+4-2
diff --git a/‎pandas/core/frame.py
+1-1 b/‎pandas/core/frame.py
+1-1
diff --git a/‎pandas/core/groupby/ops.py
+6 b/‎pandas/core/groupby/ops.py
+6
@@ -98,7 +98,8 @@ which can be used for a wide variety of time series data mining tasks.
 Visualization
 -------------
 
-While :ref:`pandas has built-in support for data visualization with matplotlib <visualization>`,
+`Pandas has its own Styler class for table visualization <user_guide/style.ipynb>`_, and while
+:ref:`pandas also has built-in support for data visualization through charts with matplotlib <visualization>`,
 there are a number of other pandas-compatible libraries.
 
 `Altair <https://altair-viz.github.io/>`__
 
@@ -38,12 +38,12 @@ Further information on any specific method can be obtained in the
     integer_na
     boolean
     visualization
+    style
     computation
     groupby
     window
     timeseries
     timedeltas
-    style
     options
     enhancingperf
     scale
 
@@ -2,9 +2,12 @@
 
 {{ header }}
 
-*************
-Visualization
-*************
+*******************
+Chart Visualization
+*******************
+
+This section demonstrates visualization through charting. For information on
+visualization of tabular data please see the section on `Table Visualization <style.ipynb>`_.
 
 We use the standard convention for referencing the matplotlib API:
 
 
@@ -101,7 +101,7 @@ be calculated with :meth:`~Rolling.apply` by specifying a separate column of wei
 
 All windowing operations support a ``min_periods`` argument that dictates the minimum amount of
 non-``np.nan`` values a window must have; otherwise, the resulting value is ``np.nan``.
-``min_peridos`` defaults to 1 for time-based windows and ``window`` for fixed windows
+``min_periods`` defaults to 1 for time-based windows and ``window`` for fixed windows
 
 .. ipython:: python
 
 
@@ -302,6 +302,38 @@ cast to ``dtype=object`` (:issue:`38709`)
    ser2
 
 
+.. _whatsnew_130.notable_bug_fixes.rolling_groupby_column:
+
+GroupBy.rolling no longer returns grouped-by column in values
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The group-by column will now be dropped from the result of a
+``groupby.rolling`` operation (:issue:`32262`)
+
+.. ipython:: python
+
+    df = pd.DataFrame({"A": [1, 1, 2, 3], "B": [0, 1, 2, 3]})
+    df
+
+*Previous behavior*:
+
+.. code-block:: ipython
+
+    In [1]: df.groupby("A").rolling(2).sum()
+    Out[1]:
+           A    B
+    A
+    1 0  NaN  NaN
+    1    2.0  1.0
+    2 2  NaN  NaN
+    3 3  NaN  NaN
+
+*New behavior*:
+
+.. ipython:: python
+
+    df.groupby("A").rolling(2).sum()
+
 .. _whatsnew_130.notable_bug_fixes.rolling_var_precision:
 
 Removed artificial truncation in rolling variance and standard deviation
@@ -501,6 +533,7 @@ Numeric
 - Bug in :meth:`DataFrame.mode` and :meth:`Series.mode` not keeping consistent integer :class:`Index` for empty input (:issue:`33321`)
 - Bug in :meth:`DataFrame.rank` with ``np.inf`` and mixture of ``np.nan`` and ``np.inf`` (:issue:`32593`)
 - Bug in :meth:`DataFrame.rank` with ``axis=0`` and columns holding incomparable types raising ``IndexError`` (:issue:`38932`)
+- Bug in ``rank`` method for :class:`Series`, :class:`DataFrame`, :class:`DataFrameGroupBy`, and :class:`SeriesGroupBy` treating the most negative ``int64`` value as missing (:issue:`32859`)
 - Bug in :func:`select_dtypes` different behavior between Windows and Linux with ``include="int"`` (:issue:`36569`)
 - Bug in :meth:`DataFrame.apply` and :meth:`DataFrame.agg` when passed argument ``func="size"`` would operate on the entire ``DataFrame`` instead of rows or columns (:issue:`39934`)
 - Bug in :meth:`DataFrame.transform` would raise ``SpecificationError`` when passed a dictionary and columns were missing; will now raise a ``KeyError`` instead (:issue:`40004`)
 
@@ -962,6 +962,7 @@ ctypedef fused rank_t:
 def rank_1d(
     ndarray[rank_t, ndim=1] values,
     const intp_t[:] labels,
+    bint is_datetimelike=False,
     ties_method="average",
     bint ascending=True,
     bint pct=False,
@@ -977,6 +978,8 @@ def rank_1d(
         Array containing unique label for each group, with its ordering
         matching up to the corresponding record in `values`. If not called
         from a groupby operation, will be an array of 0's
+    is_datetimelike : bool, default False
+        True if `values` contains datetime-like entries.
     ties_method : {'average', 'min', 'max', 'first', 'dense'}, default
         'average'
         * average: average rank of group
@@ -1032,7 +1035,7 @@ def rank_1d(
 
     if rank_t is object:
         mask = missing.isnaobj(masked_vals)
-    elif rank_t is int64_t:
+    elif rank_t is int64_t and is_datetimelike:
         mask = (masked_vals == NPY_NAT).astype(np.uint8)
     elif rank_t is float64_t:
         mask = np.isnan(masked_vals).astype(np.uint8)
@@ -1059,7 +1062,7 @@ def rank_1d(
         if rank_t is object:
             nan_fill_val = NegInfinity()
         elif rank_t is int64_t:
-            nan_fill_val = np.iinfo(np.int64).min
+            nan_fill_val = NPY_NAT
         elif rank_t is uint64_t:
             nan_fill_val = 0
         else:
@@ -1275,6 +1278,7 @@ def rank_1d(
 def rank_2d(
     ndarray[rank_t, ndim=2] in_arr,
     int axis=0,
+    bint is_datetimelike=False,
     ties_method="average",
     bint ascending=True,
     na_option="keep",
@@ -1299,7 +1303,9 @@ def rank_2d(
     tiebreak = tiebreakers[ties_method]
 
     keep_na = na_option == 'keep'
-    check_mask = rank_t is not uint64_t
+
+    # For cases where a mask is not possible, we can avoid mask checks
+    check_mask = not (rank_t is uint64_t or (rank_t is int64_t and not is_datetimelike))
 
     if axis == 0:
         values = np.asarray(in_arr).T.copy()
@@ -1310,28 +1316,34 @@ def rank_2d(
         if values.dtype != np.object_:
             values = values.astype('O')
 
-    if rank_t is not uint64_t:
+    if check_mask:
         if ascending ^ (na_option == 'top'):
             if rank_t is object:
                 nan_value = Infinity()
             elif rank_t is float64_t:
                 nan_value = np.inf
-            elif rank_t is int64_t:
+
+            # int64 and datetimelike
+            else:
                 nan_value = np.iinfo(np.int64).max
 
         else:
             if rank_t is object:
                 nan_value = NegInfinity()
             elif rank_t is float64_t:
                 nan_value = -np.inf
-            elif rank_t is int64_t:
+
+            # int64 and datetimelike
+            else:
                 nan_value = NPY_NAT
 
         if rank_t is object:
             mask = missing.isnaobj2d(values)
         elif rank_t is float64_t:
             mask = np.isnan(values)
-        elif rank_t is int64_t:
+
+        # int64 and datetimelike
+        else:
             mask = values == NPY_NAT
 
         np.putmask(values, mask, nan_value)
 
@@ -681,18 +681,17 @@ group_mean_float64 = _group_mean['double']
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def _group_ohlc(floating[:, ::1] out,
-                int64_t[::1] counts,
-                ndarray[floating, ndim=2] values,
-                const intp_t[:] labels,
-                Py_ssize_t min_count=-1):
+def group_ohlc(floating[:, ::1] out,
+               int64_t[::1] counts,
+               ndarray[floating, ndim=2] values,
+               const intp_t[:] labels,
+               Py_ssize_t min_count=-1):
     """
     Only aggregates on axis=0
     """
     cdef:
         Py_ssize_t i, j, N, K, lab
-        floating val, count
-        Py_ssize_t ngroups = len(counts)
+        floating val
 
     assert min_count == -1, "'min_count' only used in add and prod"
 
@@ -727,10 +726,6 @@ def _group_ohlc(floating[:, ::1] out,
                 out[lab, 3] = val
 
 
-group_ohlc_float32 = _group_ohlc['float']
-group_ohlc_float64 = _group_ohlc['double']
-
-
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def group_quantile(ndarray[float64_t] out,
@@ -1079,9 +1074,8 @@ def group_rank(float64_t[:, ::1] out,
     ngroups : int
         This parameter is not used, is needed to match signatures of other
         groupby functions.
-    is_datetimelike : bool, default False
-        unused in this method but provided for call compatibility with other
-        Cython transformations
+    is_datetimelike : bool
+        True if `values` contains datetime-like entries.
     ties_method : {'average', 'min', 'max', 'first', 'dense'}, default
         'average'
         * average: average rank of group
@@ -1109,6 +1103,7 @@ def group_rank(float64_t[:, ::1] out,
     result = rank_1d(
         values=values[:, 0],
         labels=labels,
+        is_datetimelike=is_datetimelike,
         ties_method=ties_method,
         ascending=ascending,
         pct=pct,
 
@@ -0,0 +1,58 @@
+from typing import (
+    Iterator,
+    Sequence,
+    overload,
+)
+
+import numpy as np
+
+from pandas._typing import ArrayLike
+
+def slice_len(slc: slice, objlen: int = ...) -> int: ...
+
+
+def get_blkno_indexers(
+    blknos: np.ndarray,  # int64_t[:]
+    group: bool = ...,
+) -> list[tuple[int, slice | np.ndarray]]: ...
+
+
+def get_blkno_placements(
+    blknos: np.ndarray,
+    group: bool = ...,
+) -> Iterator[tuple[int, BlockPlacement]]: ...
+
+
+class BlockPlacement:
+    def __init__(self, val: int | slice | np.ndarray): ...
+
+    @property
+    def indexer(self) -> np.ndarray | slice: ...
+
+    @property
+    def as_array(self) -> np.ndarray: ...
+
+    @property
+    def is_slice_like(self) -> bool: ...
+
+    @overload
+    def __getitem__(self, loc: slice | Sequence[int]) -> BlockPlacement: ...
+
+    @overload
+    def __getitem__(self, loc: int) -> int: ...
+
+    def __iter__(self) -> Iterator[int]: ...
+
+    def __len__(self) -> int: ...
+
+    def delete(self, loc) -> BlockPlacement: ...
+
+    def append(self, others: list[BlockPlacement]) -> BlockPlacement: ...
+
+
+class Block:
+    _mgr_locs: BlockPlacement
+    ndim: int
+    values: ArrayLike
+
+    def __init__(self, values: ArrayLike, placement: BlockPlacement, ndim: int): ...
@@ -0,0 +1,8 @@
+
+
+def assert_dict_equal(a, b, compare_keys: bool = ...): ...
+
+def assert_almost_equal(a, b,
+                        rtol: float = ..., atol: float = ...,
+                        check_dtype: bool = ...,
+                        obj=..., lobj=..., robj=..., index_values=...): ...
@@ -154,6 +154,9 @@ def assert_almost_equal(
                 else:
                     obj = "Input"
                 assert_class_equal(left, right, obj=obj)
+
+        # if we have "equiv", this becomes True
+        check_dtype = bool(check_dtype)
         _testing.assert_almost_equal(
             left, right, check_dtype=check_dtype, rtol=rtol, atol=atol, **kwargs
         )
@@ -388,12 +391,15 @@ def _get_ilevel_values(index, level):
             msg = f"{obj} values are different ({np.round(diff, 5)} %)"
             raise_assert_detail(obj, msg, left, right)
     else:
+
+        # if we have "equiv", this becomes True
+        exact_bool = bool(exact)
         _testing.assert_almost_equal(
             left.values,
             right.values,
             rtol=rtol,
             atol=atol,
-            check_dtype=exact,
+            check_dtype=exact_bool,
             obj=obj,
             lobj=left,
             robj=right,
 
@@ -1031,21 +1031,23 @@ def rank(
         Whether or not to the display the returned rankings in integer form
         (e.g. 1, 2, 3) or in percentile form (e.g. 0.333..., 0.666..., 1).
     """
+    is_datetimelike = needs_i8_conversion(values.dtype)
+    values = _get_values_for_rank(values)
     if values.ndim == 1:
-        values = _get_values_for_rank(values)
         ranks = algos.rank_1d(
             values,
             labels=np.zeros(len(values), dtype=np.intp),
+            is_datetimelike=is_datetimelike,
             ties_method=method,
             ascending=ascending,
             na_option=na_option,
             pct=pct,
         )
     elif values.ndim == 2:
-        values = _get_values_for_rank(values)
         ranks = algos.rank_2d(
             values,
             axis=axis,
+            is_datetimelike=is_datetimelike,
             ties_method=method,
             ascending=ascending,
             na_option=na_option,
 
@@ -528,7 +528,7 @@ class DataFrame(NDFrame, OpsMixin):
     >>> from dataclasses import make_dataclass
     >>> Point = make_dataclass("Point", [("x", int), ("y", int)])
     >>> pd.DataFrame([Point(0, 0), Point(0, 3), Point(2, 3)])
-        x  y
+       x  y
     0  0  0
     1  0  3
     2  2  3
 
@@ -486,6 +486,12 @@ def _get_cython_func_and_vals(
                 func = _get_cython_function(kind, how, values.dtype, is_numeric)
             else:
                 raise
+        else:
+            if values.dtype.kind in ["i", "u"]:
+                if how in ["ohlc"]:
+                    # The output may still include nans, so we have to cast
+                    values = ensure_float64(values)
+
         return func, values
 
     @final