diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index a4ae35ef34a9c..e41f185e08443 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -454,6 +454,16 @@ def setup(self, dtype, method, application, ncols): # DataFrameGroupBy doesn't have these methods raise NotImplementedError + if application == "transformation" and method in [ + "head", + "tail", + "unique", + "value_counts", + "size", + ]: + # DataFrameGroupBy doesn't have these methods + raise NotImplementedError + ngroups = 1000 size = ngroups * 2 rng = np.arange(ngroups).reshape(-1, 1) @@ -480,7 +490,7 @@ def setup(self, dtype, method, application, ncols): if len(cols) == 1: cols = cols[0] - if application == "transform": + if application == "transformation": if method == "describe": raise NotImplementedError diff --git a/pandas/_libs/internals.pyi b/pandas/_libs/internals.pyi index 6542b7a251644..da18084da92f9 100644 --- a/pandas/_libs/internals.pyi +++ b/pandas/_libs/internals.pyi @@ -51,7 +51,7 @@ class BlockPlacement: def __len__(self) -> int: ... def delete(self, loc) -> BlockPlacement: ... def append(self, others: list[BlockPlacement]) -> BlockPlacement: ... - def tile_for_unstack(self, factor: int) -> np.ndarray: ... + def tile_for_unstack(self, factor: int) -> npt.NDArray[np.intp]: ... class SharedBlock: _mgr_locs: BlockPlacement diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index f6c404c07c7e4..1331fc07386fb 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -1,24 +1,11 @@ - -from libc.stdlib cimport ( - free, - malloc, -) - import numpy as np cimport numpy as cnp -from numpy cimport ( - int64_t, - intp_t, - ndarray, -) cnp.import_array() from pandas._libs.util cimport is_array -from pandas._libs.lib import is_scalar - cdef cnp.dtype _dtype_obj = np.dtype("object") diff --git a/pandas/_libs/tslibs/strptime.pyi b/pandas/_libs/tslibs/strptime.pyi index cf7ae8508a45f..fd88bc6938294 100644 --- a/pandas/_libs/tslibs/strptime.pyi +++ b/pandas/_libs/tslibs/strptime.pyi @@ -9,4 +9,4 @@ def array_strptime( errors: str = "raise", ) -> tuple[np.ndarray, np.ndarray]: ... -# first ndarray is M8[ns], second is object ndarray of tzinfo | None +# first ndarray is M8[ns], second is object ndarray of tzinfo | None diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index c79dadcadc8cd..d02df6a65d359 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -617,7 +617,7 @@ def codes(self) -> np.ndarray: def group_arraylike(self) -> ArrayLike: """ Analogous to result_index, but holding an ArrayLike to ensure - we can can retain ExtensionDtypes. + we can retain ExtensionDtypes. """ if self._group_index is not None: # _group_index is set in __init__ for MultiIndex cases diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a1c411031a465..6887b919cc7d6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3060,7 +3060,7 @@ def _union(self, other: Index, sort): try: return self._outer_indexer(other)[0] except (TypeError, IncompatibleFrequency): - # incomparable objects + # incomparable objects; should only be for object dtype value_list = list(lvals) # worth making this faster? a very unusual case @@ -3074,7 +3074,7 @@ def _union(self, other: Index, sort): result = algos.union_with_duplicates(lvals, rvals) return _maybe_try_sort(result, sort) - # Self may have duplicates + # Self may have duplicates; other already checked as unique # find indexes of things in "other" that are not in "self" if self._index_as_unique: indexer = self.get_indexer(other) @@ -3089,6 +3089,7 @@ def _union(self, other: Index, sort): result = lvals if not self.is_monotonic or not other.is_monotonic: + # if both are monotonic then result should already be sorted result = _maybe_try_sort(result, sort) return result @@ -3194,6 +3195,7 @@ def _intersection(self, other: Index, sort=False): try: result = self._inner_indexer(other)[0] except TypeError: + # non-comparable; should only be for object dtype pass else: # TODO: algos.unique1d should preserve DTA/TDA @@ -4485,7 +4487,7 @@ def _get_leaf_sorter(labels: list[np.ndarray]) -> npt.NDArray[np.intp]: def _join_monotonic( self, other: Index, how: str_t = "left" ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: - # We only get here with matching dtypes + # We only get here with matching dtypes and both monotonic increasing assert other.dtype == self.dtype if self.equals(other): diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 86087dc321bac..583a22d09b110 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -1035,7 +1035,7 @@ def quantile( def unstack(self, unstacker, fill_value) -> ArrayManager: """ - Return a BlockManager with all blocks unstacked.. + Return a BlockManager with all blocks unstacked. Parameters ---------- diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index da0f8d2549a8b..bcb4dd284465b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1291,6 +1291,7 @@ def _unstack(self, unstacker, fill_value, new_placement, allow_fill: bool): unstacker : reshape._Unstacker fill_value : int Only used in ExtensionBlock._unstack + new_placement : np.ndarray[np.intp] allow_fill : bool Returns diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 386a4ef12e6b5..bb4d1f96e1405 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1376,7 +1376,7 @@ def quantile( def unstack(self, unstacker, fill_value) -> BlockManager: """ - Return a BlockManager with all blocks unstacked.. + Return a BlockManager with all blocks unstacked. Parameters ----------