diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index cb7b9f990a98e..f6f36f6ad523b 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -1,5 +1,7 @@ import warnings +cimport cython + import numpy as np cimport numpy as cnp @@ -47,6 +49,7 @@ cdef inline bint is_definitely_invalid_key(object val): _SIZE_CUTOFF = 1_000_000 +@cython.freelist(32) cdef class IndexEngine: cdef readonly: diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index d2f47c9d25496..bd749d6eca18e 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -104,6 +104,7 @@ cpdef bint checknull(object val): - np.datetime64 representation of NaT - np.timedelta64 representation of NaT - NA + - Decimal("NaN") Parameters ---------- @@ -143,6 +144,8 @@ cpdef bint checknull_old(object val): - NaT - np.datetime64 representation of NaT - np.timedelta64 representation of NaT + - NA + - Decimal("NaN") Parameters ---------- @@ -175,6 +178,8 @@ cpdef ndarray[uint8_t] isnaobj(ndarray arr): - NaT - np.datetime64 representation of NaT - np.timedelta64 representation of NaT + - NA + - Decimal("NaN") Parameters ---------- @@ -211,6 +216,7 @@ def isnaobj_old(arr: ndarray) -> ndarray: - NEGINF - NaT - NA + - Decimal("NaN") Parameters ---------- @@ -249,6 +255,8 @@ def isnaobj2d(arr: ndarray) -> ndarray: - NaT - np.datetime64 representation of NaT - np.timedelta64 representation of NaT + - NA + - Decimal("NaN") Parameters ---------- @@ -293,6 +301,8 @@ def isnaobj2d_old(arr: ndarray) -> ndarray: - NaT - np.datetime64 representation of NaT - np.timedelta64 representation of NaT + - NA + - Decimal("NaN") Parameters ---------- diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 9d48035213126..25ebd3d3ddc62 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -49,7 +49,7 @@ def load_reduce(self): return except TypeError: pass - elif args and issubclass(args[0], BaseOffset): + elif args and isinstance(args[0], type) and issubclass(args[0], BaseOffset): # TypeError: object.__new__(Day) is not safe, use Day.__new__() cls = args[0] stack[-1] = cls.__new__(*args) diff --git a/pandas/core/array_algos/take.py b/pandas/core/array_algos/take.py index 110b47a11c3a9..a8d0a7cbfd17a 100644 --- a/pandas/core/array_algos/take.py +++ b/pandas/core/array_algos/take.py @@ -256,7 +256,9 @@ def take_2d_multi( @functools.lru_cache(maxsize=128) -def _get_take_nd_function_cached(ndim, arr_dtype, out_dtype, axis): +def _get_take_nd_function_cached( + ndim: int, arr_dtype: np.dtype, out_dtype: np.dtype, axis: int +): """ Part of _get_take_nd_function below that doesn't need `mask_info` and thus can be cached (mask_info potentially contains a numpy ndarray which is not @@ -289,7 +291,7 @@ def _get_take_nd_function_cached(ndim, arr_dtype, out_dtype, axis): def _get_take_nd_function( - ndim: int, arr_dtype, out_dtype, axis: int = 0, mask_info=None + ndim: int, arr_dtype: np.dtype, out_dtype: np.dtype, axis: int = 0, mask_info=None ): """ Get the appropriate "take" implementation for the given dimension, axis diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 965eb7f68e164..12b343ab5d895 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -657,7 +657,7 @@ def _shallow_copy(self: _IndexT, values, name: Hashable = no_default) -> _IndexT values : the values to create the new Index, optional name : Label, defaults to self.name """ - name = self.name if name is no_default else name + name = self._name if name is no_default else name return self._simple_new(values, name=name) @@ -665,7 +665,7 @@ def _view(self: _IndexT) -> _IndexT: """ fastpath to make a shallow copy, i.e. new object with same data. """ - result = self._simple_new(self._values, name=self.name) + result = self._simple_new(self._values, name=self._name) result._cache = self._cache return result @@ -4569,7 +4569,7 @@ def __getitem__(self, key): # pessimization of basic indexing. result = getitem(key) # Going through simple_new for performance. - return type(self)._simple_new(result, name=self.name) + return type(self)._simple_new(result, name=self._name) if com.is_bool_indexer(key): key = np.asarray(key, dtype=bool) @@ -4585,7 +4585,7 @@ def __getitem__(self, key): return result # NB: Using _constructor._simple_new would break if MultiIndex # didn't override __getitem__ - return self._constructor._simple_new(result, name=self.name) + return self._constructor._simple_new(result, name=self._name) else: return result diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 5cdf4c1ecef55..f8390308b18f4 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -240,7 +240,7 @@ def _shallow_copy( values: Categorical, name: Hashable = no_default, ): - name = self.name if name is no_default else name + name = self._name if name is no_default else name if values is not None: # In tests we only get here with Categorical objects that diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 0e32e5c5d2762..31ad8b7d8a295 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -645,7 +645,7 @@ class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin): def _with_freq(self, freq): arr = self._data._with_freq(freq) - return type(self)._simple_new(arr, name=self.name) + return type(self)._simple_new(arr, name=self._name) @property def _has_complex_internals(self) -> bool: diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index f714da0d0e303..02fb6c6beb391 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -250,7 +250,7 @@ def __getitem__(self, key): result = self._data[key] if isinstance(result, type(self._data)): if result.ndim == 1: - return type(self)(result, name=self.name) + return type(self)(result, name=self._name) # Unpack to ndarray for MPL compat result = result._ndarray diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 7bb3dc5ab4545..0d89e75c097c1 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1718,7 +1718,7 @@ def unique(self, level=None): level = self._get_level_number(level) return self._get_level_values(level=level, unique=True) - def to_frame(self, index=True, name=None) -> DataFrame: + def to_frame(self, index: bool = True, name=None) -> DataFrame: """ Create a DataFrame with the levels of the MultiIndex as columns. @@ -2123,7 +2123,12 @@ def _getitem_slice(self: MultiIndex, slobj: slice) -> MultiIndex: @Appender(_index_shared_docs["take"] % _index_doc_kwargs) def take( - self: MultiIndex, indices, axis=0, allow_fill=True, fill_value=None, **kwargs + self: MultiIndex, + indices, + axis: int = 0, + allow_fill: bool = True, + fill_value=None, + **kwargs, ) -> MultiIndex: nv.validate_take((), kwargs) indices = ensure_platform_int(indices) @@ -3647,7 +3652,7 @@ def _intersection(self, other, sort=False) -> MultiIndex: zip(*uniq_tuples), sortorder=0, names=result_names ) - def _difference(self, other, sort): + def _difference(self, other, sort) -> MultiIndex: other, result_names = self._convert_can_do_setop(other) this = self._get_unique_index() @@ -3705,7 +3710,7 @@ def symmetric_difference(self, other, result_name=None, sort=None): # -------------------------------------------------------------------- @doc(Index.astype) - def astype(self, dtype, copy=True): + def astype(self, dtype, copy: bool = True): dtype = pandas_dtype(dtype) if is_categorical_dtype(dtype): msg = "> 1 ndim Categorical are not supported at this time" diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index b6f476d864011..b3e4abc6c4040 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -125,7 +125,7 @@ def _maybe_cast_slice_bound(self, label, side: str, kind): @doc(Index._shallow_copy) def _shallow_copy(self, values, name: Hashable = lib.no_default): if not self._can_hold_na and values.dtype.kind == "f": - name = self.name if name is lib.no_default else name + name = self._name if name is lib.no_default else name # Ensure we are not returning an Int64Index with float data: return Float64Index._simple_new(values, name=name) return super()._shallow_copy(values=values, name=name) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index e446786802239..cdf2f338529be 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -459,7 +459,7 @@ def _shallow_copy(self, values, name: Hashable = no_default): return Int64Index._simple_new(values, name=name) def _view(self: RangeIndex) -> RangeIndex: - result = type(self)._simple_new(self._range, name=self.name) + result = type(self)._simple_new(self._range, name=self._name) result._cache = self._cache return result @@ -810,7 +810,7 @@ def __getitem__(self, key): """ if isinstance(key, slice): new_range = self._range[key] - return self._simple_new(new_range, name=self.name) + return self._simple_new(new_range, name=self._name) elif is_integer(key): new_key = int(key) try: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 83a7c224060a8..d87df9d224bce 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -260,7 +260,7 @@ def make_block(self, values, placement=None) -> Block: not specified """ if placement is None: - placement = self.mgr_locs + placement = self._mgr_locs if self.is_extension: values = ensure_block_shape(values, ndim=self.ndim) @@ -272,8 +272,7 @@ def make_block(self, values, placement=None) -> Block: def make_block_same_class(self, values, placement=None) -> Block: """ Wrap given values in a block of same type as self. """ if placement is None: - placement = self.mgr_locs - # TODO: perf by not going through new_block + placement = self._mgr_locs # We assume maybe_coerce_values has already been called return type(self)(values, placement=placement, ndim=self.ndim) @@ -318,7 +317,7 @@ def getitem_block(self, slicer, new_mgr_locs=None) -> Block: """ if new_mgr_locs is None: axis0_slicer = slicer[0] if isinstance(slicer, tuple) else slicer - new_mgr_locs = self.mgr_locs[axis0_slicer] + new_mgr_locs = self._mgr_locs[axis0_slicer] elif not isinstance(new_mgr_locs, BlockPlacement): new_mgr_locs = BlockPlacement(new_mgr_locs) @@ -358,7 +357,7 @@ def delete(self, loc) -> None: Delete given loc(-s) from block in-place. """ self.values = np.delete(self.values, loc, 0) - self.mgr_locs = self.mgr_locs.delete(loc) + self.mgr_locs = self._mgr_locs.delete(loc) @final def apply(self, func, **kwargs) -> List[Block]: @@ -399,7 +398,7 @@ def _split_op_result(self, result) -> List[Block]: # TODO(EA2D): unnecessary with 2D EAs # if we get a 2D ExtensionArray, we need to split it into 1D pieces nbs = [] - for i, loc in enumerate(self.mgr_locs): + for i, loc in enumerate(self._mgr_locs): vals = result[i] block = self.make_block(values=vals, placement=loc) nbs.append(block) @@ -462,7 +461,7 @@ def _split(self) -> List[Block]: assert self.ndim == 2 new_blocks = [] - for i, ref_loc in enumerate(self.mgr_locs): + for i, ref_loc in enumerate(self._mgr_locs): vals = self.values[slice(i, i + 1)] nb = self.make_block(vals, BlockPlacement(ref_loc)) @@ -512,12 +511,12 @@ def make_a_block(nv, ref_loc): nv = f(mask, new_values, None) else: nv = new_values if inplace else new_values.copy() - block = make_a_block(nv, self.mgr_locs) + block = make_a_block(nv, self._mgr_locs) return [block] # ndim > 1 new_blocks = [] - for i, ref_loc in enumerate(self.mgr_locs): + for i, ref_loc in enumerate(self._mgr_locs): m = mask[i] v = new_values[i] @@ -1254,7 +1253,7 @@ def take_nd( # this assertion assert not (axis == 0 and new_mgr_locs is None) if new_mgr_locs is None: - new_mgr_locs = self.mgr_locs + new_mgr_locs = self._mgr_locs if not is_dtype_equal(new_values.dtype, self.dtype): return self.make_block(new_values, new_mgr_locs) @@ -1362,7 +1361,7 @@ def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]: result = cast(np.ndarray, result) # EABlock overrides where taken = result.take(m.nonzero()[0], axis=axis) r = maybe_downcast_numeric(taken, self.dtype) - nb = self.make_block(r.T, placement=self.mgr_locs[m]) + nb = self.make_block(r.T, placement=self._mgr_locs[m]) result_blocks.append(nb) return result_blocks @@ -1423,7 +1422,7 @@ def quantile( result = quantile_compat(self.values, qs, interpolation, axis) - return new_block(result, placement=self.mgr_locs, ndim=2) + return new_block(result, placement=self._mgr_locs, ndim=2) class ExtensionBlock(Block): @@ -1449,7 +1448,7 @@ def shape(self) -> Shape: # TODO(EA2D): override unnecessary with 2D EAs if self.ndim == 1: return (len(self.values),) - return len(self.mgr_locs), len(self.values) + return len(self._mgr_locs), len(self.values) def iget(self, col): @@ -1594,7 +1593,7 @@ def take_nd( # this assertion assert not (self.ndim == 1 and new_mgr_locs is None) if new_mgr_locs is None: - new_mgr_locs = self.mgr_locs + new_mgr_locs = self._mgr_locs return self.make_block_same_class(new_values, new_mgr_locs) @@ -1630,7 +1629,7 @@ def _slice(self, slicer): ) # GH#32959 only full-slicers along fake-dim0 are valid # TODO(EA2D): won't be necessary with 2D EAs - new_locs = self.mgr_locs[first] + new_locs = self._mgr_locs[first] if len(new_locs): # effectively slice(None) slicer = slicer[1] @@ -1741,9 +1740,10 @@ def _unstack(self, unstacker, fill_value, new_placement): # TODO: in all tests we have mask.all(); can we rely on that? blocks = [ + # TODO: could cast to object depending on fill_value? self.make_block_same_class( self.values.take(indices, allow_fill=True, fill_value=fill_value), - [place], + BlockPlacement(place), ) for indices, place in zip(new_values.T, new_placement) ] diff --git a/pandas/util/_exceptions.py b/pandas/util/_exceptions.py index c31c421ee1445..e70c185628f71 100644 --- a/pandas/util/_exceptions.py +++ b/pandas/util/_exceptions.py @@ -11,6 +11,8 @@ def rewrite_exception(old_name: str, new_name: str): try: yield except Exception as err: + if not err.args: + raise msg = str(err.args[0]) msg = msg.replace(old_name, new_name) args: Tuple[str, ...] = (msg,)