diff --git a/pandas/_typing.py b/pandas/_typing.py index f90ef33434773..7c74fc54b8d67 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -25,7 +25,7 @@ Optional, Sequence, Tuple, - Type, + Type as type_t, TypeVar, Union, ) @@ -119,7 +119,7 @@ # dtypes NpDtype = Union[str, np.dtype] Dtype = Union[ - "ExtensionDtype", NpDtype, Type[Union[str, float, int, complex, bool, object]] + "ExtensionDtype", NpDtype, type_t[Union[str, float, int, complex, bool, object]] ] # DtypeArg specifies all allowable dtypes in a functions its dtype argument DtypeArg = Union[Dtype, Dict[Hashable, Dtype]] diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 129c6c061d11c..8104b0170fbe2 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -7,16 +7,14 @@ from typing import ( TYPE_CHECKING, Any, - List, - Optional, - Tuple, - Type, - Union, ) import numpy as np -from pandas._typing import DtypeObj +from pandas._typing import ( + DtypeObj, + type_t, +) from pandas.errors import AbstractMethodError from pandas.core.dtypes.generic import ( @@ -101,7 +99,7 @@ def __from_arrow__( provided for registering virtual subclasses. """ - _metadata: Tuple[str, ...] = () + _metadata: tuple[str, ...] = () def __str__(self) -> str: return self.name @@ -153,7 +151,7 @@ def na_value(self) -> object: return np.nan @property - def type(self) -> Type[Any]: + def type(self) -> type[Any]: """ The scalar type for the array, e.g. ``int`` @@ -190,7 +188,7 @@ def name(self) -> str: raise AbstractMethodError(self) @property - def names(self) -> Optional[List[str]]: + def names(self) -> list[str] | None: """ Ordered list of field names, or None if there are no fields. @@ -200,7 +198,7 @@ def names(self) -> Optional[List[str]]: return None @classmethod - def construct_array_type(cls) -> Type[ExtensionArray]: + def construct_array_type(cls) -> type_t[ExtensionArray]: """ Return the array type associated with this dtype. @@ -337,7 +335,7 @@ def _is_boolean(self) -> bool: """ return False - def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]: + def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: """ Return the common dtype, if one exists. @@ -366,7 +364,7 @@ def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]: return None -def register_extension_dtype(cls: Type[ExtensionDtype]) -> Type[ExtensionDtype]: +def register_extension_dtype(cls: type[ExtensionDtype]) -> type[ExtensionDtype]: """ Register an ExtensionType with pandas as class decorator. @@ -409,9 +407,9 @@ class Registry: """ def __init__(self): - self.dtypes: List[Type[ExtensionDtype]] = [] + self.dtypes: list[type[ExtensionDtype]] = [] - def register(self, dtype: Type[ExtensionDtype]) -> None: + def register(self, dtype: type[ExtensionDtype]) -> None: """ Parameters ---------- @@ -422,9 +420,7 @@ def register(self, dtype: Type[ExtensionDtype]) -> None: self.dtypes.append(dtype) - def find( - self, dtype: Union[Type[ExtensionDtype], str] - ) -> Optional[Type[ExtensionDtype]]: + def find(self, dtype: type[ExtensionDtype] | str) -> type[ExtensionDtype] | None: """ Parameters ---------- diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index a3744ffa7f9bc..714e659f99894 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -15,15 +15,8 @@ from typing import ( TYPE_CHECKING, Any, - Dict, - List, - Optional, Sequence, - Set, Sized, - Tuple, - Type, - Union, cast, overload, ) @@ -125,7 +118,7 @@ def maybe_convert_platform( - values: Union[list, tuple, range, np.ndarray, ExtensionArray] + values: list | tuple | range | np.ndarray | ExtensionArray, ) -> ArrayLike: """ try to do platform conversion, allow ndarray or list here """ if isinstance(values, (list, tuple, range)): @@ -159,7 +152,7 @@ def is_nested_object(obj) -> bool: ) -def maybe_box_datetimelike(value: Scalar, dtype: Optional[Dtype] = None) -> Scalar: +def maybe_box_datetimelike(value: Scalar, dtype: Dtype | None = None) -> Scalar: """ Cast scalar to Timestamp or Timedelta if scalar is datetime-like and dtype is not object. @@ -245,9 +238,7 @@ def _disallow_mismatched_datetimelike(value, dtype: DtypeObj): raise TypeError(f"Cannot cast {repr(value)} to {dtype}") -def maybe_downcast_to_dtype( - result: ArrayLike, dtype: Union[str, np.dtype] -) -> ArrayLike: +def maybe_downcast_to_dtype(result: ArrayLike, dtype: str | np.dtype) -> ArrayLike: """ try to cast to the specified dtype (e.g. convert back to bool/int or could be an astype of float64->float32 @@ -460,7 +451,7 @@ def maybe_cast_result_dtype(dtype: DtypeObj, how: str) -> DtypeObj: def maybe_cast_to_extension_array( - cls: Type[ExtensionArray], obj: ArrayLike, dtype: Optional[ExtensionDtype] = None + cls: type[ExtensionArray], obj: ArrayLike, dtype: ExtensionDtype | None = None ) -> ArrayLike: """ Call to `_from_sequence` that returns the object unchanged on Exception. @@ -727,7 +718,7 @@ def _ensure_dtype_type(value, dtype: np.dtype): return dtype.type(value) -def infer_dtype_from(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, Any]: +def infer_dtype_from(val, pandas_dtype: bool = False) -> tuple[DtypeObj, Any]: """ Interpret the dtype from a scalar or array. @@ -744,7 +735,7 @@ def infer_dtype_from(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, Any]: return infer_dtype_from_array(val, pandas_dtype=pandas_dtype) -def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, Any]: +def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> tuple[DtypeObj, Any]: """ Interpret the dtype from a scalar. @@ -834,7 +825,7 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, return dtype, val -def dict_compat(d: Dict[Scalar, Scalar]) -> Dict[Scalar, Scalar]: +def dict_compat(d: dict[Scalar, Scalar]) -> dict[Scalar, Scalar]: """ Convert datetimelike-keyed dicts to a Timestamp-keyed dict. @@ -852,7 +843,7 @@ def dict_compat(d: Dict[Scalar, Scalar]) -> Dict[Scalar, Scalar]: def infer_dtype_from_array( arr, pandas_dtype: bool = False -) -> Tuple[DtypeObj, ArrayLike]: +) -> tuple[DtypeObj, ArrayLike]: """ Infer the dtype from an array. @@ -944,7 +935,7 @@ def maybe_upcast( values: np.ndarray, fill_value: Scalar = np.nan, copy: bool = False, -) -> Tuple[np.ndarray, Scalar]: +) -> tuple[np.ndarray, Scalar]: """ Provide explicit type promotion and coercion. @@ -970,7 +961,7 @@ def maybe_upcast( return values, fill_value -def invalidate_string_dtypes(dtype_set: Set[DtypeObj]): +def invalidate_string_dtypes(dtype_set: set[DtypeObj]): """ Change string like dtypes to object for ``DataFrame.select_dtypes()``. @@ -1524,7 +1515,7 @@ def maybe_castable(dtype: np.dtype) -> bool: return dtype.name not in POSSIBLY_CAST_DTYPES -def maybe_infer_to_datetimelike(value: Union[np.ndarray, List]): +def maybe_infer_to_datetimelike(value: np.ndarray | list): """ we might have a array (or single object) that is datetime like, and no dtype is passed don't change the value unless we find a @@ -1619,8 +1610,8 @@ def try_timedelta(v: np.ndarray) -> np.ndarray: def maybe_cast_to_datetime( - value: Union[ExtensionArray, np.ndarray, list], dtype: Optional[DtypeObj] -) -> Union[ExtensionArray, np.ndarray, list]: + value: ExtensionArray | np.ndarray | list, dtype: DtypeObj | None +) -> ExtensionArray | np.ndarray | list: """ try to cast the array/value to a datetimelike dtype, converting float nan to iNaT @@ -1784,7 +1775,7 @@ def ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj: return dtype -def find_common_type(types: List[DtypeObj]) -> DtypeObj: +def find_common_type(types: list[DtypeObj]) -> DtypeObj: """ Find a common data type among the given dtypes. @@ -1873,7 +1864,7 @@ def construct_2d_arraylike_from_scalar( def construct_1d_arraylike_from_scalar( - value: Scalar, length: int, dtype: Optional[DtypeObj] + value: Scalar, length: int, dtype: DtypeObj | None ) -> ArrayLike: """ create a np.ndarray / pandas type of specified shape and dtype @@ -1947,7 +1938,7 @@ def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray: def construct_1d_ndarray_preserving_na( - values: Sequence, dtype: Optional[DtypeObj] = None, copy: bool = False + values: Sequence, dtype: DtypeObj | None = None, copy: bool = False ) -> np.ndarray: """ Construct a new ndarray, coercing `values` to `dtype`, preserving NA. @@ -1997,7 +1988,7 @@ def construct_1d_ndarray_preserving_na( def maybe_cast_to_integer_array( - arr: Union[list, np.ndarray], dtype: np.dtype, copy: bool = False + arr: list | np.ndarray, dtype: np.dtype, copy: bool = False ): """ Takes any dtype and returns the casted version, raising for when data is diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 3d3b8b745d4ab..2785874878c96 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -7,13 +7,7 @@ from typing import ( TYPE_CHECKING, Any, - Dict, - List, MutableMapping, - Optional, - Tuple, - Type, - Union, cast, ) @@ -36,6 +30,7 @@ DtypeObj, NpDtype, Ordered, + type_t, ) from pandas.core.dtypes.base import ( @@ -81,12 +76,12 @@ class PandasExtensionDtype(ExtensionDtype): subdtype = None str: str_type num = 100 - shape: Tuple[int, ...] = () + shape: tuple[int, ...] = () itemsize = 8 - base: Optional[DtypeObj] = None + base: DtypeObj | None = None isbuiltin = 0 isnative = 0 - _cache: Dict[str_type, PandasExtensionDtype] = {} + _cache: dict[str_type, PandasExtensionDtype] = {} def __str__(self) -> str_type: """ @@ -103,7 +98,7 @@ def __repr__(self) -> str_type: def __hash__(self) -> int: raise NotImplementedError("sub-classes should implement an __hash__ method") - def __getstate__(self) -> Dict[str_type, Any]: + def __getstate__(self) -> dict[str_type, Any]: # pickle support; we don't want to pickle the cache return {k: getattr(self, k, None) for k in self._metadata} @@ -177,19 +172,19 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): # TODO: Document public vs. private API name = "category" - type: Type[CategoricalDtypeType] = CategoricalDtypeType + type: type[CategoricalDtypeType] = CategoricalDtypeType kind: str_type = "O" str = "|O08" base = np.dtype("O") _metadata = ("categories", "ordered") - _cache: Dict[str_type, PandasExtensionDtype] = {} + _cache: dict[str_type, PandasExtensionDtype] = {} def __init__(self, categories=None, ordered: Ordered = False): self._finalize(categories, ordered, fastpath=False) @classmethod def _from_fastpath( - cls, categories=None, ordered: Optional[bool] = None + cls, categories=None, ordered: bool | None = None ) -> CategoricalDtype: self = cls.__new__(cls) self._finalize(categories, ordered, fastpath=True) @@ -212,8 +207,8 @@ def _from_values_or_dtype( cls, values=None, categories=None, - ordered: Optional[bool] = None, - dtype: Optional[Dtype] = None, + ordered: bool | None = None, + dtype: Dtype | None = None, ) -> CategoricalDtype: """ Construct dtype from the input parameters used in :class:`Categorical`. @@ -478,7 +473,7 @@ def _hash_categories(categories, ordered: Ordered = True) -> int: return np.bitwise_xor.reduce(hashed) @classmethod - def construct_array_type(cls) -> Type[Categorical]: + def construct_array_type(cls) -> type_t[Categorical]: """ Return the array type associated with this dtype. @@ -546,9 +541,7 @@ def validate_categories(categories, fastpath: bool = False): return categories - def update_dtype( - self, dtype: Union[str_type, CategoricalDtype] - ) -> CategoricalDtype: + def update_dtype(self, dtype: str_type | CategoricalDtype) -> CategoricalDtype: """ Returns a CategoricalDtype with categories and ordered taken from dtype if specified, otherwise falling back to self if unspecified @@ -601,7 +594,7 @@ def _is_boolean(self) -> bool: return is_bool_dtype(self.categories) - def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]: + def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: from pandas.core.arrays.sparse import SparseDtype # check if we have all categorical dtype with identical categories @@ -670,7 +663,7 @@ class DatetimeTZDtype(PandasExtensionDtype): datetime64[ns, tzfile('/usr/share/zoneinfo/US/Central')] """ - type: Type[Timestamp] = Timestamp + type: type[Timestamp] = Timestamp kind: str_type = "M" str = "|M8[ns]" num = 101 @@ -678,9 +671,9 @@ class DatetimeTZDtype(PandasExtensionDtype): na_value = NaT _metadata = ("unit", "tz") _match = re.compile(r"(datetime64|M8)\[(?P.+), (?P.+)\]") - _cache: Dict[str_type, PandasExtensionDtype] = {} + _cache: dict[str_type, PandasExtensionDtype] = {} - def __init__(self, unit: Union[str_type, DatetimeTZDtype] = "ns", tz=None): + def __init__(self, unit: str_type | DatetimeTZDtype = "ns", tz=None): if isinstance(unit, DatetimeTZDtype): # error: "str" has no attribute "tz" unit, tz = unit.unit, unit.tz # type: ignore[attr-defined] @@ -727,7 +720,7 @@ def tz(self): return self._tz @classmethod - def construct_array_type(cls) -> Type[DatetimeArray]: + def construct_array_type(cls) -> type_t[DatetimeArray]: """ Return the array type associated with this dtype. @@ -837,14 +830,14 @@ class PeriodDtype(dtypes.PeriodDtypeBase, PandasExtensionDtype): period[M] """ - type: Type[Period] = Period + type: type[Period] = Period kind: str_type = "O" str = "|O08" base = np.dtype("O") num = 102 _metadata = ("freq",) _match = re.compile(r"(P|p)eriod\[(?P.+)\]") - _cache: Dict[str_type, PandasExtensionDtype] = {} + _cache: dict[str_type, PandasExtensionDtype] = {} def __new__(cls, freq=None): """ @@ -973,7 +966,7 @@ def is_dtype(cls, dtype: object) -> bool: return super().is_dtype(dtype) @classmethod - def construct_array_type(cls) -> Type[PeriodArray]: + def construct_array_type(cls) -> type_t[PeriodArray]: """ Return the array type associated with this dtype. @@ -986,7 +979,7 @@ def construct_array_type(cls) -> Type[PeriodArray]: return PeriodArray def __from_arrow__( - self, array: Union[pyarrow.Array, pyarrow.ChunkedArray] + self, array: pyarrow.Array | pyarrow.ChunkedArray ) -> PeriodArray: """ Construct PeriodArray from pyarrow Array/ChunkedArray. @@ -1049,9 +1042,9 @@ class IntervalDtype(PandasExtensionDtype): _match = re.compile( r"(I|i)nterval\[(?P[^,]+)(, (?P(right|left|both|neither)))?\]" ) - _cache: Dict[str_type, PandasExtensionDtype] = {} + _cache: dict[str_type, PandasExtensionDtype] = {} - def __new__(cls, subtype=None, closed: Optional[str_type] = None): + def __new__(cls, subtype=None, closed: str_type | None = None): from pandas.core.dtypes.common import ( is_string_dtype, pandas_dtype, @@ -1126,7 +1119,7 @@ def subtype(self): return self._subtype @classmethod - def construct_array_type(cls) -> Type[IntervalArray]: + def construct_array_type(cls) -> type[IntervalArray]: """ Return the array type associated with this dtype. @@ -1220,7 +1213,7 @@ def is_dtype(cls, dtype: object) -> bool: return super().is_dtype(dtype) def __from_arrow__( - self, array: Union[pyarrow.Array, pyarrow.ChunkedArray] + self, array: pyarrow.Array | pyarrow.ChunkedArray ) -> IntervalArray: """ Construct IntervalArray from pyarrow Array/ChunkedArray. @@ -1243,7 +1236,7 @@ def __from_arrow__( return IntervalArray._concat_same_type(results) - def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]: + def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: # NB: this doesn't handle checking for closed match if not all(isinstance(x, IntervalDtype) for x in dtypes): return None @@ -1281,7 +1274,7 @@ class PandasDtype(ExtensionDtype): _metadata = ("_dtype",) - def __init__(self, dtype: Optional[Union[NpDtype, PandasDtype]]): + def __init__(self, dtype: NpDtype | PandasDtype | None): if isinstance(dtype, PandasDtype): # make constructor univalent dtype = dtype.numpy_dtype @@ -1305,7 +1298,7 @@ def name(self) -> str: return self._dtype.name @property - def type(self) -> Type[np.generic]: + def type(self) -> type[np.generic]: """ The type object used to instantiate a scalar of this NumPy data-type. """ @@ -1333,7 +1326,7 @@ def construct_from_string(cls, string: str) -> PandasDtype: return cls(dtype) @classmethod - def construct_array_type(cls) -> Type[PandasArray]: + def construct_array_type(cls) -> type_t[PandasArray]: """ Return the array type associated with this dtype. diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 537772125e5fe..5aebad84a0a30 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -17,14 +17,9 @@ from typing import ( Any, Callable, - Dict, - FrozenSet, Hashable, Iterable, - List, Mapping, - Optional, - Type, TypeVar, Union, cast, @@ -119,7 +114,7 @@ ScalarResult = TypeVar("ScalarResult") -def generate_property(name: str, klass: Type[FrameOrSeries]): +def generate_property(name: str, klass: type[FrameOrSeries]): """ Create a property for a GroupBy subclass to dispatch to DataFrame/Series. @@ -142,7 +137,7 @@ def prop(self): return property(prop) -def pin_allowlisted_properties(klass: Type[FrameOrSeries], allowlist: FrozenSet[str]): +def pin_allowlisted_properties(klass: type[FrameOrSeries], allowlist: frozenset[str]): """ Create GroupBy member defs for DataFrame/Series names in a allowlist. @@ -327,7 +322,7 @@ def _aggregate_multiple_funcs(self, arg): arg = zip(columns, arg) - results: Dict[base.OutputKey, FrameOrSeriesUnion] = {} + results: dict[base.OutputKey, FrameOrSeriesUnion] = {} for idx, (name, func) in enumerate(arg): obj = self @@ -355,8 +350,8 @@ def _aggregate_multiple_funcs(self, arg): # TODO: index should not be Optional - see GH 35490 def _wrap_series_output( self, - output: Mapping[base.OutputKey, Union[Series, ArrayLike]], - index: Optional[Index], + output: Mapping[base.OutputKey, Series | ArrayLike], + index: Index | None, ) -> FrameOrSeriesUnion: """ Wraps the output of a SeriesGroupBy operation into the expected result. @@ -396,8 +391,8 @@ def _wrap_series_output( # TODO: Remove index argument, use self.grouper.result_index, see GH 35490 def _wrap_aggregated_output( self, - output: Mapping[base.OutputKey, Union[Series, np.ndarray]], - index: Optional[Index], + output: Mapping[base.OutputKey, Series | np.ndarray], + index: Index | None, ) -> FrameOrSeriesUnion: """ Wraps the output of a SeriesGroupBy aggregation into the expected result. @@ -420,7 +415,7 @@ def _wrap_aggregated_output( return self._reindex_output(result) def _wrap_transformed_output( - self, output: Mapping[base.OutputKey, Union[Series, ArrayLike]] + self, output: Mapping[base.OutputKey, Series | ArrayLike] ) -> Series: """ Wraps the output of a SeriesGroupBy aggregation into the expected result. @@ -450,7 +445,7 @@ def _wrap_applied_output( self, data: Series, keys: Index, - values: Optional[List[Any]], + values: list[Any] | None, not_indexed_same: bool = False, ) -> FrameOrSeriesUnion: """ @@ -1217,7 +1212,7 @@ def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame: axis = self.axis obj = self._obj_with_exclusions - result: Dict[Hashable, Union[NDFrame, np.ndarray]] = {} + result: dict[Hashable, NDFrame | np.ndarray] = {} if axis != obj._info_axis_number: for name, data in self: fres = func(data, *args, **kwargs) @@ -1234,7 +1229,7 @@ def _aggregate_item_by_item(self, func, *args, **kwargs) -> DataFrame: # only for axis==0 obj = self._obj_with_exclusions - result: Dict[Union[int, str], NDFrame] = {} + result: dict[int | str, NDFrame] = {} cannot_agg = [] for item in obj: data = obj[item] @@ -1305,7 +1300,7 @@ def _wrap_applied_output(self, data, keys, values, not_indexed_same=False): def _wrap_applied_output_series( self, keys, - values: List[Series], + values: list[Series], not_indexed_same: bool, first_not_none, key_index, @@ -1718,8 +1713,8 @@ def _insert_inaxis_grouper_inplace(self, result: DataFrame) -> None: def _wrap_aggregated_output( self, - output: Mapping[base.OutputKey, Union[Series, np.ndarray]], - index: Optional[Index], + output: Mapping[base.OutputKey, Series | np.ndarray], + index: Index | None, ) -> DataFrame: """ Wraps the output of DataFrameGroupBy aggregations into the expected result. @@ -1752,7 +1747,7 @@ def _wrap_aggregated_output( return self._reindex_output(result) def _wrap_transformed_output( - self, output: Mapping[base.OutputKey, Union[Series, ArrayLike]] + self, output: Mapping[base.OutputKey, Series | ArrayLike] ) -> DataFrame: """ Wraps the output of DataFrameGroupBy transformations into the expected result. diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index a6c3cb3ff5d0b..e226f771d5b9f 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -19,19 +19,13 @@ class providing the base-class of operations. import types from typing import ( Callable, - Dict, - FrozenSet, Generic, Hashable, Iterable, Iterator, List, Mapping, - Optional, Sequence, - Set, - Tuple, - Type, TypeVar, Union, ) @@ -529,8 +523,8 @@ def group_selection_context(groupby: BaseGroupBy) -> Iterator[BaseGroupBy]: class BaseGroupBy(PandasObject, SelectionMixin, Generic[FrameOrSeries]): - _group_selection: Optional[IndexLabel] = None - _apply_allowlist: FrozenSet[str] = frozenset() + _group_selection: IndexLabel | None = None + _apply_allowlist: frozenset[str] = frozenset() _hidden_attrs = PandasObject._hidden_attrs | { "as_index", "axis", @@ -550,12 +544,12 @@ class BaseGroupBy(PandasObject, SelectionMixin, Generic[FrameOrSeries]): def __init__( self, obj: FrameOrSeries, - keys: Optional[_KeysArgType] = None, + keys: _KeysArgType | None = None, axis: int = 0, - level: Optional[IndexLabel] = None, - grouper: Optional[ops.BaseGrouper] = None, - exclusions: Optional[Set[Hashable]] = None, - selection: Optional[IndexLabel] = None, + level: IndexLabel | None = None, + grouper: ops.BaseGrouper | None = None, + exclusions: set[Hashable] | None = None, + selection: IndexLabel | None = None, as_index: bool = True, sort: bool = True, group_keys: bool = True, @@ -623,7 +617,7 @@ def _assure_grouper(self) -> None: @final @property - def groups(self) -> Dict[Hashable, np.ndarray]: + def groups(self) -> dict[Hashable, np.ndarray]: """ Dict {group name -> group labels}. """ @@ -786,7 +780,7 @@ def _set_result_index_ordered( return result @final - def _dir_additions(self) -> Set[str]: + def _dir_additions(self) -> set[str]: return self.obj._dir_additions() | self._apply_allowlist def __getattr__(self, attr: str): @@ -824,7 +818,7 @@ def __getattr__(self, attr: str): @Appender(_pipe_template) def pipe( self, - func: Union[Callable[..., T], Tuple[Callable[..., T], str]], + func: Callable[..., T] | tuple[Callable[..., T], str], *args, **kwargs, ) -> T: @@ -897,7 +891,7 @@ def get_group(self, name, obj=None): return obj._take_with_is_copy(inds, axis=self.axis) - def __iter__(self) -> Iterator[Tuple[Hashable, FrameOrSeries]]: + def __iter__(self) -> Iterator[tuple[Hashable, FrameOrSeries]]: """ Groupby iterator. @@ -1025,7 +1019,7 @@ def _cumcount_array(self, ascending: bool = True): def _cython_transform( self, how: str, numeric_only: bool = True, axis: int = 0, **kwargs ): - output: Dict[base.OutputKey, ArrayLike] = {} + output: dict[base.OutputKey, ArrayLike] = {} for idx, obj in enumerate(self._iterate_slices()): name = obj.name @@ -1049,7 +1043,7 @@ def _cython_transform( return self._wrap_transformed_output(output) def _wrap_aggregated_output( - self, output: Mapping[base.OutputKey, np.ndarray], index: Optional[Index] + self, output: Mapping[base.OutputKey, np.ndarray], index: Index | None ): raise AbstractMethodError(self) @@ -1098,7 +1092,7 @@ def _agg_general( def _cython_agg_general( self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1 ): - output: Dict[base.OutputKey, ArrayLike] = {} + output: dict[base.OutputKey, ArrayLike] = {} # Ideally we would be able to enumerate self._iterate_slices and use # the index from enumeration as the key of output, but ohlc in particular # returns a (n x 4) array. Output requires 1D ndarrays as values, so we @@ -1215,7 +1209,7 @@ def _python_agg_general(self, func, *args, **kwargs): f = lambda x: func(x, *args, **kwargs) # iterate through "columns" ex exclusions to populate output dict - output: Dict[base.OutputKey, np.ndarray] = {} + output: dict[base.OutputKey, np.ndarray] = {} for idx, obj in enumerate(self._iterate_slices()): name = obj.name @@ -1406,7 +1400,7 @@ class GroupBy(BaseGroupBy[FrameOrSeries]): @final @property - def _obj_1d_constructor(self) -> Type[Series]: + def _obj_1d_constructor(self) -> type[Series]: # GH28330 preserve subclassed Series/DataFrames if isinstance(self.obj, DataFrame): return self.obj._constructor_sliced @@ -1419,7 +1413,7 @@ def _bool_agg(self, val_test, skipna): Shared func to call any / all Cython GroupBy implementations. """ - def objs_to_bool(vals: np.ndarray) -> Tuple[np.ndarray, Type]: + def objs_to_bool(vals: np.ndarray) -> tuple[np.ndarray, type]: if is_object_dtype(vals): vals = np.array([bool(x) for x in vals]) else: @@ -1427,7 +1421,7 @@ def objs_to_bool(vals: np.ndarray) -> Tuple[np.ndarray, Type]: return vals.view(np.uint8), bool - def result_to_bool(result: np.ndarray, inference: Type) -> np.ndarray: + def result_to_bool(result: np.ndarray, inference: type) -> np.ndarray: return result.astype(inference, copy=False) return self._get_cythonized_result( @@ -2058,7 +2052,7 @@ def backfill(self, limit=None): @final @Substitution(name="groupby") @Substitution(see_also=_common_see_also) - def nth(self, n: Union[int, List[int]], dropna: Optional[str] = None) -> DataFrame: + def nth(self, n: int | list[int], dropna: str | None = None) -> DataFrame: """ Take the nth row from each group if n is an int, or a subset of rows if n is a list of ints. @@ -2266,13 +2260,13 @@ def quantile(self, q=0.5, interpolation: str = "linear"): """ from pandas import concat - def pre_processor(vals: ArrayLike) -> Tuple[np.ndarray, Optional[np.dtype]]: + def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, np.dtype | None]: if is_object_dtype(vals): raise TypeError( "'quantile' cannot be performed against 'object' dtypes!" ) - inference: Optional[np.dtype] = None + inference: np.dtype | None = None if is_integer_dtype(vals.dtype): if isinstance(vals, ExtensionArray): out = vals.to_numpy(dtype=float, na_value=np.nan) @@ -2292,7 +2286,7 @@ def pre_processor(vals: ArrayLike) -> Tuple[np.ndarray, Optional[np.dtype]]: return out, inference - def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray: + def post_processor(vals: np.ndarray, inference: type | None) -> np.ndarray: if inference: # Check for edge case if not ( @@ -2618,7 +2612,7 @@ def _get_cythonized_result( needs_counts: bool = False, needs_values: bool = False, needs_2d: bool = False, - min_count: Optional[int] = None, + min_count: int | None = None, needs_mask: bool = False, needs_ngroups: bool = False, result_is_index: bool = False, @@ -2692,7 +2686,7 @@ def _get_cythonized_result( grouper = self.grouper labels, _, ngroups = grouper.group_info - output: Dict[base.OutputKey, np.ndarray] = {} + output: dict[base.OutputKey, np.ndarray] = {} base_func = getattr(libgroupby, how) error_msg = "" @@ -3007,10 +3001,10 @@ def _reindex_output( @final def sample( self, - n: Optional[int] = None, - frac: Optional[float] = None, + n: int | None = None, + frac: float | None = None, replace: bool = False, - weights: Optional[Union[Sequence, Series]] = None, + weights: Sequence | Series | None = None, random_state=None, ): """ @@ -3122,10 +3116,10 @@ def sample( @doc(GroupBy) def get_groupby( obj: NDFrame, - by: Optional[_KeysArgType] = None, + by: _KeysArgType | None = None, axis: int = 0, level=None, - grouper: Optional[ops.BaseGrouper] = None, + grouper: ops.BaseGrouper | None = None, exclusions=None, selection=None, as_index: bool = True, @@ -3137,7 +3131,7 @@ def get_groupby( dropna: bool = True, ) -> GroupBy: - klass: Type[GroupBy] + klass: type[GroupBy] if isinstance(obj, Series): from pandas.core.groupby.generic import SeriesGroupBy diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index d9cda1b6cbe3f..151756b829a1d 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -4,14 +4,7 @@ """ from __future__ import annotations -from typing import ( - Dict, - Hashable, - List, - Optional, - Set, - Tuple, -) +from typing import Hashable import warnings import numpy as np @@ -256,7 +249,7 @@ class Grouper: Freq: 17T, dtype: int64 """ - _attributes: Tuple[str, ...] = ("key", "level", "freq", "axis", "sort") + _attributes: tuple[str, ...] = ("key", "level", "freq", "axis", "sort") def __new__(cls, *args, **kwargs): if kwargs.get("freq") is not None: @@ -434,7 +427,7 @@ def __init__( self, index: Index, grouper=None, - obj: Optional[FrameOrSeries] = None, + obj: FrameOrSeries | None = None, name=None, level=None, sort: bool = True, @@ -563,8 +556,8 @@ def __repr__(self) -> str: def __iter__(self): return iter(self.indices) - _codes: Optional[np.ndarray] = None - _group_index: Optional[Index] = None + _codes: np.ndarray | None = None + _group_index: Index | None = None @property def ngroups(self) -> int: @@ -624,7 +617,7 @@ def _make_codes(self) -> None: self._group_index = uniques @cache_readonly - def groups(self) -> Dict[Hashable, np.ndarray]: + def groups(self) -> dict[Hashable, np.ndarray]: return self.index.groupby(Categorical.from_codes(self.codes, self.group_index)) @@ -638,7 +631,7 @@ def get_grouper( mutated: bool = False, validate: bool = True, dropna: bool = True, -) -> Tuple[ops.BaseGrouper, Set[Hashable], FrameOrSeries]: +) -> tuple[ops.BaseGrouper, set[Hashable], FrameOrSeries]: """ Create and return a BaseGrouper, which is an internal mapping of how to create the grouper indexers. @@ -762,8 +755,8 @@ def get_grouper( else: levels = [level] * len(keys) - groupings: List[Grouping] = [] - exclusions: Set[Hashable] = set() + groupings: list[Grouping] = [] + exclusions: set[Hashable] = set() # if the actual grouper should be obj[key] def is_in_axis(key) -> bool: diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index d3273d7f13a4d..6de9898a6dec0 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -10,15 +10,10 @@ import collections import functools from typing import ( - Dict, Generic, Hashable, Iterator, - List, - Optional, Sequence, - Tuple, - Type, ) import numpy as np @@ -294,14 +289,14 @@ def __init__( sort: bool = True, group_keys: bool = True, mutated: bool = False, - indexer: Optional[np.ndarray] = None, + indexer: np.ndarray | None = None, dropna: bool = True, ): assert isinstance(axis, Index), axis self._filter_empty_groups = self.compressed = len(groupings) != 1 self.axis = axis - self._groupings: List[grouper.Grouping] = list(groupings) + self._groupings: list[grouper.Grouping] = list(groupings) self.sort = sort self.group_keys = group_keys self.mutated = mutated @@ -309,7 +304,7 @@ def __init__( self.dropna = dropna @property - def groupings(self) -> List[grouper.Grouping]: + def groupings(self) -> list[grouper.Grouping]: return self._groupings @property @@ -325,7 +320,7 @@ def nkeys(self) -> int: def get_iterator( self, data: FrameOrSeries, axis: int = 0 - ) -> Iterator[Tuple[Hashable, FrameOrSeries]]: + ) -> Iterator[tuple[Hashable, FrameOrSeries]]: """ Groupby iterator @@ -455,15 +450,15 @@ def indices(self): return get_indexer_dict(codes_list, keys) @property - def codes(self) -> List[np.ndarray]: + def codes(self) -> list[np.ndarray]: return [ping.codes for ping in self.groupings] @property - def levels(self) -> List[Index]: + def levels(self) -> list[Index]: return [ping.group_index for ping in self.groupings] @property - def names(self) -> List[Hashable]: + def names(self) -> list[Hashable]: return [ping.name for ping in self.groupings] @final @@ -479,7 +474,7 @@ def size(self) -> Series: return Series(out, index=self.result_index, dtype="int64") @cache_readonly - def groups(self) -> Dict[Hashable, np.ndarray]: + def groups(self) -> dict[Hashable, np.ndarray]: """ dict {group name -> group labels} """ if len(self.groupings) == 1: return self.groupings[0].groups @@ -513,7 +508,7 @@ def codes_info(self) -> np.ndarray: return codes @final - def _get_compressed_codes(self) -> Tuple[np.ndarray, np.ndarray]: + def _get_compressed_codes(self) -> tuple[np.ndarray, np.ndarray]: all_codes = self.codes if len(all_codes) > 1: group_index = get_group_index(all_codes, self.shape, sort=True, xnull=True) @@ -528,7 +523,7 @@ def ngroups(self) -> int: return len(self.result_index) @property - def reconstructed_codes(self) -> List[np.ndarray]: + def reconstructed_codes(self) -> list[np.ndarray]: codes = self.codes comp_ids, obs_ids, _ = self.group_info return decons_obs_group_ids(comp_ids, obs_ids, self.shape, codes, xnull=True) @@ -545,7 +540,7 @@ def result_index(self) -> Index: ) @final - def get_group_levels(self) -> List[Index]: + def get_group_levels(self) -> list[Index]: if not self.compressed and len(self.groupings) == 1: return [self.groupings[0].result_index] @@ -935,7 +930,7 @@ def group_info(self): ) @cache_readonly - def reconstructed_codes(self) -> List[np.ndarray]: + def reconstructed_codes(self) -> list[np.ndarray]: # get unique result indices, and prepend 0 as groupby starts from the first return [np.r_[0, np.flatnonzero(self.bins[1:] != self.bins[:-1]) + 1]] @@ -947,15 +942,15 @@ def result_index(self): return self.binlabels @property - def levels(self) -> List[Index]: + def levels(self) -> list[Index]: return [self.binlabels] @property - def names(self) -> List[Hashable]: + def names(self) -> list[Hashable]: return [self.binlabels.name] @property - def groupings(self) -> List[grouper.Grouping]: + def groupings(self) -> list[grouper.Grouping]: return [ grouper.Grouping(lvl, lvl, in_axis=False, level=None, name=name) for lvl, name in zip(self.levels, self.names) @@ -1068,7 +1063,7 @@ def get_splitter( data: FrameOrSeries, labels: np.ndarray, ngroups: int, axis: int = 0 ) -> DataSplitter: if isinstance(data, Series): - klass: Type[DataSplitter] = SeriesSplitter + klass: type[DataSplitter] = SeriesSplitter else: # i.e. DataFrame klass = FrameSplitter diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5163c55036fd0..de6d6c8e07144 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -8,17 +8,9 @@ TYPE_CHECKING, Any, Callable, - Dict, - FrozenSet, Hashable, - List, - Optional, Sequence, - Set, - Tuple, - Type, TypeVar, - Union, cast, ) import warnings @@ -194,7 +186,7 @@ _o_dtype = np.dtype("object") -def disallow_kwargs(kwargs: Dict[str, Any]): +def disallow_kwargs(kwargs: dict[str, Any]): if kwargs: raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}") @@ -268,7 +260,7 @@ class Index(IndexOpsMixin, PandasObject): """ # tolist is not actually deprecated, just suppressed in the __dir__ - _hidden_attrs: FrozenSet[str] = ( + _hidden_attrs: frozenset[str] = ( PandasObject._hidden_attrs | IndexOpsMixin._hidden_attrs | frozenset(["contains", "set_value"]) @@ -286,22 +278,22 @@ def _left_indexer_unique(self, left: np.ndarray, right: np.ndarray) -> np.ndarra def _left_indexer( self, left: np.ndarray, right: np.ndarray - ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: return libjoin.left_join_indexer(left, right) def _inner_indexer( self, left: np.ndarray, right: np.ndarray - ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: return libjoin.inner_join_indexer(left, right) def _outer_indexer( self, left: np.ndarray, right: np.ndarray - ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: return libjoin.outer_join_indexer(left, right) _typ = "index" - _data: Union[ExtensionArray, np.ndarray] - _id: Optional[object] = None + _data: ExtensionArray | np.ndarray + _id: object | None = None _name: Hashable = None # MultiIndex.levels previously allowed setting the index name. We # don't allow this anymore, and raise if it happens rather than @@ -316,7 +308,7 @@ def _outer_indexer( # would we like our indexing holder to defer to us _defer_to_indexing = False - _engine_type: Type[libindex.IndexEngine] = libindex.ObjectEngine + _engine_type: type[libindex.IndexEngine] = libindex.ObjectEngine # whether we support partial string indexing. Overridden # in DatetimeIndex and PeriodIndex _supports_partial_string_indexing = False @@ -557,7 +549,7 @@ def asi8(self): return None @classmethod - def _simple_new(cls: Type[_IndexT], values, name: Hashable = None) -> _IndexT: + def _simple_new(cls: type[_IndexT], values, name: Hashable = None) -> _IndexT: """ We require that we have a dtype compat for the values. If we are passed a non-dtype compat, then coerce using the constructor. @@ -580,7 +572,7 @@ def _simple_new(cls: Type[_IndexT], values, name: Hashable = None) -> _IndexT: return result @cache_readonly - def _constructor(self: _IndexT) -> Type[_IndexT]: + def _constructor(self: _IndexT) -> type[_IndexT]: return type(self) @final @@ -635,7 +627,7 @@ def _format_duplicate_message(self): # Index Internals Methods @final - def _get_attributes_dict(self) -> Dict[str_t, Any]: + def _get_attributes_dict(self) -> dict[str_t, Any]: """ Return an attributes dict for my class. """ @@ -728,7 +720,7 @@ def _engine(self) -> libindex.IndexEngine: return self._engine_type(lambda: target_values, len(self)) @cache_readonly - def _dir_additions_for_owner(self) -> Set[str_t]: + def _dir_additions_for_owner(self) -> set[str_t]: """ Add the string-like labels to the owner dataframe/series dir output. @@ -992,10 +984,10 @@ def repeat(self, repeats, axis=None): def copy( self: _IndexT, - name: Optional[Hashable] = None, + name: Hashable | None = None, deep: bool = False, - dtype: Optional[Dtype] = None, - names: Optional[Sequence[Hashable]] = None, + dtype: Dtype | None = None, + names: Sequence[Hashable] | None = None, ) -> _IndexT: """ Make a copy of this object. @@ -1125,9 +1117,9 @@ def _mpl_repr(self): def format( self, name: bool = False, - formatter: Optional[Callable] = None, + formatter: Callable | None = None, na_rep: str_t = "NaN", - ) -> List[str_t]: + ) -> list[str_t]: """ Render a string representation of the Index. """ @@ -1145,8 +1137,8 @@ def format( return self._format_with_header(header, na_rep=na_rep) def _format_with_header( - self, header: List[str_t], na_rep: str_t = "NaN" - ) -> List[str_t]: + self, header: list[str_t], na_rep: str_t = "NaN" + ) -> list[str_t]: from pandas.io.formats.format import format_array values = self._values @@ -1417,7 +1409,7 @@ def name(self, value): @final def _validate_names( self, name=None, names=None, deep: bool = False - ) -> List[Hashable]: + ) -> list[Hashable]: """ Handles the quirks of having a singular 'name' parameter for general Index and plural 'names' parameter for MultiIndex. @@ -1812,7 +1804,7 @@ def droplevel(self, level=0): return self._drop_level_numbers(levnums) @final - def _drop_level_numbers(self, levnums: List[int]): + def _drop_level_numbers(self, levnums: list[int]): """ Drop MultiIndex levels by level _number_, not name. """ @@ -2368,7 +2360,7 @@ def __reduce__(self): # -------------------------------------------------------------------- # Null Handling Methods - _na_value: Union[float, NaTType] = np.nan + _na_value: float | NaTType = np.nan """The expected NA value to use with this index.""" @cache_readonly @@ -2556,7 +2548,7 @@ def dropna(self: _IndexT, how: str_t = "any") -> _IndexT: # -------------------------------------------------------------------- # Uniqueness Methods - def unique(self: _IndexT, level: Optional[Hashable] = None) -> _IndexT: + def unique(self: _IndexT, level: Hashable | None = None) -> _IndexT: """ Return unique values in the index. @@ -2587,7 +2579,7 @@ def unique(self: _IndexT, level: Optional[Hashable] = None) -> _IndexT: return self._shallow_copy(result) @final - def drop_duplicates(self: _IndexT, keep: Union[str_t, bool] = "first") -> _IndexT: + def drop_duplicates(self: _IndexT, keep: str_t | bool = "first") -> _IndexT: """ Return Index with duplicate values removed. @@ -2638,7 +2630,7 @@ def drop_duplicates(self: _IndexT, keep: Union[str_t, bool] = "first") -> _Index return super().drop_duplicates(keep=keep) - def duplicated(self, keep: Union[str_t, bool] = "first") -> np.ndarray: + def duplicated(self, keep: str_t | bool = "first") -> np.ndarray: """ Indicate duplicate index values. @@ -3252,7 +3244,7 @@ def _assert_can_do_setop(self, other) -> bool: raise TypeError("Input must be Index or array-like") return True - def _convert_can_do_setop(self, other) -> Tuple[Index, Hashable]: + def _convert_can_do_setop(self, other) -> tuple[Index, Hashable]: if not isinstance(other, Index): other = Index(other, name=self.name) result_name = self.name @@ -3377,8 +3369,8 @@ def get_loc(self, key, method=None, tolerance=None): def get_indexer( self, target, - method: Optional[str_t] = None, - limit: Optional[int] = None, + method: str_t | None = None, + limit: int | None = None, tolerance=None, ) -> np.ndarray: @@ -3406,8 +3398,8 @@ def get_indexer( def _get_indexer( self, target: Index, - method: Optional[str_t] = None, - limit: Optional[int] = None, + method: str_t | None = None, + limit: int | None = None, tolerance=None, ) -> np.ndarray: if tolerance is not None: @@ -3443,7 +3435,7 @@ def _get_indexer( return ensure_platform_int(indexer) @final - def _check_indexing_method(self, method: Optional[str_t]) -> None: + def _check_indexing_method(self, method: str_t | None) -> None: """ Raise if we have a get_indexer `method` that is not supported or valid. """ @@ -3461,9 +3453,7 @@ def _check_indexing_method(self, method: Optional[str_t]) -> None: raise ValueError("Invalid fill method") - def _convert_tolerance( - self, tolerance, target: Union[np.ndarray, Index] - ) -> np.ndarray: + def _convert_tolerance(self, tolerance, target: np.ndarray | Index) -> np.ndarray: # override this method on subclasses tolerance = np.asarray(tolerance) if target.size != tolerance.size and tolerance.size > 1: @@ -3472,7 +3462,7 @@ def _convert_tolerance( @final def _get_fill_indexer( - self, target: Index, method: str_t, limit: Optional[int] = None, tolerance=None + self, target: Index, method: str_t, limit: int | None = None, tolerance=None ) -> np.ndarray: target_values = target._get_engine_target() @@ -3492,7 +3482,7 @@ def _get_fill_indexer( @final def _get_fill_indexer_searchsorted( - self, target: Index, method: str_t, limit: Optional[int] = None + self, target: Index, method: str_t, limit: int | None = None ) -> np.ndarray: """ Fallback pad/backfill get_indexer that works for monotonic decreasing @@ -3526,7 +3516,7 @@ def _get_fill_indexer_searchsorted( @final def _get_nearest_indexer( - self, target: Index, limit: Optional[int], tolerance + self, target: Index, limit: int | None, tolerance ) -> np.ndarray: """ Get the indexer for the nearest index labels; requires an index with @@ -3557,7 +3547,7 @@ def _get_nearest_indexer( @final def _filter_indexer_tolerance( self, - target: Union[Index, np.ndarray, ExtensionArray], + target: Index | np.ndarray | ExtensionArray, indexer: np.ndarray, tolerance, ) -> np.ndarray: @@ -3899,8 +3889,8 @@ def join( self_is_mi = isinstance(self, ABCMultiIndex) other_is_mi = isinstance(other, ABCMultiIndex) - lindexer: Optional[np.ndarray] - rindexer: Optional[np.ndarray] + lindexer: np.ndarray | None + rindexer: np.ndarray | None # try to figure out the join level # GH3662 @@ -4132,7 +4122,7 @@ def _join_level( """ from pandas.core.indexes.multi import MultiIndex - def _get_leaf_sorter(labels: List[np.ndarray]) -> np.ndarray: + def _get_leaf_sorter(labels: list[np.ndarray]) -> np.ndarray: """ Returns sorter for the inner most level while preserving the order of higher levels. @@ -4283,8 +4273,8 @@ def _join_monotonic(self, other, how="left", return_indexers=False): sv = self._get_engine_target() ov = other._get_engine_target() - ridx: Optional[np.ndarray] - lidx: Optional[np.ndarray] + ridx: np.ndarray | None + lidx: np.ndarray | None if self.is_unique and other.is_unique: # We can perform much better than the general case @@ -4368,7 +4358,7 @@ def array(self) -> ExtensionArray: return array @property - def _values(self) -> Union[ExtensionArray, np.ndarray]: + def _values(self) -> ExtensionArray | np.ndarray: """ The best array representation. @@ -4654,7 +4644,7 @@ def append(self, other): return self._concat(to_concat, name) - def _concat(self, to_concat: List[Index], name: Hashable) -> Index: + def _concat(self, to_concat: list[Index], name: Hashable) -> Index: """ Concatenate multiple Index objects. """ @@ -4911,7 +4901,7 @@ def sort_values( return_indexer: bool = False, ascending: bool = True, na_position: str_t = "last", - key: Optional[Callable] = None, + key: Callable | None = None, ): """ Return a sorted copy of the index. @@ -5302,7 +5292,7 @@ def _index_as_unique(self) -> bool: _requires_unique_msg = "Reindexing only valid with uniquely valued Index objects" @final - def _maybe_promote(self, other: Index) -> Tuple[Index, Index]: + def _maybe_promote(self, other: Index) -> tuple[Index, Index]: """ When dealing with an object-dtype Index and a non-object Index, see if we can upcast the object-dtype one to improve performance. @@ -5569,10 +5559,10 @@ def _get_string_slice(self, key: str_t): def slice_indexer( self, - start: Optional[Hashable] = None, - end: Optional[Hashable] = None, - step: Optional[int] = None, - kind: Optional[str_t] = None, + start: Hashable | None = None, + end: Hashable | None = None, + step: int | None = None, + kind: str_t | None = None, ) -> slice: """ Compute the slice indexer for input labels and step. @@ -6187,9 +6177,7 @@ def ensure_index_from_sequences(sequences, names=None): return MultiIndex.from_arrays(sequences, names=names) -def ensure_index( - index_like: Union[AnyArrayLike, Sequence], copy: bool = False -) -> Index: +def ensure_index(index_like: AnyArrayLike | Sequence, copy: bool = False) -> Index: """ Ensure that we have an index from some index-like object. @@ -6283,7 +6271,7 @@ def ensure_has_len(seq): return seq -def trim_front(strings: List[str]) -> List[str]: +def trim_front(strings: list[str]) -> list[str]: """ Trims zeros and decimal points. @@ -6449,7 +6437,7 @@ def _try_convert_to_int_array( raise ValueError -def get_unanimous_names(*indexes: Index) -> Tuple[Hashable, ...]: +def get_unanimous_names(*indexes: Index) -> tuple[Hashable, ...]: """ Return common name if all indices agree, otherwise None (level-by-level). diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 0b80e863ef3ea..b3714ff004978 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -8,11 +8,7 @@ tzinfo, ) import operator -from typing import ( - TYPE_CHECKING, - Optional, - Tuple, -) +from typing import TYPE_CHECKING import warnings import numpy as np @@ -263,8 +259,8 @@ class DatetimeIndex(DatetimeTimedeltaMixin): _is_numeric_dtype = False _data: DatetimeArray - inferred_freq: Optional[str] - tz: Optional[tzinfo] + inferred_freq: str | None + tz: tzinfo | None # -------------------------------------------------------------------- # methods that dispatch to DatetimeArray and wrap result @@ -323,7 +319,7 @@ def __new__( ambiguous="raise", dayfirst=False, yearfirst=False, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, copy=False, name=None, ): @@ -435,7 +431,7 @@ def union_many(self, others): return this.rename(res_name) return this - def _maybe_utc_convert(self, other: Index) -> Tuple[DatetimeIndex, Index]: + def _maybe_utc_convert(self, other: Index) -> tuple[DatetimeIndex, Index]: this = self if isinstance(other, DatetimeIndex): @@ -1121,7 +1117,7 @@ def date_range( def bdate_range( start=None, end=None, - periods: Optional[int] = None, + periods: int | None = None, freq="B", tz=None, normalize=True, diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 9bfc21a940917..cd85efaba1d33 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -11,10 +11,6 @@ TYPE_CHECKING, Any, Hashable, - List, - Optional, - Tuple, - Union, cast, ) @@ -283,7 +279,7 @@ def __new__( cls, data, closed=None, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, copy: bool = False, name: Hashable = None, verify_integrity: bool = True, @@ -324,7 +320,7 @@ def from_breaks( closed: str = "right", name=None, copy: bool = False, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, ): with rewrite_exception("IntervalArray", cls.__name__): array = IntervalArray.from_breaks( @@ -355,7 +351,7 @@ def from_arrays( closed: str = "right", name=None, copy: bool = False, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, ): with rewrite_exception("IntervalArray", cls.__name__): array = IntervalArray.from_arrays( @@ -385,7 +381,7 @@ def from_tuples( closed: str = "right", name=None, copy: bool = False, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, ): with rewrite_exception("IntervalArray", cls.__name__): arr = IntervalArray.from_tuples(data, closed=closed, copy=copy, dtype=dtype) @@ -652,8 +648,8 @@ def _searchsorted_monotonic(self, label, side: str = "left"): # Indexing Methods def get_loc( - self, key, method: Optional[str] = None, tolerance=None - ) -> Union[int, slice, np.ndarray]: + self, key, method: str | None = None, tolerance=None + ) -> int | slice | np.ndarray: """ Get integer location, slice or boolean mask for requested label. @@ -721,9 +717,9 @@ def get_loc( def _get_indexer( self, target: Index, - method: Optional[str] = None, - limit: Optional[int] = None, - tolerance: Optional[Any] = None, + method: str | None = None, + limit: int | None = None, + tolerance: Any | None = None, ) -> np.ndarray: if isinstance(target, IntervalIndex): @@ -756,7 +752,7 @@ def _get_indexer( return ensure_platform_int(indexer) @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) - def get_indexer_non_unique(self, target: Index) -> Tuple[np.ndarray, np.ndarray]: + def get_indexer_non_unique(self, target: Index) -> tuple[np.ndarray, np.ndarray]: target = ensure_index(target) if isinstance(target, IntervalIndex) and not self._should_compare(target): @@ -775,7 +771,7 @@ def get_indexer_non_unique(self, target: Index) -> Tuple[np.ndarray, np.ndarray] return ensure_platform_int(indexer), ensure_platform_int(missing) - def _get_indexer_pointwise(self, target: Index) -> Tuple[np.ndarray, np.ndarray]: + def _get_indexer_pointwise(self, target: Index) -> tuple[np.ndarray, np.ndarray]: """ pointwise implementation for get_indexer and get_indexer_non_unique. """ @@ -911,7 +907,7 @@ def insert(self, loc, item): # Rendering Methods # __repr__ associated methods are based on MultiIndex - def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]: + def _format_with_header(self, header: list[str], na_rep: str = "NaN") -> list[str]: return header + list(self._format_native_types(na_rep=na_rep)) def _format_native_types(self, na_rep="NaN", quoting=None, **kwargs): diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 9751e12c373cd..eea1a069b9df6 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -10,10 +10,8 @@ Hashable, Iterable, List, - Optional, Sequence, Tuple, - Union, cast, ) import warnings @@ -298,7 +296,7 @@ class MultiIndex(Index): _comparables = ["names"] rename = Index.set_names - sortorder: Optional[int] + sortorder: int | None # -------------------------------------------------------------------- # Constructors @@ -350,7 +348,7 @@ def __new__( return result - def _validate_codes(self, level: List, code: List): + def _validate_codes(self, level: list, code: list): """ Reassign code values as -1 if their corresponding levels are NaN. @@ -371,9 +369,7 @@ def _validate_codes(self, level: List, code: List): code = np.where(null_mask[code], -1, code) return code - def _verify_integrity( - self, codes: Optional[List] = None, levels: Optional[List] = None - ): + def _verify_integrity(self, codes: list | None = None, levels: list | None = None): """ Parameters ---------- @@ -505,9 +501,9 @@ def from_arrays(cls, arrays, sortorder=None, names=lib.no_default) -> MultiIndex @names_compat def from_tuples( cls, - tuples: Iterable[Tuple[Hashable, ...]], - sortorder: Optional[int] = None, - names: Optional[Sequence[Hashable]] = None, + tuples: Iterable[tuple[Hashable, ...]], + sortorder: int | None = None, + names: Sequence[Hashable] | None = None, ) -> MultiIndex: """ Convert list of tuples to MultiIndex. @@ -550,7 +546,7 @@ def from_tuples( tuples = list(tuples) tuples = cast(Collection[Tuple[Hashable, ...]], tuples) - arrays: List[Sequence[Hashable]] + arrays: list[Sequence[Hashable]] if len(tuples) == 0: if names is None: raise TypeError("Cannot infer number of levels from empty list") @@ -1340,14 +1336,14 @@ def _format_native_types(self, na_rep="nan", **kwargs): def format( self, - name: Optional[bool] = None, - formatter: Optional[Callable] = None, - na_rep: Optional[str] = None, + name: bool | None = None, + formatter: Callable | None = None, + na_rep: str | None = None, names: bool = False, space: int = 2, sparsify=None, adjoin: bool = True, - ) -> List: + ) -> list: if name is not None: names = name @@ -1609,7 +1605,7 @@ def is_monotonic_decreasing(self) -> bool: return self[::-1].is_monotonic_increasing @cache_readonly - def _inferred_type_levels(self) -> List[str]: + def _inferred_type_levels(self) -> list[str]: """ return a list of the inferred types, one for each level """ return [i.inferred_type for i in self.levels] @@ -2377,7 +2373,7 @@ def reorder_levels(self, order) -> MultiIndex: levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False ) - def _get_codes_for_sorting(self) -> List[Categorical]: + def _get_codes_for_sorting(self) -> list[Categorical]: """ we are categorizing our codes by using the available categories (all, not just observed) @@ -2399,7 +2395,7 @@ def cats(level_codes): def sortlevel( self, level=0, ascending: bool = True, sort_remaining: bool = True - ) -> Tuple[MultiIndex, np.ndarray]: + ) -> tuple[MultiIndex, np.ndarray]: """ Sort MultiIndex at the requested level. @@ -2669,8 +2665,8 @@ def _get_partial_string_timestamp_match_key(self, key): def _get_indexer( self, target: Index, - method: Optional[str] = None, - limit: Optional[int] = None, + method: str | None = None, + limit: int | None = None, tolerance=None, ) -> np.ndarray: @@ -2715,7 +2711,7 @@ def _get_indexer( return ensure_platform_int(indexer) def get_slice_bound( - self, label: Union[Hashable, Sequence[Hashable]], side: str, kind: str + self, label: Hashable | Sequence[Hashable], side: str, kind: str ) -> int: """ For an ordered MultiIndex, compute slice bound @@ -3039,9 +3035,7 @@ def get_loc_level(self, key, level=0, drop_level: bool = True): level = [self._get_level_number(lev) for lev in level] return self._get_loc_level(key, level=level, drop_level=drop_level) - def _get_loc_level( - self, key, level: Union[int, List[int]] = 0, drop_level: bool = True - ): + def _get_loc_level(self, key, level: int | list[int] = 0, drop_level: bool = True): """ get_loc_level but with `level` known to be positional, not name-based. """ @@ -3319,9 +3313,7 @@ def _convert_to_indexer(r) -> Int64Index: r = r.nonzero()[0] return Int64Index(r) - def _update_indexer( - idxr: Optional[Index], indexer: Optional[Index], key - ) -> Index: + def _update_indexer(idxr: Index | None, indexer: Index | None, key) -> Index: if indexer is None: indexer = Index(np.arange(n)) if idxr is None: @@ -3343,7 +3335,7 @@ def _update_indexer( elif is_list_like(k): # a collection of labels to include from this level (these # are or'd) - indexers: Optional[Int64Index] = None + indexers: Int64Index | None = None for x in k: try: idxrs = _convert_to_indexer( @@ -3400,7 +3392,7 @@ def _update_indexer( def _reorder_indexer( self, - seq: Tuple[Union[Scalar, Iterable, AnyArrayLike], ...], + seq: tuple[Scalar | Iterable | AnyArrayLike, ...], indexer: Int64Index, ) -> Int64Index: """ @@ -3434,7 +3426,7 @@ def _reorder_indexer( return indexer n = len(self) - keys: Tuple[np.ndarray, ...] = () + keys: tuple[np.ndarray, ...] = () # For each level of the sequence in seq, map the level codes with the # order they appears in a list-like sequence # This mapping is then use to reorder the indexer @@ -3823,7 +3815,7 @@ def isin(self, values, level=None) -> np.ndarray: __inv__ = make_invalid_op("__inv__") -def _lexsort_depth(codes: List[np.ndarray], nlevels: int) -> int: +def _lexsort_depth(codes: list[np.ndarray], nlevels: int) -> int: """Count depth (up to a maximum of `nlevels`) with which codes are lexsorted.""" int64_codes = [ensure_int64(level_codes) for level_codes in codes] for k in range(nlevels, 0, -1): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 66f2b757438a7..a32585c3bed13 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -4,10 +4,7 @@ datetime, timedelta, ) -from typing import ( - Any, - Optional, -) +from typing import Any import warnings import numpy as np @@ -217,7 +214,7 @@ def __new__( data=None, ordinal=None, freq=None, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, copy=False, name=None, **fields, @@ -594,7 +591,7 @@ def _get_string_slice(self, key: str): def period_range( - start=None, end=None, periods: Optional[int] = None, freq=None, name=None + start=None, end=None, periods: int | None = None, freq=None, name=None ) -> PeriodIndex: """ Return a fixed frequency PeriodIndex. diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 7f2361eeb4d05..7a7a13ac94448 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -8,10 +8,6 @@ Any, Callable, Hashable, - List, - Optional, - Tuple, - Type, ) import warnings @@ -109,7 +105,7 @@ def __new__( start=None, stop=None, step=None, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, copy=False, name=None, ): @@ -148,7 +144,7 @@ def __new__( @classmethod def from_range( - cls, data: range, name=None, dtype: Optional[Dtype] = None + cls, data: range, name=None, dtype: Dtype | None = None ) -> RangeIndex: """ Create RangeIndex from a range object. @@ -186,7 +182,7 @@ def _simple_new(cls, values: range, name: Hashable = None) -> RangeIndex: # -------------------------------------------------------------------- @cache_readonly - def _constructor(self) -> Type[Int64Index]: + def _constructor(self) -> type[Int64Index]: """ return the class to use for construction """ return Int64Index @@ -236,7 +232,7 @@ def _format_data(self, name=None): # we are formatting thru the attributes return None - def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]: + def _format_with_header(self, header: list[str], na_rep: str = "NaN") -> list[str]: if not len(self._range): return header first_val_str = str(self._range[0]) @@ -405,8 +401,8 @@ def get_loc(self, key, method=None, tolerance=None): def _get_indexer( self, target: Index, - method: Optional[str] = None, - limit: Optional[int] = None, + method: str | None = None, + limit: int | None = None, tolerance=None, ) -> np.ndarray: if com.any_not_none(method, tolerance, limit): @@ -482,7 +478,7 @@ def copy( self, name: Hashable = None, deep: bool = False, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, names=None, ): name = self._validate_names(name=name, names=names, deep=deep)[0] @@ -540,8 +536,8 @@ def argsort(self, *args, **kwargs) -> np.ndarray: return np.arange(len(self) - 1, -1, -1) def factorize( - self, sort: bool = False, na_sentinel: Optional[int] = -1 - ) -> Tuple[np.ndarray, RangeIndex]: + self, sort: bool = False, na_sentinel: int | None = -1 + ) -> tuple[np.ndarray, RangeIndex]: codes = np.arange(len(self), dtype=np.intp) uniques = self if sort and self.step < 0: @@ -920,7 +916,7 @@ def _arith_method(self, other, op): ]: return op(self._int64index, other) - step: Optional[Callable] = None + step: Callable | None = None if op in [operator.mul, ops.rmul, operator.truediv, ops.rtruediv]: step = op diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index d432b7ef443cc..c3d17d3b400a5 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -7,11 +7,7 @@ TYPE_CHECKING, Any, Callable, - List, - Optional, - Tuple, TypeVar, - Union, ) import numpy as np @@ -119,13 +115,13 @@ class ArrayManager(DataManager): "arrays", ] - arrays: List[Union[np.ndarray, ExtensionArray]] - _axes: List[Index] + arrays: list[np.ndarray | ExtensionArray] + _axes: list[Index] def __init__( self, - arrays: List[Union[np.ndarray, ExtensionArray]], - axes: List[Index], + arrays: list[np.ndarray | ExtensionArray], + axes: list[Index], verify_integrity: bool = True, ): # Note: we are storing the axes in "_axes" in the (row, columns) order @@ -143,7 +139,7 @@ def make_empty(self: T, axes=None) -> T: if axes is None: axes = [self.axes[1:], Index([])] - arrays: List[Union[np.ndarray, ExtensionArray]] = [] + arrays: list[np.ndarray | ExtensionArray] = [] return type(self)(arrays, axes) @property @@ -152,14 +148,14 @@ def items(self) -> Index: @property # error: Signature of "axes" incompatible with supertype "DataManager" - def axes(self) -> List[Index]: # type: ignore[override] + def axes(self) -> list[Index]: # type: ignore[override] # mypy doesn't work to override attribute with property # see https://github.com/python/mypy/issues/4125 """Axes is BlockManager-compatible order (columns, rows)""" return [self._axes[1], self._axes[0]] @property - def shape_proper(self) -> Tuple[int, ...]: + def shape_proper(self) -> tuple[int, ...]: # this returns (n_rows, n_columns) return tuple(len(ax) for ax in self._axes) @@ -236,7 +232,7 @@ def _verify_integrity(self) -> None: def reduce( self: T, func: Callable, ignore_failures: bool = False - ) -> Tuple[T, np.ndarray]: + ) -> tuple[T, np.ndarray]: """ Apply reduction function column-wise, returning a single-row ArrayManager. @@ -252,8 +248,8 @@ def reduce( np.ndarray Indexer of column indices that are retained. """ - result_arrays: List[np.ndarray] = [] - result_indices: List[int] = [] + result_arrays: list[np.ndarray] = [] + result_indices: list[int] = [] for i, arr in enumerate(self.arrays): try: res = func(arr, axis=0) @@ -301,8 +297,8 @@ def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T: ------- ArrayManager """ - result_arrays: List[np.ndarray] = [] - result_indices: List[int] = [] + result_arrays: list[np.ndarray] = [] + result_indices: list[int] = [] for i, arr in enumerate(self.arrays): try: @@ -343,7 +339,7 @@ def operate_blockwise(self, other: ArrayManager, array_op) -> ArrayManager: def apply( self: T, f, - align_keys: Optional[List[str]] = None, + align_keys: list[str] | None = None, ignore_failures: bool = False, **kwargs, ) -> T: @@ -366,8 +362,8 @@ def apply( assert "filter" not in kwargs align_keys = align_keys or [] - result_arrays: List[np.ndarray] = [] - result_indices: List[int] = [] + result_arrays: list[np.ndarray] = [] + result_indices: list[int] = [] # fillna: Series/DataFrame is responsible for making sure value is aligned aligned_args = {k: kwargs[k] for k in align_keys} @@ -406,7 +402,7 @@ def apply( result_arrays.append(applied) result_indices.append(i) - new_axes: List[Index] + new_axes: list[Index] if ignore_failures: # TODO copy? new_axes = [self._axes[0], self._axes[1][result_indices]] @@ -621,8 +617,8 @@ def replace(self, value, **kwargs) -> ArrayManager: def replace_list( self: T, - src_list: List[Any], - dest_list: List[Any], + src_list: list[Any], + dest_list: list[Any], inplace: bool = False, regex: bool = False, ) -> T: @@ -850,7 +846,7 @@ def idelete(self, indexer): self._axes = [self._axes[0], self._axes[1][to_keep]] return self - def iset(self, loc: Union[int, slice, np.ndarray], value: ArrayLike): + def iset(self, loc: int | slice | np.ndarray, value: ArrayLike): """ Set new column(s). @@ -1133,15 +1129,15 @@ class SingleArrayManager(ArrayManager, SingleDataManager): "arrays", ] - arrays: List[Union[np.ndarray, ExtensionArray]] - _axes: List[Index] + arrays: list[np.ndarray | ExtensionArray] + _axes: list[Index] ndim = 1 def __init__( self, - arrays: List[Union[np.ndarray, ExtensionArray]], - axes: List[Index], + arrays: list[np.ndarray | ExtensionArray], + axes: list[Index], verify_integrity: bool = True, ): self._axes = axes diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 9a2b3be4b66e2..c4c70851bd54a 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -5,10 +5,6 @@ from typing import ( TYPE_CHECKING, Any, - List, - Optional, - Tuple, - Type, Union, cast, ) @@ -131,7 +127,7 @@ def maybe_split(meth: F) -> F: """ @wraps(meth) - def newfunc(self, *args, **kwargs) -> List[Block]: + def newfunc(self, *args, **kwargs) -> list[Block]: if self.ndim == 1 or self.shape[0] == 1: return meth(self, *args, **kwargs) @@ -150,7 +146,7 @@ class Block(libinternals.Block, PandasObject): Index-ignorant; let the container take care of that """ - values: Union[np.ndarray, ExtensionArray] + values: np.ndarray | ExtensionArray __slots__ = () is_numeric = False @@ -215,7 +211,7 @@ def array_values(self) -> ExtensionArray: # ExtensionArray]"; expected "Union[ndarray, PandasArray]" return PandasArray(self.values) # type: ignore[arg-type] - def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray: + def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: """ return an internal format, currently just the ndarray this is often overridden to handle to_dense like operations @@ -265,7 +261,7 @@ def make_block(self, values, placement=None) -> Block: @final def make_block_same_class( - self, values, placement: Optional[BlockPlacement] = None + self, values, placement: BlockPlacement | None = None ) -> Block: """ Wrap given values in a block of same type as self. """ if placement is None: @@ -379,7 +375,7 @@ def delete(self, loc) -> None: pass @final - def apply(self, func, **kwargs) -> List[Block]: + def apply(self, func, **kwargs) -> list[Block]: """ apply the function to my values; return a block if we are not one @@ -389,7 +385,7 @@ def apply(self, func, **kwargs) -> List[Block]: return self._split_op_result(result) - def reduce(self, func, ignore_failures: bool = False) -> List[Block]: + def reduce(self, func, ignore_failures: bool = False) -> list[Block]: # We will apply the function and reshape the result into a single-row # Block with the same mgr_locs; squeezing will be done at a higher level assert self.ndim == 2 @@ -411,7 +407,7 @@ def reduce(self, func, ignore_failures: bool = False) -> List[Block]: return [nb] @final - def _split_op_result(self, result) -> List[Block]: + def _split_op_result(self, result) -> list[Block]: # See also: split_and_operate if is_extension_array_dtype(result) and result.ndim > 1: # TODO(EA2D): unnecessary with 2D EAs @@ -431,7 +427,7 @@ def _split_op_result(self, result) -> List[Block]: def fillna( self, value, limit=None, inplace: bool = False, downcast=None - ) -> List[Block]: + ) -> list[Block]: """ fillna on the block with the value. If we fail, then convert to ObjectBlock and try again @@ -472,7 +468,7 @@ def fillna( ) @final - def _split(self) -> List[Block]: + def _split(self) -> list[Block]: """ Split a block into a list of single-column blocks. """ @@ -487,7 +483,7 @@ def _split(self) -> List[Block]: return new_blocks @final - def split_and_operate(self, func, *args, **kwargs) -> List[Block]: + def split_and_operate(self, func, *args, **kwargs) -> list[Block]: """ Split the block and apply func column-by-column. @@ -510,7 +506,7 @@ def split_and_operate(self, func, *args, **kwargs) -> List[Block]: return res_blocks @final - def _maybe_downcast(self, blocks: List[Block], downcast=None) -> List[Block]: + def _maybe_downcast(self, blocks: list[Block], downcast=None) -> list[Block]: if self.dtype == _dtype_obj: # TODO: why is behavior different for object dtype? @@ -531,7 +527,7 @@ def _maybe_downcast(self, blocks: List[Block], downcast=None) -> List[Block]: return extend_blocks([b.downcast(downcast) for b in blocks]) @final - def downcast(self, dtypes=None) -> List[Block]: + def downcast(self, dtypes=None) -> list[Block]: """ try to downcast each item to the dict of dtypes if present """ # turn it off completely if dtypes is False: @@ -562,7 +558,7 @@ def downcast(self, dtypes=None) -> List[Block]: return self._downcast_2d() @maybe_split - def _downcast_2d(self) -> List[Block]: + def _downcast_2d(self) -> list[Block]: """ downcast specialized to 2D case post-validation. @@ -609,7 +605,7 @@ def convert( datetime: bool = True, numeric: bool = True, timedelta: bool = True, - ) -> List[Block]: + ) -> list[Block]: """ attempt to coerce any object types to better types return a copy of the block (if copy = True) by definition we are not an ObjectBlock @@ -663,7 +659,7 @@ def replace( value, inplace: bool = False, regex: bool = False, - ) -> List[Block]: + ) -> list[Block]: """ replace the to_replace value with value, possible to create new blocks here this is just a call to putmask. regex is not used here. @@ -730,7 +726,7 @@ def _replace_regex( inplace: bool = False, convert: bool = True, mask=None, - ) -> List[Block]: + ) -> list[Block]: """ Replace elements by the given value. @@ -767,11 +763,11 @@ def _replace_regex( @final def _replace_list( self, - src_list: List[Any], - dest_list: List[Any], + src_list: list[Any], + dest_list: list[Any], inplace: bool = False, regex: bool = False, - ) -> List[Block]: + ) -> list[Block]: """ See BlockManager._replace_list docstring. """ @@ -813,7 +809,7 @@ def _replace_list( rb = [self if inplace else self.copy()] for i, (src, dest) in enumerate(pairs): convert = i == src_len # only convert once at the end - new_rb: List[Block] = [] + new_rb: list[Block] = [] # GH-39338: _replace_coerce can split a block into # single-column blocks, so track the index so we know @@ -849,7 +845,7 @@ def _replace_coerce( mask: np.ndarray, inplace: bool = True, regex: bool = False, - ) -> List[Block]: + ) -> list[Block]: """ Replace value corresponding to the given boolean array with another value. @@ -1004,7 +1000,7 @@ def setitem(self, indexer, value): block = self.make_block(values) return block - def putmask(self, mask, new) -> List[Block]: + def putmask(self, mask, new) -> list[Block]: """ putmask the data to the block; it is possible that we may create a new dtype of block @@ -1089,16 +1085,16 @@ def interpolate( self, method: str = "pad", axis: int = 0, - index: Optional[Index] = None, + index: Index | None = None, inplace: bool = False, - limit: Optional[int] = None, + limit: int | None = None, limit_direction: str = "forward", - limit_area: Optional[str] = None, - fill_value: Optional[Any] = None, + limit_area: str | None = None, + fill_value: Any | None = None, coerce: bool = False, - downcast: Optional[str] = None, + downcast: str | None = None, **kwargs, - ) -> List[Block]: + ) -> list[Block]: inplace = validate_bool_kwarg(inplace, "inplace") @@ -1139,7 +1135,7 @@ def take_nd( self, indexer, axis: int, - new_mgr_locs: Optional[BlockPlacement] = None, + new_mgr_locs: BlockPlacement | None = None, fill_value=lib.no_default, ) -> Block: """ @@ -1173,12 +1169,12 @@ def take_nd( else: return self.make_block_same_class(new_values, new_mgr_locs) - def diff(self, n: int, axis: int = 1) -> List[Block]: + def diff(self, n: int, axis: int = 1) -> list[Block]: """ return block for the diff of the values """ new_values = algos.diff(self.values, n, axis=axis, stacklevel=7) return [self.make_block(values=new_values)] - def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> List[Block]: + def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Block]: """ shift the block by periods, possibly upcast """ # convert integer to float if necessary. need to do a lot more than # that, handle boolean etc also @@ -1193,7 +1189,7 @@ def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> List[Blo return [self.make_block(new_values)] - def where(self, other, cond, errors="raise") -> List[Block]: + def where(self, other, cond, errors="raise") -> list[Block]: """ evaluate the block; return result block(s) from the result @@ -1267,7 +1263,7 @@ def where(self, other, cond, errors="raise") -> List[Block]: cond = cond.swapaxes(axis, 0) mask = cond.all(axis=1) - result_blocks: List[Block] = [] + result_blocks: list[Block] = [] for m in [mask, ~mask]: if m.any(): result = cast(np.ndarray, result) # EABlock overrides where @@ -1415,7 +1411,7 @@ def set_inplace(self, locs, values): # _cache not yet initialized pass - def putmask(self, mask, new) -> List[Block]: + def putmask(self, mask, new) -> list[Block]: """ See Block.putmask.__doc__ """ @@ -1482,7 +1478,7 @@ def setitem(self, indexer, value): self.values[indexer] = value return self - def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray: + def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: # ExtensionArrays must be iterable, so this works. # TODO(EA2D): reshape not needed with 2D EAs return np.asarray(self.values).reshape(self.shape) @@ -1495,7 +1491,7 @@ def take_nd( self, indexer, axis: int = 0, - new_mgr_locs: Optional[BlockPlacement] = None, + new_mgr_locs: BlockPlacement | None = None, fill_value=lib.no_default, ) -> Block: """ @@ -1558,7 +1554,7 @@ def _slice(self, slicer): def fillna( self, value, limit=None, inplace: bool = False, downcast=None - ) -> List[Block]: + ) -> list[Block]: values = self.values.fillna(value=value, limit=limit) return [self.make_block_same_class(values=values)] @@ -1568,7 +1564,7 @@ def interpolate( new_values = self.values.fillna(value=fill_value, method=method, limit=limit) return self.make_block_same_class(new_values) - def diff(self, n: int, axis: int = 1) -> List[Block]: + def diff(self, n: int, axis: int = 1) -> list[Block]: if axis == 0 and n != 0: # n==0 case will be a no-op so let is fall through # Since we only have one column, the result will be all-NA. @@ -1581,7 +1577,7 @@ def diff(self, n: int, axis: int = 1) -> List[Block]: axis = 0 return super().diff(n, axis) - def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> List[Block]: + def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Block]: """ Shift the block by `periods`. @@ -1591,7 +1587,7 @@ def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> List[Blo new_values = self.values.shift(periods=periods, fill_value=fill_value) return [self.make_block_same_class(new_values)] - def where(self, other, cond, errors="raise") -> List[Block]: + def where(self, other, cond, errors="raise") -> list[Block]: cond = extract_bool_array(cond) assert not isinstance(other, (ABCIndex, ABCSeries, ABCDataFrame)) @@ -1685,7 +1681,7 @@ def is_view(self) -> bool: # check the ndarray values of the DatetimeIndex values return self.values._ndarray.base is not None - def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray: + def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: """ return object dtype as boxed values, such as Timestamps/Timedelta """ @@ -1701,7 +1697,7 @@ def iget(self, key): # TODO(EA2D): this can be removed if we ever have 2D EA return self.values.reshape(self.shape)[key] - def putmask(self, mask, new) -> List[Block]: + def putmask(self, mask, new) -> list[Block]: mask = extract_bool_array(mask) if not self._can_hold_element(new): @@ -1712,7 +1708,7 @@ def putmask(self, mask, new) -> List[Block]: arr.T.putmask(mask, new) return [self] - def where(self, other, cond, errors="raise") -> List[Block]: + def where(self, other, cond, errors="raise") -> list[Block]: # TODO(EA2D): reshape unnecessary with 2D EAs arr = self.values.reshape(self.shape) @@ -1728,7 +1724,7 @@ def where(self, other, cond, errors="raise") -> List[Block]: nb = self.make_block_same_class(res_values) return [nb] - def diff(self, n: int, axis: int = 0) -> List[Block]: + def diff(self, n: int, axis: int = 0) -> list[Block]: """ 1st discrete difference. @@ -1754,7 +1750,7 @@ def diff(self, n: int, axis: int = 0) -> List[Block]: new_values = values - values.shift(n, axis=axis) return [self.make_block(new_values)] - def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> List[Block]: + def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Block]: # TODO(EA2D) this is unnecessary if these blocks are backed by 2D EAs values = self.values.reshape(self.shape) new_values = values.shift(periods, fill_value=fill_value, axis=axis) @@ -1762,7 +1758,7 @@ def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> List[Blo def fillna( self, value, limit=None, inplace: bool = False, downcast=None - ) -> List[Block]: + ) -> list[Block]: if not self._can_hold_element(value) and self.dtype.kind != "m": # We support filling a DatetimeTZ with a `value` whose timezone @@ -1782,7 +1778,7 @@ class DatetimeLikeBlock(NDArrayBackedExtensionBlock): __slots__ = () is_numeric = False - values: Union[DatetimeArray, TimedeltaArray] + values: DatetimeArray | TimedeltaArray class DatetimeTZBlock(ExtensionBlock, DatetimeLikeBlock): @@ -1814,7 +1810,7 @@ class ObjectBlock(Block): values: np.ndarray @maybe_split - def reduce(self, func, ignore_failures: bool = False) -> List[Block]: + def reduce(self, func, ignore_failures: bool = False) -> list[Block]: """ For object-dtype, we operate column-wise. """ @@ -1839,7 +1835,7 @@ def convert( datetime: bool = True, numeric: bool = True, timedelta: bool = True, - ) -> List[Block]: + ) -> list[Block]: """ attempt to cast any object types to better types return a copy of the block (if copy = True) by definition we ARE an ObjectBlock!!!!! @@ -1892,7 +1888,7 @@ def maybe_coerce_values(values) -> ArrayLike: return values -def get_block_type(values, dtype: Optional[Dtype] = None): +def get_block_type(values, dtype: Dtype | None = None): """ Find the appropriate Block subclass to use for the given values and dtype. @@ -1911,7 +1907,7 @@ def get_block_type(values, dtype: Optional[Dtype] = None): vtype = dtype.type kind = dtype.kind - cls: Type[Block] + cls: type[Block] if is_sparse(dtype): # Need this first(ish) so that Sparse[datetime] is sparse @@ -1991,8 +1987,8 @@ def check_ndim(values, placement: BlockPlacement, ndim: int): def extract_pandas_array( - values: Union[np.ndarray, ExtensionArray], dtype: Optional[DtypeObj], ndim: int -) -> Tuple[Union[np.ndarray, ExtensionArray], Optional[DtypeObj]]: + values: np.ndarray | ExtensionArray, dtype: DtypeObj | None, ndim: int +) -> tuple[np.ndarray | ExtensionArray, DtypeObj | None]: """ Ensure that we don't allow PandasArray / PandasDtype in internals. """ @@ -2012,7 +2008,7 @@ def extract_pandas_array( # ----------------------------------------------------------------- -def extend_blocks(result, blocks=None) -> List[Block]: +def extend_blocks(result, blocks=None) -> list[Block]: """ return a new extended blocks, given the result """ if blocks is None: blocks = [] diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 19c9b27db9f70..b1f90834f09c3 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -4,8 +4,6 @@ import itertools from typing import ( TYPE_CHECKING, - Dict, - List, Sequence, ) @@ -56,7 +54,7 @@ def _concatenate_array_managers( - mgrs_indexers, axes: List[Index], concat_axis: int, copy: bool + mgrs_indexers, axes: list[Index], concat_axis: int, copy: bool ) -> Manager: """ Concatenate array managers into one. @@ -95,7 +93,7 @@ def _concatenate_array_managers( def concatenate_managers( - mgrs_indexers, axes: List[Index], concat_axis: int, copy: bool + mgrs_indexers, axes: list[Index], concat_axis: int, copy: bool ) -> Manager: """ Concatenate block managers into one. @@ -160,7 +158,7 @@ def concatenate_managers( return BlockManager(blocks, axes) -def _get_mgr_concatenation_plan(mgr: BlockManager, indexers: Dict[int, np.ndarray]): +def _get_mgr_concatenation_plan(mgr: BlockManager, indexers: dict[int, np.ndarray]): """ Construct concatenation plan for given block manager and indexers. @@ -395,7 +393,7 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike: def _concatenate_join_units( - join_units: List[JoinUnit], concat_axis: int, copy: bool + join_units: list[JoinUnit], concat_axis: int, copy: bool ) -> ArrayLike: """ Concatenate values from several join units along selected axis. @@ -500,7 +498,7 @@ def _get_empty_dtype(join_units: Sequence[JoinUnit]) -> DtypeObj: return dtype -def _is_uniform_join_units(join_units: List[JoinUnit]) -> bool: +def _is_uniform_join_units(join_units: list[JoinUnit]) -> bool: """ Check if the join units consist of blocks of uniform type that can be concatenated using Block.concat_same_type instead of the generic diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 6364816b9ab2d..2960fb292818a 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -8,13 +8,8 @@ from typing import ( TYPE_CHECKING, Any, - Dict, Hashable, - List, - Optional, Sequence, - Tuple, - Union, ) import numpy as np @@ -102,9 +97,9 @@ def arrays_to_mgr( index, columns, *, - dtype: Optional[DtypeObj] = None, + dtype: DtypeObj | None = None, verify_integrity: bool = True, - typ: Optional[str] = None, + typ: str | None = None, consolidate: bool = True, ) -> Manager: """ @@ -146,10 +141,10 @@ def arrays_to_mgr( def rec_array_to_mgr( - data: Union[MaskedRecords, np.recarray, np.ndarray], + data: MaskedRecords | np.recarray | np.ndarray, index, columns, - dtype: Optional[DtypeObj], + dtype: DtypeObj | None, copy: bool, typ: str, ): @@ -192,7 +187,7 @@ def rec_array_to_mgr( return mgr -def fill_masked_arrays(data: MaskedRecords, arr_columns: Index) -> List[np.ndarray]: +def fill_masked_arrays(data: MaskedRecords, arr_columns: Index) -> list[np.ndarray]: """ Convert numpy MaskedRecords to ensure mask is softened. """ @@ -246,7 +241,7 @@ def mgr_to_mgr(mgr, typ: str): def ndarray_to_mgr( - values, index, columns, dtype: Optional[DtypeObj], copy: bool, typ: str + values, index, columns, dtype: DtypeObj | None, copy: bool, typ: str ) -> Manager: # used in DataFrame.__init__ # input must be a ndarray, list, Series, Index, ExtensionArray @@ -380,11 +375,11 @@ def maybe_squeeze_dt64tz(dta: ArrayLike) -> ArrayLike: def dict_to_mgr( - data: Dict, + data: dict, index, columns, *, - dtype: Optional[DtypeObj] = None, + dtype: DtypeObj | None = None, typ: str = "block", copy: bool = True, ) -> Manager: @@ -394,7 +389,7 @@ def dict_to_mgr( Used in DataFrame.__init__ """ - arrays: Union[Sequence[Any], Series] + arrays: Sequence[Any] | Series if columns is not None: from pandas.core.series import Series @@ -455,9 +450,9 @@ def dict_to_mgr( def nested_data_to_arrays( data: Sequence, - columns: Optional[Index], - index: Optional[Index], - dtype: Optional[DtypeObj], + columns: Index | None, + index: Index | None, + dtype: DtypeObj | None, ): """ Convert a single sequence of arrays to multiple arrays. @@ -540,7 +535,7 @@ def convert(v): return values -def _homogenize(data, index: Index, dtype: Optional[DtypeObj]): +def _homogenize(data, index: Index, dtype: DtypeObj | None): oindex = None homogenized = [] @@ -585,7 +580,7 @@ def extract_index(data) -> Index: index = Index([]) elif len(data) > 0: raw_lengths = [] - indexes: List[Union[List[Hashable], Index]] = [] + indexes: list[list[Hashable] | Index] = [] have_raw_arrays = False have_series = False @@ -638,8 +633,8 @@ def extract_index(data) -> Index: def reorder_arrays( - arrays: List[ArrayLike], arr_columns: Index, columns: Optional[Index] -) -> Tuple[List[ArrayLike], Index]: + arrays: list[ArrayLike], arr_columns: Index, columns: Index | None +) -> tuple[list[ArrayLike], Index]: # reorder according to the columns if columns is not None and len(columns) and len(arr_columns): indexer = ensure_index(arr_columns).get_indexer(columns) @@ -653,7 +648,7 @@ def _get_names_from_index(data) -> Index: if not has_some_name: return ibase.default_index(len(data)) - index: List[Hashable] = list(range(len(data))) + index: list[Hashable] = list(range(len(data))) count = 0 for i, s in enumerate(data): n = getattr(s, "name", None) @@ -667,8 +662,8 @@ def _get_names_from_index(data) -> Index: def _get_axes( - N: int, K: int, index: Optional[Index], columns: Optional[Index] -) -> Tuple[Index, Index]: + N: int, K: int, index: Index | None, columns: Index | None +) -> tuple[Index, Index]: # helper to create the axes as indexes # return axes or defaults @@ -717,8 +712,8 @@ def dataclasses_to_dicts(data): def to_arrays( - data, columns: Optional[Index], dtype: Optional[DtypeObj] = None -) -> Tuple[List[ArrayLike], Index]: + data, columns: Index | None, dtype: DtypeObj | None = None +) -> tuple[list[ArrayLike], Index]: """ Return list of arrays, columns. """ @@ -770,7 +765,7 @@ def to_arrays( return content, columns -def _list_to_arrays(data: List[Union[Tuple, List]]) -> np.ndarray: +def _list_to_arrays(data: list[tuple | list]) -> np.ndarray: # Returned np.ndarray has ndim = 2 # Note: we already check len(data) > 0 before getting hre if isinstance(data[0], tuple): @@ -782,9 +777,9 @@ def _list_to_arrays(data: List[Union[Tuple, List]]) -> np.ndarray: def _list_of_series_to_arrays( - data: List, - columns: Optional[Index], -) -> Tuple[np.ndarray, Index]: + data: list, + columns: Index | None, +) -> tuple[np.ndarray, Index]: # returned np.ndarray has ndim == 2 if columns is None: @@ -792,7 +787,7 @@ def _list_of_series_to_arrays( pass_data = [x for x in data if isinstance(x, (ABCSeries, ABCDataFrame))] columns = get_objs_combined_axis(pass_data, sort=False) - indexer_cache: Dict[int, np.ndarray] = {} + indexer_cache: dict[int, np.ndarray] = {} aligned_values = [] for s in data: @@ -818,9 +813,9 @@ def _list_of_series_to_arrays( def _list_of_dict_to_arrays( - data: List[Dict], - columns: Optional[Index], -) -> Tuple[np.ndarray, Index]: + data: list[dict], + columns: Index | None, +) -> tuple[np.ndarray, Index]: """ Convert list of dicts to numpy arrays @@ -856,9 +851,9 @@ def _list_of_dict_to_arrays( def _finalize_columns_and_data( content: np.ndarray, # ndim == 2 - columns: Optional[Index], - dtype: Optional[DtypeObj], -) -> Tuple[List[ArrayLike], Index]: + columns: Index | None, + dtype: DtypeObj | None, +) -> tuple[list[ArrayLike], Index]: """ Ensure we have valid columns, cast object dtypes if possible. """ @@ -877,7 +872,7 @@ def _finalize_columns_and_data( def _validate_or_indexify_columns( - content: List[np.ndarray], columns: Optional[Index] + content: list[np.ndarray], columns: Index | None ) -> Index: """ If columns is None, make numbers as column names; Otherwise, validate that @@ -935,8 +930,8 @@ def _validate_or_indexify_columns( def _convert_object_array( - content: List[np.ndarray], dtype: Optional[DtypeObj] -) -> List[ArrayLike]: + content: list[np.ndarray], dtype: DtypeObj | None +) -> list[ArrayLike]: """ Internal function to convert object array. diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 549d4337dcf54..05d65537c69ba 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -6,14 +6,9 @@ Any, Callable, DefaultDict, - Dict, Hashable, - List, - Optional, Sequence, - Tuple, TypeVar, - Union, ) import warnings @@ -161,7 +156,7 @@ def __init__( verify_integrity: bool = True, ): self.axes = [ensure_index(ax) for ax in axes] - self.blocks: Tuple[Block, ...] = tuple(blocks) + self.blocks: tuple[Block, ...] = tuple(blocks) for block in blocks: if self.ndim != block.ndim: @@ -183,7 +178,7 @@ def __init__( self._blklocs = None # type: ignore[assignment] @classmethod - def _simple_new(cls, blocks: Tuple[Block, ...], axes: List[Index]): + def _simple_new(cls, blocks: tuple[Block, ...], axes: list[Index]): """ Fastpath constructor; does NO validation. """ @@ -198,7 +193,7 @@ def _simple_new(cls, blocks: Tuple[Block, ...], axes: List[Index]): return obj @classmethod - def from_blocks(cls, blocks: List[Block], axes: List[Index]): + def from_blocks(cls, blocks: list[Block], axes: list[Index]): """ Constructor for BlockManager and SingleBlockManager with same signature. """ @@ -311,7 +306,7 @@ def get_dtypes(self): return dtypes.take(self.blknos) @property - def arrays(self) -> List[ArrayLike]: + def arrays(self) -> list[ArrayLike]: """ Quick access to the backing arrays of the Blocks. @@ -392,7 +387,7 @@ def _verify_integrity(self) -> None: def reduce( self: T, func: Callable, ignore_failures: bool = False - ) -> Tuple[T, np.ndarray]: + ) -> tuple[T, np.ndarray]: """ Apply reduction function blockwise, returning a single-row BlockManager. @@ -411,7 +406,7 @@ def reduce( # If 2D, we assume that we're operating column-wise assert self.ndim == 2 - res_blocks: List[Block] = [] + res_blocks: list[Block] = [] for blk in self.blocks: nbs = blk.reduce(func, ignore_failures) res_blocks.extend(nbs) @@ -443,7 +438,7 @@ def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T: ------- BlockManager """ - result_blocks: List[Block] = [] + result_blocks: list[Block] = [] for blk in self.blocks: try: @@ -473,7 +468,7 @@ def operate_blockwise(self, other: BlockManager, array_op) -> BlockManager: def apply( self: T, f, - align_keys: Optional[List[str]] = None, + align_keys: list[str] | None = None, ignore_failures: bool = False, **kwargs, ) -> T: @@ -496,7 +491,7 @@ def apply( assert "filter" not in kwargs align_keys = align_keys or [] - result_blocks: List[Block] = [] + result_blocks: list[Block] = [] # fillna: Series/DataFrame is responsible for making sure value is aligned aligned_args = {k: kwargs[k] for k in align_keys} @@ -657,8 +652,8 @@ def replace(self, to_replace, value, inplace: bool, regex: bool) -> BlockManager def replace_list( self: T, - src_list: List[Any], - dest_list: List[Any], + src_list: list[Any], + dest_list: list[Any], inplace: bool = False, regex: bool = False, ) -> T: @@ -754,7 +749,7 @@ def get_numeric_data(self, copy: bool = False) -> BlockManager: return self._combine([b for b in self.blocks if b.is_numeric], copy) def _combine( - self: T, blocks: List[Block], copy: bool = True, index: Optional[Index] = None + self: T, blocks: list[Block], copy: bool = True, index: Index | None = None ) -> T: """ return a new manager with the blocks """ if len(blocks) == 0: @@ -764,7 +759,7 @@ def _combine( indexer = np.sort(np.concatenate([b.mgr_locs.as_array for b in blocks])) inv_indexer = lib.get_reverse_indexer(indexer, self.shape[0]) - new_blocks: List[Block] = [] + new_blocks: list[Block] = [] for b in blocks: b = b.copy(deep=copy) b.mgr_locs = BlockPlacement(inv_indexer[b.mgr_locs.indexer]) @@ -828,7 +823,7 @@ def copy_func(ax): def as_array( self, transpose: bool = False, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, copy: bool = False, na_value=lib.no_default, ) -> np.ndarray: @@ -893,7 +888,7 @@ def as_array( return arr.transpose() if transpose else arr def _interleave( - self, dtype: Optional[Dtype] = None, na_value=lib.no_default + self, dtype: Dtype | None = None, na_value=lib.no_default ) -> np.ndarray: """ Return ndarray from blocks with specified item order @@ -956,7 +951,7 @@ def to_dict(self, copy: bool = True): values : a dict of dtype -> BlockManager """ - bd: Dict[str, List[Block]] = {} + bd: dict[str, list[Block]] = {} for b in self.blocks: bd.setdefault(str(b.dtype), []).append(b) @@ -1062,7 +1057,7 @@ def idelete(self, indexer) -> BlockManager: axes = [new_columns, self.axes[1]] return type(self)._simple_new(tuple(nbs), axes) - def iset(self, loc: Union[int, slice, np.ndarray], value: ArrayLike): + def iset(self, loc: int | slice | np.ndarray, value: ArrayLike): """ Set new item in-place. Does not consolidate. Adds new Block if not contained in the current set of items @@ -1146,7 +1141,7 @@ def value_getitem(placement): unfit_mgr_locs = np.concatenate(unfit_mgr_locs) unfit_count = len(unfit_mgr_locs) - new_blocks: List[Block] = [] + new_blocks: list[Block] = [] if value_is_extension_type: # This code (ab-)uses the fact that EA blocks contain only # one item. @@ -1305,10 +1300,10 @@ def reindex_indexer( def _slice_take_blocks_ax0( self, - slice_or_indexer: Union[slice, np.ndarray], + slice_or_indexer: slice | np.ndarray, fill_value=lib.no_default, only_slice: bool = False, - ) -> List[Block]: + ) -> list[Block]: """ Slice/take blocks along axis=0. @@ -1511,8 +1506,8 @@ def unstack(self, unstacker, fill_value) -> BlockManager: new_columns = unstacker.get_new_columns(self.items) new_index = unstacker.new_index - new_blocks: List[Block] = [] - columns_mask: List[np.ndarray] = [] + new_blocks: list[Block] = [] + columns_mask: list[np.ndarray] = [] for blk in self.blocks: blk_cols = self.items[blk.mgr_locs.indexer] @@ -1563,7 +1558,7 @@ def __init__( self.blocks = (block,) @classmethod - def from_blocks(cls, blocks: List[Block], axes: List[Index]) -> SingleBlockManager: + def from_blocks(cls, blocks: list[Block], axes: list[Index]) -> SingleBlockManager: """ Constructor for BlockManager and SingleBlockManager with same signature. """ @@ -1691,7 +1686,7 @@ def set_values(self, values: ArrayLike): def create_block_manager_from_blocks( - blocks: List[Block], axes: List[Index], consolidate: bool = True + blocks: list[Block], axes: list[Index], consolidate: bool = True ) -> BlockManager: try: mgr = BlockManager(blocks, axes) @@ -1714,7 +1709,7 @@ def _extract_array(obj): def create_block_manager_from_arrays( arrays, names: Index, - axes: List[Index], + axes: list[Index], consolidate: bool = True, ) -> BlockManager: assert isinstance(names, Index) @@ -1736,8 +1731,8 @@ def create_block_manager_from_arrays( def construction_error( tot_items: int, block_shape: Shape, - axes: List[Index], - e: Optional[ValueError] = None, + axes: list[Index], + e: ValueError | None = None, ): """ raise a helpful message about our construction """ passed = tuple(map(int, [tot_items] + list(block_shape))) @@ -1763,11 +1758,11 @@ def construction_error( def _form_blocks( - arrays: List[ArrayLike], names: Index, axes: List[Index], consolidate: bool -) -> List[Block]: + arrays: list[ArrayLike], names: Index, axes: list[Index], consolidate: bool +) -> list[Block]: # put "leftover" items in float bucket, where else? # generalize? - items_dict: DefaultDict[str, List] = defaultdict(list) + items_dict: DefaultDict[str, list] = defaultdict(list) extra_locs = [] names_idx = names @@ -1787,7 +1782,7 @@ def _form_blocks( block_type = get_block_type(v) items_dict[block_type.__name__].append((i, v)) - blocks: List[Block] = [] + blocks: list[Block] = [] if len(items_dict["NumericBlock"]): numeric_blocks = _multi_blockify( items_dict["NumericBlock"], consolidate=consolidate @@ -1841,7 +1836,7 @@ def _form_blocks( return blocks -def _simple_blockify(tuples, dtype, consolidate: bool) -> List[Block]: +def _simple_blockify(tuples, dtype, consolidate: bool) -> list[Block]: """ return a single array of a block that has a single dtype; if dtype is not None, coerce to this dtype @@ -1859,7 +1854,7 @@ def _simple_blockify(tuples, dtype, consolidate: bool) -> List[Block]: return [block] -def _multi_blockify(tuples, dtype: Optional[DtypeObj] = None, consolidate: bool = True): +def _multi_blockify(tuples, dtype: DtypeObj | None = None, consolidate: bool = True): """ return an array of blocks that potentially have different dtypes """ if not consolidate: @@ -1884,7 +1879,7 @@ def _multi_blockify(tuples, dtype: Optional[DtypeObj] = None, consolidate: bool return new_blocks -def _tuples_to_blocks_no_consolidate(tuples, dtype: Optional[DtypeObj]) -> List[Block]: +def _tuples_to_blocks_no_consolidate(tuples, dtype: DtypeObj | None) -> list[Block]: # tuples produced within _form_blocks are of the form (placement, whatever, array) if dtype is not None: return [ @@ -1910,7 +1905,7 @@ def _stack_arrays(tuples, dtype: np.dtype): return stacked, placement -def _consolidate(blocks: Tuple[Block, ...]) -> List[Block]: +def _consolidate(blocks: tuple[Block, ...]) -> list[Block]: """ Merge blocks having same dtype, exclude non-consolidating blocks """ @@ -1918,7 +1913,7 @@ def _consolidate(blocks: Tuple[Block, ...]) -> List[Block]: gkey = lambda x: x._consolidate_key grouper = itertools.groupby(sorted(blocks, key=gkey), gkey) - new_blocks: List[Block] = [] + new_blocks: list[Block] = [] for (_can_consolidate, dtype), group_blocks in grouper: merged_blocks = _merge_blocks( list(group_blocks), dtype=dtype, can_consolidate=_can_consolidate @@ -1928,8 +1923,8 @@ def _consolidate(blocks: Tuple[Block, ...]) -> List[Block]: def _merge_blocks( - blocks: List[Block], dtype: DtypeObj, can_consolidate: bool -) -> List[Block]: + blocks: list[Block], dtype: DtypeObj, can_consolidate: bool +) -> list[Block]: if len(blocks) == 1: return blocks @@ -1964,7 +1959,7 @@ def _fast_count_smallints(arr: np.ndarray) -> np.ndarray: def _preprocess_slice_or_indexer( - slice_or_indexer: Union[slice, np.ndarray], length: int, allow_fill: bool + slice_or_indexer: slice | np.ndarray, length: int, allow_fill: bool ): if isinstance(slice_or_indexer, slice): return ( diff --git a/pandas/core/internals/ops.py b/pandas/core/internals/ops.py index be5224fe32ae1..779b5100ecdd8 100644 --- a/pandas/core/internals/ops.py +++ b/pandas/core/internals/ops.py @@ -4,8 +4,6 @@ from typing import ( TYPE_CHECKING, Iterator, - List, - Tuple, ) from pandas._typing import ArrayLike @@ -54,7 +52,7 @@ def operate_blockwise( # At this point we have already checked the parent DataFrames for # assert rframe._indexed_same(lframe) - res_blks: List[Block] = [] + res_blks: list[Block] = [] for lvals, rvals, locs, left_ea, right_ea, rblk in _iter_block_pairs(left, right): res_values = array_op(lvals, rvals) if left_ea and not right_ea and hasattr(res_values, "reshape"): @@ -82,7 +80,7 @@ def operate_blockwise( return new_mgr -def _reset_block_mgr_locs(nbs: List[Block], locs): +def _reset_block_mgr_locs(nbs: list[Block], locs): """ Reset mgr_locs to correspond to our original DataFrame. """ @@ -96,7 +94,7 @@ def _reset_block_mgr_locs(nbs: List[Block], locs): def _get_same_shape_values( lblk: Block, rblk: Block, left_ea: bool, right_ea: bool -) -> Tuple[ArrayLike, ArrayLike]: +) -> tuple[ArrayLike, ArrayLike]: """ Slice lblk.values to align with rblk. Squeeze if we have EAs. """