From f92114ddb5e6bc19483815516813ae0f0a091251 Mon Sep 17 00:00:00 2001 From: fathomer Date: Tue, 6 Apr 2021 18:52:18 +0530 Subject: [PATCH] STY: remove --keep-runtime-typing from pyupgrade #40759 Part-4 --- pandas/_typing.py | 4 +- pandas/core/ops/__init__.py | 12 +- pandas/core/reshape/concat.py | 20 ++- pandas/core/reshape/melt.py | 3 +- pandas/core/reshape/merge.py | 67 +++++---- pandas/core/reshape/pivot.py | 28 ++-- pandas/core/reshape/reshape.py | 15 +-- pandas/core/tools/datetimes.py | 55 ++++---- pandas/core/util/hashing.py | 9 +- pandas/core/window/ewm.py | 30 ++--- pandas/core/window/rolling.py | 125 ++++++++--------- pandas/io/common.py | 35 +++-- pandas/io/excel/_base.py | 17 +-- pandas/io/excel/_openpyxl.py | 21 ++- pandas/io/formats/csvs.py | 46 +++---- pandas/io/gbq.py | 32 ++--- pandas/io/orc.py | 8 +- pandas/io/parquet.py | 24 ++-- pandas/io/pytables.py | 240 ++++++++++++++++----------------- pandas/io/stata.py | 155 +++++++++++---------- 20 files changed, 432 insertions(+), 514 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index f90ef33434773..7c74fc54b8d67 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -25,7 +25,7 @@ Optional, Sequence, Tuple, - Type, + Type as type_t, TypeVar, Union, ) @@ -119,7 +119,7 @@ # dtypes NpDtype = Union[str, np.dtype] Dtype = Union[ - "ExtensionDtype", NpDtype, Type[Union[str, float, int, complex, bool, object]] + "ExtensionDtype", NpDtype, type_t[Union[str, float, int, complex, bool, object]] ] # DtypeArg specifies all allowable dtypes in a functions its dtype argument DtypeArg = Union[Dtype, Dict[Hashable, Dtype]] diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 4ebcd6533af2e..689f27a25f11b 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -6,11 +6,7 @@ from __future__ import annotations import operator -from typing import ( - TYPE_CHECKING, - Optional, - Set, -) +from typing import TYPE_CHECKING import warnings import numpy as np @@ -79,7 +75,7 @@ # ----------------------------------------------------------------------------- # constants -ARITHMETIC_BINOPS: Set[str] = { +ARITHMETIC_BINOPS: set[str] = { "add", "sub", "mul", @@ -99,7 +95,7 @@ } -COMPARISON_BINOPS: Set[str] = {"eq", "ne", "lt", "gt", "le", "ge"} +COMPARISON_BINOPS: set[str] = {"eq", "ne", "lt", "gt", "le", "ge"} # ----------------------------------------------------------------------------- @@ -207,7 +203,7 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): def align_method_FRAME( - left, right, axis, flex: Optional[bool] = False, level: Level = None + left, right, axis, flex: bool | None = False, level: Level = None ): """ Convert rhs to meet lhs dims if input is list, tuple or np.ndarray. diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index a8c6913cd5d6c..b3b453ea6355a 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -8,11 +8,7 @@ TYPE_CHECKING, Hashable, Iterable, - List, Mapping, - Optional, - Type, - Union, cast, overload, ) @@ -58,7 +54,7 @@ @overload def concat( - objs: Union[Iterable[DataFrame], Mapping[Hashable, DataFrame]], + objs: Iterable[DataFrame] | Mapping[Hashable, DataFrame], axis=0, join: str = "outer", ignore_index: bool = False, @@ -74,7 +70,7 @@ def concat( @overload def concat( - objs: Union[Iterable[NDFrame], Mapping[Hashable, NDFrame]], + objs: Iterable[NDFrame] | Mapping[Hashable, NDFrame], axis=0, join: str = "outer", ignore_index: bool = False, @@ -89,7 +85,7 @@ def concat( def concat( - objs: Union[Iterable[NDFrame], Mapping[Hashable, NDFrame]], + objs: Iterable[NDFrame] | Mapping[Hashable, NDFrame], axis=0, join="outer", ignore_index: bool = False, @@ -314,7 +310,7 @@ class _Concatenator: def __init__( self, - objs: Union[Iterable[NDFrame], Mapping[Hashable, NDFrame]], + objs: Iterable[NDFrame] | Mapping[Hashable, NDFrame], axis=0, join: str = "outer", keys=None, @@ -383,7 +379,7 @@ def __init__( # get the sample # want the highest ndim that we have, and must be non-empty # unless all objs are empty - sample: Optional[NDFrame] = None + sample: NDFrame | None = None if len(ndims) > 1: max_ndim = max(ndims) for obj in objs: @@ -474,7 +470,7 @@ def __init__( self.new_axes = self._get_new_axes() def get_result(self): - cons: Type[FrameOrSeriesUnion] + cons: type[FrameOrSeriesUnion] sample: FrameOrSeriesUnion # series only @@ -539,7 +535,7 @@ def _get_result_dim(self) -> int: else: return self.objs[0].ndim - def _get_new_axes(self) -> List[Index]: + def _get_new_axes(self) -> list[Index]: ndim = self._get_result_dim() return [ self._get_concat_axis if i == self.bm_axis else self._get_comb_axis(i) @@ -568,7 +564,7 @@ def _get_concat_axis(self) -> Index: idx = ibase.default_index(len(self.objs)) return idx elif self.keys is None: - names: List[Hashable] = [None] * len(self.objs) + names: list[Hashable] = [None] * len(self.objs) num = 0 has_names = False for i, x in enumerate(self.objs): diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 09249eba9c3f5..6a0fad9ee729b 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -3,7 +3,6 @@ import re from typing import ( TYPE_CHECKING, - List, cast, ) import warnings @@ -494,7 +493,7 @@ def wide_to_long( two 2.9 """ - def get_var_names(df, stub: str, sep: str, suffix: str) -> List[str]: + def get_var_names(df, stub: str, sep: str, suffix: str) -> list[str]: regex = fr"^{re.escape(stub)}{re.escape(sep)}{suffix}$" pattern = re.compile(regex) return [col for col in df.columns if pattern.match(col)] diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index a9faf0098b6d4..13e528f38f3bf 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -11,9 +11,6 @@ from typing import ( TYPE_CHECKING, Hashable, - List, - Optional, - Tuple, cast, ) import warnings @@ -94,16 +91,16 @@ def merge( left: FrameOrSeriesUnion, right: FrameOrSeriesUnion, how: str = "inner", - on: Optional[IndexLabel] = None, - left_on: Optional[IndexLabel] = None, - right_on: Optional[IndexLabel] = None, + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, left_index: bool = False, right_index: bool = False, sort: bool = False, suffixes: Suffixes = ("_x", "_y"), copy: bool = True, indicator: bool = False, - validate: Optional[str] = None, + validate: str | None = None, ) -> DataFrame: op = _MergeOperation( left, @@ -143,7 +140,7 @@ def _groupby_and_merge(by, left: DataFrame, right: DataFrame, merge_pieces): by = [by] lby = left.groupby(by, sort=False) - rby: Optional[groupby.DataFrameGroupBy] = None + rby: groupby.DataFrameGroupBy | None = None # if we can groupby the rhs # then we can get vastly better perf @@ -186,12 +183,12 @@ def _groupby_and_merge(by, left: DataFrame, right: DataFrame, merge_pieces): def merge_ordered( left: DataFrame, right: DataFrame, - on: Optional[IndexLabel] = None, - left_on: Optional[IndexLabel] = None, - right_on: Optional[IndexLabel] = None, + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, left_by=None, right_by=None, - fill_method: Optional[str] = None, + fill_method: str | None = None, suffixes: Suffixes = ("_x", "_y"), how: str = "outer", ) -> DataFrame: @@ -327,9 +324,9 @@ def _merger(x, y) -> DataFrame: def merge_asof( left: DataFrame, right: DataFrame, - on: Optional[IndexLabel] = None, - left_on: Optional[IndexLabel] = None, - right_on: Optional[IndexLabel] = None, + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, left_index: bool = False, right_index: bool = False, by=None, @@ -614,9 +611,9 @@ def __init__( left: FrameOrSeriesUnion, right: FrameOrSeriesUnion, how: str = "inner", - on: Optional[IndexLabel] = None, - left_on: Optional[IndexLabel] = None, - right_on: Optional[IndexLabel] = None, + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, axis: int = 1, left_index: bool = False, right_index: bool = False, @@ -624,7 +621,7 @@ def __init__( suffixes: Suffixes = ("_x", "_y"), copy: bool = True, indicator: bool = False, - validate: Optional[str] = None, + validate: str | None = None, ): _left = _validate_operand(left) _right = _validate_operand(right) @@ -650,7 +647,7 @@ def __init__( self.indicator = indicator - self.indicator_name: Optional[str] + self.indicator_name: str | None if isinstance(self.indicator, str): self.indicator_name = self.indicator elif isinstance(self.indicator, bool): @@ -743,14 +740,14 @@ def get_result(self) -> DataFrame: return result.__finalize__(self, method="merge") def _maybe_drop_cross_column( - self, result: DataFrame, cross_col: Optional[str] + self, result: DataFrame, cross_col: str | None ) -> None: if cross_col is not None: result.drop(columns=cross_col, inplace=True) def _indicator_pre_merge( self, left: DataFrame, right: DataFrame - ) -> Tuple[DataFrame, DataFrame]: + ) -> tuple[DataFrame, DataFrame]: columns = left.columns.union(right.columns) @@ -830,8 +827,8 @@ def _maybe_restore_index_levels(self, result: DataFrame) -> None: def _maybe_add_join_keys( self, result: DataFrame, - left_indexer: Optional[np.ndarray], - right_indexer: Optional[np.ndarray], + left_indexer: np.ndarray | None, + right_indexer: np.ndarray | None, ) -> None: left_has_missing = None @@ -1274,7 +1271,7 @@ def _maybe_coerce_merge_keys(self) -> None: def _create_cross_configuration( self, left: DataFrame, right: DataFrame - ) -> Tuple[DataFrame, DataFrame, str, str]: + ) -> tuple[DataFrame, DataFrame, str, str]: """ Creates the configuration to dispatch the cross operation to inner join, e.g. adding a join column and resetting parameters. Join column is added @@ -1498,7 +1495,7 @@ def restore_dropped_levels_multijoin( join_index: Index, lindexer: np.ndarray, rindexer: np.ndarray, -) -> Tuple[List[Index], np.ndarray, List[Hashable]]: +) -> tuple[list[Index], np.ndarray, list[Hashable]]: """ *this is an internal non-public method* @@ -1592,15 +1589,15 @@ def __init__( self, left: DataFrame, right: DataFrame, - on: Optional[IndexLabel] = None, - left_on: Optional[IndexLabel] = None, - right_on: Optional[IndexLabel] = None, + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, left_index: bool = False, right_index: bool = False, axis: int = 1, suffixes: Suffixes = ("_x", "_y"), copy: bool = True, - fill_method: Optional[str] = None, + fill_method: str | None = None, how: str = "outer", ): @@ -1686,9 +1683,9 @@ def __init__( self, left: DataFrame, right: DataFrame, - on: Optional[IndexLabel] = None, - left_on: Optional[IndexLabel] = None, - right_on: Optional[IndexLabel] = None, + on: IndexLabel | None = None, + left_on: IndexLabel | None = None, + right_on: IndexLabel | None = None, left_index: bool = False, right_index: bool = False, by=None, @@ -1697,7 +1694,7 @@ def __init__( axis: int = 1, suffixes: Suffixes = ("_x", "_y"), copy: bool = True, - fill_method: Optional[str] = None, + fill_method: str | None = None, how: str = "asof", tolerance=None, allow_exact_matches: bool = True, @@ -2031,7 +2028,7 @@ def _left_join_on_index( def _factorize_keys( lk: ArrayLike, rk: ArrayLike, sort: bool = True, how: str = "inner" -) -> Tuple[np.ndarray, np.ndarray, int]: +) -> tuple[np.ndarray, np.ndarray, int]: """ Encode left and right keys as enumerated types. diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index fa0e5c422501a..795f5250012cb 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -3,14 +3,8 @@ from typing import ( TYPE_CHECKING, Callable, - Dict, Hashable, - List, - Optional, Sequence, - Set, - Tuple, - Union, cast, ) @@ -75,7 +69,7 @@ def pivot_table( columns = _convert_by(columns) if isinstance(aggfunc, list): - pieces: List[DataFrame] = [] + pieces: list[DataFrame] = [] keys = [] for func in aggfunc: _table = __internal_pivot_table( @@ -116,7 +110,7 @@ def __internal_pivot_table( values, index, columns, - aggfunc: Union[AggFuncTypeBase, AggFuncTypeDict], + aggfunc: AggFuncTypeBase | AggFuncTypeDict, fill_value, margins: bool, dropna: bool, @@ -275,7 +269,7 @@ def _add_margins( if margins_name in table.columns.get_level_values(level): raise ValueError(msg) - key: Union[str, Tuple[str, ...]] + key: str | tuple[str, ...] if len(rows) > 1: key = (margins_name,) + ("",) * (len(rows) - 1) else: @@ -419,7 +413,7 @@ def _generate_marginal_results_without_values( ): if len(cols) > 0: # need to "interleave" the margins - margin_keys: Union[List, Index] = [] + margin_keys: list | Index = [] def _all_key(): if len(cols) == 1: @@ -470,9 +464,9 @@ def _convert_by(by): @Appender(_shared_docs["pivot"], indents=1) def pivot( data: DataFrame, - index: Optional[IndexLabel] = None, - columns: Optional[IndexLabel] = None, - values: Optional[IndexLabel] = None, + index: IndexLabel | None = None, + columns: IndexLabel | None = None, + values: IndexLabel | None = None, ) -> DataFrame: if columns is None: raise TypeError("pivot() missing 1 required argument: 'columns'") @@ -690,7 +684,7 @@ def _normalize(table, normalize, margins: bool, margins_name="All"): if margins is False: # Actual Normalizations - normalizers: Dict[Union[bool, str], Callable] = { + normalizers: dict[bool | str, Callable] = { "all": lambda x: x / x.sum(axis=1).sum(axis=0), "columns": lambda x: x / x.sum(), "index": lambda x: x.div(x.sum(axis=1), axis=0), @@ -776,8 +770,8 @@ def _get_names(arrs, names, prefix: str = "row"): def _build_names_mapper( - rownames: List[str], colnames: List[str] -) -> Tuple[Dict[str, str], List[str], Dict[str, str], List[str]]: + rownames: list[str], colnames: list[str] +) -> tuple[dict[str, str], list[str], dict[str, str], list[str]]: """ Given the names of a DataFrame's rows and columns, returns a set of unique row and column names and mappers that convert to original names. @@ -806,7 +800,7 @@ def _build_names_mapper( """ def get_duplicates(names): - seen: Set = set() + seen: set = set() return {name for name in names if name not in seen} shared_names = set(rownames).intersection(set(colnames)) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 346dc3732b212..d889e84cb9045 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -3,9 +3,6 @@ import itertools from typing import ( TYPE_CHECKING, - List, - Optional, - Union, cast, ) @@ -765,7 +762,7 @@ def get_dummies( columns=None, sparse: bool = False, drop_first: bool = False, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, ) -> DataFrame: """ Convert categorical variable into dummy/indicator variables. @@ -904,7 +901,7 @@ def check_len(item, name): elif isinstance(prefix_sep, dict): prefix_sep = [prefix_sep[col] for col in data_to_encode.columns] - with_dummies: List[DataFrame] + with_dummies: list[DataFrame] if data_to_encode.shape == data.shape: # Encoding the entire df, do not prepend any dropped columns with_dummies = [] @@ -950,7 +947,7 @@ def _get_dummies_1d( dummy_na: bool = False, sparse: bool = False, drop_first: bool = False, - dtype: Optional[Dtype] = None, + dtype: Dtype | None = None, ) -> DataFrame: from pandas.core.reshape.concat import concat @@ -993,7 +990,7 @@ def get_empty_frame(data) -> DataFrame: else: dummy_cols = Index([f"{prefix}{prefix_sep}{level}" for level in levels]) - index: Optional[Index] + index: Index | None if isinstance(data, Series): index = data.index else: @@ -1001,7 +998,7 @@ def get_empty_frame(data) -> DataFrame: if sparse: - fill_value: Union[bool, float, int] + fill_value: bool | float | int if is_integer_dtype(dtype): fill_value = 0 elif dtype == bool: @@ -1011,7 +1008,7 @@ def get_empty_frame(data) -> DataFrame: sparse_series = [] N = len(data) - sp_indices: List[List] = [[] for _ in range(len(dummy_cols))] + sp_indices: list[list] = [[] for _ in range(len(dummy_cols))] mask = codes != -1 codes = codes[mask] n_idx = np.arange(N)[mask] diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 7619623bb9eda..36313b8b80413 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -9,7 +9,6 @@ Callable, Hashable, List, - Optional, Tuple, TypeVar, Union, @@ -98,7 +97,7 @@ def _guess_datetime_format_for_array(arr, **kwargs): def should_cache( - arg: ArrayConvertible, unique_share: float = 0.7, check_count: Optional[int] = None + arg: ArrayConvertible, unique_share: float = 0.7, check_count: int | None = None ) -> bool: """ Decides whether to do caching. @@ -159,7 +158,7 @@ def should_cache( def _maybe_cache( arg: ArrayConvertible, - format: Optional[str], + format: str | None, cache: bool, convert_listlike: Callable, ) -> Series: @@ -198,7 +197,7 @@ def _maybe_cache( def _box_as_indexlike( - dt_array: ArrayLike, utc: Optional[bool] = None, name: Hashable = None + dt_array: ArrayLike, utc: bool | None = None, name: Hashable = None ) -> Index: """ Properly boxes the ndarray of datetimes to DatetimeIndex @@ -229,7 +228,7 @@ def _box_as_indexlike( def _convert_and_box_cache( arg: DatetimeScalarOrArrayConvertible, cache_array: Series, - name: Optional[str] = None, + name: str | None = None, ) -> Index: """ Convert array of dates with a cache and wrap the result in an Index. @@ -285,14 +284,14 @@ def _return_parsed_timezone_results(result: np.ndarray, timezones, tz, name) -> def _convert_listlike_datetimes( arg, - format: Optional[str], + format: str | None, name: Hashable = None, - tz: Optional[Timezone] = None, - unit: Optional[str] = None, + tz: Timezone | None = None, + unit: str | None = None, errors: str = "raise", infer_datetime_format: bool = False, - dayfirst: Optional[bool] = None, - yearfirst: Optional[bool] = None, + dayfirst: bool | None = None, + yearfirst: bool | None = None, exact: bool = True, ): """ @@ -434,7 +433,7 @@ def _array_strptime_with_fallback( exact: bool, errors: str, infer_datetime_format: bool, -) -> Optional[Index]: +) -> Index | None: """ Call array_strptime, with fallback behavior depending on 'errors'. """ @@ -482,7 +481,7 @@ def _to_datetime_with_format( exact: bool, errors: str, infer_datetime_format: bool, -) -> Optional[Index]: +) -> Index | None: """ Try parsing with the given format, returning None on failure. """ @@ -644,14 +643,14 @@ def to_datetime( errors: str = ..., dayfirst: bool = ..., yearfirst: bool = ..., - utc: Optional[bool] = ..., - format: Optional[str] = ..., + utc: bool | None = ..., + format: str | None = ..., exact: bool = ..., - unit: Optional[str] = ..., + unit: str | None = ..., infer_datetime_format: bool = ..., origin=..., cache: bool = ..., -) -> Union[DatetimeScalar, NaTType]: +) -> DatetimeScalar | NaTType: ... @@ -661,10 +660,10 @@ def to_datetime( errors: str = ..., dayfirst: bool = ..., yearfirst: bool = ..., - utc: Optional[bool] = ..., - format: Optional[str] = ..., + utc: bool | None = ..., + format: str | None = ..., exact: bool = ..., - unit: Optional[str] = ..., + unit: str | None = ..., infer_datetime_format: bool = ..., origin=..., cache: bool = ..., @@ -674,14 +673,14 @@ def to_datetime( @overload def to_datetime( - arg: Union[List, Tuple], + arg: list | tuple, errors: str = ..., dayfirst: bool = ..., yearfirst: bool = ..., - utc: Optional[bool] = ..., - format: Optional[str] = ..., + utc: bool | None = ..., + format: str | None = ..., exact: bool = ..., - unit: Optional[str] = ..., + unit: str | None = ..., infer_datetime_format: bool = ..., origin=..., cache: bool = ..., @@ -694,14 +693,14 @@ def to_datetime( errors: str = "raise", dayfirst: bool = False, yearfirst: bool = False, - utc: Optional[bool] = None, - format: Optional[str] = None, + utc: bool | None = None, + format: str | None = None, exact: bool = True, - unit: Optional[str] = None, + unit: str | None = None, infer_datetime_format: bool = False, origin="unix", cache: bool = True, -) -> Optional[Union[DatetimeIndex, Series, DatetimeScalar, NaTType]]: +) -> DatetimeIndex | Series | DatetimeScalar | NaTType | None: """ Convert argument to datetime. @@ -1035,7 +1034,7 @@ def coerce(values): return values -def _attempt_YYYYMMDD(arg: np.ndarray, errors: str) -> Optional[np.ndarray]: +def _attempt_YYYYMMDD(arg: np.ndarray, errors: str) -> np.ndarray | None: """ try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like, arg is a passed in as an object dtype, but could really be ints/strings diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index 375901bc3fb58..fb5002648b6a5 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -9,9 +9,6 @@ Hashable, Iterable, Iterator, - Optional, - Tuple, - Union, cast, ) @@ -80,10 +77,10 @@ def combine_hash_arrays(arrays: Iterator[np.ndarray], num_items: int) -> np.ndar def hash_pandas_object( - obj: Union[Index, FrameOrSeriesUnion], + obj: Index | FrameOrSeriesUnion, index: bool = True, encoding: str = "utf8", - hash_key: Optional[str] = _default_hash_key, + hash_key: str | None = _default_hash_key, categorize: bool = True, ) -> Series: """ @@ -169,7 +166,7 @@ def hash_pandas_object( def hash_tuples( - vals: Union[MultiIndex, Iterable[Tuple[Hashable, ...]]], + vals: MultiIndex | Iterable[tuple[Hashable, ...]], encoding: str = "utf8", hash_key: str = _default_hash_key, ) -> np.ndarray: diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index e35ff5afca66e..67bcdb0a387dd 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -3,10 +3,6 @@ import datetime from functools import partial from textwrap import dedent -from typing import ( - Optional, - Union, -) import warnings import numpy as np @@ -50,10 +46,10 @@ def get_center_of_mass( - comass: Optional[float], - span: Optional[float], - halflife: Optional[float], - alpha: Optional[float], + comass: float | None, + span: float | None, + halflife: float | None, + alpha: float | None, ) -> float: valid_count = common.count_not_none(comass, span, halflife, alpha) if valid_count > 1: @@ -229,15 +225,15 @@ class ExponentialMovingWindow(BaseWindow): def __init__( self, obj: FrameOrSeries, - com: Optional[float] = None, - span: Optional[float] = None, - halflife: Optional[Union[float, TimedeltaConvertibleTypes]] = None, - alpha: Optional[float] = None, + com: float | None = None, + span: float | None = None, + halflife: float | TimedeltaConvertibleTypes | None = None, + alpha: float | None = None, min_periods: int = 0, adjust: bool = True, ignore_na: bool = False, axis: Axis = 0, - times: Optional[Union[str, np.ndarray, FrameOrSeries]] = None, + times: str | np.ndarray | FrameOrSeries | None = None, ): super().__init__( obj=obj, @@ -472,8 +468,8 @@ def var_func(values, begin, end, min_periods): ) def cov( self, - other: Optional[FrameOrSeriesUnion] = None, - pairwise: Optional[bool] = None, + other: FrameOrSeriesUnion | None = None, + pairwise: bool | None = None, bias: bool = False, **kwargs, ): @@ -537,8 +533,8 @@ def cov_func(x, y): ) def corr( self, - other: Optional[FrameOrSeriesUnion] = None, - pairwise: Optional[bool] = None, + other: FrameOrSeriesUnion | None = None, + pairwise: bool | None = None, **kwargs, ): from pandas import Series diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index c7fa6f99bfb1c..0fc2f552543b8 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -13,13 +13,6 @@ TYPE_CHECKING, Any, Callable, - Dict, - List, - Optional, - Set, - Tuple, - Type, - Union, ) import warnings @@ -114,19 +107,19 @@ class BaseWindow(SelectionMixin): """Provides utilities for performing windowing operations.""" - _attributes: List[str] = [] - exclusions: Set[str] = set() + _attributes: list[str] = [] + exclusions: set[str] = set() def __init__( self, obj: FrameOrSeries, window=None, - min_periods: Optional[int] = None, + min_periods: int | None = None, center: bool = False, - win_type: Optional[str] = None, + win_type: str | None = None, axis: Axis = 0, - on: Optional[Union[str, Index]] = None, - closed: Optional[str] = None, + on: str | Index | None = None, + closed: str | None = None, method: str = "single", ): self.obj = obj @@ -383,7 +376,7 @@ def _get_window_indexer(self) -> BaseIndexer: return FixedWindowIndexer(window_size=self.window) def _apply_series( - self, homogeneous_func: Callable[..., ArrayLike], name: Optional[str] = None + self, homogeneous_func: Callable[..., ArrayLike], name: str | None = None ) -> Series: """ Series version of _apply_blockwise @@ -401,7 +394,7 @@ def _apply_series( return obj._constructor(result, index=obj.index, name=obj.name) def _apply_blockwise( - self, homogeneous_func: Callable[..., ArrayLike], name: Optional[str] = None + self, homogeneous_func: Callable[..., ArrayLike], name: str | None = None ) -> FrameOrSeriesUnion: """ Apply the given function to the DataFrame broken down into homogeneous @@ -436,7 +429,7 @@ def hfunc2d(values: ArrayLike) -> ArrayLike: return self._resolve_output(out, obj) def _apply_tablewise( - self, homogeneous_func: Callable[..., ArrayLike], name: Optional[str] = None + self, homogeneous_func: Callable[..., ArrayLike], name: str | None = None ) -> FrameOrSeriesUnion: """ Apply the given function to the DataFrame across the entire object @@ -455,8 +448,8 @@ def _apply_tablewise( def _apply_pairwise( self, target: FrameOrSeriesUnion, - other: Optional[FrameOrSeriesUnion], - pairwise: Optional[bool], + other: FrameOrSeriesUnion | None, + pairwise: bool | None, func: Callable[[FrameOrSeriesUnion, FrameOrSeriesUnion], FrameOrSeriesUnion], ) -> FrameOrSeriesUnion: """ @@ -472,8 +465,8 @@ def _apply_pairwise( def _apply( self, func: Callable[..., Any], - name: Optional[str] = None, - numba_cache_key: Optional[Tuple[Callable, str]] = None, + name: str | None = None, + numba_cache_key: tuple[Callable, str] | None = None, **kwargs, ): """ @@ -567,8 +560,8 @@ def __init__( def _apply( self, func: Callable[..., Any], - name: Optional[str] = None, - numba_cache_key: Optional[Tuple[Callable, str]] = None, + name: str | None = None, + numba_cache_key: tuple[Callable, str] | None = None, **kwargs, ) -> FrameOrSeries: result = super()._apply( @@ -624,8 +617,8 @@ def _apply( def _apply_pairwise( self, target: FrameOrSeriesUnion, - other: Optional[FrameOrSeriesUnion], - pairwise: Optional[bool], + other: FrameOrSeriesUnion | None, + pairwise: bool | None, func: Callable[[FrameOrSeriesUnion, FrameOrSeriesUnion], FrameOrSeriesUnion], ) -> FrameOrSeriesUnion: """ @@ -949,8 +942,8 @@ def _center_window(self, result: np.ndarray, offset: int) -> np.ndarray: def _apply( self, func: Callable[[np.ndarray, int, int], np.ndarray], - name: Optional[str] = None, - numba_cache_key: Optional[Tuple[Callable, str]] = None, + name: str | None = None, + numba_cache_key: tuple[Callable, str] | None = None, **kwargs, ): """ @@ -1121,10 +1114,10 @@ def apply( self, func: Callable[..., Any], raw: bool = False, - engine: Optional[str] = None, - engine_kwargs: Optional[Dict[str, bool]] = None, - args: Optional[Tuple[Any, ...]] = None, - kwargs: Optional[Dict[str, Any]] = None, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + args: tuple[Any, ...] | None = None, + kwargs: dict[str, Any] | None = None, ): if args is None: args = () @@ -1163,8 +1156,8 @@ def apply( def _generate_cython_apply_func( self, - args: Tuple[Any, ...], - kwargs: Dict[str, Any], + args: tuple[Any, ...], + kwargs: dict[str, Any], raw: bool, function: Callable[..., Any], ) -> Callable[[np.ndarray, np.ndarray, np.ndarray, int], np.ndarray]: @@ -1188,8 +1181,8 @@ def apply_func(values, begin, end, min_periods, raw=raw): def sum( self, *args, - engine: Optional[str] = None, - engine_kwargs: Optional[Dict[str, bool]] = None, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, **kwargs, ): nv.validate_window_func("sum", args, kwargs) @@ -1211,8 +1204,8 @@ def sum( def max( self, *args, - engine: Optional[str] = None, - engine_kwargs: Optional[Dict[str, bool]] = None, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, **kwargs, ): nv.validate_window_func("max", args, kwargs) @@ -1234,8 +1227,8 @@ def max( def min( self, *args, - engine: Optional[str] = None, - engine_kwargs: Optional[Dict[str, bool]] = None, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, **kwargs, ): nv.validate_window_func("min", args, kwargs) @@ -1257,8 +1250,8 @@ def min( def mean( self, *args, - engine: Optional[str] = None, - engine_kwargs: Optional[Dict[str, bool]] = None, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, **kwargs, ): nv.validate_window_func("mean", args, kwargs) @@ -1279,8 +1272,8 @@ def mean( def median( self, - engine: Optional[str] = None, - engine_kwargs: Optional[Dict[str, bool]] = None, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, **kwargs, ): if maybe_use_numba(engine): @@ -1355,8 +1348,8 @@ def quantile(self, quantile: float, interpolation: str = "linear", **kwargs): def cov( self, - other: Optional[FrameOrSeriesUnion] = None, - pairwise: Optional[bool] = None, + other: FrameOrSeriesUnion | None = None, + pairwise: bool | None = None, ddof: int = 1, **kwargs, ): @@ -1393,8 +1386,8 @@ def cov_func(x, y): def corr( self, - other: Optional[FrameOrSeriesUnion] = None, - pairwise: Optional[bool] = None, + other: FrameOrSeriesUnion | None = None, + pairwise: bool | None = None, ddof: int = 1, **kwargs, ): @@ -1619,10 +1612,10 @@ def apply( self, func: Callable[..., Any], raw: bool = False, - engine: Optional[str] = None, - engine_kwargs: Optional[Dict[str, bool]] = None, - args: Optional[Tuple[Any, ...]] = None, - kwargs: Optional[Dict[str, Any]] = None, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, + args: tuple[Any, ...] | None = None, + kwargs: dict[str, Any] | None = None, ): return super().apply( func, @@ -1700,8 +1693,8 @@ def apply( def sum( self, *args, - engine: Optional[str] = None, - engine_kwargs: Optional[Dict[str, bool]] = None, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, **kwargs, ): nv.validate_rolling_func("sum", args, kwargs) @@ -1726,8 +1719,8 @@ def sum( def max( self, *args, - engine: Optional[str] = None, - engine_kwargs: Optional[Dict[str, bool]] = None, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, **kwargs, ): nv.validate_rolling_func("max", args, kwargs) @@ -1767,8 +1760,8 @@ def max( def min( self, *args, - engine: Optional[str] = None, - engine_kwargs: Optional[Dict[str, bool]] = None, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, **kwargs, ): nv.validate_rolling_func("min", args, kwargs) @@ -1815,8 +1808,8 @@ def min( def mean( self, *args, - engine: Optional[str] = None, - engine_kwargs: Optional[Dict[str, bool]] = None, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, **kwargs, ): nv.validate_rolling_func("mean", args, kwargs) @@ -1854,8 +1847,8 @@ def mean( ) def median( self, - engine: Optional[str] = None, - engine_kwargs: Optional[Dict[str, bool]] = None, + engine: str | None = None, + engine_kwargs: dict[str, bool] | None = None, **kwargs, ): return super().median(engine=engine, engine_kwargs=engine_kwargs, **kwargs) @@ -2144,8 +2137,8 @@ def quantile(self, quantile: float, interpolation: str = "linear", **kwargs): ) def cov( self, - other: Optional[FrameOrSeriesUnion] = None, - pairwise: Optional[bool] = None, + other: FrameOrSeriesUnion | None = None, + pairwise: bool | None = None, ddof: int = 1, **kwargs, ): @@ -2269,8 +2262,8 @@ def cov( ) def corr( self, - other: Optional[FrameOrSeriesUnion] = None, - pairwise: Optional[bool] = None, + other: FrameOrSeriesUnion | None = None, + pairwise: bool | None = None, ddof: int = 1, **kwargs, ): @@ -2295,8 +2288,8 @@ def _get_window_indexer(self) -> GroupbyIndexer: ------- GroupbyIndexer """ - rolling_indexer: Type[BaseIndexer] - indexer_kwargs: Optional[Dict[str, Any]] = None + rolling_indexer: type[BaseIndexer] + indexer_kwargs: dict[str, Any] | None = None index_array = self._index_array if isinstance(self.window, BaseIndexer): rolling_indexer = type(self.window) diff --git a/pandas/io/common.py b/pandas/io/common.py index eab13cb7cd598..00966d39dd99d 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -19,12 +19,7 @@ IO, Any, AnyStr, - Dict, - List, Mapping, - Optional, - Tuple, - Union, cast, ) from urllib.parse import ( @@ -95,7 +90,7 @@ class IOHandles: handle: Buffer compression: CompressionDict - created_handles: List[Buffer] = dataclasses.field(default_factory=list) + created_handles: list[Buffer] = dataclasses.field(default_factory=list) is_wrapped: bool = False is_mmap: bool = False @@ -342,7 +337,7 @@ def _get_filepath_or_buffer( # If botocore is installed we fallback to reading with anon=True # to allow reads from public buckets - err_types_to_retry_with_anon: List[Any] = [] + err_types_to_retry_with_anon: list[Any] = [] try: import_optional_dependency("botocore") from botocore.exceptions import ( @@ -431,7 +426,7 @@ def file_path_to_url(path: str) -> str: def get_compression_method( compression: CompressionOptions, -) -> Tuple[Optional[str], CompressionDict]: +) -> tuple[str | None, CompressionDict]: """ Simplifies a compression argument to a compression method string and a mapping containing additional arguments. @@ -451,7 +446,7 @@ def get_compression_method( ------ ValueError on mapping missing 'method' key """ - compression_method: Optional[str] + compression_method: str | None if isinstance(compression, Mapping): compression_args = dict(compression) try: @@ -465,8 +460,8 @@ def get_compression_method( def infer_compression( - filepath_or_buffer: FilePathOrBuffer, compression: Optional[str] -) -> Optional[str]: + filepath_or_buffer: FilePathOrBuffer, compression: str | None +) -> str | None: """ Get the compression method for filepath_or_buffer. If compression='infer', the inferred compression method is returned. Otherwise, the input @@ -526,11 +521,11 @@ def infer_compression( def get_handle( path_or_buf: FilePathOrBuffer, mode: str, - encoding: Optional[str] = None, + encoding: str | None = None, compression: CompressionOptions = None, memory_map: bool = False, is_text: bool = True, - errors: Optional[str] = None, + errors: str | None = None, storage_options: StorageOptions = None, ) -> IOHandles: """ @@ -619,7 +614,7 @@ def get_handle( ) handle = ioargs.filepath_or_buffer - handles: List[Buffer] + handles: list[Buffer] # memory mapping needs to be the first step handle, memory_map, handles = _maybe_memory_map( @@ -769,14 +764,14 @@ def __init__( self, file: FilePathOrBuffer, mode: str, - archive_name: Optional[str] = None, + archive_name: str | None = None, **kwargs, ): mode = mode.replace("b", "") self.archive_name = archive_name - self.multiple_write_buffer: Optional[Union[StringIO, BytesIO]] = None + self.multiple_write_buffer: StringIO | BytesIO | None = None - kwargs_zip: Dict[str, Any] = {"compression": zipfile.ZIP_DEFLATED} + kwargs_zip: dict[str, Any] = {"compression": zipfile.ZIP_DEFLATED} kwargs_zip.update(kwargs) # error: Argument 1 to "__init__" of "ZipFile" has incompatible type @@ -861,10 +856,10 @@ def _maybe_memory_map( memory_map: bool, encoding: str, mode: str, - errors: Optional[str], -) -> Tuple[FileOrBuffer, bool, List[Buffer]]: + errors: str | None, +) -> tuple[FileOrBuffer, bool, list[Buffer]]: """Try to memory map file/buffer.""" - handles: List[Buffer] = [] + handles: list[Buffer] = [] memory_map &= hasattr(handle, "fileno") or isinstance(handle, str) if not memory_map: return handle, memory_map, handles diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index c0904c0393af6..7eefd26b194ab 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -9,10 +9,7 @@ from textwrap import fill from typing import ( Any, - Dict, Mapping, - Optional, - Union, cast, ) import warnings @@ -342,7 +339,7 @@ def read_excel( index_col=None, usecols=None, squeeze=False, - dtype: Optional[DtypeArg] = None, + dtype: DtypeArg | None = None, engine=None, converters=None, true_values=None, @@ -480,7 +477,7 @@ def parse( index_col=None, usecols=None, squeeze=False, - dtype: Optional[DtypeArg] = None, + dtype: DtypeArg | None = None, true_values=None, false_values=None, skiprows=None, @@ -757,13 +754,13 @@ class ExcelWriter(metaclass=abc.ABCMeta): # ExcelWriter. def __new__( cls, - path: Union[FilePathOrBuffer, ExcelWriter], + path: FilePathOrBuffer | ExcelWriter, engine=None, date_format=None, datetime_format=None, mode: str = "w", storage_options: StorageOptions = None, - engine_kwargs: Optional[Dict] = None, + engine_kwargs: dict | None = None, **kwargs, ): if kwargs: @@ -858,13 +855,13 @@ def save(self): def __init__( self, - path: Union[FilePathOrBuffer, ExcelWriter], + path: FilePathOrBuffer | ExcelWriter, engine=None, date_format=None, datetime_format=None, mode: str = "w", storage_options: StorageOptions = None, - engine_kwargs: Optional[Dict] = None, + engine_kwargs: dict | None = None, **kwargs, ): # validate that this engine can handle the extension @@ -885,7 +882,7 @@ def __init__( self.handles = get_handle( path, mode, storage_options=storage_options, is_text=False ) - self.sheets: Dict[str, Any] = {} + self.sheets: dict[str, Any] = {} self.cur_sheet = None if date_format is None: diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 20d9a998505cd..860971a7967da 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -4,9 +4,6 @@ from typing import ( TYPE_CHECKING, Any, - Dict, - List, - Optional, ) import numpy as np @@ -40,7 +37,7 @@ def __init__( datetime_format=None, mode: str = "w", storage_options: StorageOptions = None, - engine_kwargs: Optional[Dict[str, Any]] = None, + engine_kwargs: dict[str, Any] | None = None, ): # Use the openpyxl module as the Excel writer. from openpyxl.workbook import Workbook @@ -76,7 +73,7 @@ def save(self): self.handles.handle.truncate() @classmethod - def _convert_to_style_kwargs(cls, style_dict: dict) -> Dict[str, Serialisable]: + def _convert_to_style_kwargs(cls, style_dict: dict) -> dict[str, Serialisable]: """ Convert a style_dict to a set of kwargs suitable for initializing or updating-on-copy an openpyxl v2 style object. @@ -101,7 +98,7 @@ def _convert_to_style_kwargs(cls, style_dict: dict) -> Dict[str, Serialisable]: """ _style_key_map = {"borders": "border"} - style_kwargs: Dict[str, Serialisable] = {} + style_kwargs: dict[str, Serialisable] = {} for k, v in style_dict.items(): if k in _style_key_map: k = _style_key_map[k] @@ -415,7 +412,7 @@ def write_cells( # Write the frame cells using openpyxl. sheet_name = self._get_sheet_name(sheet_name) - _style_cache: Dict[str, Dict[str, Serialisable]] = {} + _style_cache: dict[str, dict[str, Serialisable]] = {} if sheet_name in self.sheets: wks = self.sheets[sheet_name] @@ -437,7 +434,7 @@ def write_cells( if fmt: xcell.number_format = fmt - style_kwargs: Optional[Dict[str, Serialisable]] = {} + style_kwargs: dict[str, Serialisable] | None = {} if cell.style: key = str(cell.style) style_kwargs = _style_cache.get(key) @@ -516,7 +513,7 @@ def close(self): super().close() @property - def sheet_names(self) -> List[str]: + def sheet_names(self) -> list[str]: return self.book.sheetnames def get_sheet_by_name(self, name: str): @@ -543,12 +540,12 @@ def _convert_cell(self, cell, convert_float: bool) -> Scalar: return cell.value - def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: + def get_sheet_data(self, sheet, convert_float: bool) -> list[list[Scalar]]: if self.book.read_only: sheet.reset_dimensions() - data: List[List[Scalar]] = [] + data: list[list[Scalar]] = [] last_row_with_data = -1 for row_number, row in enumerate(sheet.rows): converted_row = [self._convert_cell(cell, convert_float) for cell in row] @@ -563,7 +560,7 @@ def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: # With dimension reset, openpyxl no longer pads rows max_width = max(len(data_row) for data_row in data) if min(len(data_row) for data_row in data) < max_width: - empty_cell: List[Scalar] = [""] + empty_cell: list[Scalar] = [""] data = [ data_row + (max_width - len(data_row)) * empty_cell for data_row in data diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index ca8340cfd0a24..2d4d67a533874 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -9,13 +9,9 @@ from typing import ( TYPE_CHECKING, Any, - Dict, Hashable, Iterator, - List, - Optional, Sequence, - Union, ) import numpy as np @@ -51,19 +47,19 @@ def __init__( formatter: DataFrameFormatter, path_or_buf: FilePathOrBuffer[str] = "", sep: str = ",", - cols: Optional[Sequence[Hashable]] = None, - index_label: Optional[IndexLabel] = None, + cols: Sequence[Hashable] | None = None, + index_label: IndexLabel | None = None, mode: str = "w", - encoding: Optional[str] = None, + encoding: str | None = None, errors: str = "strict", compression: CompressionOptions = "infer", - quoting: Optional[int] = None, + quoting: int | None = None, line_terminator="\n", - chunksize: Optional[int] = None, - quotechar: Optional[str] = '"', - date_format: Optional[str] = None, + chunksize: int | None = None, + quotechar: str | None = '"', + date_format: str | None = None, doublequote: bool = True, - escapechar: Optional[str] = None, + escapechar: str | None = None, storage_options: StorageOptions = None, ): self.fmt = formatter @@ -93,7 +89,7 @@ def na_rep(self) -> str: return self.fmt.na_rep @property - def float_format(self) -> Optional[FloatFormatType]: + def float_format(self) -> FloatFormatType | None: return self.fmt.float_format @property @@ -101,14 +97,14 @@ def decimal(self) -> str: return self.fmt.decimal @property - def header(self) -> Union[bool, Sequence[str]]: + def header(self) -> bool | Sequence[str]: return self.fmt.header @property def index(self) -> bool: return self.fmt.index - def _initialize_index_label(self, index_label: Optional[IndexLabel]) -> IndexLabel: + def _initialize_index_label(self, index_label: IndexLabel | None) -> IndexLabel: if index_label is not False: if index_label is None: return self._get_index_label_from_obj() @@ -117,20 +113,20 @@ def _initialize_index_label(self, index_label: Optional[IndexLabel]) -> IndexLab return [index_label] return index_label - def _get_index_label_from_obj(self) -> List[str]: + def _get_index_label_from_obj(self) -> list[str]: if isinstance(self.obj.index, ABCMultiIndex): return self._get_index_label_multiindex() else: return self._get_index_label_flat() - def _get_index_label_multiindex(self) -> List[str]: + def _get_index_label_multiindex(self) -> list[str]: return [name or "" for name in self.obj.index.names] - def _get_index_label_flat(self) -> List[str]: + def _get_index_label_flat(self) -> list[str]: index_label = self.obj.index.name return [""] if index_label is None else [index_label] - def _initialize_quotechar(self, quotechar: Optional[str]) -> Optional[str]: + def _initialize_quotechar(self, quotechar: str | None) -> str | None: if self.quoting != csvlib.QUOTE_NONE: # prevents crash in _csv return quotechar @@ -141,7 +137,7 @@ def has_mi_columns(self) -> bool: return bool(isinstance(self.obj.columns, ABCMultiIndex)) def _initialize_columns( - self, cols: Optional[Sequence[Hashable]] + self, cols: Sequence[Hashable] | None ) -> Sequence[Hashable]: # validate mi options if self.has_mi_columns: @@ -164,13 +160,13 @@ def _initialize_columns( else: return list(new_cols) - def _initialize_chunksize(self, chunksize: Optional[int]) -> int: + def _initialize_chunksize(self, chunksize: int | None) -> int: if chunksize is None: return (100000 // (len(self.cols) or 1)) or 1 return int(chunksize) @property - def _number_format(self) -> Dict[str, Any]: + def _number_format(self) -> dict[str, Any]: """Dictionary used for storing number formatting settings.""" return { "na_rep": self.na_rep, @@ -221,8 +217,8 @@ def write_cols(self) -> Sequence[Hashable]: return self.cols @property - def encoded_labels(self) -> List[Hashable]: - encoded_labels: List[Hashable] = [] + def encoded_labels(self) -> list[Hashable]: + encoded_labels: list[Hashable] = [] if self.index and self.index_label: assert isinstance(self.index_label, Sequence) @@ -272,7 +268,7 @@ def _save_header(self) -> None: for row in self._generate_multiindex_header_rows(): self.writer.writerow(row) - def _generate_multiindex_header_rows(self) -> Iterator[List[Hashable]]: + def _generate_multiindex_header_rows(self) -> Iterator[list[Hashable]]: columns = self.obj.columns for i in range(columns.nlevels): # we need at least 1 index column to write our col names diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 562a62da369ae..0d7b7893aa496 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -4,10 +4,6 @@ from typing import ( TYPE_CHECKING, Any, - Dict, - List, - Optional, - Union, ) from pandas.compat._optional import import_optional_dependency @@ -29,18 +25,18 @@ def _try_import(): def read_gbq( query: str, - project_id: Optional[str] = None, - index_col: Optional[str] = None, - col_order: Optional[List[str]] = None, + project_id: str | None = None, + index_col: str | None = None, + col_order: list[str] | None = None, reauth: bool = False, auth_local_webserver: bool = False, - dialect: Optional[str] = None, - location: Optional[str] = None, - configuration: Optional[Dict[str, Any]] = None, + dialect: str | None = None, + location: str | None = None, + configuration: dict[str, Any] | None = None, credentials=None, - use_bqstorage_api: Optional[bool] = None, - max_results: Optional[int] = None, - progress_bar_type: Optional[str] = None, + use_bqstorage_api: bool | None = None, + max_results: int | None = None, + progress_bar_type: str | None = None, ) -> DataFrame: """ Load data from Google BigQuery. @@ -177,7 +173,7 @@ def read_gbq( """ pandas_gbq = _try_import() - kwargs: Dict[str, Union[str, bool, int, None]] = {} + kwargs: dict[str, str | bool | int | None] = {} # START: new kwargs. Don't populate unless explicitly set. if use_bqstorage_api is not None: @@ -206,13 +202,13 @@ def read_gbq( def to_gbq( dataframe: DataFrame, destination_table: str, - project_id: Optional[str] = None, - chunksize: Optional[int] = None, + project_id: str | None = None, + chunksize: int | None = None, reauth: bool = False, if_exists: str = "fail", auth_local_webserver: bool = False, - table_schema: Optional[List[Dict[str, str]]] = None, - location: Optional[str] = None, + table_schema: list[dict[str, str]] | None = None, + location: str | None = None, progress_bar: bool = True, credentials=None, ) -> None: diff --git a/pandas/io/orc.py b/pandas/io/orc.py index df76156aac9eb..db14a07e4b91b 100644 --- a/pandas/io/orc.py +++ b/pandas/io/orc.py @@ -2,11 +2,7 @@ from __future__ import annotations import distutils -from typing import ( - TYPE_CHECKING, - List, - Optional, -) +from typing import TYPE_CHECKING from pandas._typing import FilePathOrBuffer @@ -17,7 +13,7 @@ def read_orc( - path: FilePathOrBuffer, columns: Optional[List[str]] = None, **kwargs + path: FilePathOrBuffer, columns: list[str] | None = None, **kwargs ) -> DataFrame: """ Load an ORC object from the file path, returning a DataFrame. diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 183d753ddd60b..fcb077eee0624 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -7,10 +7,6 @@ from typing import ( Any, AnyStr, - Dict, - List, - Optional, - Tuple, ) from warnings import catch_warnings @@ -78,7 +74,7 @@ def _get_path_or_handle( storage_options: StorageOptions = None, mode: str = "rb", is_dir: bool = False, -) -> Tuple[FilePathOrBuffer, Optional[IOHandles], Any]: +) -> tuple[FilePathOrBuffer, IOHandles | None, Any]: """File handling for PyArrow.""" path_or_handle = stringify_path(path) if is_fsspec_url(path_or_handle) and fs is None: @@ -162,15 +158,15 @@ def write( self, df: DataFrame, path: FilePathOrBuffer[AnyStr], - compression: Optional[str] = "snappy", - index: Optional[bool] = None, + compression: str | None = "snappy", + index: bool | None = None, storage_options: StorageOptions = None, - partition_cols: Optional[List[str]] = None, + partition_cols: list[str] | None = None, **kwargs, ): self.validate_dataframe(df) - from_pandas_kwargs: Dict[str, Any] = {"schema": kwargs.pop("schema", None)} + from_pandas_kwargs: dict[str, Any] = {"schema": kwargs.pop("schema", None)} if index is not None: from_pandas_kwargs["preserve_index"] = index @@ -348,14 +344,14 @@ def read( @doc(storage_options=generic._shared_docs["storage_options"]) def to_parquet( df: DataFrame, - path: Optional[FilePathOrBuffer] = None, + path: FilePathOrBuffer | None = None, engine: str = "auto", - compression: Optional[str] = "snappy", - index: Optional[bool] = None, + compression: str | None = "snappy", + index: bool | None = None, storage_options: StorageOptions = None, - partition_cols: Optional[List[str]] = None, + partition_cols: list[str] | None = None, **kwargs, -) -> Optional[bytes]: +) -> bytes | None: """ Write a DataFrame to the parquet format. diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 8a3e3ea556bea..a718ce1b6b9ec 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -18,14 +18,8 @@ TYPE_CHECKING, Any, Callable, - Dict, Hashable, - List, - Optional, Sequence, - Tuple, - Type, - Union, cast, ) import warnings @@ -268,15 +262,15 @@ def to_hdf( key: str, value: FrameOrSeries, mode: str = "a", - complevel: Optional[int] = None, - complib: Optional[str] = None, + complevel: int | None = None, + complib: str | None = None, append: bool = False, - format: Optional[str] = None, + format: str | None = None, index: bool = True, - min_itemsize: Optional[Union[int, Dict[str, int]]] = None, + min_itemsize: int | dict[str, int] | None = None, nan_rep=None, - dropna: Optional[bool] = None, - data_columns: Optional[Union[bool, List[str]]] = None, + dropna: bool | None = None, + data_columns: bool | list[str] | None = None, errors: str = "strict", encoding: str = "UTF-8", ): @@ -325,11 +319,11 @@ def read_hdf( mode: str = "r", errors: str = "strict", where=None, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, columns=None, iterator=False, - chunksize: Optional[int] = None, + chunksize: int | None = None, **kwargs, ): """ @@ -554,7 +548,7 @@ class HDFStore: >>> store.close() # only now, data is written to disk """ - _handle: Optional[File] + _handle: File | None _mode: str _complevel: int _fletcher32: bool @@ -563,7 +557,7 @@ def __init__( self, path, mode: str = "a", - complevel: Optional[int] = None, + complevel: int | None = None, complib=None, fletcher32: bool = False, **kwargs, @@ -651,7 +645,7 @@ def __enter__(self): def __exit__(self, exc_type, exc_value, traceback): self.close() - def keys(self, include: str = "pandas") -> List[str]: + def keys(self, include: str = "pandas") -> list[str]: """ Return a list of keys corresponding to objects stored in HDFStore. @@ -881,8 +875,8 @@ def select_as_coordinates( self, key: str, where=None, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, ): """ return the selection as an Index @@ -913,8 +907,8 @@ def select_column( self, key: str, column: str, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, ): """ return a single column from the table. This is generally only useful to @@ -1079,10 +1073,10 @@ def put( index=True, append=False, complib=None, - complevel: Optional[int] = None, - min_itemsize: Optional[Union[int, Dict[str, int]]] = None, + complevel: int | None = None, + min_itemsize: int | dict[str, int] | None = None, nan_rep=None, - data_columns: Optional[List[str]] = None, + data_columns: list[str] | None = None, encoding=None, errors: str = "strict", track_times: bool = True, @@ -1205,14 +1199,14 @@ def append( index=True, append=True, complib=None, - complevel: Optional[int] = None, + complevel: int | None = None, columns=None, - min_itemsize: Optional[Union[int, Dict[str, int]]] = None, + min_itemsize: int | dict[str, int] | None = None, nan_rep=None, chunksize=None, expectedrows=None, - dropna: Optional[bool] = None, - data_columns: Optional[List[str]] = None, + dropna: bool | None = None, + data_columns: list[str] | None = None, encoding=None, errors: str = "strict", ): @@ -1283,7 +1277,7 @@ def append( def append_to_multiple( self, - d: Dict, + d: dict, value, selector, data_columns=None, @@ -1334,7 +1328,7 @@ def append_to_multiple( # figure out how to split the value remain_key = None - remain_values: List = [] + remain_values: list = [] for k, v in d.items(): if v is None: if remain_key is not None: @@ -1382,8 +1376,8 @@ def create_table_index( self, key: str, columns=None, - optlevel: Optional[int] = None, - kind: Optional[str] = None, + optlevel: int | None = None, + kind: str | None = None, ): """ Create a pytables index on the table. @@ -1496,7 +1490,7 @@ def walk(self, where="/"): yield (g._v_pathname.rstrip("/"), groups, leaves) - def get_node(self, key: str) -> Optional[Node]: + def get_node(self, key: str) -> Node | None: """ return the node with the key or None if it does not exist """ self._check_if_open() if not key.startswith("/"): @@ -1512,7 +1506,7 @@ def get_node(self, key: str) -> Optional[Node]: assert isinstance(node, _table_mod.Node), type(node) return node - def get_storer(self, key: str) -> Union[GenericFixed, Table]: + def get_storer(self, key: str) -> GenericFixed | Table: """ return the storer object for a key, raise if not in the file """ group = self.get_node(key) if group is None: @@ -1529,7 +1523,7 @@ def copy( propindexes: bool = True, keys=None, complib=None, - complevel: Optional[int] = None, + complevel: int | None = None, fletcher32: bool = False, overwrite=True, ): @@ -1568,7 +1562,7 @@ def copy( data = self.select(k) if isinstance(s, Table): - index: Union[bool, List[str]] = False + index: bool | list[str] = False if propindexes: index = [a.name for a in s.axes if a.is_indexed] new_store.append( @@ -1643,12 +1637,12 @@ def _create_storer( self, group, format=None, - value: Optional[FrameOrSeries] = None, + value: FrameOrSeries | None = None, encoding: str = "UTF-8", errors: str = "strict", - ) -> Union[GenericFixed, Table]: + ) -> GenericFixed | Table: """ return a suitable class to operate """ - cls: Union[Type[GenericFixed], Type[Table]] + cls: type[GenericFixed] | type[Table] if value is not None and not isinstance(value, (Series, DataFrame)): raise TypeError("value must be None, Series, or DataFrame") @@ -1740,9 +1734,9 @@ def _write_to_group( index=True, append=False, complib=None, - complevel: Optional[int] = None, + complevel: int | None = None, fletcher32=None, - min_itemsize: Optional[Union[int, Dict[str, int]]] = None, + min_itemsize: int | dict[str, int] | None = None, chunksize=None, expectedrows=None, dropna=False, @@ -1858,21 +1852,21 @@ class TableIterator: Whether to automatically close the store at the end of iteration. """ - chunksize: Optional[int] + chunksize: int | None store: HDFStore - s: Union[GenericFixed, Table] + s: GenericFixed | Table def __init__( self, store: HDFStore, - s: Union[GenericFixed, Table], + s: GenericFixed | Table, func, where, nrows, start=None, stop=None, iterator: bool = False, - chunksize: Optional[int] = None, + chunksize: int | None = None, auto_close: bool = False, ): self.store = store @@ -1977,7 +1971,7 @@ def __init__( values=None, kind=None, typ=None, - cname: Optional[str] = None, + cname: str | None = None, axis=None, pos=None, freq=None, @@ -2078,7 +2072,7 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): if self.freq is not None: kwargs["freq"] = _ensure_decoded(self.freq) - factory: Union[Type[Index], Type[DatetimeIndex]] = Index + factory: type[Index] | type[DatetimeIndex] = Index if is_datetime64_dtype(values.dtype) or is_datetime64tz_dtype(values.dtype): factory = DatetimeIndex @@ -2295,7 +2289,7 @@ def __init__( table=None, meta=None, metadata=None, - dtype: Optional[DtypeArg] = None, + dtype: DtypeArg | None = None, data=None, ): super().__init__( @@ -2393,7 +2387,7 @@ def get_atom_string(cls, shape, itemsize): return _tables().StringCol(itemsize=itemsize, shape=shape[0]) @classmethod - def get_atom_coltype(cls, kind: str) -> Type[Col]: + def get_atom_coltype(cls, kind: str) -> type[Col]: """ return the PyTables column class for this column """ if kind.startswith("uint"): k4 = kind[4:] @@ -2598,7 +2592,7 @@ class Fixed: pandas_kind: str format_type: str = "fixed" # GH#30962 needed by dask - obj_type: Type[FrameOrSeriesUnion] + obj_type: type[FrameOrSeriesUnion] ndim: int encoding: str parent: HDFStore @@ -2626,7 +2620,7 @@ def is_old_version(self) -> bool: return self.version[0] <= 0 and self.version[1] <= 10 and self.version[2] < 1 @property - def version(self) -> Tuple[int, int, int]: + def version(self) -> tuple[int, int, int]: """ compute and set our version """ version = _ensure_decoded(getattr(self.group._v_attrs, "pandas_version", None)) try: @@ -2735,8 +2729,8 @@ def read( self, where=None, columns=None, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, ): raise NotImplementedError( "cannot read on an abstract storer: subclasses should implement" @@ -2747,9 +2741,7 @@ def write(self, **kwargs): "cannot write on an abstract storer: subclasses should implement" ) - def delete( - self, where=None, start: Optional[int] = None, stop: Optional[int] = None - ): + def delete(self, where=None, start: int | None = None, stop: int | None = None): """ support fully deleting the node in its entirety (only) - where specification must be None @@ -2766,7 +2758,7 @@ class GenericFixed(Fixed): _index_type_map = {DatetimeIndex: "datetime", PeriodIndex: "period"} _reverse_index_map = {v: k for k, v in _index_type_map.items()} - attributes: List[str] = [] + attributes: list[str] = [] # indexer helpers def _class_to_alias(self, cls) -> str: @@ -2858,9 +2850,7 @@ def get_attrs(self): def write(self, obj, **kwargs): self.set_attrs() - def read_array( - self, key: str, start: Optional[int] = None, stop: Optional[int] = None - ): + def read_array(self, key: str, start: int | None = None, stop: int | None = None): """ read an array for the specified node (off of group """ import tables @@ -2895,7 +2885,7 @@ def read_array( return ret def read_index( - self, key: str, start: Optional[int] = None, stop: Optional[int] = None + self, key: str, start: int | None = None, stop: int | None = None ) -> Index: variety = _ensure_decoded(getattr(self.attrs, f"{key}_variety")) @@ -2957,13 +2947,13 @@ def write_multi_index(self, key: str, index: MultiIndex): self.write_array(label_key, level_codes) def read_multi_index( - self, key: str, start: Optional[int] = None, stop: Optional[int] = None + self, key: str, start: int | None = None, stop: int | None = None ) -> MultiIndex: nlevels = getattr(self.attrs, f"{key}_nlevels") levels = [] codes = [] - names: List[Hashable] = [] + names: list[Hashable] = [] for i in range(nlevels): level_key = f"{key}_level{i}" node = getattr(self.group, level_key) @@ -2980,7 +2970,7 @@ def read_multi_index( ) def read_index_node( - self, node: Node, start: Optional[int] = None, stop: Optional[int] = None + self, node: Node, start: int | None = None, stop: int | None = None ) -> Index: data = node[start:stop] # If the index was an empty array write_array_empty() will @@ -3026,7 +3016,7 @@ def write_array_empty(self, key: str, value: ArrayLike): node._v_attrs.value_type = str(value.dtype) node._v_attrs.shape = value.shape - def write_array(self, key: str, obj: FrameOrSeries, items: Optional[Index] = None): + def write_array(self, key: str, obj: FrameOrSeries, items: Index | None = None): # TODO: we only have a few tests that get here, the only EA # that gets passed is DatetimeArray, and we never have # both self._filters and EA @@ -3132,8 +3122,8 @@ def read( self, where=None, columns=None, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, ): self.validate_read(columns, where) index = self.read_index("index", start=start, stop=stop) @@ -3153,7 +3143,7 @@ class BlockManagerFixed(GenericFixed): nblocks: int @property - def shape(self) -> Optional[Shape]: + def shape(self) -> Shape | None: try: ndim = self.ndim @@ -3183,8 +3173,8 @@ def read( self, where=None, columns=None, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, ): # start, stop applied to rows, so 0th axis only self.validate_read(columns, where) @@ -3270,15 +3260,15 @@ class Table(Fixed): pandas_kind = "wide_table" format_type: str = "table" # GH#30962 needed by dask table_type: str - levels: Union[int, List[Hashable]] = 1 + levels: int | list[Hashable] = 1 is_table = True - index_axes: List[IndexCol] - non_index_axes: List[Tuple[int, Any]] - values_axes: List[DataCol] - data_columns: List - metadata: List - info: Dict + index_axes: list[IndexCol] + non_index_axes: list[tuple[int, Any]] + values_axes: list[DataCol] + data_columns: list + metadata: list + info: dict def __init__( self, @@ -3368,7 +3358,7 @@ def is_multi_index(self) -> bool: def validate_multiindex( self, obj: FrameOrSeriesUnion - ) -> Tuple[DataFrame, List[Hashable]]: + ) -> tuple[DataFrame, list[Hashable]]: """ validate that we can store the multi-index; reset and return the new object @@ -3438,7 +3428,7 @@ def data_orientation(self): ) ) - def queryables(self) -> Dict[str, Any]: + def queryables(self) -> dict[str, Any]: """ return a dict of the kinds allowable columns for this object """ # mypy doesn't recognize DataFrame._AXIS_NAMES, so we re-write it here axis_names = {0: "index", 1: "columns"} @@ -3459,7 +3449,7 @@ def index_cols(self): # Note: each `i.cname` below is assured to be a str. return [(i.axis, i.cname) for i in self.index_axes] - def values_cols(self) -> List[str]: + def values_cols(self) -> list[str]: """ return a list of my values cols """ return [i.cname for i in self.values_axes] @@ -3518,7 +3508,7 @@ def get_attrs(self): self.nan_rep = getattr(self.attrs, "nan_rep", None) self.encoding = _ensure_encoding(getattr(self.attrs, "encoding", None)) self.errors = _ensure_decoded(getattr(self.attrs, "errors", "strict")) - self.levels: List[Hashable] = getattr(self.attrs, "levels", None) or [] + self.levels: list[Hashable] = getattr(self.attrs, "levels", None) or [] self.index_axes = [a for a in self.indexables if a.is_an_indexable] self.values_axes = [a for a in self.indexables if not a.is_an_indexable] @@ -3625,7 +3615,7 @@ def f(i, c): return _indexables - def create_index(self, columns=None, optlevel=None, kind: Optional[str] = None): + def create_index(self, columns=None, optlevel=None, kind: str | None = None): """ Create a pytables index on the specified columns. @@ -3710,8 +3700,8 @@ def create_index(self, columns=None, optlevel=None, kind: Optional[str] = None): ) def _read_axes( - self, where, start: Optional[int] = None, stop: Optional[int] = None - ) -> List[Tuple[ArrayLike, ArrayLike]]: + self, where, start: int | None = None, stop: int | None = None + ) -> list[tuple[ArrayLike, ArrayLike]]: """ Create the axes sniffed from the table. @@ -3852,7 +3842,7 @@ def _create_axes( ) # create according to the new data - new_non_index_axes: List = [] + new_non_index_axes: list = [] # nan_representation if nan_rep is None: @@ -3935,7 +3925,7 @@ def _create_axes( # make sure that we match up the existing columns # if we have an existing table - existing_col: Optional[DataCol] + existing_col: DataCol | None if table_exists and validate: try: @@ -4035,8 +4025,8 @@ def get_blk_items(mgr): mgr = frame._mgr mgr = cast(BlockManager, mgr) - blocks: List[Block] = list(mgr.blocks) - blk_items: List[Index] = get_blk_items(mgr) + blocks: list[Block] = list(mgr.blocks) + blk_items: list[Index] = get_blk_items(mgr) if len(data_columns): axis, axis_labels = new_non_index_axes[0] @@ -4060,7 +4050,7 @@ def get_blk_items(mgr): tuple(b_items.tolist()): (b, b_items) for b, b_items in zip(blocks, blk_items) } - new_blocks: List[Block] = [] + new_blocks: list[Block] = [] new_blk_items = [] for ea in values_axes: items = tuple(ea.values) @@ -4140,10 +4130,10 @@ def process_filter(field, filt): def create_description( self, complib, - complevel: Optional[int], + complevel: int | None, fletcher32: bool, - expectedrows: Optional[int], - ) -> Dict[str, Any]: + expectedrows: int | None, + ) -> dict[str, Any]: """ create the description of the table from the axes & values """ # provided expected rows if its passed if expectedrows is None: @@ -4169,7 +4159,7 @@ def create_description( return d def read_coordinates( - self, where=None, start: Optional[int] = None, stop: Optional[int] = None + self, where=None, start: int | None = None, stop: int | None = None ): """ select coordinates (row numbers) from a table; return the @@ -4198,8 +4188,8 @@ def read_column( self, column: str, where=None, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, ): """ return a single column from the table, generally only indexables @@ -4251,8 +4241,8 @@ def read( self, where=None, columns=None, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, ): """ read the indices and the indexing array, calculate offset rows and return @@ -4333,7 +4323,7 @@ def write( # add the rows table.write_data(chunksize, dropna=dropna) - def write_data(self, chunksize: Optional[int], dropna: bool = False): + def write_data(self, chunksize: int | None, dropna: bool = False): """ we form the data into a 2-d including indexes,values,mask write chunk-by-chunk """ @@ -4395,9 +4385,9 @@ def write_data(self, chunksize: Optional[int], dropna: bool = False): def write_data_chunk( self, rows: np.ndarray, - indexes: List[np.ndarray], - mask: Optional[np.ndarray], - values: List[np.ndarray], + indexes: list[np.ndarray], + mask: np.ndarray | None, + values: list[np.ndarray], ): """ Parameters @@ -4436,9 +4426,7 @@ def write_data_chunk( self.table.append(rows) self.table.flush() - def delete( - self, where=None, start: Optional[int] = None, stop: Optional[int] = None - ): + def delete(self, where=None, start: int | None = None, stop: int | None = None): # delete all rows (and return the nrows) if where is None or not len(where): @@ -4505,7 +4493,7 @@ class AppendableFrameTable(AppendableTable): pandas_kind = "frame_table" table_type = "appendable_frame" ndim = 2 - obj_type: Type[FrameOrSeriesUnion] = DataFrame + obj_type: type[FrameOrSeriesUnion] = DataFrame @property def is_transposed(self) -> bool: @@ -4522,8 +4510,8 @@ def read( self, where=None, columns=None, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, ): # validate the version @@ -4626,8 +4614,8 @@ def read( self, where=None, columns=None, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, ) -> Series: is_multi_index = self.is_multi_index @@ -4672,7 +4660,7 @@ class GenericTable(AppendableFrameTable): table_type = "generic_table" ndim = 2 obj_type = DataFrame - levels: List[Hashable] + levels: list[Hashable] @property def pandas_type(self) -> str: @@ -4706,7 +4694,7 @@ def indexables(self): name="index", axis=0, table=self.table, meta=meta, metadata=md ) - _indexables: List[Union[GenericIndexCol, GenericDataIndexableCol]] = [index_col] + _indexables: list[GenericIndexCol | GenericDataIndexableCol] = [index_col] for i, n in enumerate(d._v_names): assert isinstance(n, str) @@ -4759,8 +4747,8 @@ def read( self, where=None, columns=None, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, ): df = super().read(where=where, columns=columns, start=start, stop=stop) @@ -4789,7 +4777,7 @@ def _reindex_axis(obj: DataFrame, axis: int, labels: Index, other=None) -> DataF if other is not None: labels = ensure_index(other.unique()).intersection(labels, sort=False) if not labels.equals(ax): - slicer: List[Union[slice, Index]] = [slice(None, None)] * obj.ndim + slicer: list[slice | Index] = [slice(None, None)] * obj.ndim slicer[axis] = labels obj = obj.loc[tuple(slicer)] return obj @@ -4798,17 +4786,17 @@ def _reindex_axis(obj: DataFrame, axis: int, labels: Index, other=None) -> DataF # tz to/from coercion -def _get_tz(tz: tzinfo) -> Union[str, tzinfo]: +def _get_tz(tz: tzinfo) -> str | tzinfo: """ for a tz-aware type, return an encoded zone """ zone = timezones.get_timezone(tz) return zone def _set_tz( - values: Union[np.ndarray, Index], - tz: Optional[Union[str, tzinfo]], + values: np.ndarray | Index, + tz: str | tzinfo | None, coerce: bool = False, -) -> Union[np.ndarray, DatetimeIndex]: +) -> np.ndarray | DatetimeIndex: """ coerce the values to a DatetimeIndex if tz is set preserve the input shape if possible @@ -4905,10 +4893,8 @@ def _convert_index(name: str, index: Index, encoding: str, errors: str) -> Index return IndexCol(name, converted, kind, atom, index_name=index_name) -def _unconvert_index( - data, kind: str, encoding: str, errors: str -) -> Union[np.ndarray, Index]: - index: Union[Index, np.ndarray] +def _unconvert_index(data, kind: str, encoding: str, errors: str) -> np.ndarray | Index: + index: Index | np.ndarray if kind == "datetime64": index = DatetimeIndex(data) @@ -4940,7 +4926,7 @@ def _maybe_convert_for_string_atom( nan_rep, encoding, errors, - columns: List[str], + columns: list[str], ): bvalues = block.values @@ -4962,7 +4948,7 @@ def _maybe_convert_for_string_atom( elif not (inferred_type == "string" or dtype_name == "object"): return bvalues - blocks: List[Block] = block.fillna(nan_rep, downcast=False) + blocks: list[Block] = block.fillna(nan_rep, downcast=False) # Note: because block is always object dtype, fillna goes # through a path such that the result is always a 1-element list assert len(blocks) == 1 @@ -5205,8 +5191,8 @@ def __init__( self, table: Table, where=None, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, ): self.table = table self.where = where diff --git a/pandas/io/stata.py b/pandas/io/stata.py index c01a369bf0054..f1747f94a7ea8 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -20,13 +20,8 @@ from typing import ( Any, AnyStr, - Dict, Hashable, - List, - Optional, Sequence, - Tuple, - Union, cast, ) import warnings @@ -652,12 +647,12 @@ def __init__(self, catarray: Series, encoding: str = "latin-1"): self.value_labels = list(zip(np.arange(len(categories)), categories)) self.value_labels.sort(key=lambda x: x[0]) self.text_len = 0 - self.txt: List[bytes] = [] + self.txt: list[bytes] = [] self.n = 0 # Compute lengths and setup lists of offsets and labels - offsets: List[int] = [] - values: List[int] = [] + offsets: list[int] = [] + values: list[int] = [] for vl in self.value_labels: category = vl[1] if not isinstance(category, str): @@ -777,7 +772,7 @@ class StataMissingValue: """ # Construct a dictionary of missing values - MISSING_VALUES: Dict[float, str] = {} + MISSING_VALUES: dict[float, str] = {} bases = (101, 32741, 2147483621) for b in bases: # Conversion to long to avoid hash issues on 32 bit platforms #8968 @@ -813,7 +808,7 @@ class StataMissingValue: "float64": struct.unpack(" str: return self._str @property - def value(self) -> Union[int, float]: + def value(self) -> int | float: """ The binary representation of the missing value. @@ -857,7 +852,7 @@ def __eq__(self, other: Any) -> bool: ) @classmethod - def get_base_missing_value(cls, dtype: np.dtype) -> Union[int, float]: + def get_base_missing_value(cls, dtype: np.dtype) -> int | float: if dtype == np.int8: value = cls.BASE_MISSING_VALUES["int8"] elif dtype == np.int16: @@ -1043,17 +1038,17 @@ def __init__( path_or_buf: FilePathOrBuffer, convert_dates: bool = True, convert_categoricals: bool = True, - index_col: Optional[str] = None, + index_col: str | None = None, convert_missing: bool = False, preserve_dtypes: bool = True, - columns: Optional[Sequence[str]] = None, + columns: Sequence[str] | None = None, order_categoricals: bool = True, - chunksize: Optional[int] = None, + chunksize: int | None = None, compression: CompressionOptions = "infer", storage_options: StorageOptions = None, ): super().__init__() - self.col_sizes: List[int] = [] + self.col_sizes: list[int] = [] # Arguments to the reader (can be temporarily overridden in # calls to read). @@ -1079,7 +1074,7 @@ def __init__( self._column_selector_set = False self._value_labels_read = False self._data_read = False - self._dtype: Optional[np.dtype] = None + self._dtype: np.dtype | None = None self._lines_read = 0 self._native_byteorder = _set_endianness(sys.byteorder) @@ -1211,7 +1206,7 @@ def _read_new_header(self) -> None: # Get data type information, works for versions 117-119. def _get_dtypes( self, seek_vartypes: int - ) -> Tuple[List[Union[int, str]], List[Union[str, np.dtype]]]: + ) -> tuple[list[int | str], list[str | np.dtype]]: self.path_or_buf.seek(seek_vartypes) raw_typlist = [ @@ -1219,7 +1214,7 @@ def _get_dtypes( for _ in range(self.nvar) ] - def f(typ: int) -> Union[int, str]: + def f(typ: int) -> int | str: if typ <= 2045: return typ try: @@ -1229,7 +1224,7 @@ def f(typ: int) -> Union[int, str]: typlist = [f(x) for x in raw_typlist] - def g(typ: int) -> Union[str, np.dtype]: + def g(typ: int) -> str | np.dtype: if typ <= 2045: return str(typ) try: @@ -1243,13 +1238,13 @@ def g(typ: int) -> Union[str, np.dtype]: return typlist, dtyplist - def _get_varlist(self) -> List[str]: + def _get_varlist(self) -> list[str]: # 33 in order formats, 129 in formats 118 and 119 b = 33 if self.format_version < 118 else 129 return [self._decode(self.path_or_buf.read(b)) for _ in range(self.nvar)] # Returns the format list - def _get_fmtlist(self) -> List[str]: + def _get_fmtlist(self) -> list[str]: if self.format_version >= 118: b = 57 elif self.format_version > 113: @@ -1262,7 +1257,7 @@ def _get_fmtlist(self) -> List[str]: return [self._decode(self.path_or_buf.read(b)) for _ in range(self.nvar)] # Returns the label list - def _get_lbllist(self) -> List[str]: + def _get_lbllist(self) -> list[str]: if self.format_version >= 118: b = 129 elif self.format_version > 108: @@ -1271,7 +1266,7 @@ def _get_lbllist(self) -> List[str]: b = 9 return [self._decode(self.path_or_buf.read(b)) for _ in range(self.nvar)] - def _get_variable_labels(self) -> List[str]: + def _get_variable_labels(self) -> list[str]: if self.format_version >= 118: vlblist = [ self._decode(self.path_or_buf.read(321)) for _ in range(self.nvar) @@ -1430,7 +1425,7 @@ def _setup_dtype(self) -> np.dtype: return self._dtype - def _calcsize(self, fmt: Union[int, str]) -> int: + def _calcsize(self, fmt: int | str) -> int: if isinstance(fmt, int): return fmt return struct.calcsize(self.byteorder + fmt) @@ -1459,7 +1454,7 @@ def _read_value_labels(self) -> None: if self.format_version <= 108: # Value labels are not supported in version 108 and earlier. self._value_labels_read = True - self.value_label_dict: Dict[str, Dict[Union[float, int], str]] = {} + self.value_label_dict: dict[str, dict[float | int, str]] = {} return if self.format_version >= 117: @@ -1541,7 +1536,7 @@ def __next__(self) -> DataFrame: self._using_iterator = True return self.read(nrows=self._chunksize) - def get_chunk(self, size: Optional[int] = None) -> DataFrame: + def get_chunk(self, size: int | None = None) -> DataFrame: """ Reads lines from Stata file and returns as dataframe @@ -1561,14 +1556,14 @@ def get_chunk(self, size: Optional[int] = None) -> DataFrame: @Appender(_read_method_doc) def read( self, - nrows: Optional[int] = None, - convert_dates: Optional[bool] = None, - convert_categoricals: Optional[bool] = None, - index_col: Optional[str] = None, - convert_missing: Optional[bool] = None, - preserve_dtypes: Optional[bool] = None, - columns: Optional[Sequence[str]] = None, - order_categoricals: Optional[bool] = None, + nrows: int | None = None, + convert_dates: bool | None = None, + convert_categoricals: bool | None = None, + index_col: str | None = None, + convert_missing: bool | None = None, + preserve_dtypes: bool | None = None, + columns: Sequence[str] | None = None, + order_categoricals: bool | None = None, ) -> DataFrame: # Handle empty file or chunk. If reading incrementally raise # StopIteration. If reading the whole thing return an empty @@ -1816,7 +1811,7 @@ def _do_select_columns(self, data: DataFrame, columns: Sequence[str]) -> DataFra def _do_convert_categoricals( self, data: DataFrame, - value_label_dict: Dict[str, Dict[Union[float, int], str]], + value_label_dict: dict[str, dict[float | int, str]], lbllist: Sequence[str], order_categoricals: bool, ) -> DataFrame: @@ -1833,7 +1828,7 @@ def _do_convert_categoricals( column = data[col] key_matches = column.isin(keys) if self._using_iterator and key_matches.all(): - initial_categories: Optional[np.ndarray] = keys + initial_categories: np.ndarray | None = keys # If all categories are in the keys and we are iterating, # use the same keys for all chunks. If some are missing # value labels, then we will fall back to the categories @@ -1894,7 +1889,7 @@ def data_label(self) -> str: """ return self._data_label - def variable_labels(self) -> Dict[str, str]: + def variable_labels(self) -> dict[str, str]: """ Return variable labels as a dict, associating each variable name with corresponding label. @@ -1905,7 +1900,7 @@ def variable_labels(self) -> Dict[str, str]: """ return dict(zip(self.varlist, self._variable_labels)) - def value_labels(self) -> Dict[str, Dict[Union[float, int], str]]: + def value_labels(self) -> dict[str, dict[float | int, str]]: """ Return a dict, associating each variable name a dict, associating each value its corresponding label. @@ -1925,16 +1920,16 @@ def read_stata( filepath_or_buffer: FilePathOrBuffer, convert_dates: bool = True, convert_categoricals: bool = True, - index_col: Optional[str] = None, + index_col: str | None = None, convert_missing: bool = False, preserve_dtypes: bool = True, - columns: Optional[Sequence[str]] = None, + columns: Sequence[str] | None = None, order_categoricals: bool = True, - chunksize: Optional[int] = None, + chunksize: int | None = None, iterator: bool = False, compression: CompressionOptions = "infer", storage_options: StorageOptions = None, -) -> Union[DataFrame, StataReader]: +) -> DataFrame | StataReader: reader = StataReader( filepath_or_buffer, @@ -2000,7 +1995,7 @@ def _convert_datetime_to_stata_type(fmt: str) -> np.dtype: raise NotImplementedError(f"Format {fmt} not implemented") -def _maybe_convert_to_int_keys(convert_dates: Dict, varlist: List[Hashable]) -> Dict: +def _maybe_convert_to_int_keys(convert_dates: dict, varlist: list[Hashable]) -> dict: new_dict = {} for key in convert_dates: if not convert_dates[key].startswith("%"): # make sure proper fmts @@ -2185,12 +2180,12 @@ def __init__( self, fname: FilePathOrBuffer, data: DataFrame, - convert_dates: Optional[Dict[Hashable, str]] = None, + convert_dates: dict[Hashable, str] | None = None, write_index: bool = True, - byteorder: Optional[str] = None, - time_stamp: Optional[datetime.datetime] = None, - data_label: Optional[str] = None, - variable_labels: Optional[Dict[Hashable, str]] = None, + byteorder: str | None = None, + time_stamp: datetime.datetime | None = None, + data_label: str | None = None, + variable_labels: dict[Hashable, str] | None = None, compression: CompressionOptions = "infer", storage_options: StorageOptions = None, ): @@ -2201,7 +2196,7 @@ def __init__( self._data_label = data_label self._variable_labels = variable_labels self._compression = compression - self._output_file: Optional[Buffer] = None + self._output_file: Buffer | None = None # attach nobs, nvars, data, varlist, typlist self._prepare_pandas(data) self.storage_options = storage_options @@ -2211,7 +2206,7 @@ def __init__( self._byteorder = _set_endianness(byteorder) self._fname = fname self.type_converters = {253: np.int32, 252: np.int16, 251: np.int8} - self._converted_names: Dict[Hashable, str] = {} + self._converted_names: dict[Hashable, str] = {} def _write(self, to_write: str) -> None: """ @@ -2234,7 +2229,7 @@ def _prepare_categoricals(self, data: DataFrame) -> DataFrame: """ is_cat = [is_categorical_dtype(data[col].dtype) for col in data] self._is_col_cat = is_cat - self._value_labels: List[StataValueLabel] = [] + self._value_labels: list[StataValueLabel] = [] if not any(is_cat): return data @@ -2333,7 +2328,7 @@ def _check_column_names(self, data: DataFrame) -> DataFrame: dates are exported, the variable name is propagated to the date conversion dictionary """ - converted_names: Dict[Hashable, str] = {} + converted_names: dict[Hashable, str] = {} columns = list(data.columns) original_columns = columns[:] @@ -2390,8 +2385,8 @@ def _check_column_names(self, data: DataFrame) -> DataFrame: return data def _set_formats_and_types(self, dtypes: Series) -> None: - self.fmtlist: List[str] = [] - self.typlist: List[int] = [] + self.fmtlist: list[str] = [] + self.typlist: list[int] = [] for col, dtype in dtypes.items(): self.fmtlist.append(_dtype_to_default_stata_fmt(dtype, self.data[col])) self.typlist.append(_dtype_to_stata_type(dtype, self.data[col])) @@ -2577,8 +2572,8 @@ def _write_value_labels(self) -> None: def _write_header( self, - data_label: Optional[str] = None, - time_stamp: Optional[datetime.datetime] = None, + data_label: str | None = None, + time_stamp: datetime.datetime | None = None, ) -> None: byteorder = self._byteorder # ds_format - just use 114 @@ -2778,7 +2773,7 @@ def _dtype_to_stata_type_117(dtype: np.dtype, column: Series, force_strl: bool) raise NotImplementedError(f"Data type {dtype} not supported.") -def _pad_bytes_new(name: Union[str, bytes], length: int) -> bytes: +def _pad_bytes_new(name: str | bytes, length: int) -> bytes: """ Takes a bytes instance and pads it with null bytes until it's length chars. """ @@ -2821,7 +2816,7 @@ def __init__( df: DataFrame, columns: Sequence[str], version: int = 117, - byteorder: Optional[str] = None, + byteorder: str | None = None, ): if version not in (117, 118, 119): raise ValueError("Only dta versions 117, 118 and 119 supported") @@ -2849,11 +2844,11 @@ def __init__( self._gso_o_type = gso_o_type self._gso_v_type = gso_v_type - def _convert_key(self, key: Tuple[int, int]) -> int: + def _convert_key(self, key: tuple[int, int]) -> int: v, o = key return v + self._o_offet * o - def generate_table(self) -> Tuple[Dict[str, Tuple[int, int]], DataFrame]: + def generate_table(self) -> tuple[dict[str, tuple[int, int]], DataFrame]: """ Generates the GSO lookup table for the DataFrame @@ -2904,7 +2899,7 @@ def generate_table(self) -> Tuple[Dict[str, Tuple[int, int]], DataFrame]: return gso_table, gso_df - def generate_blob(self, gso_table: Dict[str, Tuple[int, int]]) -> bytes: + def generate_blob(self, gso_table: dict[str, tuple[int, int]]) -> bytes: """ Generates the binary blob of GSOs that is written to the dta file. @@ -3061,18 +3056,18 @@ def __init__( self, fname: FilePathOrBuffer, data: DataFrame, - convert_dates: Optional[Dict[Hashable, str]] = None, + convert_dates: dict[Hashable, str] | None = None, write_index: bool = True, - byteorder: Optional[str] = None, - time_stamp: Optional[datetime.datetime] = None, - data_label: Optional[str] = None, - variable_labels: Optional[Dict[Hashable, str]] = None, - convert_strl: Optional[Sequence[Hashable]] = None, + byteorder: str | None = None, + time_stamp: datetime.datetime | None = None, + data_label: str | None = None, + variable_labels: dict[Hashable, str] | None = None, + convert_strl: Sequence[Hashable] | None = None, compression: CompressionOptions = "infer", storage_options: StorageOptions = None, ): # Copy to new list since convert_strl might be modified later - self._convert_strl: List[Hashable] = [] + self._convert_strl: list[Hashable] = [] if convert_strl is not None: self._convert_strl.extend(convert_strl) @@ -3088,11 +3083,11 @@ def __init__( compression=compression, storage_options=storage_options, ) - self._map: Dict[str, int] = {} + self._map: dict[str, int] = {} self._strl_blob = b"" @staticmethod - def _tag(val: Union[str, bytes], tag: str) -> bytes: + def _tag(val: str | bytes, tag: str) -> bytes: """Surround val with """ if isinstance(val, str): val = bytes(val, "utf-8") @@ -3105,8 +3100,8 @@ def _update_map(self, tag: str) -> None: def _write_header( self, - data_label: Optional[str] = None, - time_stamp: Optional[datetime.datetime] = None, + data_label: str | None = None, + time_stamp: datetime.datetime | None = None, ) -> None: """Write the file header""" byteorder = self._byteorder @@ -3453,14 +3448,14 @@ def __init__( self, fname: FilePathOrBuffer, data: DataFrame, - convert_dates: Optional[Dict[Hashable, str]] = None, + convert_dates: dict[Hashable, str] | None = None, write_index: bool = True, - byteorder: Optional[str] = None, - time_stamp: Optional[datetime.datetime] = None, - data_label: Optional[str] = None, - variable_labels: Optional[Dict[Hashable, str]] = None, - convert_strl: Optional[Sequence[Hashable]] = None, - version: Optional[int] = None, + byteorder: str | None = None, + time_stamp: datetime.datetime | None = None, + data_label: str | None = None, + variable_labels: dict[Hashable, str] | None = None, + convert_strl: Sequence[Hashable] | None = None, + version: int | None = None, compression: CompressionOptions = "infer", storage_options: StorageOptions = None, ):