From 9e57d6d212a4edf9227c137cf56ca92239dfb4f5 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Tue, 21 Dec 2021 13:39:55 -0500 Subject: [PATCH 01/15] fix column_arrays for array manager --- pandas/core/internals/array_manager.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 09f16a2ddab67..06849bffff5ca 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -794,7 +794,14 @@ def column_arrays(self) -> list[ArrayLike]: """ Used in the JSON C code to access column arrays. """ - return self.arrays + + def convert_array(arr: ArrayLike) -> ArrayLike: + if isinstance(arr, ExtensionArray): + return arr.to_numpy() + else: + return arr + + return [convert_array(arr) for arr in self.arrays] def iset( self, loc: int | slice | np.ndarray, value: ArrayLike, inplace: bool = False From 34dc181bbead0bc2928f909f229163b7e78a0302 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sat, 19 Feb 2022 21:36:40 -0500 Subject: [PATCH 02/15] fix up typing to eventually support Interval typing --- pandas/_typing.py | 2 +- pandas/core/algorithms.py | 2 +- pandas/core/arrays/interval.py | 6 ++++ pandas/core/arrays/masked.py | 4 ++- pandas/core/arrays/string_arrow.py | 6 ++-- pandas/core/common.py | 2 +- pandas/core/indexes/interval.py | 7 ++-- pandas/core/tools/datetimes.py | 15 +++++++-- pandas/io/excel/_odfreader.py | 20 ++++++++---- pandas/io/parsers/python_parser.py | 52 +++++++++++++++--------------- 10 files changed, 72 insertions(+), 44 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index c0383fe50a7e7..0d311b3e82a2c 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -83,7 +83,7 @@ PythonScalar = Union[str, int, float, bool] DatetimeLikeScalar = Union["Period", "Timestamp", "Timedelta"] PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"] -Scalar = Union[PythonScalar, PandasScalar] +Scalar = Union[PythonScalar, PandasScalar, np.datetime64, np.timedelta64, datetime] IntStrT = TypeVar("IntStrT", int, str) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 8c10b62d83f9e..c57261c810663 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -759,7 +759,7 @@ def factorize( else: dtype = values.dtype values = _ensure_data(values) - na_value: Scalar + na_value: Scalar | None if original.dtype.kind in ["m", "M"]: # Note: factorize_array will cast NaT bc it has a __int__ diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index d23910c37b52b..fa43583bb2c34 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -7,6 +7,7 @@ ) import textwrap from typing import ( + Any, Sequence, TypeVar, Union, @@ -197,6 +198,11 @@ class IntervalArray(IntervalMixin, ExtensionArray): can_hold_na = True _na_value = _fill_value = np.nan + # To make mypy recognize the fields + _left: Any + _right: Any + _dtype: Any + # --------------------------------------------------------------------- # Constructors diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 21f44dbc6a1cd..1ea7d82e3e666 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -76,6 +76,8 @@ from pandas.core.ops import invalid_comparison if TYPE_CHECKING: + from lib import NoDefault + from libmissing import NAType from pandas import Series from pandas.core.arrays import BooleanArray from pandas._typing import ( @@ -336,7 +338,7 @@ def to_numpy( self, dtype: npt.DTypeLike | None = None, copy: bool = False, - na_value: Scalar = lib.no_default, + na_value: Scalar | NoDefault | NAType = lib.no_default, ) -> np.ndarray: """ Convert to a NumPy Array. diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 3503b54dd478a..002def4d31e72 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -762,7 +762,7 @@ def _str_replace( return type(self)(result) def _str_match( - self, pat: str, case: bool = True, flags: int = 0, na: Scalar = None + self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None ): if pa_version_under4p0: return super()._str_match(pat, case, flags, na) @@ -771,7 +771,9 @@ def _str_match( pat = "^" + pat return self._str_contains(pat, case, flags, na, regex=True) - def _str_fullmatch(self, pat, case: bool = True, flags: int = 0, na: Scalar = None): + def _str_fullmatch( + self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None + ): if pa_version_under4p0: return super()._str_fullmatch(pat, case, flags, na) diff --git a/pandas/core/common.py b/pandas/core/common.py index 94fb09ddc79b3..d81abf2f2bc5c 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -517,7 +517,7 @@ def f(x): def convert_to_list_like( - values: Scalar | Iterable | AnyArrayLike, + values: Scalar | Iterable | AnyArrayLike | Hashable, ) -> list | AnyArrayLike: """ Convert list-like or scalar input to list-like. List, numpy and pandas array-like diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 1e39c1db1a73b..badad69913d62 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -9,6 +9,7 @@ from typing import ( Any, Hashable, + Literal, ) import numpy as np @@ -191,10 +192,12 @@ class IntervalIndex(ExtensionIndex): _typ = "intervalindex" # annotate properties pinned via inherit_names - closed: str + closed: Literal["left", "right", "both", "neither"] is_non_overlapping_monotonic: bool closed_left: bool closed_right: bool + open_left: bool + open_right: bool _data: IntervalArray _values: IntervalArray @@ -543,7 +546,7 @@ def _maybe_convert_i8(self, key): return key_i8 - def _searchsorted_monotonic(self, label, side: str = "left"): + def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"): if not self.is_non_overlapping_monotonic: raise KeyError( "can only get slices from an IntervalIndex if bounds are " diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 33ed64c7ae364..c493b3dc29c18 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -10,8 +10,8 @@ Hashable, List, Tuple, - TypeVar, Union, + cast, overload, ) import warnings @@ -79,13 +79,15 @@ from pandas._libs.tslibs.nattype import NaTType from pandas import Series + from pandas.core.arrays.base import ExtensionArray # --------------------------------------------------------------------- # types used in annotations ArrayConvertible = Union[List, Tuple, AnyArrayLike, "Series"] Scalar = Union[int, float, str] -DatetimeScalar = TypeVar("DatetimeScalar", Scalar, datetime) +DatetimeScalar = Union[Scalar, datetime] + DatetimeScalarOrArrayConvertible = Union[DatetimeScalar, ArrayConvertible] start_caching_at = 50 @@ -638,7 +640,7 @@ def to_datetime( infer_datetime_format: bool = ..., origin=..., cache: bool = ..., -) -> DatetimeScalar | NaTType: +) -> Timestamp | NaTType: ... @@ -1061,6 +1063,13 @@ def to_datetime( result = convert_listlike(arg, format, name=arg.name) elif is_list_like(arg): try: + # error: Argument 1 to "_maybe_cache" has incompatible type + # "Union[float, str, datetime, List[Any], Tuple[Any, ...], ExtensionArray, + # ndarray[Any, Any], Series]"; expected "Union[List[Any], Tuple[Any, ...], + # Union[Union[ExtensionArray, ndarray[Any, Any]], Index, Series], Series]" + arg = cast( + Union[list, tuple, ExtensionArray, np.ndarray, Series, Index], arg + ) cache_array = _maybe_cache(arg, format, cache, convert_listlike) except OutOfBoundsDatetime: # caching attempts to create a DatetimeIndex, which may raise diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 6adce02dc50f0..61496e533428c 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -2,6 +2,7 @@ import numpy as np +from pandas._libs.tslibs.nattype import NaTType from pandas._typing import ( FilePath, ReadBuffer, @@ -81,7 +82,9 @@ def get_sheet_by_name(self, name: str): self.close() raise ValueError(f"sheet {name} not found") - def get_sheet_data(self, sheet, convert_float: bool) -> list[list[Scalar]]: + def get_sheet_data( + self, sheet, convert_float: bool + ) -> list[list[Scalar | NaTType]]: """ Parse an ODF Table into a list of lists """ @@ -99,12 +102,12 @@ def get_sheet_data(self, sheet, convert_float: bool) -> list[list[Scalar]]: empty_rows = 0 max_row_len = 0 - table: list[list[Scalar]] = [] + table: list[list[Scalar | NaTType]] = [] for sheet_row in sheet_rows: sheet_cells = [x for x in sheet_row.childNodes if x.qname in cell_names] empty_cells = 0 - table_row: list[Scalar] = [] + table_row: list[Scalar | NaTType] = [] for sheet_cell in sheet_cells: if sheet_cell.qname == table_cell_name: @@ -167,7 +170,7 @@ def _is_empty_row(self, row) -> bool: return True - def _get_cell_value(self, cell, convert_float: bool) -> Scalar: + def _get_cell_value(self, cell, convert_float: bool) -> Scalar | NaTType: from odf.namespaces import OFFICENS if str(cell) == "#N/A": @@ -200,9 +203,12 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar: cell_value = cell.attributes.get((OFFICENS, "date-value")) return pd.to_datetime(cell_value) elif cell_type == "time": - stamp = pd.to_datetime(str(cell)) - # error: Item "str" of "Union[float, str, NaTType]" has no attribute "time" - return stamp.time() # type: ignore[union-attr] + stamp: pd.Timestamp | NaTType = pd.to_datetime(str(cell)) + if not isinstance(stamp, NaTType): + return stamp + else: + self.close() + raise ValueError(f"Unrecognized time {str(cell)}") else: self.close() raise ValueError(f"Unrecognized type {cell_type}") diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index 8a66a5c22caf5..23fa81698ac1d 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -113,7 +113,7 @@ def __init__(self, f: ReadCsvBuffer[str] | list, **kwds): # Get columns in two steps: infer from data, then # infer column indices from self.usecols if it is specified. self._col_indices: list[int] | None = None - columns: list[list[Scalar | None]] + columns: list[list[Hashable | None]] ( columns, self.num_original_columns, @@ -352,11 +352,11 @@ def _convert_data( def _infer_columns( self, - ) -> tuple[list[list[Scalar | None]], int, set[Scalar | None]]: + ) -> tuple[list[list[Hashable | None]], int, set[Hashable | None]]: names = self.names num_original_columns = 0 clear_buffer = True - unnamed_cols: set[Scalar | None] = set() + unnamed_cols: set[Hashable | None] = set() self._header_line = None if self.header is not None: @@ -371,7 +371,7 @@ def _infer_columns( have_mi_columns = False header = [header] - columns: list[list[Scalar | None]] = [] + columns: list[list[Hashable | None]] = [] for level, hr in enumerate(header): try: line = self._buffered_line() @@ -400,7 +400,7 @@ def _infer_columns( line = self.names[:] - this_columns: list[Scalar | None] = [] + this_columns: list[Hashable | None] = [] this_unnamed_cols = [] for i, c in enumerate(line): @@ -475,7 +475,7 @@ def _infer_columns( if clear_buffer: self._clear_buffer() - first_line: list[Scalar] | None + first_line: list[Hashable] | None if names is not None: # Read first row after header to check if data are longer try: @@ -551,10 +551,10 @@ def _infer_columns( def _handle_usecols( self, - columns: list[list[Scalar | None]], - usecols_key: list[Scalar | None], + columns: list[list[Hashable | None]], + usecols_key: list[Hashable | None], num_original_columns: int, - ) -> list[list[Scalar | None]]: + ) -> list[list[Hashable | None]]: """ Sets self._col_indices @@ -599,7 +599,7 @@ def _handle_usecols( self._col_indices = sorted(col_indices) return columns - def _buffered_line(self) -> list[Scalar]: + def _buffered_line(self) -> list[Hashable]: """ Return a line from buffer, filling buffer if required. """ @@ -608,7 +608,7 @@ def _buffered_line(self) -> list[Scalar]: else: return self._next_line() - def _check_for_bom(self, first_row: list[Scalar]) -> list[Scalar]: + def _check_for_bom(self, first_row: list[Hashable]) -> list[Hashable]: """ Checks whether the file begins with the BOM character. If it does, remove it. In addition, if there is quoting @@ -659,10 +659,10 @@ def _check_for_bom(self, first_row: list[Scalar]) -> list[Scalar]: # No quotation so just remove BOM from first element new_row = first_row_bom[1:] - new_row_list: list[Scalar] = [new_row] + new_row_list: list[Hashable] = [new_row] return new_row_list + first_row[1:] - def _is_line_empty(self, line: list[Scalar]) -> bool: + def _is_line_empty(self, line: list[Hashable]) -> bool: """ Check if a line is empty or not. @@ -677,7 +677,7 @@ def _is_line_empty(self, line: list[Scalar]) -> bool: """ return not line or all(not x for x in line) - def _next_line(self) -> list[Scalar]: + def _next_line(self) -> list[Hashable]: if isinstance(self.data, list): while self.skipfunc(self.pos): if self.pos >= len(self.data): @@ -756,7 +756,7 @@ def _alert_malformed(self, msg: str, row_num: int) -> None: base = f"Skipping line {row_num}: " sys.stderr.write(base + msg + "\n") - def _next_iter_line(self, row_num: int) -> list[Scalar] | None: + def _next_iter_line(self, row_num: int) -> list[Hashable] | None: """ Wrapper around iterating through `self.data` (CSV source). @@ -804,7 +804,7 @@ def _next_iter_line(self, row_num: int) -> list[Scalar] | None: self._alert_malformed(msg, row_num) return None - def _check_comments(self, lines: list[list[Scalar]]) -> list[list[Scalar]]: + def _check_comments(self, lines: list[list[Hashable]]) -> list[list[Hashable]]: if self.comment is None: return lines ret = [] @@ -825,7 +825,7 @@ def _check_comments(self, lines: list[list[Scalar]]) -> list[list[Scalar]]: ret.append(rl) return ret - def _remove_empty_lines(self, lines: list[list[Scalar]]) -> list[list[Scalar]]: + def _remove_empty_lines(self, lines: list[list[Hashable]]) -> list[list[Hashable]]: """ Iterate through the lines and remove any that are either empty or contain only one whitespace value @@ -851,7 +851,7 @@ def _remove_empty_lines(self, lines: list[list[Scalar]]) -> list[list[Scalar]]: ret.append(line) return ret - def _check_thousands(self, lines: list[list[Scalar]]) -> list[list[Scalar]]: + def _check_thousands(self, lines: list[list[Hashable]]) -> list[list[Hashable]]: if self.thousands is None: return lines @@ -860,8 +860,8 @@ def _check_thousands(self, lines: list[list[Scalar]]) -> list[list[Scalar]]: ) def _search_replace_num_columns( - self, lines: list[list[Scalar]], search: str, replace: str - ) -> list[list[Scalar]]: + self, lines: list[list[Hashable]], search: str, replace: str + ) -> list[list[Hashable]]: ret = [] for line in lines: rl = [] @@ -878,7 +878,7 @@ def _search_replace_num_columns( ret.append(rl) return ret - def _check_decimal(self, lines: list[list[Scalar]]) -> list[list[Scalar]]: + def _check_decimal(self, lines: list[list[Hashable]]) -> list[list[Hashable]]: if self.decimal == parser_defaults["decimal"]: return lines @@ -908,7 +908,7 @@ def _get_index_name( orig_names = list(columns) columns = list(columns) - line: list[Scalar] | None + line: list[Hashable] | None if self._header_line is not None: line = self._header_line else: @@ -917,7 +917,7 @@ def _get_index_name( except StopIteration: line = None - next_line: list[Scalar] | None + next_line: list[Hashable] | None try: next_line = self._next_line() except StopIteration: @@ -964,7 +964,7 @@ def _get_index_name( return index_name, orig_names, columns - def _rows_to_cols(self, content: list[list[Scalar]]) -> list[np.ndarray]: + def _rows_to_cols(self, content: list[list[Hashable]]) -> list[np.ndarray]: col_len = self.num_original_columns if self._implicit_index: @@ -1051,7 +1051,7 @@ def _rows_to_cols(self, content: list[list[Scalar]]) -> list[np.ndarray]: ] return zipped_content - def _get_lines(self, rows: int | None = None) -> list[list[Scalar]]: + def _get_lines(self, rows: int | None = None) -> list[list[Hashable]]: lines = self.buf new_rows = None @@ -1281,7 +1281,7 @@ def _make_reader(self, f: IO[str] | ReadCsvBuffer[str]) -> None: self.infer_nrows, ) - def _remove_empty_lines(self, lines: list[list[Scalar]]) -> list[list[Scalar]]: + def _remove_empty_lines(self, lines: list[list[Hashable]]) -> list[list[Hashable]]: """ Returns the list of lines without the empty ones. With fixed-width fields, empty lines become arrays of empty strings. From a547800240a1d85789eb925c6d168e9eaf1501c6 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sun, 20 Feb 2022 11:18:03 -0500 Subject: [PATCH 03/15] fix imports in core/tools/datetimes.py --- pandas/core/tools/datetimes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index c493b3dc29c18..71c49624a9fea 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -66,6 +66,7 @@ ) from pandas.core import algorithms from pandas.core.algorithms import unique +from pandas.core.arrays.base import ExtensionArray from pandas.core.arrays.datetimes import ( maybe_convert_dtype, objects_to_datetime64ns, @@ -79,7 +80,6 @@ from pandas._libs.tslibs.nattype import NaTType from pandas import Series - from pandas.core.arrays.base import ExtensionArray # --------------------------------------------------------------------- # types used in annotations @@ -1068,7 +1068,7 @@ def to_datetime( # ndarray[Any, Any], Series]"; expected "Union[List[Any], Tuple[Any, ...], # Union[Union[ExtensionArray, ndarray[Any, Any]], Index, Series], Series]" arg = cast( - Union[list, tuple, ExtensionArray, np.ndarray, Series, Index], arg + Union[list, tuple, ExtensionArray, np.ndarray, "Series", Index], arg ) cache_array = _maybe_cache(arg, format, cache, convert_listlike) except OutOfBoundsDatetime: From 3a3bfeabf7b44bc65dfef46d086a666f518b548f Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sun, 20 Feb 2022 16:58:26 -0500 Subject: [PATCH 04/15] fix time in odfreader --- pandas/io/excel/_odfreader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 61496e533428c..53429119ada8d 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -205,7 +205,7 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar | NaTType: elif cell_type == "time": stamp: pd.Timestamp | NaTType = pd.to_datetime(str(cell)) if not isinstance(stamp, NaTType): - return stamp + return stamp.time() else: self.close() raise ValueError(f"Unrecognized time {str(cell)}") From 5edf5c9f6e1efbc11915d84b99e2dd3f984ef37b Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sun, 20 Feb 2022 19:09:42 -0500 Subject: [PATCH 05/15] pandas/core/arrays/masked.py --- pandas/core/arrays/masked.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 1ea7d82e3e666..2021e373f35c9 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -76,8 +76,6 @@ from pandas.core.ops import invalid_comparison if TYPE_CHECKING: - from lib import NoDefault - from libmissing import NAType from pandas import Series from pandas.core.arrays import BooleanArray from pandas._typing import ( @@ -338,7 +336,7 @@ def to_numpy( self, dtype: npt.DTypeLike | None = None, copy: bool = False, - na_value: Scalar | NoDefault | NAType = lib.no_default, + na_value: Scalar | lib.NoDefault | libmissing.NAType = lib.no_default, ) -> np.ndarray: """ Convert to a NumPy Array. From 7c4cd5c5e1e27422f4974c2e1661021c22156798 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Mon, 21 Feb 2022 08:22:46 -0500 Subject: [PATCH 06/15] use cast instead of new code in odfreader --- pandas/io/excel/_odfreader.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 53429119ada8d..13ad9f254e086 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -1,5 +1,7 @@ from __future__ import annotations +from typing import cast + import numpy as np from pandas._libs.tslibs.nattype import NaTType @@ -203,12 +205,10 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar | NaTType: cell_value = cell.attributes.get((OFFICENS, "date-value")) return pd.to_datetime(cell_value) elif cell_type == "time": - stamp: pd.Timestamp | NaTType = pd.to_datetime(str(cell)) - if not isinstance(stamp, NaTType): - return stamp.time() - else: - self.close() - raise ValueError(f"Unrecognized time {str(cell)}") + # cast needed because `pd.to_datetime can return NaTType, + # but we know this is a valid time + stamp = cast(pd.Timestamp, pd.to_datetime(str(cell))) + return stamp.time() else: self.close() raise ValueError(f"Unrecognized type {cell_type}") From 649ef075a1b8494ace8a0fdbb5e3473fe66ebd52 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Mon, 21 Feb 2022 12:29:51 -0500 Subject: [PATCH 07/15] fix odfreader --- pandas/io/excel/_odfreader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 13ad9f254e086..6050ae4953377 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -208,7 +208,7 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar | NaTType: # cast needed because `pd.to_datetime can return NaTType, # but we know this is a valid time stamp = cast(pd.Timestamp, pd.to_datetime(str(cell))) - return stamp.time() + return stamp else: self.close() raise ValueError(f"Unrecognized type {cell_type}") From 1c1ba2cb3f187f6259afc0831575f0f7a767078d Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Mon, 21 Feb 2022 12:35:20 -0500 Subject: [PATCH 08/15] interval pyi --- pandas/_libs/interval.pyi | 134 ++++++++++++++++++++++++++++++++ pandas/core/indexes/interval.py | 10 ++- pandas/io/formats/style.py | 8 +- 3 files changed, 143 insertions(+), 9 deletions(-) create mode 100644 pandas/_libs/interval.pyi diff --git a/pandas/_libs/interval.pyi b/pandas/_libs/interval.pyi new file mode 100644 index 0000000000000..d1244aac2d44f --- /dev/null +++ b/pandas/_libs/interval.pyi @@ -0,0 +1,134 @@ +from __future__ import annotations + +import sys +from typing import ( + Any, + Generic, + Protocol, + TypeVar, + Union, + overload, +) + +import numpy as np + +from pandas._typing import ( + Timedelta, + Timestamp, +) + +if sys.version_info >= (3, 8): + from typing import Literal +else: + from typing_extensions import Literal + +VALID_CLOSED: frozenset + +OrderableScalarT = TypeVar("OrderableScalarT", int, float) +OrderableTimesT = TypeVar("OrderableTimesT", Timestamp, Timedelta) +OrderableT = TypeVar("OrderableT", int, float, Timestamp, Timedelta) + +class IntervalMixinProtocol(Protocol): ... + +class _LengthDescriptor: + @overload + def __get__(self, instance: Interval[float], owner: Any) -> float: ... + @overload + def __get__(self, instance: Interval[int], owner: Any) -> int: ... + @overload + def __get__(self, instance: Interval[OrderableTimesT], owner: Any) -> Timedelta: ... + +class IntervalMixin: + @property + def closed_left(self: IntervalMixinProtocol) -> bool: ... + @property + def closed_right(self: IntervalMixinProtocol) -> bool: ... + @property + def open_left(self: IntervalMixinProtocol) -> bool: ... + @property + def open_right(self: IntervalMixinProtocol) -> bool: ... + @property + def mid(self: IntervalMixinProtocol) -> float: ... + @property + def is_empty(self: IntervalMixinProtocol) -> bool: ... + def _check_closed_matches(self, other: IntervalMixin, name: str = ...): ... + +class Interval(IntervalMixin, Generic[OrderableT]): + @property + def left(self: Interval[OrderableT]) -> OrderableT: ... + @property + def right(self: Interval[OrderableT]) -> OrderableT: ... + @property + def closed(self) -> str: ... + def __init__( + self, + left: OrderableT, + right: OrderableT, + closed: Union[str, Literal["left", "right", "both", "neither"]] = ..., + ): ... + length: _LengthDescriptor + def __hash__(self) -> int: ... + @overload + def __contains__(self: Interval[OrderableTimesT], OrderableTimesT) -> bool: ... + @overload + def __contains__(self: Interval[int], key: Union[int, float]) -> bool: ... + @overload + def __contains__(self: Interval[float], key: Union[int, float]) -> bool: ... + def __repr__(self) -> str: ... + def __str__(self) -> str: ... + @overload + def __add__( + self: Interval[OrderableTimesT], y: Timedelta + ) -> Interval[OrderableTimesT]: ... + @overload + def __add__(self: Interval[int], y: int) -> Interval[int]: ... + @overload + def __add__(self: Interval[int], y: float) -> Interval[float]: ... + @overload + def __add__(self: Interval[float], y: Union[int, float]) -> Interval[float]: ... + @overload + def __sub__( + self: Interval[OrderableTimesT], y: Timedelta + ) -> Interval[OrderableTimesT]: ... + @overload + def __sub__(self: Interval[int], y: int) -> Interval[int]: ... + @overload + def __sub__(self: Interval[int], y: float) -> Interval[float]: ... + @overload + def __sub__(self: Interval[float], y: Union[int, float]) -> Interval[float]: ... + @overload + def __mul__(self: Interval[int], y: int) -> Interval[int]: ... + @overload + def __mul__(self: Interval[int], y: float) -> Interval[float]: ... + @overload + def __mul__(self: Interval[float], y: Union[int, float]) -> Interval[float]: ... + @overload + def __truediv__(self: Interval[int], y: int) -> Interval[int]: ... + @overload + def __truediv__(self: Interval[int], y: float) -> Interval[float]: ... + @overload + def __truediv__(self: Interval[float], y: Union[int, float]) -> Interval[float]: ... + @overload + def __floordiv__(self: Interval[int], y: int) -> Interval[int]: ... + @overload + def __floordiv__(self: Interval[int], y: float) -> Interval[float]: ... + @overload + def __floordiv__( + self: Interval[float], y: Union[int, float] + ) -> Interval[float]: ... + def overlaps(self: Interval[OrderableT], other: Interval[OrderableT]) -> bool: ... + +def intervals_to_interval_bounds(intervals: np.ndarray, validate_closed: int = ...): ... + +class IntervalTree(IntervalMixin): + def __init__( + self, + left: np.ndarray, + right: np.ndarray, + closed: Literal["left", "right", "both", "neither"] = ..., + ): ... + def get_indexer(self, target) -> np.ndarray: ... + def get_indexer_non_unique(self, target) -> np.ndarray: ... + _na_count: int + @property + def is_overlapping(self) -> bool: ... diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index badad69913d62..464974300c629 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -320,9 +320,10 @@ def from_tuples( return cls._simple_new(arr, name=name) # -------------------------------------------------------------------- - + # error: Return type "IntervalTree" of "_engine" incompatible with return type + # "Union[IndexEngine, ExtensionEngine]" in supertype "Index" @cache_readonly - def _engine(self) -> IntervalTree: + def _engine(self) -> IntervalTree: # type: ignore[override] left = self._maybe_convert_i8(self.left) right = self._maybe_convert_i8(self.right) return IntervalTree(left, right, closed=self.closed) @@ -514,7 +515,10 @@ def _maybe_convert_i8(self, key): left = self._maybe_convert_i8(key.left) right = self._maybe_convert_i8(key.right) constructor = Interval if scalar else IntervalIndex.from_arrays - return constructor(left, right, closed=self.closed) + # error: "object" not callable + return constructor( + left, right, closed=self.closed + ) # type: ignore[operator] if scalar: # Timestamp/Timedelta diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 9d0b213e44671..3faace708b6ec 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -3725,14 +3725,10 @@ def _highlight_between( Return an array of css props based on condition of data values within given range. """ if np.iterable(left) and not isinstance(left, str): - left = _validate_apply_axis_arg( - left, "left", None, data # type: ignore[arg-type] - ) + left = _validate_apply_axis_arg(left, "left", None, data) if np.iterable(right) and not isinstance(right, str): - right = _validate_apply_axis_arg( - right, "right", None, data # type: ignore[arg-type] - ) + right = _validate_apply_axis_arg(right, "right", None, data) # get ops with correct boundary attribution if inclusive == "both": From 35c2af000db02081b8a2dca7671fd123552c9103 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Mon, 21 Feb 2022 14:42:48 -0500 Subject: [PATCH 09/15] use cast in odfreader --- pandas/io/excel/_odfreader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 6050ae4953377..41146cf49abfb 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -208,7 +208,8 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar | NaTType: # cast needed because `pd.to_datetime can return NaTType, # but we know this is a valid time stamp = cast(pd.Timestamp, pd.to_datetime(str(cell))) - return stamp + # cast needed here because Scalar doesn't include datetime.time + return cast(Scalar, stamp.time()) else: self.close() raise ValueError(f"Unrecognized type {cell_type}") From 9769356d3254f7e14986c66bd0720498dc553df1 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Mon, 21 Feb 2022 14:46:00 -0500 Subject: [PATCH 10/15] use cast in odfreader --- pandas/io/excel/_odfreader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 6050ae4953377..41146cf49abfb 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -208,7 +208,8 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar | NaTType: # cast needed because `pd.to_datetime can return NaTType, # but we know this is a valid time stamp = cast(pd.Timestamp, pd.to_datetime(str(cell))) - return stamp + # cast needed here because Scalar doesn't include datetime.time + return cast(Scalar, stamp.time()) else: self.close() raise ValueError(f"Unrecognized type {cell_type}") From 607b367ab0dade96529f01a2cae2b661393c2d1c Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Mon, 21 Feb 2022 15:02:00 -0500 Subject: [PATCH 11/15] fixes for mid, length to mixin, change use of protocol --- pandas/_libs/interval.pyi | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/interval.pyi b/pandas/_libs/interval.pyi index d1244aac2d44f..35b23aad93438 100644 --- a/pandas/_libs/interval.pyi +++ b/pandas/_libs/interval.pyi @@ -38,19 +38,27 @@ class _LengthDescriptor: @overload def __get__(self, instance: Interval[OrderableTimesT], owner: Any) -> Timedelta: ... -class IntervalMixin: - @property - def closed_left(self: IntervalMixinProtocol) -> bool: ... +class _MidDescriptor: + @overload + def __get__(self, instance: Interval[OrderableScalarT], owner: Any) -> float: ... + @overload + def __get__(self, instance: Interval[Timedelta], owner: Any) -> Timedelta: ... + @overload + def __get__(self, instance: Interval[Timestamp], owner: Any) -> Timestamp: ... + +class IntervalMixin(IntervalMixinProtocol): @property - def closed_right(self: IntervalMixinProtocol) -> bool: ... + def closed_left(self) -> bool: ... @property - def open_left(self: IntervalMixinProtocol) -> bool: ... + def closed_right(self) -> bool: ... @property - def open_right(self: IntervalMixinProtocol) -> bool: ... + def open_left(self) -> bool: ... @property - def mid(self: IntervalMixinProtocol) -> float: ... + def open_right(self) -> bool: ... + mid: _MidDescriptor + length: _LengthDescriptor @property - def is_empty(self: IntervalMixinProtocol) -> bool: ... + def is_empty(self) -> bool: ... def _check_closed_matches(self, other: IntervalMixin, name: str = ...): ... class Interval(IntervalMixin, Generic[OrderableT]): @@ -64,9 +72,8 @@ class Interval(IntervalMixin, Generic[OrderableT]): self, left: OrderableT, right: OrderableT, - closed: Union[str, Literal["left", "right", "both", "neither"]] = ..., + closed: Literal["left", "right", "both", "neither"] = ..., ): ... - length: _LengthDescriptor def __hash__(self) -> int: ... @overload def __contains__(self: Interval[OrderableTimesT], OrderableTimesT) -> bool: ... From f9f08207daab360bb23e55edf330c1fb87e02497 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Mon, 21 Feb 2022 21:51:38 -0500 Subject: [PATCH 12/15] misc cleanup from twoertwein --- pandas/_libs/interval.pyi | 114 +++++++++++++++++++++++++------------- 1 file changed, 76 insertions(+), 38 deletions(-) diff --git a/pandas/_libs/interval.pyi b/pandas/_libs/interval.pyi index 35b23aad93438..2ad2cf79afa0a 100644 --- a/pandas/_libs/interval.pyi +++ b/pandas/_libs/interval.pyi @@ -1,34 +1,28 @@ from __future__ import annotations -import sys from typing import ( Any, Generic, - Protocol, + Literal, + Tuple, TypeVar, Union, overload, ) import numpy as np +import numpy.typing as npt from pandas._typing import ( Timedelta, Timestamp, ) -if sys.version_info >= (3, 8): - from typing import Literal -else: - from typing_extensions import Literal +VALID_CLOSED: frozenset[str] -VALID_CLOSED: frozenset - -OrderableScalarT = TypeVar("OrderableScalarT", int, float) -OrderableTimesT = TypeVar("OrderableTimesT", Timestamp, Timedelta) -OrderableT = TypeVar("OrderableT", int, float, Timestamp, Timedelta) - -class IntervalMixinProtocol(Protocol): ... +_OrderableScalarT = TypeVar("_OrderableScalarT", int, float) +_OrderableTimesT = TypeVar("_OrderableTimesT", Timestamp, Timedelta) +_OrderableT = TypeVar("_OrderableT", int, float, Timestamp, Timedelta) class _LengthDescriptor: @overload @@ -36,17 +30,27 @@ class _LengthDescriptor: @overload def __get__(self, instance: Interval[int], owner: Any) -> int: ... @overload - def __get__(self, instance: Interval[OrderableTimesT], owner: Any) -> Timedelta: ... + def __get__( + self, instance: Interval[_OrderableTimesT], owner: Any + ) -> Timedelta: ... + @overload + def __get__( + self, instance: IntervalTree[_OrderableT], owner: Any + ) -> _OrderableT: ... class _MidDescriptor: @overload - def __get__(self, instance: Interval[OrderableScalarT], owner: Any) -> float: ... + def __get__(self, instance: Interval[_OrderableScalarT], owner: Any) -> float: ... @overload - def __get__(self, instance: Interval[Timedelta], owner: Any) -> Timedelta: ... + def __get__( + self, instance: Interval[_OrderableTimesT], owner: Any + ) -> _OrderableTimesT: ... @overload - def __get__(self, instance: Interval[Timestamp], owner: Any) -> Timestamp: ... + def __get__( + self, instance: IntervalTree[_OrderableT], owner: Any + ) -> _OrderableT: ... -class IntervalMixin(IntervalMixinProtocol): +class IntervalMixin: @property def closed_left(self) -> bool: ... @property @@ -59,34 +63,34 @@ class IntervalMixin(IntervalMixinProtocol): length: _LengthDescriptor @property def is_empty(self) -> bool: ... - def _check_closed_matches(self, other: IntervalMixin, name: str = ...): ... + def _check_closed_matches(self, other: IntervalMixin, name: str = ...) -> None: ... -class Interval(IntervalMixin, Generic[OrderableT]): +class Interval(IntervalMixin, Generic[_OrderableT]): @property - def left(self: Interval[OrderableT]) -> OrderableT: ... + def left(self: Interval[_OrderableT]) -> _OrderableT: ... @property - def right(self: Interval[OrderableT]) -> OrderableT: ... + def right(self: Interval[_OrderableT]) -> _OrderableT: ... @property - def closed(self) -> str: ... + def closed(self) -> Literal["left", "right", "both", "neither"]: ... def __init__( self, - left: OrderableT, - right: OrderableT, + left: _OrderableT, + right: _OrderableT, closed: Literal["left", "right", "both", "neither"] = ..., ): ... def __hash__(self) -> int: ... @overload - def __contains__(self: Interval[OrderableTimesT], OrderableTimesT) -> bool: ... - @overload - def __contains__(self: Interval[int], key: Union[int, float]) -> bool: ... + def __contains__(self: Interval[_OrderableTimesT], _OrderableTimesT) -> bool: ... @overload - def __contains__(self: Interval[float], key: Union[int, float]) -> bool: ... + def __contains__( + self: Interval[_OrderableScalarT], key: Union[int, float] + ) -> bool: ... def __repr__(self) -> str: ... def __str__(self) -> str: ... @overload def __add__( - self: Interval[OrderableTimesT], y: Timedelta - ) -> Interval[OrderableTimesT]: ... + self: Interval[_OrderableTimesT], y: Timedelta + ) -> Interval[_OrderableTimesT]: ... @overload def __add__(self: Interval[int], y: int) -> Interval[int]: ... @overload @@ -94,9 +98,19 @@ class Interval(IntervalMixin, Generic[OrderableT]): @overload def __add__(self: Interval[float], y: Union[int, float]) -> Interval[float]: ... @overload + def __radd__( + self: Interval[_OrderableTimesT], y: Timedelta + ) -> Interval[_OrderableTimesT]: ... + @overload + def __radd__(self: Interval[int], y: int) -> Interval[int]: ... + @overload + def __radd__(self: Interval[int], y: float) -> Interval[float]: ... + @overload + def __radd__(self: Interval[float], y: Union[int, float]) -> Interval[float]: ... + @overload def __sub__( - self: Interval[OrderableTimesT], y: Timedelta - ) -> Interval[OrderableTimesT]: ... + self: Interval[_OrderableTimesT], y: Timedelta + ) -> Interval[_OrderableTimesT]: ... @overload def __sub__(self: Interval[int], y: int) -> Interval[int]: ... @overload @@ -104,12 +118,28 @@ class Interval(IntervalMixin, Generic[OrderableT]): @overload def __sub__(self: Interval[float], y: Union[int, float]) -> Interval[float]: ... @overload + def __rsub__( + self: Interval[_OrderableTimesT], y: Timedelta + ) -> Interval[_OrderableTimesT]: ... + @overload + def __rsub__(self: Interval[int], y: int) -> Interval[int]: ... + @overload + def __rsub__(self: Interval[int], y: float) -> Interval[float]: ... + @overload + def __rsub__(self: Interval[float], y: Union[int, float]) -> Interval[float]: ... + @overload def __mul__(self: Interval[int], y: int) -> Interval[int]: ... @overload def __mul__(self: Interval[int], y: float) -> Interval[float]: ... @overload def __mul__(self: Interval[float], y: Union[int, float]) -> Interval[float]: ... @overload + def __rmul__(self: Interval[int], y: int) -> Interval[int]: ... + @overload + def __rmul__(self: Interval[int], y: float) -> Interval[float]: ... + @overload + def __rmul__(self: Interval[float], y: Union[int, float]) -> Interval[float]: ... + @overload def __truediv__(self: Interval[int], y: int) -> Interval[int]: ... @overload def __truediv__(self: Interval[int], y: float) -> Interval[float]: ... @@ -123,19 +153,27 @@ class Interval(IntervalMixin, Generic[OrderableT]): def __floordiv__( self: Interval[float], y: Union[int, float] ) -> Interval[float]: ... - def overlaps(self: Interval[OrderableT], other: Interval[OrderableT]) -> bool: ... + def overlaps(self: Interval[_OrderableT], other: Interval[_OrderableT]) -> bool: ... -def intervals_to_interval_bounds(intervals: np.ndarray, validate_closed: int = ...): ... +def intervals_to_interval_bounds( + intervals: np.ndarray, validate_closed: bool = ... +) -> Tuple[np.ndarray, np.ndarray, str]: ... -class IntervalTree(IntervalMixin): +class IntervalTree(IntervalMixin, Generic[_OrderableT]): def __init__( self, left: np.ndarray, right: np.ndarray, closed: Literal["left", "right", "both", "neither"] = ..., + leaf_size: int = ..., ): ... - def get_indexer(self, target) -> np.ndarray: ... - def get_indexer_non_unique(self, target) -> np.ndarray: ... + def get_indexer(self, target) -> npt.NDArray[np.intp]: ... + def get_indexer_non_unique( + self, target + ) -> Tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... _na_count: int @property def is_overlapping(self) -> bool: ... + @property + def is_monotonic_increasing(self) -> bool: ... + def clear_mapping(self) -> None: ... From 52f376f641a3cedf0d53d8cf013e87603ded09ae Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Tue, 22 Feb 2022 12:13:26 -0500 Subject: [PATCH 13/15] remove generic from IntervalTree --- pandas/_libs/interval.pyi | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/interval.pyi b/pandas/_libs/interval.pyi index 2ad2cf79afa0a..77eeda14e52e2 100644 --- a/pandas/_libs/interval.pyi +++ b/pandas/_libs/interval.pyi @@ -34,9 +34,7 @@ class _LengthDescriptor: self, instance: Interval[_OrderableTimesT], owner: Any ) -> Timedelta: ... @overload - def __get__( - self, instance: IntervalTree[_OrderableT], owner: Any - ) -> _OrderableT: ... + def __get__(self, instance: IntervalTree, owner: Any) -> np.ndarray: ... class _MidDescriptor: @overload @@ -46,9 +44,7 @@ class _MidDescriptor: self, instance: Interval[_OrderableTimesT], owner: Any ) -> _OrderableTimesT: ... @overload - def __get__( - self, instance: IntervalTree[_OrderableT], owner: Any - ) -> _OrderableT: ... + def __get__(self, instance: IntervalTree, owner: Any) -> np.ndarray: ... class IntervalMixin: @property @@ -159,7 +155,7 @@ def intervals_to_interval_bounds( intervals: np.ndarray, validate_closed: bool = ... ) -> Tuple[np.ndarray, np.ndarray, str]: ... -class IntervalTree(IntervalMixin, Generic[_OrderableT]): +class IntervalTree(IntervalMixin): def __init__( self, left: np.ndarray, From 4fcf5234da10d763298ab7f91a858a70c8ac4155 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Tue, 22 Feb 2022 23:32:45 -0500 Subject: [PATCH 14/15] clean up based on comments. simplify python_parser changes. Introduce IntervalClosedType --- pandas/_typing.py | 4 +++ pandas/core/arrays/interval.py | 26 +++++++------- pandas/core/common.py | 3 +- pandas/core/generic.py | 5 +-- pandas/core/indexes/datetimes.py | 7 ++-- pandas/core/indexes/interval.py | 10 ++++-- pandas/io/excel/_odfreader.py | 9 +++-- pandas/io/parsers/python_parser.py | 54 +++++++++++++++--------------- 8 files changed, 68 insertions(+), 50 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 0d311b3e82a2c..b897a4e8fe199 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -304,3 +304,7 @@ def closed(self) -> bool: # read_xml parsers XMLParsers = Literal["lxml", "etree"] + +# Interval closed type + +IntervalClosedType = Literal["left", "right", "both", "neither"] diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index fa43583bb2c34..bd6dfa8823cc7 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -7,7 +7,6 @@ ) import textwrap from typing import ( - Any, Sequence, TypeVar, Union, @@ -30,6 +29,7 @@ from pandas._typing import ( ArrayLike, Dtype, + IntervalClosedType, NpDtype, PositionalIndexer, ScalarIndexer, @@ -199,9 +199,9 @@ class IntervalArray(IntervalMixin, ExtensionArray): _na_value = _fill_value = np.nan # To make mypy recognize the fields - _left: Any - _right: Any - _dtype: Any + _left: np.ndarray + _right: np.ndarray + _dtype: IntervalDtype # --------------------------------------------------------------------- # Constructors @@ -663,11 +663,7 @@ def __getitem__( if is_scalar(left) and isna(left): return self._fill_value return Interval(left, right, self.closed) - # error: Argument 1 to "ndim" has incompatible type "Union[ndarray, - # ExtensionArray]"; expected "Union[Union[int, float, complex, str, bytes, - # generic], Sequence[Union[int, float, complex, str, bytes, generic]], - # Sequence[Sequence[Any]], _SupportsArray]" - if np.ndim(left) > 1: # type: ignore[arg-type] + if np.ndim(left) > 1: # GH#30588 multi-dimensional indexer disallowed raise ValueError("multi-dimensional indexing not allowed") return self._shallow_copy(left, right) @@ -1370,7 +1366,7 @@ def closed(self): ), } ) - def set_closed(self: IntervalArrayT, closed) -> IntervalArrayT: + def set_closed(self: IntervalArrayT, closed: IntervalClosedType) -> IntervalArrayT: if closed not in VALID_CLOSED: msg = f"invalid option for 'closed': {closed}" raise ValueError(msg) @@ -1671,8 +1667,14 @@ def _from_combined(self, combined: np.ndarray) -> IntervalArray: dtype = self._left.dtype if needs_i8_conversion(dtype): - new_left = type(self._left)._from_sequence(nc[:, 0], dtype=dtype) - new_right = type(self._right)._from_sequence(nc[:, 1], dtype=dtype) + # error: "Type[ndarray[Any, Any]]" has no attribute "_from_sequence" + new_left = type(self._left)._from_sequence( # type: ignore[attr-defined] + nc[:, 0], dtype=dtype + ) + # error: "Type[ndarray[Any, Any]]" has no attribute "_from_sequence" + new_right = type(self._right)._from_sequence( # type: ignore[attr-defined] + nc[:, 1], dtype=dtype + ) else: new_left = nc[:, 0].view(dtype) new_right = nc[:, 1].view(dtype) diff --git a/pandas/core/common.py b/pandas/core/common.py index d81abf2f2bc5c..62c2034505589 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -35,7 +35,6 @@ ArrayLike, NpDtype, RandomState, - Scalar, T, ) from pandas.util._exceptions import find_stack_level @@ -517,7 +516,7 @@ def f(x): def convert_to_list_like( - values: Scalar | Iterable | AnyArrayLike | Hashable, + values: Hashable | Iterable | AnyArrayLike, ) -> list | AnyArrayLike: """ Convert list-like or scalar input to list-like. List, numpy and pandas array-like diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9f9dffaaa399f..40cfeb796828b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -46,6 +46,7 @@ FilePath, IndexKeyFunc, IndexLabel, + IntervalClosedType, JSONSerializable, Level, Manager, @@ -7776,7 +7777,7 @@ def between_time( end_time, include_start: bool_t | lib.NoDefault = lib.no_default, include_end: bool_t | lib.NoDefault = lib.no_default, - inclusive: str | None = None, + inclusive: IntervalClosedType | None = None, axis=None, ) -> NDFrameT: """ @@ -7881,7 +7882,7 @@ def between_time( left = True if isinstance(include_start, lib.NoDefault) else include_start right = True if isinstance(include_end, lib.NoDefault) else include_end - inc_dict = { + inc_dict: dict[tuple[bool_t, bool_t], IntervalClosedType] = { (True, True): "both", (True, False): "left", (False, True): "right", diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 004d860b20a6f..4acdc7e6c7556 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -33,6 +33,7 @@ from pandas._typing import ( Dtype, DtypeObj, + IntervalClosedType, npt, ) from pandas.util._decorators import ( @@ -884,8 +885,8 @@ def date_range( tz=None, normalize: bool = False, name: Hashable = None, - closed: str | None | lib.NoDefault = lib.no_default, - inclusive: str | None = None, + closed: Literal["left", "right"] | None | lib.NoDefault = lib.no_default, + inclusive: IntervalClosedType | None = None, **kwargs, ) -> DatetimeIndex: """ @@ -1091,7 +1092,7 @@ def bdate_range( weekmask=None, holidays=None, closed: lib.NoDefault = lib.no_default, - inclusive: str | None = None, + inclusive: IntervalClosedType | None = None, **kwargs, ) -> DatetimeIndex: """ diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index badad69913d62..aea0326bed2fb 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -29,6 +29,7 @@ from pandas._typing import ( Dtype, DtypeObj, + IntervalClosedType, npt, ) from pandas.errors import InvalidIndexError @@ -192,7 +193,7 @@ class IntervalIndex(ExtensionIndex): _typ = "intervalindex" # annotate properties pinned via inherit_names - closed: Literal["left", "right", "both", "neither"] + closed: IntervalClosedType is_non_overlapping_monotonic: bool closed_left: bool closed_right: bool @@ -944,7 +945,12 @@ def _is_type_compatible(a, b) -> bool: def interval_range( - start=None, end=None, periods=None, freq=None, name: Hashable = None, closed="right" + start=None, + end=None, + periods=None, + freq=None, + name: Hashable = None, + closed: IntervalClosedType = "right", ) -> IntervalIndex: """ Return a fixed frequency IntervalIndex. diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 41146cf49abfb..384813b6ec65d 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -1,10 +1,12 @@ from __future__ import annotations -from typing import cast +from typing import ( + TYPE_CHECKING, + cast, +) import numpy as np -from pandas._libs.tslibs.nattype import NaTType from pandas._typing import ( FilePath, ReadBuffer, @@ -19,6 +21,9 @@ from pandas.io.excel._base import BaseExcelReader +if TYPE_CHECKING: + from pandas._libs.tslibs.nattype import NaTType + @doc(storage_options=_shared_docs["storage_options"]) class ODFReader(BaseExcelReader): diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index 23fa81698ac1d..8ba58ad7be0bc 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -113,7 +113,7 @@ def __init__(self, f: ReadCsvBuffer[str] | list, **kwds): # Get columns in two steps: infer from data, then # infer column indices from self.usecols if it is specified. self._col_indices: list[int] | None = None - columns: list[list[Hashable | None]] + columns: list[list[Scalar | None]] ( columns, self.num_original_columns, @@ -352,11 +352,11 @@ def _convert_data( def _infer_columns( self, - ) -> tuple[list[list[Hashable | None]], int, set[Hashable | None]]: + ) -> tuple[list[list[Scalar | None]], int, set[Scalar | None]]: names = self.names num_original_columns = 0 clear_buffer = True - unnamed_cols: set[Hashable | None] = set() + unnamed_cols: set[Scalar | None] = set() self._header_line = None if self.header is not None: @@ -371,7 +371,7 @@ def _infer_columns( have_mi_columns = False header = [header] - columns: list[list[Hashable | None]] = [] + columns: list[list[Scalar | None]] = [] for level, hr in enumerate(header): try: line = self._buffered_line() @@ -400,7 +400,7 @@ def _infer_columns( line = self.names[:] - this_columns: list[Hashable | None] = [] + this_columns: list[Scalar | None] = [] this_unnamed_cols = [] for i, c in enumerate(line): @@ -475,7 +475,7 @@ def _infer_columns( if clear_buffer: self._clear_buffer() - first_line: list[Hashable] | None + first_line: list[Scalar] | None if names is not None: # Read first row after header to check if data are longer try: @@ -551,10 +551,10 @@ def _infer_columns( def _handle_usecols( self, - columns: list[list[Hashable | None]], - usecols_key: list[Hashable | None], + columns: list[list[Scalar | None]], + usecols_key: list[Scalar | None], num_original_columns: int, - ) -> list[list[Hashable | None]]: + ) -> list[list[Scalar | None]]: """ Sets self._col_indices @@ -599,7 +599,7 @@ def _handle_usecols( self._col_indices = sorted(col_indices) return columns - def _buffered_line(self) -> list[Hashable]: + def _buffered_line(self) -> list[Scalar]: """ Return a line from buffer, filling buffer if required. """ @@ -608,7 +608,7 @@ def _buffered_line(self) -> list[Hashable]: else: return self._next_line() - def _check_for_bom(self, first_row: list[Hashable]) -> list[Hashable]: + def _check_for_bom(self, first_row: list[Scalar]) -> list[Scalar]: """ Checks whether the file begins with the BOM character. If it does, remove it. In addition, if there is quoting @@ -659,10 +659,10 @@ def _check_for_bom(self, first_row: list[Hashable]) -> list[Hashable]: # No quotation so just remove BOM from first element new_row = first_row_bom[1:] - new_row_list: list[Hashable] = [new_row] + new_row_list: list[Scalar] = [new_row] return new_row_list + first_row[1:] - def _is_line_empty(self, line: list[Hashable]) -> bool: + def _is_line_empty(self, line: list[Scalar]) -> bool: """ Check if a line is empty or not. @@ -677,7 +677,7 @@ def _is_line_empty(self, line: list[Hashable]) -> bool: """ return not line or all(not x for x in line) - def _next_line(self) -> list[Hashable]: + def _next_line(self) -> list[Scalar]: if isinstance(self.data, list): while self.skipfunc(self.pos): if self.pos >= len(self.data): @@ -756,7 +756,7 @@ def _alert_malformed(self, msg: str, row_num: int) -> None: base = f"Skipping line {row_num}: " sys.stderr.write(base + msg + "\n") - def _next_iter_line(self, row_num: int) -> list[Hashable] | None: + def _next_iter_line(self, row_num: int) -> list[Scalar] | None: """ Wrapper around iterating through `self.data` (CSV source). @@ -804,7 +804,7 @@ def _next_iter_line(self, row_num: int) -> list[Hashable] | None: self._alert_malformed(msg, row_num) return None - def _check_comments(self, lines: list[list[Hashable]]) -> list[list[Hashable]]: + def _check_comments(self, lines: list[list[Scalar]]) -> list[list[Scalar]]: if self.comment is None: return lines ret = [] @@ -825,7 +825,7 @@ def _check_comments(self, lines: list[list[Hashable]]) -> list[list[Hashable]]: ret.append(rl) return ret - def _remove_empty_lines(self, lines: list[list[Hashable]]) -> list[list[Hashable]]: + def _remove_empty_lines(self, lines: list[list[Scalar]]) -> list[list[Scalar]]: """ Iterate through the lines and remove any that are either empty or contain only one whitespace value @@ -851,7 +851,7 @@ def _remove_empty_lines(self, lines: list[list[Hashable]]) -> list[list[Hashable ret.append(line) return ret - def _check_thousands(self, lines: list[list[Hashable]]) -> list[list[Hashable]]: + def _check_thousands(self, lines: list[list[Scalar]]) -> list[list[Scalar]]: if self.thousands is None: return lines @@ -860,8 +860,8 @@ def _check_thousands(self, lines: list[list[Hashable]]) -> list[list[Hashable]]: ) def _search_replace_num_columns( - self, lines: list[list[Hashable]], search: str, replace: str - ) -> list[list[Hashable]]: + self, lines: list[list[Scalar]], search: str, replace: str + ) -> list[list[Scalar]]: ret = [] for line in lines: rl = [] @@ -878,7 +878,7 @@ def _search_replace_num_columns( ret.append(rl) return ret - def _check_decimal(self, lines: list[list[Hashable]]) -> list[list[Hashable]]: + def _check_decimal(self, lines: list[list[Scalar]]) -> list[list[Scalar]]: if self.decimal == parser_defaults["decimal"]: return lines @@ -893,7 +893,7 @@ def _clear_buffer(self) -> None: def _get_index_name( self, columns: list[Hashable] - ) -> tuple[list[Hashable] | None, list[Hashable], list[Hashable]]: + ) -> tuple[Sequence[Hashable] | None, list[Hashable], list[Hashable]]: """ Try several cases to get lines: @@ -908,7 +908,7 @@ def _get_index_name( orig_names = list(columns) columns = list(columns) - line: list[Hashable] | None + line: list[Scalar] | None if self._header_line is not None: line = self._header_line else: @@ -917,7 +917,7 @@ def _get_index_name( except StopIteration: line = None - next_line: list[Hashable] | None + next_line: list[Scalar] | None try: next_line = self._next_line() except StopIteration: @@ -964,7 +964,7 @@ def _get_index_name( return index_name, orig_names, columns - def _rows_to_cols(self, content: list[list[Hashable]]) -> list[np.ndarray]: + def _rows_to_cols(self, content: list[list[Scalar]]) -> list[np.ndarray]: col_len = self.num_original_columns if self._implicit_index: @@ -1051,7 +1051,7 @@ def _rows_to_cols(self, content: list[list[Hashable]]) -> list[np.ndarray]: ] return zipped_content - def _get_lines(self, rows: int | None = None) -> list[list[Hashable]]: + def _get_lines(self, rows: int | None = None) -> list[list[Scalar]]: lines = self.buf new_rows = None @@ -1281,7 +1281,7 @@ def _make_reader(self, f: IO[str] | ReadCsvBuffer[str]) -> None: self.infer_nrows, ) - def _remove_empty_lines(self, lines: list[list[Hashable]]) -> list[list[Hashable]]: + def _remove_empty_lines(self, lines: list[list[Scalar]]) -> list[list[Scalar]]: """ Returns the list of lines without the empty ones. With fixed-width fields, empty lines become arrays of empty strings. From 9e11b8c81b0d7d3b86be2b546c0d35a6299244ab Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sun, 27 Feb 2022 12:18:15 -0500 Subject: [PATCH 15/15] Feedback from twoertwein - Use IntervalClosedType - Consolidate __get__ in LengthDescriptor - Use tuple instead of Tuple --- pandas/_libs/interval.pyi | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/interval.pyi b/pandas/_libs/interval.pyi index 77eeda14e52e2..67cb604083c6b 100644 --- a/pandas/_libs/interval.pyi +++ b/pandas/_libs/interval.pyi @@ -3,8 +3,6 @@ from __future__ import annotations from typing import ( Any, Generic, - Literal, - Tuple, TypeVar, Union, overload, @@ -14,6 +12,7 @@ import numpy as np import numpy.typing as npt from pandas._typing import ( + IntervalClosedType, Timedelta, Timestamp, ) @@ -26,9 +25,9 @@ _OrderableT = TypeVar("_OrderableT", int, float, Timestamp, Timedelta) class _LengthDescriptor: @overload - def __get__(self, instance: Interval[float], owner: Any) -> float: ... - @overload - def __get__(self, instance: Interval[int], owner: Any) -> int: ... + def __get__( + self, instance: Interval[_OrderableScalarT], owner: Any + ) -> _OrderableScalarT: ... @overload def __get__( self, instance: Interval[_OrderableTimesT], owner: Any @@ -67,12 +66,12 @@ class Interval(IntervalMixin, Generic[_OrderableT]): @property def right(self: Interval[_OrderableT]) -> _OrderableT: ... @property - def closed(self) -> Literal["left", "right", "both", "neither"]: ... + def closed(self) -> IntervalClosedType: ... def __init__( self, left: _OrderableT, right: _OrderableT, - closed: Literal["left", "right", "both", "neither"] = ..., + closed: IntervalClosedType = ..., ): ... def __hash__(self) -> int: ... @overload @@ -153,20 +152,20 @@ class Interval(IntervalMixin, Generic[_OrderableT]): def intervals_to_interval_bounds( intervals: np.ndarray, validate_closed: bool = ... -) -> Tuple[np.ndarray, np.ndarray, str]: ... +) -> tuple[np.ndarray, np.ndarray, str]: ... class IntervalTree(IntervalMixin): def __init__( self, left: np.ndarray, right: np.ndarray, - closed: Literal["left", "right", "both", "neither"] = ..., + closed: IntervalClosedType = ..., leaf_size: int = ..., ): ... def get_indexer(self, target) -> npt.NDArray[np.intp]: ... def get_indexer_non_unique( self, target - ) -> Tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... + ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... _na_count: int @property def is_overlapping(self) -> bool: ...