From 9e57d6d212a4edf9227c137cf56ca92239dfb4f5 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Tue, 21 Dec 2021 13:39:55 -0500 Subject: [PATCH 1/7] fix column_arrays for array manager --- pandas/core/internals/array_manager.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 09f16a2ddab67..06849bffff5ca 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -794,7 +794,14 @@ def column_arrays(self) -> list[ArrayLike]: """ Used in the JSON C code to access column arrays. """ - return self.arrays + + def convert_array(arr: ArrayLike) -> ArrayLike: + if isinstance(arr, ExtensionArray): + return arr.to_numpy() + else: + return arr + + return [convert_array(arr) for arr in self.arrays] def iset( self, loc: int | slice | np.ndarray, value: ArrayLike, inplace: bool = False From 84b119faea9b3409c5f1a1afe388b77bfff29e45 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Wed, 23 Feb 2022 07:24:01 -0500 Subject: [PATCH 2/7] remove dead code in arrays/interval.py --- pandas/core/arrays/interval.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index d23910c37b52b..d809ad90ad1b5 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1664,12 +1664,8 @@ def _from_combined(self, combined: np.ndarray) -> IntervalArray: nc = combined.view("i8").reshape(-1, 2) dtype = self._left.dtype - if needs_i8_conversion(dtype): - new_left = type(self._left)._from_sequence(nc[:, 0], dtype=dtype) - new_right = type(self._right)._from_sequence(nc[:, 1], dtype=dtype) - else: - new_left = nc[:, 0].view(dtype) - new_right = nc[:, 1].view(dtype) + new_left = nc[:, 0].view(dtype) + new_right = nc[:, 1].view(dtype) return self._shallow_copy(left=new_left, right=new_right) def unique(self) -> IntervalArray: From 64fedddc8d03f8b6ebfad6ce5827c9d778f0a1b7 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Fri, 25 Feb 2022 14:51:00 -0500 Subject: [PATCH 3/7] Undo Revert "remove dead code in arrays/interval.py" This reverts commit 84b119faea9b3409c5f1a1afe388b77bfff29e45. --- pandas/core/arrays/interval.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index d809ad90ad1b5..d23910c37b52b 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1664,8 +1664,12 @@ def _from_combined(self, combined: np.ndarray) -> IntervalArray: nc = combined.view("i8").reshape(-1, 2) dtype = self._left.dtype - new_left = nc[:, 0].view(dtype) - new_right = nc[:, 1].view(dtype) + if needs_i8_conversion(dtype): + new_left = type(self._left)._from_sequence(nc[:, 0], dtype=dtype) + new_right = type(self._right)._from_sequence(nc[:, 1], dtype=dtype) + else: + new_left = nc[:, 0].view(dtype) + new_right = nc[:, 1].view(dtype) return self._shallow_copy(left=new_left, right=new_right) def unique(self) -> IntervalArray: From 7a0abcd844e843f6991d5cd001dff9f91cb33288 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sat, 12 Mar 2022 13:39:41 -0500 Subject: [PATCH 4/7] Add type checks for pd.to_timedelta make Timedelta operations return Timedelta instead of timedelta --- pandas/_libs/tslibs/timedeltas.pyi | 78 ++++++++++++++++++++++++------ pandas/_typing.py | 1 + pandas/core/tools/datetimes.py | 15 +++--- pandas/core/tools/timedeltas.py | 45 ++++++++++++++++- pandas/io/sql.py | 7 ++- 5 files changed, 121 insertions(+), 25 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyi b/pandas/_libs/tslibs/timedeltas.pyi index 9377261979be4..56e38df064985 100644 --- a/pandas/_libs/tslibs/timedeltas.pyi +++ b/pandas/_libs/tslibs/timedeltas.pyi @@ -1,19 +1,65 @@ from datetime import timedelta from typing import ( ClassVar, + Literal, Type, TypeVar, overload, ) import numpy as np -import numpy.typing as npt from pandas._libs.tslibs import ( NaTType, Tick, ) +from pandas._typing import npt +# Copied from pandas/_libs/tslibs/timedeltas.pyx + +UnitChoices = Literal[ + "Y", + "y", + "M", + "W", + "w", + "D", + "d", + "days", + "day", + "hours", + "hour", + "hr", + "h", + "m", + "minute", + "min", + "minutes", + "t", + "s", + "seconds", + "sec", + "second", + "ms", + "milliseconds", + "millisecond", + "milli", + "millis", + "l", + "us", + "microseconds", + "microsecond", + "µs", + "micro", + "micros", + "u", + "ns", + "nanoseconds", + "nano", + "nanos", + "nanosecond", + "n", +] _S = TypeVar("_S", bound=timedelta) def ints_to_pytimedelta( @@ -25,7 +71,7 @@ def array_to_timedelta64( unit: str | None = ..., errors: str = ..., ) -> np.ndarray: ... # np.ndarray[m8ns] -def parse_timedelta_unit(unit: str | None) -> str: ... +def parse_timedelta_unit(unit: str | None) -> UnitChoices: ... def delta_to_nanoseconds(delta: np.timedelta64 | timedelta | Tick) -> int: ... class Timedelta(timedelta): @@ -59,20 +105,20 @@ class Timedelta(timedelta): def ceil(self: _S, freq: str) -> _S: ... @property def resolution_string(self) -> str: ... - def __add__(self, other: timedelta) -> timedelta: ... - def __radd__(self, other: timedelta) -> timedelta: ... - def __sub__(self, other: timedelta) -> timedelta: ... - def __rsub__(self, other: timedelta) -> timedelta: ... - def __neg__(self) -> timedelta: ... - def __pos__(self) -> timedelta: ... - def __abs__(self) -> timedelta: ... - def __mul__(self, other: float) -> timedelta: ... - def __rmul__(self, other: float) -> timedelta: ... + def __add__(self, other: timedelta) -> Timedelta: ... + def __radd__(self, other: timedelta) -> Timedelta: ... + def __sub__(self, other: timedelta) -> Timedelta: ... + def __rsub__(self, other: timedelta) -> Timedelta: ... + def __neg__(self) -> Timedelta: ... + def __pos__(self) -> Timedelta: ... + def __abs__(self) -> Timedelta: ... + def __mul__(self, other: float) -> Timedelta: ... + def __rmul__(self, other: float) -> Timedelta: ... # error: Signature of "__floordiv__" incompatible with supertype "timedelta" @overload # type: ignore[override] def __floordiv__(self, other: timedelta) -> int: ... @overload - def __floordiv__(self, other: int | float) -> timedelta: ... + def __floordiv__(self, other: int | float) -> Timedelta: ... @overload def __floordiv__( self, other: npt.NDArray[np.timedelta64] @@ -90,11 +136,13 @@ class Timedelta(timedelta): @overload def __truediv__(self, other: timedelta) -> float: ... @overload - def __truediv__(self, other: float) -> timedelta: ... - def __mod__(self, other: timedelta) -> timedelta: ... - def __divmod__(self, other: timedelta) -> tuple[int, timedelta]: ... + def __truediv__(self, other: float) -> Timedelta: ... + def __mod__(self, other: timedelta) -> Timedelta: ... + def __divmod__(self, other: timedelta) -> tuple[int, Timedelta]: ... def __le__(self, other: timedelta) -> bool: ... def __lt__(self, other: timedelta) -> bool: ... def __ge__(self, other: timedelta) -> bool: ... def __gt__(self, other: timedelta) -> bool: ... def __hash__(self) -> int: ... + def isoformat(self) -> str: ... + def to_numpy(self) -> np.timedelta64: ... diff --git a/pandas/_typing.py b/pandas/_typing.py index cabf0e8275d08..2b42a0a3efb44 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -313,3 +313,4 @@ def closed(self) -> bool: # datetime and NaTType DatetimeNaTType = Union[datetime, "NaTType"] +DateTimeErrorChoices = Literal["ignore", "raise", "coerce"] diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 3ff6e7f09b72a..46fd1cad97440 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -39,6 +39,7 @@ from pandas._typing import ( AnyArrayLike, ArrayLike, + DateTimeErrorChoices, Timezone, ) from pandas.util._exceptions import find_stack_level @@ -79,6 +80,7 @@ if TYPE_CHECKING: from pandas._libs.tslibs.nattype import NaTType + from pandas._libs.tslibs.timedeltas import UnitChoices from pandas import ( DataFrame, @@ -657,7 +659,7 @@ def _adjust_to_origin(arg, origin, unit): @overload def to_datetime( arg: DatetimeScalar, - errors: str = ..., + errors: DateTimeErrorChoices = ..., dayfirst: bool = ..., yearfirst: bool = ..., utc: bool | None = ..., @@ -674,7 +676,7 @@ def to_datetime( @overload def to_datetime( arg: Series | DictConvertible, - errors: str = ..., + errors: DateTimeErrorChoices = ..., dayfirst: bool = ..., yearfirst: bool = ..., utc: bool | None = ..., @@ -691,7 +693,7 @@ def to_datetime( @overload def to_datetime( arg: list | tuple | Index | ArrayLike, - errors: str = ..., + errors: DateTimeErrorChoices = ..., dayfirst: bool = ..., yearfirst: bool = ..., utc: bool | None = ..., @@ -707,7 +709,7 @@ def to_datetime( def to_datetime( arg: DatetimeScalarOrArrayConvertible | DictConvertible, - errors: str = "raise", + errors: DateTimeErrorChoices = "raise", dayfirst: bool = False, yearfirst: bool = False, utc: bool | None = None, @@ -1148,7 +1150,7 @@ def to_datetime( } -def _assemble_from_unit_mappings(arg, errors, tz): +def _assemble_from_unit_mappings(arg, errors: DateTimeErrorChoices, tz): """ assemble the unit specified fields from the arg (DataFrame) Return a Series for actual parsing @@ -1228,7 +1230,8 @@ def coerce(values): except (TypeError, ValueError) as err: raise ValueError(f"cannot assemble the datetimes: {err}") from err - for u in ["h", "m", "s", "ms", "us", "ns"]: + units: list[UnitChoices] = ["h", "m", "s", "ms", "us", "ns"] + for u in units: value = unit_rev.get(u) if value is not None and value in arg: try: diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 81b2be4e10e62..55c96e65028d8 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -3,6 +3,12 @@ """ from __future__ import annotations +from datetime import timedelta +from typing import ( + TYPE_CHECKING, + overload, +) + import numpy as np from pandas._libs import lib @@ -23,8 +29,39 @@ from pandas.core.arrays.timedeltas import sequence_to_td64ns +if TYPE_CHECKING: + from pandas._libs.tslibs.timedeltas import UnitChoices + from pandas._typing import ( + AnyArrayLike, + DateTimeErrorChoices, + ) + + from pandas import Series + + +@overload +def to_timedelta( + arg: str | int | float | timedelta, + unit: UnitChoices | None = ..., + errors: DateTimeErrorChoices = ..., +) -> Timedelta: + ... + + +@overload +def to_timedelta( + arg: list | tuple | range | AnyArrayLike, + unit: UnitChoices | None = ..., + errors: DateTimeErrorChoices = ..., +) -> Series: + ... + -def to_timedelta(arg, unit=None, errors="raise"): +def to_timedelta( + arg: str | int | float | timedelta | list | tuple | range | AnyArrayLike, + unit: UnitChoices | None = None, + errors: DateTimeErrorChoices = "raise", +) -> Timedelta | Series: """ Convert argument to timedelta. @@ -133,7 +170,11 @@ def to_timedelta(arg, unit=None, errors="raise"): return _convert_listlike(arg, unit=unit, errors=errors, name=arg.name) elif isinstance(arg, np.ndarray) and arg.ndim == 0: # extract array scalar and process below - arg = lib.item_from_zerodim(arg) + # error: Incompatible types in assignment (expression has type "object", + # variable has type "Union[str, int, float, timedelta, List[Any], + # Tuple[Any, ...], Union[Union[ExtensionArray, ndarray[Any, Any]], Index, + # Series]]") [assignment] + arg = lib.item_from_zerodim(arg) # type: ignore[assignment] elif is_list_like(arg) and getattr(arg, "ndim", 1) == 1: return _convert_listlike(arg, unit=unit, errors=errors) elif getattr(arg, "ndim", 1) > 1: diff --git a/pandas/io/sql.py b/pandas/io/sql.py index e004e9c1ecbcc..0bb27334e1651 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -25,7 +25,10 @@ import numpy as np import pandas._libs.lib as lib -from pandas._typing import DtypeArg +from pandas._typing import ( + DateTimeErrorChoices, + DtypeArg, +) from pandas.compat._optional import import_optional_dependency from pandas.errors import AbstractMethodError from pandas.util._exceptions import find_stack_level @@ -86,7 +89,7 @@ def _handle_date_column( # read_sql like functions. # Format can take on custom to_datetime argument values such as # {"errors": "coerce"} or {"dayfirst": True} - error = format.pop("errors", None) or "ignore" + error: DateTimeErrorChoices = format.pop("errors", None) or "ignore" return to_datetime(col, errors=error, **format) else: # Allow passing of formatting string for integers From 7b31def4f2e191cdbd98b1346e5298756c0e1671 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sat, 12 Mar 2022 18:47:01 -0500 Subject: [PATCH 5/7] add TimeDeltaIndex return for listlike --- pandas/core/tools/timedeltas.py | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 55c96e65028d8..67fca9300bb5a 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -32,11 +32,15 @@ if TYPE_CHECKING: from pandas._libs.tslibs.timedeltas import UnitChoices from pandas._typing import ( - AnyArrayLike, + ArrayLike, DateTimeErrorChoices, ) - from pandas import Series + from pandas import ( + Index, + Series, + TimeDeltaIndex, + ) @overload @@ -50,18 +54,36 @@ def to_timedelta( @overload def to_timedelta( - arg: list | tuple | range | AnyArrayLike, + arg: Series, unit: UnitChoices | None = ..., errors: DateTimeErrorChoices = ..., ) -> Series: ... +@overload +def to_timedelta( + arg: list | tuple | range | ArrayLike | Index, + unit: UnitChoices | None = ..., + errors: DateTimeErrorChoices = ..., +) -> TimeDeltaIndex: + ... + + def to_timedelta( - arg: str | int | float | timedelta | list | tuple | range | AnyArrayLike, + arg: str + | int + | float + | timedelta + | list + | tuple + | range + | ArrayLike + | Index + | Series, unit: UnitChoices | None = None, errors: DateTimeErrorChoices = "raise", -) -> Timedelta | Series: +) -> Timedelta | TimeDeltaIndex | Series: """ Convert argument to timedelta. From bf5c16ea28fcefebc49ba6d38240dfd400cb4d0f Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sun, 13 Mar 2022 22:32:16 -0400 Subject: [PATCH 6/7] add comments about cross referencing the code --- pandas/_libs/tslibs/timedeltas.pyi | 4 ++-- pandas/_libs/tslibs/timedeltas.pyx | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyi b/pandas/_libs/tslibs/timedeltas.pyi index 56e38df064985..28c2f7db62158 100644 --- a/pandas/_libs/tslibs/timedeltas.pyi +++ b/pandas/_libs/tslibs/timedeltas.pyi @@ -15,8 +15,8 @@ from pandas._libs.tslibs import ( ) from pandas._typing import npt -# Copied from pandas/_libs/tslibs/timedeltas.pyx - +# This should be kept consistent with the keys in the dict timedelta_abbrevs +# in pandas/_libs/tslibs/timedeltas.pyx UnitChoices = Literal[ "Y", "y", diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 8eaf86b3d193f..e6b27b4459aae 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -81,6 +81,7 @@ Components = collections.namedtuple( ], ) +# This should be kept consistent with UnitChoices in pandas/_libs/tslibs/timedeltas.pyi cdef dict timedelta_abbrevs = { "Y": "Y", "y": "Y", From 6edf1ed2eb9e1efd89d6b705de7e20926b4ee40e Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sun, 13 Mar 2022 23:10:11 -0400 Subject: [PATCH 7/7] correct capitalization of TimedeltaIndex --- pandas/core/tools/timedeltas.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 67fca9300bb5a..720d02f0cf59e 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -39,7 +39,7 @@ from pandas import ( Index, Series, - TimeDeltaIndex, + TimedeltaIndex, ) @@ -66,7 +66,7 @@ def to_timedelta( arg: list | tuple | range | ArrayLike | Index, unit: UnitChoices | None = ..., errors: DateTimeErrorChoices = ..., -) -> TimeDeltaIndex: +) -> TimedeltaIndex: ... @@ -83,7 +83,7 @@ def to_timedelta( | Series, unit: UnitChoices | None = None, errors: DateTimeErrorChoices = "raise", -) -> Timedelta | TimeDeltaIndex | Series: +) -> Timedelta | TimedeltaIndex | Series: """ Convert argument to timedelta.