Skip to content

TYP: Subset of "Improved the type stubs in the _libs directory to help with type checking" #44251

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Dec 14, 2021
6 changes: 3 additions & 3 deletions pandas/_libs/interval.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -516,9 +516,9 @@ def intervals_to_interval_bounds(ndarray intervals, bint validate_closed=True):

Returns
-------
tuple of tuples
left : (ndarray, object, array)
right : (ndarray, object, array)
tuple of
left : ndarray
right : ndarray
closed: str
"""
cdef:
Expand Down
15 changes: 15 additions & 0 deletions pandas/_libs/missing.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import numpy as np
from numpy import typing as npt

class NAType: ...

NA: NAType

def is_matching_na(
left: object, right: object, nan_matches_none: bool = ...
) -> bool: ...
def isposinf_scalar(val: object) -> bool: ...
def isneginf_scalar(val: object) -> bool: ...
def checknull(val: object, inf_as_na: bool = ...) -> bool: ...
def isnaobj(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ...
def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
46 changes: 23 additions & 23 deletions pandas/_libs/tslibs/dtypes.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -18,33 +18,33 @@ class PeriodDtypeBase:
def resolution(self) -> Resolution: ...

class FreqGroup(Enum):
FR_ANN: int = ...
FR_QTR: int = ...
FR_MTH: int = ...
FR_WK: int = ...
FR_BUS: int = ...
FR_DAY: int = ...
FR_HR: int = ...
FR_MIN: int = ...
FR_SEC: int = ...
FR_MS: int = ...
FR_US: int = ...
FR_NS: int = ...
FR_UND: int = ...
FR_ANN: int
FR_QTR: int
FR_MTH: int
FR_WK: int
FR_BUS: int
FR_DAY: int
FR_HR: int
FR_MIN: int
FR_SEC: int
FR_MS: int
FR_US: int
FR_NS: int
FR_UND: int
@staticmethod
def get_freq_group(code: int) -> FreqGroup: ...

class Resolution(Enum):
RESO_NS: int = ...
RESO_US: int = ...
RESO_MS: int = ...
RESO_SEC: int = ...
RESO_MIN: int = ...
RESO_HR: int = ...
RESO_DAY: int = ...
RESO_MTH: int = ...
RESO_QTR: int = ...
RESO_YR: int = ...
RESO_NS: int
RESO_US: int
RESO_MS: int
RESO_SEC: int
RESO_MIN: int
RESO_HR: int
RESO_DAY: int
RESO_MTH: int
RESO_QTR: int
RESO_YR: int
def __lt__(self, other: Resolution) -> bool: ...
def __ge__(self, other: Resolution) -> bool: ...
@property
Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/nattype.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ NaT: NaTType
iNaT: int
nat_strings: set[str]

def is_null_datetimelike(val: object, inat_is_null: bool = ...) -> bool: ...

class NaTType(datetime):
value: np.int64
def asm8(self) -> np.datetime64: ...
Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/np_datetime.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
class OutOfBoundsDatetime(ValueError): ...
4 changes: 2 additions & 2 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3573,7 +3573,7 @@ cpdef to_offset(freq):

Parameters
----------
freq : str, tuple, datetime.timedelta, DateOffset or None
freq : str, datetime.timedelta, BaseOffset or None

Returns
-------
Expand All @@ -3586,7 +3586,7 @@ cpdef to_offset(freq):

See Also
--------
DateOffset : Standard kind of date increment used for a date range.
BaseOffset : Standard kind of date increment used for a date range.

Examples
--------
Expand Down
3 changes: 1 addition & 2 deletions pandas/_libs/tslibs/timestamps.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,14 @@ import numpy as np

from pandas._libs.tslibs import (
BaseOffset,
NaT,
NaTType,
Period,
Timedelta,
)

_S = TypeVar("_S")

def integer_op_not_supported(obj) -> None: ...
def integer_op_not_supported(obj) -> TypeError: ...

class Timestamp(datetime):
min: ClassVar[Timestamp]
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,9 @@ def _cmp_method(self, other, op):

# ------------------------------------------------------------------------
# String methods interface
_str_na_value = StringDtype.na_value
# error: Incompatible types in assignment (expression has type "NAType",
# base class "PandasArray" defined the type as "float")
_str_na_value = StringDtype.na_value # type: ignore[assignment]

def _str_map(
self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -912,6 +912,10 @@ def maybe_upcast(
# We get a copy in all cases _except_ (values.dtype == new_dtype and not copy)
upcast_values = values.astype(new_dtype, copy=copy)

# error: Incompatible return value type (got "Tuple[ndarray[Any, dtype[Any]],
# Union[Union[str, int, float, bool] Union[Period, Timestamp, Timedelta, Any]]]",
# expected "Tuple[NumpyArrayT, Union[Union[str, int, float, bool], Union[Period,
# Timestamp, Timedelta, Any]]]")
return upcast_values, fill_value # type: ignore[return-value]
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should probably be np.ndarray and not be NumpyArrayT since the dtype might change.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The dtype can change but the Python type (i.e. np.ndarray or subclass) maybe preserved. I'm not sure but the numpy types don't annotate as preserving Python object type so maybe not. This was added for ma.MaskedArray handling in the constructors, so could well be that the numpy types are not precise enough, creating this false positive.



Expand Down
8 changes: 4 additions & 4 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -876,15 +876,15 @@ def freq(self):

@classmethod
def _parse_dtype_strict(cls, freq: str_type) -> BaseOffset:
if isinstance(freq, str):
if isinstance(freq, str): # note: freq is already of type str!
if freq.startswith("period[") or freq.startswith("Period["):
m = cls._match.search(freq)
if m is not None:
freq = m.group("freq")

freq = to_offset(freq)
if freq is not None:
return freq
freq_offset = to_offset(freq)
if freq_offset is not None:
return freq_offset

raise ValueError("could not construct PeriodDtype")

Expand Down
5 changes: 4 additions & 1 deletion pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,10 @@ def _isna_array(values: ArrayLike, inf_as_na: bool = False):
if inf_as_na and is_categorical_dtype(dtype):
result = libmissing.isnaobj(values.to_numpy(), inf_as_na=inf_as_na)
else:
result = values.isna()
# error: Incompatible types in assignment (expression has type
# "Union[ndarray[Any, Any], ExtensionArraySupportsAnyAll]", variable has
# type "ndarray[Any, dtype[bool_]]")
result = values.isna() # type: ignore[assignment]
elif is_string_or_object_np_dtype(values.dtype):
result = _isna_string_dtype(values, inf_as_na=inf_as_na)
elif needs_i8_conversion(dtype):
Expand Down
30 changes: 18 additions & 12 deletions pandas/core/ops/mask_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@


def kleene_or(
left: bool | np.ndarray,
right: bool | np.ndarray,
left: bool | np.ndarray | libmissing.NAType,
right: bool | np.ndarray | libmissing.NAType,
left_mask: np.ndarray | None,
right_mask: np.ndarray | None,
):
Expand All @@ -37,12 +37,13 @@ def kleene_or(
The result of the logical or, and the new mask.
"""
# To reduce the number of cases, we ensure that `left` & `left_mask`
# always come from an array, not a scalar. This is safe, since because
# always come from an array, not a scalar. This is safe, since
# A | B == B | A
if left_mask is None:
return kleene_or(right, left, right_mask, left_mask)

assert isinstance(left, np.ndarray)
if not isinstance(left, np.ndarray):
raise TypeError("Either `left` or `right` need to be a np.ndarray.")

raise_for_nan(right, method="or")

Expand Down Expand Up @@ -73,8 +74,8 @@ def kleene_or(


def kleene_xor(
left: bool | np.ndarray,
right: bool | np.ndarray,
left: bool | np.ndarray | libmissing.NAType,
right: bool | np.ndarray | libmissing.NAType,
left_mask: np.ndarray | None,
right_mask: np.ndarray | None,
):
Expand All @@ -99,16 +100,20 @@ def kleene_xor(
result, mask: ndarray[bool]
The result of the logical xor, and the new mask.
"""
# To reduce the number of cases, we ensure that `left` & `left_mask`
# always come from an array, not a scalar. This is safe, since
# A ^ B == B ^ A
if left_mask is None:
return kleene_xor(right, left, right_mask, left_mask)

if not isinstance(left, np.ndarray):
raise TypeError("Either `left` or `right` need to be a np.ndarray.")

raise_for_nan(right, method="xor")
if right is libmissing.NA:
result = np.zeros_like(left)
else:
# error: Incompatible types in assignment (expression has type
# "Union[bool, Any]", variable has type "ndarray")
result = left ^ right # type: ignore[assignment]
result = left ^ right

if right_mask is None:
if right is libmissing.NA:
Expand Down Expand Up @@ -146,12 +151,13 @@ def kleene_and(
The result of the logical xor, and the new mask.
"""
# To reduce the number of cases, we ensure that `left` & `left_mask`
# always come from an array, not a scalar. This is safe, since because
# A | B == B | A
# always come from an array, not a scalar. This is safe, since
# A & B == B & A
if left_mask is None:
return kleene_and(right, left, right_mask, left_mask)

assert isinstance(left, np.ndarray)
if not isinstance(left, np.ndarray):
raise TypeError("Either `left` or `right` need to be a np.ndarray.")
raise_for_nan(right, method="and")

if right is libmissing.NA:
Expand Down
20 changes: 10 additions & 10 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -2012,30 +2012,30 @@ def _adjust_dates_anchored(
if closed == "right":
if foffset > 0:
# roll back
fresult = first.value - foffset
fresult_int = first.value - foffset
else:
fresult = first.value - freq.nanos
fresult_int = first.value - freq.nanos

if loffset > 0:
# roll forward
lresult = last.value + (freq.nanos - loffset)
lresult_int = last.value + (freq.nanos - loffset)
else:
# already the end of the road
lresult = last.value
lresult_int = last.value
else: # closed == 'left'
if foffset > 0:
fresult = first.value - foffset
fresult_int = first.value - foffset
else:
# start of the road
fresult = first.value
fresult_int = first.value

if loffset > 0:
# roll forward
lresult = last.value + (freq.nanos - loffset)
lresult_int = last.value + (freq.nanos - loffset)
else:
lresult = last.value + freq.nanos
fresult = Timestamp(fresult)
lresult = Timestamp(lresult)
lresult_int = last.value + freq.nanos
fresult = Timestamp(fresult_int)
lresult = Timestamp(lresult_int)
if first_tzinfo is not None:
fresult = fresult.tz_localize("UTC").tz_convert(first_tzinfo)
if last_tzinfo is not None:
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/strings/object_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def rep(x, r):
return result

def _str_match(
self, pat: str, case: bool = True, flags: int = 0, na: Scalar = None
self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
):
if not case:
flags |= re.IGNORECASE
Expand All @@ -208,7 +208,7 @@ def _str_fullmatch(
pat: str | re.Pattern,
case: bool = True,
flags: int = 0,
na: Scalar = None,
na: Scalar | None = None,
):
if not case:
flags |= re.IGNORECASE
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/arrays/boolean/test_logical.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@
import pandas as pd
import pandas._testing as tm
from pandas.arrays import BooleanArray
from pandas.core.ops.mask_ops import (
kleene_and,
kleene_or,
kleene_xor,
)
from pandas.tests.extension.base import BaseOpsUtil


Expand Down Expand Up @@ -239,3 +244,11 @@ def test_no_masked_assumptions(self, other, all_logical_operators):
result = getattr(a, all_logical_operators)(other)
expected = getattr(b, all_logical_operators)(other)
tm.assert_extension_array_equal(result, expected)


@pytest.mark.parametrize("operation", [kleene_or, kleene_xor, kleene_and])
def test_error_both_scalar(operation):
msg = r"Either `left` or `right` need to be a np\.ndarray."
with pytest.raises(TypeError, match=msg):
# masks need to be non-None, otherwise it ends up in an infinite recursion
operation(True, True, np.zeros(1), np.zeros(1))