Skip to content

Commit 949885e

Browse files
authored
Change check() function to test actual types in Series and Index (#546)
* fix check function to check actual type, not using __iter__() * change np.int32 to np.int_ * for bitwise use np.integer * change tables reference in pyproject.toml * for nightly, use np.integer rather than np.int64 * make timedelta division type result dependent on pandas version * fix mypy issue of assigning timedeltadiv return type
1 parent 00ab656 commit 949885e

10 files changed

+352
-303
lines changed

pandas-stubs/core/series.pyi

+12-24
Original file line numberDiff line numberDiff line change
@@ -1289,12 +1289,10 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
12891289
# ignore needed for mypy as we want different results based on the arguments
12901290
@overload # type: ignore[override]
12911291
def __and__( # type: ignore[misc]
1292-
self, other: bool | list[bool] | np_ndarray_bool | Series[bool]
1292+
self, other: bool | list[bool] | list[int] | np_ndarray_bool | Series[bool]
12931293
) -> Series[bool]: ...
12941294
@overload
1295-
def __and__(
1296-
self, other: int | list[int] | np_ndarray_anyint | Series[int]
1297-
) -> Series[int]: ...
1295+
def __and__(self, other: int | np_ndarray_anyint | Series[int]) -> Series[int]: ...
12981296
# def __array__(self, dtype: Optional[_bool] = ...) -> _np_ndarray
12991297
def __div__(self, other: num | _ListLike | Series[S1]) -> Series[S1]: ...
13001298
def __eq__(self, other: object) -> Series[_bool]: ... # type: ignore[override]
@@ -1323,22 +1321,18 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
13231321
# ignore needed for mypy as we want different results based on the arguments
13241322
@overload # type: ignore[override]
13251323
def __or__( # type: ignore[misc]
1326-
self, other: bool | list[bool] | np_ndarray_bool | Series[bool]
1324+
self, other: bool | list[bool] | list[int] | np_ndarray_bool | Series[bool]
13271325
) -> Series[bool]: ...
13281326
@overload
1329-
def __or__(
1330-
self, other: int | list[int] | np_ndarray_anyint | Series[int]
1331-
) -> Series[int]: ...
1327+
def __or__(self, other: int | np_ndarray_anyint | Series[int]) -> Series[int]: ...
13321328
def __radd__(self, other: num | _str | _ListLike | Series[S1]) -> Series[S1]: ...
13331329
# ignore needed for mypy as we want different results based on the arguments
13341330
@overload # type: ignore[override]
13351331
def __rand__( # type: ignore[misc]
1336-
self, other: bool | list[bool] | np_ndarray_bool | Series[bool]
1332+
self, other: bool | list[bool] | list[int] | np_ndarray_bool | Series[bool]
13371333
) -> Series[bool]: ...
13381334
@overload
1339-
def __rand__(
1340-
self, other: int | list[int] | np_ndarray_anyint | Series[int]
1341-
) -> Series[int]: ...
1335+
def __rand__(self, other: int | np_ndarray_anyint | Series[int]) -> Series[int]: ... # type: ignore[misc]
13421336
def __rdiv__(self, other: num | _ListLike | Series[S1]) -> Series[S1]: ...
13431337
def __rdivmod__(self, other: num | _ListLike | Series[S1]) -> Series[S1]: ... # type: ignore[override]
13441338
def __rfloordiv__(self, other: num | _ListLike | Series[S1]) -> Series[S1]: ...
@@ -1349,12 +1343,10 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
13491343
# ignore needed for mypy as we want different results based on the arguments
13501344
@overload # type: ignore[override]
13511345
def __ror__( # type: ignore[misc]
1352-
self, other: bool | list[bool] | np_ndarray_bool | Series[bool]
1346+
self, other: bool | list[bool] | list[int] | np_ndarray_bool | Series[bool]
13531347
) -> Series[bool]: ...
13541348
@overload
1355-
def __ror__(
1356-
self, other: int | list[int] | np_ndarray_anyint | Series[int]
1357-
) -> Series[int]: ...
1349+
def __ror__(self, other: int | np_ndarray_anyint | Series[int]) -> Series[int]: ... # type: ignore[misc]
13581350
def __rsub__(self, other: num | _ListLike | Series[S1]) -> Series: ...
13591351
@overload
13601352
def __rtruediv__(self, other: TimedeltaSeries) -> Series[float]: ...
@@ -1363,12 +1355,10 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
13631355
# ignore needed for mypy as we want different results based on the arguments
13641356
@overload # type: ignore[override]
13651357
def __rxor__( # type: ignore[misc]
1366-
self, other: bool | list[bool] | np_ndarray_bool | Series[bool]
1358+
self, other: bool | list[bool] | list[int] | np_ndarray_bool | Series[bool]
13671359
) -> Series[bool]: ...
13681360
@overload
1369-
def __rxor__(
1370-
self, other: int | list[int] | np_ndarray_anyint | Series[int]
1371-
) -> Series[int]: ...
1361+
def __rxor__(self, other: int | np_ndarray_anyint | Series[int]) -> Series[int]: ... # type: ignore[misc]
13721362
@overload
13731363
def __sub__(
13741364
self, other: Timestamp | datetime | TimestampSeries
@@ -1389,12 +1379,10 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
13891379
# ignore needed for mypy as we want different results based on the arguments
13901380
@overload # type: ignore[override]
13911381
def __xor__( # type: ignore[misc]
1392-
self, other: bool | list[bool] | np_ndarray_bool | Series[bool]
1382+
self, other: bool | list[bool] | list[int] | np_ndarray_bool | Series[bool]
13931383
) -> Series[bool]: ...
13941384
@overload
1395-
def __xor__(
1396-
self, other: int | list[int] | np_ndarray_anyint | Series[int]
1397-
) -> Series[int]: ...
1385+
def __xor__(self, other: int | np_ndarray_anyint | Series[int]) -> Series[int]: ...
13981386
def __invert__(self) -> Series[bool]: ...
13991387
# properties
14001388
# @property

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ pre-commit = ">=2.19.0"
4949
black = ">=22.12.0"
5050
isort = ">=5.10.1"
5151
openpyxl = ">=3.0.10"
52-
tables = { version = ">=3.7.0", python = "<3.11" }
52+
tables = { version = ">=3.7.0" }
5353
lxml = { version = ">=4.7.1,<4.9.0", python = "<3.11" }
5454
pyreadstat = ">=1.2.0"
5555
xlrd = ">=2.0.1"

tests/__init__.py

+5-6
Original file line numberDiff line numberDiff line change
@@ -28,19 +28,18 @@
2828
)
2929
# This is only needed temporarily due to no wheels being available for lxml on 3.11
3030

31-
pytables_skip = pytest.mark.skipif(
32-
sys.version_info >= (3, 11), reason="pytables is not available for 3.11 yet"
33-
)
34-
# This is only needed temporarily due to no wheels being available for pytables on 3.11
35-
3631

3732
def check(actual: T, klass: type, dtype: type | None = None, attr: str = "left") -> T:
3833
if not isinstance(actual, klass):
3934
raise RuntimeError(f"Expected type '{klass}' but got '{type(actual)}'")
4035
if dtype is None:
4136
return actual # type: ignore[return-value]
4237

43-
if hasattr(actual, "__iter__"):
38+
if isinstance(actual, pd.Series):
39+
value = actual.iloc[0]
40+
elif isinstance(actual, pd.Index):
41+
value = actual[0] # type: ignore[assignment]
42+
elif hasattr(actual, "__iter__"):
4443
value = next(iter(actual)) # pyright: ignore[reportGeneralTypeIssues]
4544
else:
4645
assert hasattr(actual, attr)

tests/test_frame.py

+21-17
Original file line numberDiff line numberDiff line change
@@ -72,13 +72,13 @@ def test_types_init() -> None:
7272

7373
def test_types_all() -> None:
7474
df = pd.DataFrame([[False, True], [False, False]], columns=["col1", "col2"])
75-
check(assert_type(df.all(), "pd.Series[bool]"), pd.Series, bool)
75+
check(assert_type(df.all(), "pd.Series[bool]"), pd.Series, np.bool_)
7676
check(assert_type(df.all(axis=None), bool), np.bool_)
7777

7878

7979
def test_types_any() -> None:
8080
df = pd.DataFrame([[False, True], [False, False]], columns=["col1", "col2"])
81-
check(assert_type(df.any(), "pd.Series[bool]"), pd.Series, bool)
81+
check(assert_type(df.any(), "pd.Series[bool]"), pd.Series, np.bool_)
8282
check(assert_type(df.any(axis=None), bool), np.bool_)
8383

8484

@@ -496,7 +496,7 @@ def gethead(s: pd.Series, y: int) -> pd.Series:
496496
check(assert_type(df.apply(gethead, args=(4,)), pd.DataFrame), pd.DataFrame)
497497

498498
# Check various return types for default result_type (None) with default axis (0)
499-
check(assert_type(df.apply(returns_scalar), "pd.Series[int]"), pd.Series, int)
499+
check(assert_type(df.apply(returns_scalar), "pd.Series[int]"), pd.Series, np.int64)
500500
check(assert_type(df.apply(returns_series), pd.DataFrame), pd.DataFrame)
501501
check(assert_type(df.apply(returns_listlike_of_3), pd.DataFrame), pd.DataFrame)
502502
check(assert_type(df.apply(returns_dict), pd.Series), pd.Series)
@@ -507,7 +507,7 @@ def gethead(s: pd.Series, y: int) -> pd.Series:
507507
# to pass a result_type of "expand" to a scalar return
508508
assert_type(df.apply(returns_scalar, result_type="expand"), "pd.Series[int]"),
509509
pd.Series,
510-
int,
510+
np.int64,
511511
)
512512
check(
513513
assert_type(df.apply(returns_series, result_type="expand"), pd.DataFrame),
@@ -530,7 +530,7 @@ def gethead(s: pd.Series, y: int) -> pd.Series:
530530
# to pass a result_type of "reduce" to a scalar return
531531
assert_type(df.apply(returns_scalar, result_type="reduce"), "pd.Series[int]"),
532532
pd.Series,
533-
int,
533+
np.int64,
534534
)
535535
check(
536536
# Note that technically it does not make sense
@@ -548,7 +548,9 @@ def gethead(s: pd.Series, y: int) -> pd.Series:
548548

549549
# Check various return types for default result_type (None) with axis=1
550550
check(
551-
assert_type(df.apply(returns_scalar, axis=1), "pd.Series[int]"), pd.Series, int
551+
assert_type(df.apply(returns_scalar, axis=1), "pd.Series[int]"),
552+
pd.Series,
553+
np.int64,
552554
)
553555
check(assert_type(df.apply(returns_series, axis=1), pd.DataFrame), pd.DataFrame)
554556
check(assert_type(df.apply(returns_listlike_of_3, axis=1), pd.Series), pd.Series)
@@ -562,7 +564,7 @@ def gethead(s: pd.Series, y: int) -> pd.Series:
562564
df.apply(returns_scalar, axis=1, result_type="expand"), "pd.Series[int]"
563565
),
564566
pd.Series,
565-
int,
567+
np.int64,
566568
)
567569
check(
568570
assert_type(
@@ -589,7 +591,7 @@ def gethead(s: pd.Series, y: int) -> pd.Series:
589591
df.apply(returns_scalar, axis=1, result_type="reduce"), "pd.Series[int]"
590592
),
591593
pd.Series,
592-
int,
594+
np.int64,
593595
)
594596
check(
595597
# Note that technically it does not make sense
@@ -668,33 +670,35 @@ def gethead(s: pd.Series, y: int) -> pd.Series:
668670
# Test various other positional/keyword argument combinations
669671
# to ensure all overloads are supported
670672
check(
671-
assert_type(df.apply(returns_scalar, axis=0), "pd.Series[int]"), pd.Series, int
673+
assert_type(df.apply(returns_scalar, axis=0), "pd.Series[int]"),
674+
pd.Series,
675+
np.int64,
672676
)
673677
check(
674678
assert_type(
675679
df.apply(returns_scalar, axis=0, result_type=None), "pd.Series[int]"
676680
),
677681
pd.Series,
678-
int,
682+
np.int64,
679683
)
680684
check(
681685
assert_type(df.apply(returns_scalar, 0, False, None), "pd.Series[int]"),
682686
pd.Series,
683-
int,
687+
np.int64,
684688
)
685689
check(
686690
assert_type(
687691
df.apply(returns_scalar, 0, False, result_type=None), "pd.Series[int]"
688692
),
689693
pd.Series,
690-
int,
694+
np.int64,
691695
)
692696
check(
693697
assert_type(
694698
df.apply(returns_scalar, 0, raw=False, result_type=None), "pd.Series[int]"
695699
),
696700
pd.Series,
697-
int,
701+
np.int64,
698702
)
699703

700704

@@ -863,7 +867,7 @@ def test_types_groupby_methods() -> None:
863867
check(
864868
assert_type(df.groupby("col1").value_counts(normalize=False), "pd.Series[int]"),
865869
pd.Series,
866-
int,
870+
np.int64,
867871
)
868872
check(
869873
assert_type(
@@ -948,12 +952,12 @@ def test_types_groupby_any() -> None:
948952
check(
949953
assert_type(df.groupby("col1")["col2"].any(), "pd.Series[bool]"),
950954
pd.Series,
951-
bool,
955+
np.bool_,
952956
)
953957
check(
954958
assert_type(df.groupby("col1")["col2"].any(), "pd.Series[bool]"),
955959
pd.Series,
956-
bool,
960+
np.bool_,
957961
)
958962

959963

@@ -2277,7 +2281,7 @@ def test_series_groupby_and_value_counts() -> None:
22772281
)
22782282
c1 = df.groupby("Animal")["Max Speed"].value_counts()
22792283
c2 = df.groupby("Animal")["Max Speed"].value_counts(normalize=True)
2280-
check(assert_type(c1, "pd.Series[int]"), pd.Series, int)
2284+
check(assert_type(c1, "pd.Series[int]"), pd.Series, np.int64)
22812285
check(assert_type(c2, "pd.Series[float]"), pd.Series, float)
22822286

22832287

tests/test_indexes.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def test_column_getitem() -> None:
8787

8888
column = df.columns[0]
8989
check(assert_type(column, Scalar), str)
90-
check(assert_type(df[column], pd.Series), pd.Series, int)
90+
check(assert_type(df[column], pd.Series), pd.Series, np.int64)
9191

9292

9393
def test_column_contains() -> None:
@@ -843,7 +843,7 @@ def test_getitem() -> None:
843843
iri = pd.RangeIndex(0, 10)
844844
check(assert_type(iri, pd.RangeIndex), pd.RangeIndex, int)
845845
check(assert_type(iri[0], int), int)
846-
check(assert_type(iri[[0, 2, 4]], pd.Index), pd.Index, int)
846+
check(assert_type(iri[[0, 2, 4]], pd.Index), pd.Index, np.int64)
847847

848848
mi = pd.MultiIndex.from_product([["a", "b"], ["c", "d"]], names=["ab", "cd"])
849849
check(assert_type(mi, pd.MultiIndex), pd.MultiIndex)

tests/test_io.py

+1-9
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,7 @@
6666
from pandas.io.sas.sas_xport import XportReader
6767
from pandas.io.stata import StataReader
6868

69-
from . import (
70-
lxml_skip,
71-
pytables_skip,
72-
)
69+
from . import lxml_skip
7370

7471
DF = DataFrame({"a": [1, 2, 3], "b": [0.0, 0.0, 0.0]})
7572
CWD = os.path.split(os.path.abspath(__file__))[0]
@@ -290,14 +287,12 @@ def test_sas_xport() -> None:
290287
pass
291288

292289

293-
@pytables_skip
294290
def test_hdf():
295291
with ensure_clean() as path:
296292
check(assert_type(DF.to_hdf(path, "df"), None), type(None))
297293
check(assert_type(read_hdf(path), Union[DataFrame, Series]), DataFrame)
298294

299295

300-
@pytables_skip
301296
def test_hdfstore():
302297
with ensure_clean() as path:
303298
store = HDFStore(path, model="w")
@@ -341,7 +336,6 @@ def test_hdfstore():
341336
store.close()
342337

343338

344-
@pytables_skip
345339
def test_read_hdf_iterator():
346340
with ensure_clean() as path:
347341
check(assert_type(DF.to_hdf(path, "df", format="table"), None), type(None))
@@ -356,7 +350,6 @@ def test_read_hdf_iterator():
356350
ti.close()
357351

358352

359-
@pytables_skip
360353
def test_hdf_context_manager():
361354
with ensure_clean() as path:
362355
check(assert_type(DF.to_hdf(path, "df", format="table"), None), type(None))
@@ -365,7 +358,6 @@ def test_hdf_context_manager():
365358
check(assert_type(store.get("df"), Union[DataFrame, Series]), DataFrame)
366359

367360

368-
@pytables_skip
369361
def test_hdf_series():
370362
s = DF["a"]
371363
with ensure_clean() as path:

tests/test_pandas.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -240,10 +240,10 @@ def test_types_json_normalize() -> None:
240240
def test_isna() -> None:
241241
# https://github.com/pandas-dev/pandas-stubs/issues/264
242242
s1 = pd.Series([1, np.nan, 3.2])
243-
check(assert_type(pd.isna(s1), "pd.Series[bool]"), pd.Series, bool)
243+
check(assert_type(pd.isna(s1), "pd.Series[bool]"), pd.Series, np.bool_)
244244

245245
s2 = pd.Series([1, 3.2])
246-
check(assert_type(pd.notna(s2), "pd.Series[bool]"), pd.Series, bool)
246+
check(assert_type(pd.notna(s2), "pd.Series[bool]"), pd.Series, np.bool_)
247247

248248
df1 = pd.DataFrame({"a": [1, 2, 1, 2], "b": [1, 1, 2, np.nan]})
249249
check(assert_type(pd.isna(df1), "pd.DataFrame"), pd.DataFrame)
@@ -890,7 +890,7 @@ def test_cut() -> None:
890890
check(assert_type(s0r, pd.Series), pd.Series, pd.Interval)
891891
check(assert_type(s1r, pd.DatetimeIndex), pd.DatetimeIndex, pd.Timestamp)
892892
s0rlf, s1rlf = pd.cut(s1, bins=10, labels=False, retbins=True)
893-
check(assert_type(s0rlf, pd.Series), pd.Series, int)
893+
check(assert_type(s0rlf, pd.Series), pd.Series, np.int64)
894894
check(assert_type(s1rlf, pd.DatetimeIndex), pd.DatetimeIndex, pd.Timestamp)
895895
s0rls, s1rls = pd.cut(s1, bins=4, labels=["1", "2", "3", "4"], retbins=True)
896896
check(assert_type(s0rls, pd.Series), pd.Series, str)

0 commit comments

Comments
 (0)