Change check() function to test actual types in Series and Index (#546)

Dr-Irv · web-flow · commit 949885e984f9 · 2023-02-20T21:05:25.000-05:00
* fix check function to check actual type, not using __iter__()

* change np.int32 to np.int_

* for bitwise use np.integer

* change tables reference in pyproject.toml

* for nightly, use np.integer rather than np.int64

* make timedelta division type result dependent on pandas version

* fix mypy issue of assigning timedeltadiv return type
diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi
@@ -1289,12 +1289,10 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
     # ignore needed for mypy as we want different results based on the arguments
     @overload  # type: ignore[override]
     def __and__(  # type: ignore[misc]
-        self, other: bool | list[bool] | np_ndarray_bool | Series[bool]
+        self, other: bool | list[bool] | list[int] | np_ndarray_bool | Series[bool]
     ) -> Series[bool]: ...
     @overload
-    def __and__(
-        self, other: int | list[int] | np_ndarray_anyint | Series[int]
-    ) -> Series[int]: ...
+    def __and__(self, other: int | np_ndarray_anyint | Series[int]) -> Series[int]: ...
     # def __array__(self, dtype: Optional[_bool] = ...) -> _np_ndarray
     def __div__(self, other: num | _ListLike | Series[S1]) -> Series[S1]: ...
     def __eq__(self, other: object) -> Series[_bool]: ...  # type: ignore[override]
@@ -1323,22 +1321,18 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
     # ignore needed for mypy as we want different results based on the arguments
     @overload  # type: ignore[override]
     def __or__(  # type: ignore[misc]
-        self, other: bool | list[bool] | np_ndarray_bool | Series[bool]
+        self, other: bool | list[bool] | list[int] | np_ndarray_bool | Series[bool]
     ) -> Series[bool]: ...
     @overload
-    def __or__(
-        self, other: int | list[int] | np_ndarray_anyint | Series[int]
-    ) -> Series[int]: ...
+    def __or__(self, other: int | np_ndarray_anyint | Series[int]) -> Series[int]: ...
     def __radd__(self, other: num | _str | _ListLike | Series[S1]) -> Series[S1]: ...
     # ignore needed for mypy as we want different results based on the arguments
     @overload  # type: ignore[override]
     def __rand__(  # type: ignore[misc]
-        self, other: bool | list[bool] | np_ndarray_bool | Series[bool]
+        self, other: bool | list[bool] | list[int] | np_ndarray_bool | Series[bool]
     ) -> Series[bool]: ...
     @overload
-    def __rand__(
-        self, other: int | list[int] | np_ndarray_anyint | Series[int]
-    ) -> Series[int]: ...
+    def __rand__(self, other: int | np_ndarray_anyint | Series[int]) -> Series[int]: ...  # type: ignore[misc]
     def __rdiv__(self, other: num | _ListLike | Series[S1]) -> Series[S1]: ...
     def __rdivmod__(self, other: num | _ListLike | Series[S1]) -> Series[S1]: ...  # type: ignore[override]
     def __rfloordiv__(self, other: num | _ListLike | Series[S1]) -> Series[S1]: ...
@@ -1349,12 +1343,10 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
     # ignore needed for mypy as we want different results based on the arguments
     @overload  # type: ignore[override]
     def __ror__(  # type: ignore[misc]
-        self, other: bool | list[bool] | np_ndarray_bool | Series[bool]
+        self, other: bool | list[bool] | list[int] | np_ndarray_bool | Series[bool]
     ) -> Series[bool]: ...
     @overload
-    def __ror__(
-        self, other: int | list[int] | np_ndarray_anyint | Series[int]
-    ) -> Series[int]: ...
+    def __ror__(self, other: int | np_ndarray_anyint | Series[int]) -> Series[int]: ...  # type: ignore[misc]
     def __rsub__(self, other: num | _ListLike | Series[S1]) -> Series: ...
     @overload
     def __rtruediv__(self, other: TimedeltaSeries) -> Series[float]: ...
@@ -1363,12 +1355,10 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
     # ignore needed for mypy as we want different results based on the arguments
     @overload  # type: ignore[override]
     def __rxor__(  # type: ignore[misc]
-        self, other: bool | list[bool] | np_ndarray_bool | Series[bool]
+        self, other: bool | list[bool] | list[int] | np_ndarray_bool | Series[bool]
     ) -> Series[bool]: ...
     @overload
-    def __rxor__(
-        self, other: int | list[int] | np_ndarray_anyint | Series[int]
-    ) -> Series[int]: ...
+    def __rxor__(self, other: int | np_ndarray_anyint | Series[int]) -> Series[int]: ...  # type: ignore[misc]
     @overload
     def __sub__(
         self, other: Timestamp | datetime | TimestampSeries
@@ -1389,12 +1379,10 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
     # ignore needed for mypy as we want different results based on the arguments
     @overload  # type: ignore[override]
     def __xor__(  # type: ignore[misc]
-        self, other: bool | list[bool] | np_ndarray_bool | Series[bool]
+        self, other: bool | list[bool] | list[int] | np_ndarray_bool | Series[bool]
     ) -> Series[bool]: ...
     @overload
-    def __xor__(
-        self, other: int | list[int] | np_ndarray_anyint | Series[int]
-    ) -> Series[int]: ...
+    def __xor__(self, other: int | np_ndarray_anyint | Series[int]) -> Series[int]: ...
     def __invert__(self) -> Series[bool]: ...
     # properties
     # @property
diff --git a/pyproject.toml b/pyproject.toml
@@ -49,7 +49,7 @@ pre-commit = ">=2.19.0"
 black = ">=22.12.0"
 isort = ">=5.10.1"
 openpyxl = ">=3.0.10"
-tables = { version = ">=3.7.0", python = "<3.11" }
+tables = { version = ">=3.7.0" }
 lxml = { version = ">=4.7.1,<4.9.0", python = "<3.11" }
 pyreadstat = ">=1.2.0"
 xlrd = ">=2.0.1"
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -28,19 +28,18 @@
 )
 # This is only needed temporarily due to no wheels being available for lxml on 3.11
 
-pytables_skip = pytest.mark.skipif(
-    sys.version_info >= (3, 11), reason="pytables is not available for 3.11 yet"
-)
-# This is only needed temporarily due to no wheels being available for pytables on 3.11
-
 
 def check(actual: T, klass: type, dtype: type | None = None, attr: str = "left") -> T:
     if not isinstance(actual, klass):
         raise RuntimeError(f"Expected type '{klass}' but got '{type(actual)}'")
     if dtype is None:
         return actual  # type: ignore[return-value]
 
-    if hasattr(actual, "__iter__"):
+    if isinstance(actual, pd.Series):
+        value = actual.iloc[0]
+    elif isinstance(actual, pd.Index):
+        value = actual[0]  # type: ignore[assignment]
+    elif hasattr(actual, "__iter__"):
         value = next(iter(actual))  # pyright: ignore[reportGeneralTypeIssues]
     else:
         assert hasattr(actual, attr)
diff --git a/tests/test_frame.py b/tests/test_frame.py
@@ -72,13 +72,13 @@ def test_types_init() -> None:
 
 def test_types_all() -> None:
     df = pd.DataFrame([[False, True], [False, False]], columns=["col1", "col2"])
-    check(assert_type(df.all(), "pd.Series[bool]"), pd.Series, bool)
+    check(assert_type(df.all(), "pd.Series[bool]"), pd.Series, np.bool_)
     check(assert_type(df.all(axis=None), bool), np.bool_)
 
 
 def test_types_any() -> None:
     df = pd.DataFrame([[False, True], [False, False]], columns=["col1", "col2"])
-    check(assert_type(df.any(), "pd.Series[bool]"), pd.Series, bool)
+    check(assert_type(df.any(), "pd.Series[bool]"), pd.Series, np.bool_)
     check(assert_type(df.any(axis=None), bool), np.bool_)
 
 
@@ -496,7 +496,7 @@ def gethead(s: pd.Series, y: int) -> pd.Series:
     check(assert_type(df.apply(gethead, args=(4,)), pd.DataFrame), pd.DataFrame)
 
     # Check various return types for default result_type (None) with default axis (0)
-    check(assert_type(df.apply(returns_scalar), "pd.Series[int]"), pd.Series, int)
+    check(assert_type(df.apply(returns_scalar), "pd.Series[int]"), pd.Series, np.int64)
     check(assert_type(df.apply(returns_series), pd.DataFrame), pd.DataFrame)
     check(assert_type(df.apply(returns_listlike_of_3), pd.DataFrame), pd.DataFrame)
     check(assert_type(df.apply(returns_dict), pd.Series), pd.Series)
@@ -507,7 +507,7 @@ def gethead(s: pd.Series, y: int) -> pd.Series:
         # to pass a result_type of "expand" to a scalar return
         assert_type(df.apply(returns_scalar, result_type="expand"), "pd.Series[int]"),
         pd.Series,
-        int,
+        np.int64,
     )
     check(
         assert_type(df.apply(returns_series, result_type="expand"), pd.DataFrame),
@@ -530,7 +530,7 @@ def gethead(s: pd.Series, y: int) -> pd.Series:
         # to pass a result_type of "reduce" to a scalar return
         assert_type(df.apply(returns_scalar, result_type="reduce"), "pd.Series[int]"),
         pd.Series,
-        int,
+        np.int64,
     )
     check(
         # Note that technically it does not make sense
@@ -548,7 +548,9 @@ def gethead(s: pd.Series, y: int) -> pd.Series:
 
     # Check various return types for default result_type (None) with axis=1
     check(
-        assert_type(df.apply(returns_scalar, axis=1), "pd.Series[int]"), pd.Series, int
+        assert_type(df.apply(returns_scalar, axis=1), "pd.Series[int]"),
+        pd.Series,
+        np.int64,
     )
     check(assert_type(df.apply(returns_series, axis=1), pd.DataFrame), pd.DataFrame)
     check(assert_type(df.apply(returns_listlike_of_3, axis=1), pd.Series), pd.Series)
@@ -562,7 +564,7 @@ def gethead(s: pd.Series, y: int) -> pd.Series:
             df.apply(returns_scalar, axis=1, result_type="expand"), "pd.Series[int]"
         ),
         pd.Series,
-        int,
+        np.int64,
     )
     check(
         assert_type(
@@ -589,7 +591,7 @@ def gethead(s: pd.Series, y: int) -> pd.Series:
             df.apply(returns_scalar, axis=1, result_type="reduce"), "pd.Series[int]"
         ),
         pd.Series,
-        int,
+        np.int64,
     )
     check(
         # Note that technically it does not make sense
@@ -668,33 +670,35 @@ def gethead(s: pd.Series, y: int) -> pd.Series:
     # Test various other positional/keyword argument combinations
     # to ensure all overloads are supported
     check(
-        assert_type(df.apply(returns_scalar, axis=0), "pd.Series[int]"), pd.Series, int
+        assert_type(df.apply(returns_scalar, axis=0), "pd.Series[int]"),
+        pd.Series,
+        np.int64,
     )
     check(
         assert_type(
             df.apply(returns_scalar, axis=0, result_type=None), "pd.Series[int]"
         ),
         pd.Series,
-        int,
+        np.int64,
     )
     check(
         assert_type(df.apply(returns_scalar, 0, False, None), "pd.Series[int]"),
         pd.Series,
-        int,
+        np.int64,
     )
     check(
         assert_type(
             df.apply(returns_scalar, 0, False, result_type=None), "pd.Series[int]"
         ),
         pd.Series,
-        int,
+        np.int64,
     )
     check(
         assert_type(
             df.apply(returns_scalar, 0, raw=False, result_type=None), "pd.Series[int]"
         ),
         pd.Series,
-        int,
+        np.int64,
     )
 
 
@@ -863,7 +867,7 @@ def test_types_groupby_methods() -> None:
     check(
         assert_type(df.groupby("col1").value_counts(normalize=False), "pd.Series[int]"),
         pd.Series,
-        int,
+        np.int64,
     )
     check(
         assert_type(
@@ -948,12 +952,12 @@ def test_types_groupby_any() -> None:
     check(
         assert_type(df.groupby("col1")["col2"].any(), "pd.Series[bool]"),
         pd.Series,
-        bool,
+        np.bool_,
     )
     check(
         assert_type(df.groupby("col1")["col2"].any(), "pd.Series[bool]"),
         pd.Series,
-        bool,
+        np.bool_,
     )
 
 
@@ -2277,7 +2281,7 @@ def test_series_groupby_and_value_counts() -> None:
     )
     c1 = df.groupby("Animal")["Max Speed"].value_counts()
     c2 = df.groupby("Animal")["Max Speed"].value_counts(normalize=True)
-    check(assert_type(c1, "pd.Series[int]"), pd.Series, int)
+    check(assert_type(c1, "pd.Series[int]"), pd.Series, np.int64)
     check(assert_type(c2, "pd.Series[float]"), pd.Series, float)
 
 
diff --git a/tests/test_indexes.py b/tests/test_indexes.py
@@ -87,7 +87,7 @@ def test_column_getitem() -> None:
 
     column = df.columns[0]
     check(assert_type(column, Scalar), str)
-    check(assert_type(df[column], pd.Series), pd.Series, int)
+    check(assert_type(df[column], pd.Series), pd.Series, np.int64)
 
 
 def test_column_contains() -> None:
@@ -843,7 +843,7 @@ def test_getitem() -> None:
     iri = pd.RangeIndex(0, 10)
     check(assert_type(iri, pd.RangeIndex), pd.RangeIndex, int)
     check(assert_type(iri[0], int), int)
-    check(assert_type(iri[[0, 2, 4]], pd.Index), pd.Index, int)
+    check(assert_type(iri[[0, 2, 4]], pd.Index), pd.Index, np.int64)
 
     mi = pd.MultiIndex.from_product([["a", "b"], ["c", "d"]], names=["ab", "cd"])
     check(assert_type(mi, pd.MultiIndex), pd.MultiIndex)
diff --git a/tests/test_io.py b/tests/test_io.py
@@ -66,10 +66,7 @@
 from pandas.io.sas.sas_xport import XportReader
 from pandas.io.stata import StataReader
 
-from . import (
-    lxml_skip,
-    pytables_skip,
-)
+from . import lxml_skip
 
 DF = DataFrame({"a": [1, 2, 3], "b": [0.0, 0.0, 0.0]})
 CWD = os.path.split(os.path.abspath(__file__))[0]
@@ -290,14 +287,12 @@ def test_sas_xport() -> None:
         pass
 
 
-@pytables_skip
 def test_hdf():
     with ensure_clean() as path:
         check(assert_type(DF.to_hdf(path, "df"), None), type(None))
         check(assert_type(read_hdf(path), Union[DataFrame, Series]), DataFrame)
 
 
-@pytables_skip
 def test_hdfstore():
     with ensure_clean() as path:
         store = HDFStore(path, model="w")
@@ -341,7 +336,6 @@ def test_hdfstore():
         store.close()
 
 
-@pytables_skip
 def test_read_hdf_iterator():
     with ensure_clean() as path:
         check(assert_type(DF.to_hdf(path, "df", format="table"), None), type(None))
@@ -356,7 +350,6 @@ def test_read_hdf_iterator():
         ti.close()
 
 
-@pytables_skip
 def test_hdf_context_manager():
     with ensure_clean() as path:
         check(assert_type(DF.to_hdf(path, "df", format="table"), None), type(None))
@@ -365,7 +358,6 @@ def test_hdf_context_manager():
             check(assert_type(store.get("df"), Union[DataFrame, Series]), DataFrame)
 
 
-@pytables_skip
 def test_hdf_series():
     s = DF["a"]
     with ensure_clean() as path:
diff --git a/tests/test_pandas.py b/tests/test_pandas.py
@@ -240,10 +240,10 @@ def test_types_json_normalize() -> None:
 def test_isna() -> None:
     # https://github.com/pandas-dev/pandas-stubs/issues/264
     s1 = pd.Series([1, np.nan, 3.2])
-    check(assert_type(pd.isna(s1), "pd.Series[bool]"), pd.Series, bool)
+    check(assert_type(pd.isna(s1), "pd.Series[bool]"), pd.Series, np.bool_)
 
     s2 = pd.Series([1, 3.2])
-    check(assert_type(pd.notna(s2), "pd.Series[bool]"), pd.Series, bool)
+    check(assert_type(pd.notna(s2), "pd.Series[bool]"), pd.Series, np.bool_)
 
     df1 = pd.DataFrame({"a": [1, 2, 1, 2], "b": [1, 1, 2, np.nan]})
     check(assert_type(pd.isna(df1), "pd.DataFrame"), pd.DataFrame)
@@ -890,7 +890,7 @@ def test_cut() -> None:
     check(assert_type(s0r, pd.Series), pd.Series, pd.Interval)
     check(assert_type(s1r, pd.DatetimeIndex), pd.DatetimeIndex, pd.Timestamp)
     s0rlf, s1rlf = pd.cut(s1, bins=10, labels=False, retbins=True)
-    check(assert_type(s0rlf, pd.Series), pd.Series, int)
+    check(assert_type(s0rlf, pd.Series), pd.Series, np.int64)
     check(assert_type(s1rlf, pd.DatetimeIndex), pd.DatetimeIndex, pd.Timestamp)
     s0rls, s1rls = pd.cut(s1, bins=4, labels=["1", "2", "3", "4"], retbins=True)
     check(assert_type(s0rls, pd.Series), pd.Series, str)
diff --git a/tests/test_scalars.py b/tests/test_scalars.py
diff --git a/tests/test_series.py b/tests/test_series.py
diff --git a/tests/test_timefuncs.py b/tests/test_timefuncs.py