pandas-dev · mroeschke · Sep 8, 2022 · Aug 21, 2022 · Aug 21, 2022 · Aug 21, 2022
diff --git a/.libcst.codemod.yaml b/.libcst.codemod.yaml
@@ -0,0 +1,18 @@
+# String that LibCST should look for in code which indicates that the
+# module is generated code.
+generated_code_marker: '@generated'
+# Command line and arguments for invoking a code formatter. Anything
+# specified here must be capable of taking code via stdin and returning
+# formatted code via stdout.
+formatter: ['black', '-']
+# List of regex patterns which LibCST will evaluate against filenames to
+# determine if the module should be touched.
+blacklist_patterns: []
+# List of modules that contain codemods inside of them.
+modules:
+- 'libcst.codemod.commands'
+- 'autotyping'
+# Absolute or relative path of the repository root, used for providing
+# full-repo metadata. Relative paths should be specified with this file
+# location as the base.
+repo_root: '.'
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -252,3 +252,14 @@ repos:
             /(__init__\.py)|(api\.py)|(_version\.py)|(testing\.py)|(conftest\.py)$
             |/tests/
             |/_testing/
+    -   id: autotyping
+        name: autotyping
+        entry: python -m libcst.tool codemod autotyping.AutotypeCommand --safe 1
+        types_or: [python, pyi]
+        files: ^pandas
+        exclude: ^(pandas/tests|pandas/io/clipboard)
+        language: python
+        additional_dependencies:
+        - autotyping==22.9.0
+        - black==22.6.0
+        - libcst==0.4.7
diff --git a/pandas/_libs/arrays.pyi b/pandas/_libs/arrays.pyi
@@ -10,7 +10,7 @@ from pandas._typing import (
 class NDArrayBacked:
     _dtype: DtypeObj
     _ndarray: np.ndarray
-    def __init__(self, values: np.ndarray, dtype: DtypeObj): ...
+    def __init__(self, values: np.ndarray, dtype: DtypeObj) -> None: ...
     @classmethod
     def _simple_new(cls, values: np.ndarray, dtype: DtypeObj): ...
     def _from_backing_data(self, values: np.ndarray): ...

diff --git a/pandas/_libs/hashtable.pyi b/pandas/_libs/hashtable.pyi
@@ -13,7 +13,7 @@ def unique_label_indices(
 
 class Factorizer:
     count: int
-    def __init__(self, size_hint: int): ...
+    def __init__(self, size_hint: int) -> None: ...
     def get_count(self) -> int: ...
 
 class ObjectFactorizer(Factorizer):
@@ -39,80 +39,80 @@ class Int64Factorizer(Factorizer):
     ) -> npt.NDArray[np.intp]: ...
 
 class Int64Vector:
-    def __init__(self, *args): ...
+    def __init__(self, *args) -> None: ...
     def __len__(self) -> int: ...
     def to_array(self) -> npt.NDArray[np.int64]: ...
 
 class Int32Vector:
-    def __init__(self, *args): ...
+    def __init__(self, *args) -> None: ...
     def __len__(self) -> int: ...
     def to_array(self) -> npt.NDArray[np.int32]: ...
 
 class Int16Vector:
-    def __init__(self, *args): ...
+    def __init__(self, *args) -> None: ...
     def __len__(self) -> int: ...
     def to_array(self) -> npt.NDArray[np.int16]: ...
 
 class Int8Vector:
-    def __init__(self, *args): ...
+    def __init__(self, *args) -> None: ...
     def __len__(self) -> int: ...
     def to_array(self) -> npt.NDArray[np.int8]: ...
 
 class UInt64Vector:
-    def __init__(self, *args): ...
+    def __init__(self, *args) -> None: ...
     def __len__(self) -> int: ...
     def to_array(self) -> npt.NDArray[np.uint64]: ...
 
 class UInt32Vector:
-    def __init__(self, *args): ...
+    def __init__(self, *args) -> None: ...
     def __len__(self) -> int: ...
     def to_array(self) -> npt.NDArray[np.uint32]: ...
 
 class UInt16Vector:
-    def __init__(self, *args): ...
+    def __init__(self, *args) -> None: ...
     def __len__(self) -> int: ...
     def to_array(self) -> npt.NDArray[np.uint16]: ...
 
 class UInt8Vector:
-    def __init__(self, *args): ...
+    def __init__(self, *args) -> None: ...
     def __len__(self) -> int: ...
     def to_array(self) -> npt.NDArray[np.uint8]: ...
 
 class Float64Vector:
-    def __init__(self, *args): ...
+    def __init__(self, *args) -> None: ...
     def __len__(self) -> int: ...
     def to_array(self) -> npt.NDArray[np.float64]: ...
 
 class Float32Vector:
-    def __init__(self, *args): ...
+    def __init__(self, *args) -> None: ...
     def __len__(self) -> int: ...
     def to_array(self) -> npt.NDArray[np.float32]: ...
 
 class Complex128Vector:
-    def __init__(self, *args): ...
+    def __init__(self, *args) -> None: ...
     def __len__(self) -> int: ...
     def to_array(self) -> npt.NDArray[np.complex128]: ...
 
 class Complex64Vector:
-    def __init__(self, *args): ...
+    def __init__(self, *args) -> None: ...
     def __len__(self) -> int: ...
     def to_array(self) -> npt.NDArray[np.complex64]: ...
 
 class StringVector:
-    def __init__(self, *args): ...
+    def __init__(self, *args) -> None: ...
     def __len__(self) -> int: ...
     def to_array(self) -> npt.NDArray[np.object_]: ...
 
 class ObjectVector:
-    def __init__(self, *args): ...
+    def __init__(self, *args) -> None: ...
     def __len__(self) -> int: ...
     def to_array(self) -> npt.NDArray[np.object_]: ...
 
 class HashTable:
     # NB: The base HashTable class does _not_ actually have these methods;
     #  we are putting the here for the sake of mypy to avoid
     #  reproducing them in each subclass below.
-    def __init__(self, size_hint: int = ...): ...
+    def __init__(self, size_hint: int = ...) -> None: ...
     def __len__(self) -> int: ...
     def __contains__(self, key: Hashable) -> bool: ...
     def sizeof(self, deep: bool = ...) -> int: ...

diff --git a/pandas/_libs/index.pyi b/pandas/_libs/index.pyi
@@ -7,8 +7,9 @@ from pandas.core.arrays import ExtensionArray
 
 class IndexEngine:
     over_size_threshold: bool
-    def __init__(self, values: np.ndarray): ...
+    def __init__(self, values: np.ndarray) -> None: ...
     def __contains__(self, val: object) -> bool: ...
+
     # -> int | slice | np.ndarray[bool]
     def get_loc(self, val: object) -> int | slice | np.ndarray: ...
     def sizeof(self, deep: bool = ...) -> int: ...
@@ -49,12 +50,13 @@ class BoolEngine(UInt8Engine): ...
 class BaseMultiIndexCodesEngine:
     levels: list[np.ndarray]
     offsets: np.ndarray  # ndarray[uint64_t, ndim=1]
+
     def __init__(
         self,
         levels: list[np.ndarray],  # all entries hashable
         labels: list[np.ndarray],  # all entries integer-dtyped
         offsets: np.ndarray,  # np.ndarray[np.uint64, ndim=1]
-    ): ...
+    ) -> None: ...
     def get_indexer(
         self,
         target: npt.NDArray[np.object_],
@@ -69,7 +71,7 @@ class BaseMultiIndexCodesEngine:
     ) -> npt.NDArray[np.intp]: ...
 
 class ExtensionEngine:
-    def __init__(self, values: ExtensionArray): ...
+    def __init__(self, values: ExtensionArray) -> None: ...
     def __contains__(self, val: object) -> bool: ...
     def get_loc(self, val: object) -> int | slice | np.ndarray: ...
     def get_indexer(self, values: np.ndarray) -> npt.NDArray[np.intp]: ...

diff --git a/pandas/_libs/internals.pyi b/pandas/_libs/internals.pyi
@@ -34,7 +34,7 @@ def update_blklocs_and_blknos(
 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
 @final
 class BlockPlacement:
-    def __init__(self, val: int | slice | np.ndarray): ...
+    def __init__(self, val: int | slice | np.ndarray) -> None: ...
     @property
     def indexer(self) -> np.ndarray | slice: ...
     @property
@@ -57,7 +57,9 @@ class SharedBlock:
     _mgr_locs: BlockPlacement
     ndim: int
     values: ArrayLike
-    def __init__(self, values: ArrayLike, placement: BlockPlacement, ndim: int): ...
+    def __init__(
+        self, values: ArrayLike, placement: BlockPlacement, ndim: int
+    ) -> None: ...
 
 class NumpyBlock(SharedBlock):
     values: np.ndarray
@@ -80,6 +82,6 @@ class BlockManager:
     _blklocs: np.ndarray
     def __init__(
         self, blocks: tuple[B, ...], axes: list[Index], verify_integrity=...
-    ): ...
+    ) -> None: ...
     def get_slice(self: T, slobj: slice, axis: int = ...) -> T: ...
     def _rebuild_blknos_and_blklocs(self) -> None: ...
diff --git a/pandas/_libs/parsers.pyi b/pandas/_libs/parsers.pyi
@@ -62,7 +62,7 @@ class TextReader:
         float_precision: Literal["round_trip", "legacy", "high"] | None = ...,
         skip_blank_lines: bool = ...,
         encoding_errors: bytes | str = ...,
-    ): ...
+    ) -> None: ...
     def set_noconvert(self, i: int) -> None: ...
     def remove_noconvert(self, i: int) -> None: ...
     def close(self) -> None: ...

diff --git a/pandas/_libs/sparse.pyi b/pandas/_libs/sparse.pyi
@@ -12,7 +12,7 @@ _SparseIndexT = TypeVar("_SparseIndexT", bound=SparseIndex)
 class SparseIndex:
     length: int
     npoints: int
-    def __init__(self): ...
+    def __init__(self) -> None: ...
     @property
     def ngaps(self) -> int: ...
     @property
@@ -31,13 +31,15 @@ class IntIndex(SparseIndex):
     indices: npt.NDArray[np.int32]
     def __init__(
         self, length: int, indices: Sequence[int], check_integrity: bool = ...
-    ): ...
+    ) -> None: ...
 
 class BlockIndex(SparseIndex):
     nblocks: int
     blocs: np.ndarray
     blengths: np.ndarray
-    def __init__(self, length: int, blocs: np.ndarray, blengths: np.ndarray): ...
+    def __init__(
+        self, length: int, blocs: np.ndarray, blengths: np.ndarray
+    ) -> None: ...
 
 def make_mask_object_ndarray(
     arr: npt.NDArray[np.object_], fill_value

diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py
@@ -5,6 +5,7 @@
 from pathlib import Path
 from shutil import rmtree
 import tempfile
+from types import TracebackType
 from typing import (
     IO,
     Any,
@@ -65,7 +66,7 @@ def set_timezone(tz: str) -> Iterator[None]:
     import os
     import time
 
-    def setTZ(tz):
+    def setTZ(tz) -> None:
         if tz is None:
             try:
                 del os.environ["TZ"]
@@ -237,6 +238,11 @@ def __enter__(self) -> None:
         self.start_state = np.random.get_state()
         np.random.seed(self.seed)
 
-    def __exit__(self, exc_type, exc_value, traceback) -> None:
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_value: BaseException | None,
+        traceback: TracebackType | None,
+    ) -> None:
 
         np.random.set_state(self.start_state)
diff --git a/pandas/_version.py b/pandas/_version.py
@@ -334,7 +334,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
     return pieces
 
 
-def plus_or_dot(pieces):
+def plus_or_dot(pieces) -> str:
     """Return a + if we don't already have one, else return a ."""
     if "+" in pieces.get("closest-tag", ""):
         return "."

diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py
@@ -210,7 +210,7 @@ def find_class(self, module, name):
 Unpickler.dispatch[pkl.REDUCE[0]] = load_reduce
 
 
-def load_newobj(self):
+def load_newobj(self) -> None:
     args = self.stack.pop()
     cls = self.stack[-1]
 
@@ -234,7 +234,7 @@ def load_newobj(self):
 Unpickler.dispatch[pkl.NEWOBJ[0]] = load_newobj
 
 
-def load_newobj_ex(self):
+def load_newobj_ex(self) -> None:
     kwargs = self.stack.pop()
     args = self.stack.pop()
     cls = self.stack.pop()

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -29,7 +29,10 @@
 from decimal import Decimal
 import operator
 import os
-from typing import Callable
+from typing import (
+    Callable,
+    Iterator,
+)
 
 from dateutil.tz import (
     tzlocal,
@@ -132,7 +135,7 @@ def ignore_doctest_warning(item: pytest.Item, path: str, message: str) -> None:
         item.add_marker(pytest.mark.filterwarnings(f"ignore:{message}"))
 
 
-def pytest_collection_modifyitems(items, config):
+def pytest_collection_modifyitems(items, config) -> None:
     skip_slow = config.getoption("--skip-slow")
     only_slow = config.getoption("--only-slow")
     skip_network = config.getoption("--skip-network")
@@ -512,10 +515,10 @@ def __init__(self, underlying_dict) -> None:
         def __getitem__(self, key):
             return self._data.__getitem__(key)
 
-        def __iter__(self):
+        def __iter__(self) -> Iterator:
             return self._data.__iter__()
 
-        def __len__(self):
+        def __len__(self) -> int:
             return self._data.__len__()
 
     return TestNonDictMapping

diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py
@@ -59,7 +59,7 @@ def _delegate_method(self, name, *args, **kwargs):
     @classmethod
     def _add_delegate_accessors(
         cls, delegate, accessors, typ: str, overwrite: bool = False
-    ):
+    ) -> None:
         """
         Add accessors to cls from the delegate class.
 

diff --git a/pandas/core/array_algos/take.py b/pandas/core/array_algos/take.py
@@ -337,7 +337,7 @@ def _get_take_nd_function(
 
     if func is None:
 
-        def func(arr, indexer, out, fill_value=np.nan):
+        def func(arr, indexer, out, fill_value=np.nan) -> None:
             indexer = ensure_platform_int(indexer)
             _take_nd_object(
                 arr, indexer, out, axis=axis, fill_value=fill_value, mask_info=mask_info
@@ -349,7 +349,7 @@ def func(arr, indexer, out, fill_value=np.nan):
 def _view_wrapper(f, arr_dtype=None, out_dtype=None, fill_wrap=None):
     def wrapper(
         arr: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=np.nan
-    ):
+    ) -> None:
         if arr_dtype is not None:
             arr = arr.view(arr_dtype)
         if out_dtype is not None:
@@ -364,7 +364,7 @@ def wrapper(
 def _convert_wrapper(f, conv_dtype):
     def wrapper(
         arr: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=np.nan
-    ):
+    ) -> None:
         if conv_dtype == object:
             # GH#39755 avoid casting dt64/td64 to integers
             arr = ensure_wrapped_if_datetimelike(arr)
@@ -506,7 +506,7 @@ def _take_nd_object(
     axis: int,
     fill_value,
     mask_info,
-):
+) -> None:
     if mask_info is not None:
         mask, needs_masking = mask_info
     else: