diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d927be76843e1..7988012498db7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -206,3 +206,8 @@ repos: files: ^pandas/core/ exclude: ^pandas/core/api\.py$ types: [python] + - id: no-bool-in-core-generic + name: Use bool_t instead of bool in pandas/core/generic.py + entry: python scripts/no_bool_in_generic.py + language: python + files: ^pandas/core/generic\.py$ diff --git a/LICENSES/PYUPGRADE_LICENSE b/LICENSES/PYUPGRADE_LICENSE new file mode 100644 index 0000000000000..522fbe20b8991 --- /dev/null +++ b/LICENSES/PYUPGRADE_LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2017 Anthony Sottile + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b3262c61a0597..8e20eeb16c7a8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -232,7 +232,7 @@ class NDFrame(PandasObject, SelectionMixin, indexing.IndexingMixin): def __init__( self, data: Manager, - copy: bool = False, + copy: bool_t = False, attrs: Mapping[Hashable, Any] | None = None, ): # copy kwarg is retained for mypy compat, is not used @@ -249,7 +249,7 @@ def __init__( @classmethod def _init_mgr( - cls, mgr, axes, dtype: Dtype | None = None, copy: bool = False + cls, mgr, axes, dtype: Dtype | None = None, copy: bool_t = False ) -> Manager: """ passed a manager and a axes dict """ for a, axe in axes.items(): @@ -377,8 +377,8 @@ def flags(self) -> Flags: def set_flags( self: FrameOrSeries, *, - copy: bool = False, - allows_duplicate_labels: bool | None = None, + copy: bool_t = False, + allows_duplicate_labels: bool_t | None = None, ) -> FrameOrSeries: """ Return a new object with updated flags. @@ -467,7 +467,7 @@ def _data(self): _stat_axis_name = "index" _AXIS_ORDERS: list[str] _AXIS_TO_AXIS_NUMBER: dict[Axis, int] = {0: 0, "index": 0, "rows": 0} - _AXIS_REVERSED: bool + _AXIS_REVERSED: bool_t _info_axis_number: int _info_axis_name: str _AXIS_LEN: int @@ -494,7 +494,7 @@ def _construct_axes_dict(self, axes=None, **kwargs): @final @classmethod def _construct_axes_from_arguments( - cls, args, kwargs, require_all: bool = False, sentinel=None + cls, args, kwargs, require_all: bool_t = False, sentinel=None ): """ Construct and returns axes if supplied in args/kwargs. @@ -714,11 +714,11 @@ def set_axis(self: FrameOrSeries, labels, *, inplace: Literal[True]) -> None: @overload def set_axis( - self: FrameOrSeries, labels, axis: Axis = ..., inplace: bool = ... + self: FrameOrSeries, labels, axis: Axis = ..., inplace: bool_t = ... ) -> FrameOrSeries | None: ... - def set_axis(self, labels, axis: Axis = 0, inplace: bool = False): + def set_axis(self, labels, axis: Axis = 0, inplace: bool_t = False): """ Assign desired index to given axis. @@ -749,7 +749,7 @@ def set_axis(self, labels, axis: Axis = 0, inplace: bool = False): return self._set_axis_nocheck(labels, axis, inplace) @final - def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool): + def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool_t): # NDFrame.rename with inplace=False calls set_axis(inplace=True) on a copy. if inplace: setattr(self, self._get_axis_name(axis), labels) @@ -995,8 +995,8 @@ def rename( index: Renamer | None = None, columns: Renamer | None = None, axis: Axis | None = None, - copy: bool = True, - inplace: bool = False, + copy: bool_t = True, + inplace: bool_t = False, level: Level | None = None, errors: str = "ignore", ) -> FrameOrSeries | None: @@ -1402,13 +1402,13 @@ def _set_axis_name(self, name, axis=0, inplace=False): # Comparison Methods @final - def _indexed_same(self, other) -> bool: + def _indexed_same(self, other) -> bool_t: return all( self._get_axis(a).equals(other._get_axis(a)) for a in self._AXIS_ORDERS ) @final - def equals(self, other: object) -> bool: + def equals(self, other: object) -> bool_t: """ Test whether two objects contain the same elements. @@ -5071,7 +5071,7 @@ def filter( return self.reindex(**{name: [r for r in items if r in labels]}) elif like: - def f(x) -> bool: + def f(x) -> bool_t: assert like is not None # needed for mypy return like in ensure_str(x) @@ -5079,7 +5079,7 @@ def f(x) -> bool: return self.loc(axis=axis)[values] elif regex: - def f(x) -> bool: + def f(x) -> bool_t: return matcher.search(ensure_str(x)) is not None matcher = re.compile(regex) diff --git a/scripts/no_bool_in_generic.py b/scripts/no_bool_in_generic.py new file mode 100644 index 0000000000000..f80eff56b2729 --- /dev/null +++ b/scripts/no_bool_in_generic.py @@ -0,0 +1,92 @@ +""" +Check that pandas/core/generic.py doesn't use bool as a type annotation. + +There is already the method `bool`, so the alias `bool_t` should be used instead. + +This is meant to be run as a pre-commit hook - to run it manually, you can do: + + pre-commit run no-bool-in-core-generic --all-files + +The function `visit` is adapted from a function by the same name in pyupgrade: +https://github.com/asottile/pyupgrade/blob/5495a248f2165941c5d3b82ac3226ba7ad1fa59d/pyupgrade/_data.py#L70-L113 +""" + +import argparse +import ast +import collections +from typing import ( + Dict, + List, + Optional, + Sequence, + Tuple, +) + + +def visit(tree: ast.Module) -> Dict[int, List[int]]: + "Step through tree, recording when nodes are in annotations." + in_annotation = False + nodes: List[Tuple[bool, ast.AST]] = [(in_annotation, tree)] + to_replace = collections.defaultdict(list) + + while nodes: + in_annotation, node = nodes.pop() + + if isinstance(node, ast.Name) and in_annotation and node.id == "bool": + to_replace[node.lineno].append(node.col_offset) + + for name in reversed(node._fields): + value = getattr(node, name) + if name in {"annotation", "returns"}: + next_in_annotation = True + else: + next_in_annotation = in_annotation + if isinstance(value, ast.AST): + nodes.append((next_in_annotation, value)) + elif isinstance(value, list): + for value in reversed(value): + if isinstance(value, ast.AST): + nodes.append((next_in_annotation, value)) + + return to_replace + + +def replace_bool_with_bool_t(to_replace, content: str) -> str: + new_lines = [] + + for n, line in enumerate(content.splitlines(), start=1): + if n in to_replace: + for col_offset in reversed(to_replace[n]): + line = line[:col_offset] + "bool_t" + line[col_offset + 4 :] + new_lines.append(line) + return "\n".join(new_lines) + + +def check_for_bool_in_generic(content: str) -> Tuple[bool, str]: + tree = ast.parse(content) + to_replace = visit(tree) + + if not to_replace: + mutated = False + return mutated, content + + mutated = True + return mutated, replace_bool_with_bool_t(to_replace, content) + + +def main(argv: Optional[Sequence[str]] = None) -> None: + parser = argparse.ArgumentParser() + parser.add_argument("paths", nargs="*") + args = parser.parse_args(argv) + + for path in args.paths: + with open(path, encoding="utf-8") as fd: + content = fd.read() + mutated, new_content = check_for_bool_in_generic(content) + if mutated: + with open(path, "w", encoding="utf-8") as fd: + fd.write(new_content) + + +if __name__ == "__main__": + main() diff --git a/scripts/tests/test_no_bool_in_generic.py b/scripts/tests/test_no_bool_in_generic.py new file mode 100644 index 0000000000000..0bc91c5d1cf1e --- /dev/null +++ b/scripts/tests/test_no_bool_in_generic.py @@ -0,0 +1,20 @@ +from scripts.no_bool_in_generic import check_for_bool_in_generic + +BAD_FILE = "def foo(a: bool) -> bool:\n return bool(0)" +GOOD_FILE = "def foo(a: bool_t) -> bool_t:\n return bool(0)" + + +def test_bad_file_with_replace(): + content = BAD_FILE + mutated, result = check_for_bool_in_generic(content) + expected = GOOD_FILE + assert result == expected + assert mutated + + +def test_good_file_with_replace(): + content = GOOD_FILE + mutated, result = check_for_bool_in_generic(content) + expected = content + assert result == expected + assert not mutated