diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6a840ebfb9b52..82043f79643e4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -70,12 +70,9 @@ repos: rev: 6.0.0 hooks: - id: flake8 - # Need to patch os.remove rule in pandas-dev-flaker - exclude: ^ci/fix_wheels.py additional_dependencies: &flake8_dependencies - flake8==6.0.0 - flake8-bugbear==22.7.1 - - pandas-dev-flaker==0.5.0 - repo: https://github.com/pycqa/pylint rev: v2.15.9 hooks: @@ -183,6 +180,21 @@ repos: types: [rst] args: [--filename=*.rst] additional_dependencies: [flake8-rst==0.7.0, flake8==3.7.9] + - id: inconsistent-namespace-usage + name: 'Check for inconsistent use of pandas namespace' + entry: python scripts/check_for_inconsistent_pandas_namespace.py + exclude: ^pandas/core/interchange/ + language: python + types: [python] + - id: no-os-remove + name: Check code for instances of os.remove + entry: os\.remove + language: pygrep + types: [python] + files: ^pandas/tests/ + exclude: | + (?x)^ + pandas/tests/io/pytables/test_store\.py$ - id: unwanted-patterns name: Unwanted patterns language: pygrep @@ -192,6 +204,20 @@ repos: \#\ type:\ (?!ignore) |\#\ type:\s?ignore(?!\[) + # foo._class__ instead of type(foo) + |\.__class__ + + # np.bool/np.object instead of np.bool_/np.object_ + |np\.bool[^_8`] + |np\.object[^_8`] + + # imports from collections.abc instead of `from collections import abc` + |from\ collections\.abc\ import + + # Numpy + |from\ numpy\ import\ random + |from\ numpy\.random\ import + # Incorrect code-block / IPython directives |\.\.\ code-block\ :: |\.\.\ ipython\ :: @@ -200,7 +226,17 @@ repos: # Check for deprecated messages without sphinx directive |(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.) + + # {foo!r} instead of {repr(foo)} + |!r} + + # builtin filter function + |(?obj`, not ` obj` language: pygrep @@ -231,6 +267,58 @@ repos: files: ^pandas/tests/extension/base types: [python] exclude: ^pandas/tests/extension/base/base\.py + - id: unwanted-patterns-in-tests + name: Unwanted patterns in tests + language: pygrep + entry: | + (?x) + # pytest.xfail instead of pytest.mark.xfail + pytest\.xfail + + # imports from pandas._testing instead of `import pandas._testing as tm` + |from\ pandas\._testing\ import + |from\ pandas\ import\ _testing\ as\ tm + + # No direct imports from conftest + |conftest\ import + |import\ conftest + + # pandas.testing instead of tm + |pd\.testing\. + + # pd.api.types instead of from pandas.api.types import ... + |(pd|pandas)\.api\.types\. + + # np.testing, np.array_equal + |(numpy|np)(\.testing|\.array_equal) + + # unittest.mock (use pytest builtin monkeypatch fixture instead) + |(unittest(\.| import )mock|mock\.Mock\(\)|mock\.patch) + + # pytest raises without context + |\s\ pytest.raises + + # pytest.warns (use tm.assert_produces_warning instead) + |pytest\.warns + files: ^pandas/tests/ + types_or: [python, cython, rst] + - id: unwanted-patterns-in-ea-tests + name: Unwanted patterns in EA tests + language: pygrep + entry: | + (?x) + tm.assert_(series|frame)_equal + files: ^pandas/tests/extension/base/ + exclude: ^pandas/tests/extension/base/base\.py$ + types_or: [python, cython, rst] + - id: unwanted-patterns-in-cython + name: Unwanted patterns in Cython code + language: pygrep + entry: | + (?x) + # `obj` as opposed to ` obj` + [a-zA-Z0-9*]>[ ] + types: [cython] - id: pip-to-conda name: Generate pip dependency from conda language: python @@ -251,6 +339,38 @@ repos: language: python types: [rst] files: ^doc/source/(development|reference)/ + - id: unwanted-patterns-bare-pytest-raises + name: Check for use of bare pytest raises + language: python + entry: python scripts/validate_unwanted_patterns.py --validation-type="bare_pytest_raises" + types: [python] + files: ^pandas/tests/ + exclude: ^pandas/tests/extension/ + - id: unwanted-patterns-private-function-across-module + name: Check for use of private functions across modules + language: python + entry: python scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" + types: [python] + exclude: ^(asv_bench|pandas/tests|doc)/ + - id: unwanted-patterns-private-import-across-module + name: Check for import of private attributes across modules + language: python + entry: python scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" + types: [python] + exclude: | + (?x) + ^(asv_bench|pandas/tests|doc)/ + |scripts/validate_min_versions_in_sync\.py$ + - id: unwanted-patterns-strings-to-concatenate + name: Check for use of not concatenated strings + language: python + entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate" + types_or: [python, cython] + - id: unwanted-patterns-strings-with-misplaced-whitespace + name: Check for strings with misplaced spaces + language: python + entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace" + types_or: [python, cython] - id: use-pd_array-in-core name: Import pandas.array as pd_array in core language: python diff --git a/asv_bench/benchmarks/pandas_vb_common.py b/asv_bench/benchmarks/pandas_vb_common.py index d3168bde0a783..97d91111e833a 100644 --- a/asv_bench/benchmarks/pandas_vb_common.py +++ b/asv_bench/benchmarks/pandas_vb_common.py @@ -70,7 +70,7 @@ class BaseIO: def remove(self, f): """Remove created files""" try: - os.remove(f) # noqa: PDF008 + os.remove(f) except OSError: # On Windows, attempting to remove a file that is in use # causes an exception to be raised diff --git a/doc/scripts/eval_performance.py b/doc/scripts/eval_performance.py index 85d9ce4ad01e9..f6087e02a9330 100644 --- a/doc/scripts/eval_performance.py +++ b/doc/scripts/eval_performance.py @@ -6,8 +6,7 @@ from pandas import DataFrame setup_common = """from pandas import DataFrame -from numpy.random import randn -df = DataFrame(randn(%d, 3), columns=list('abc')) +df = DataFrame(np.random.randn(%d, 3), columns=list('abc')) %s""" setup_with = "s = 'a + b * (c ** 2 + b ** 2 - a) / (a * c) ** 3'" diff --git a/doc/source/development/contributing_codebase.rst b/doc/source/development/contributing_codebase.rst index b05f026bbbb44..449b6de36cd24 100644 --- a/doc/source/development/contributing_codebase.rst +++ b/doc/source/development/contributing_codebase.rst @@ -43,7 +43,7 @@ Pre-commit ---------- Additionally, :ref:`Continuous Integration ` will run code formatting checks -like ``black``, ``flake8`` (including a `pandas-dev-flaker `_ plugin), +like ``black``, ``flake8``, ``isort``, and ``cpplint`` and more using `pre-commit hooks `_ Any warnings from these checks will cause the :ref:`Continuous Integration ` to fail; therefore, it is helpful to run the check yourself before submitting code. This diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 5895a06792ffb..9dbe450261e54 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -320,7 +320,7 @@ Null-values are no longer coerced to NaN-value in value_counts and mode ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :meth:`Series.value_counts` and :meth:`Series.mode` no longer coerce ``None``, -``NaT`` and other null-values to a NaN-value for ``np.object``-dtype. This +``NaT`` and other null-values to a NaN-value for ``np.object_``-dtype. This behavior is now consistent with ``unique``, ``isin`` and others (:issue:`42688`). diff --git a/environment.yml b/environment.yml index b6b8f7d6af1ba..9eaf52977d6d2 100644 --- a/environment.yml +++ b/environment.yml @@ -90,7 +90,6 @@ dependencies: - gitdb - natsort # DataFrame.sort_values doctest - numpydoc - - pandas-dev-flaker=0.5.0 - pydata-sphinx-theme<0.11 - pytest-cython # doctest - sphinx diff --git a/pandas/__init__.py b/pandas/__init__.py index 951cb38656d0b..048d20f0de72f 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -135,7 +135,7 @@ ) from pandas import api, arrays, errors, io, plotting, tseries -from pandas import testing # noqa:PDF015 +from pandas import testing from pandas.util._print_versions import show_versions from pandas.io.api import ( diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index bc7b876cb5de8..b56cf2a23a45f 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1482,7 +1482,7 @@ def infer_dtype(value: object, skipna: bool = True) -> str: return val if values.descr.type_num != NPY_OBJECT: - # i.e. values.dtype != np.object + # i.e. values.dtype != np.object_ # This should not be reached values = values.astype(object) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 43020ae471f10..eb2905751a9b4 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -886,7 +886,7 @@ def external_error_raised(expected_exception: type[Exception]) -> ContextManager """ import pytest - return pytest.raises(expected_exception, match=None) # noqa: PDF010 + return pytest.raises(expected_exception, match=None) cython_table = pd.core.common._cython_table.items() diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 085a2a80ca8ec..b59b9632913e4 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -14,7 +14,6 @@ import sys from pandas._typing import F -import pandas.compat._compressors from pandas.compat._constants import ( IS64, PY39, @@ -22,6 +21,7 @@ PY311, PYPY, ) +import pandas.compat.compressors from pandas.compat.numpy import ( is_numpy_dev, np_version_under1p21, @@ -131,7 +131,7 @@ def is_ci_environment() -> bool: return os.environ.get("PANDAS_CI", "0") == "1" -def get_lzma_file() -> type[pandas.compat._compressors.LZMAFile]: +def get_lzma_file() -> type[pandas.compat.compressors.LZMAFile]: """ Importing the `LZMAFile` class from the `lzma` module. @@ -145,13 +145,13 @@ def get_lzma_file() -> type[pandas.compat._compressors.LZMAFile]: RuntimeError If the `lzma` module was not imported correctly, or didn't exist. """ - if not pandas.compat._compressors.has_lzma: + if not pandas.compat.compressors.has_lzma: raise RuntimeError( "lzma module not available. " "A Python re-install with the proper dependencies, " "might be required to solve this issue." ) - return pandas.compat._compressors.LZMAFile + return pandas.compat.compressors.LZMAFile __all__ = [ diff --git a/pandas/compat/_compressors.py b/pandas/compat/compressors.py similarity index 100% rename from pandas/compat/_compressors.py rename to pandas/compat/compressors.py diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 97262b1f4bb21..fb081d0e63c96 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -1,8 +1,10 @@ from __future__ import annotations -from collections.abc import Callable # noqa: PDF001 import re -from typing import Union +from typing import ( + Callable, + Union, +) import numpy as np diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3517f3ee9183d..a89b28603113b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -133,11 +133,11 @@ from pandas.core import ( algorithms as algos, arraylike, + common, indexing, nanops, sample, ) -from pandas.core import common # noqa: PDF018 from pandas.core.array_algos.replace import should_use_regex from pandas.core.arrays import ExtensionArray from pandas.core.base import PandasObject @@ -158,7 +158,7 @@ SingleArrayManager, ) from pandas.core.internals.construction import mgr_to_mgr -from pandas.core.internals.managers import _using_copy_on_write +from pandas.core.internals.managers import using_copy_on_write from pandas.core.methods.describe import describe_ndframe from pandas.core.missing import ( clean_fill_method, @@ -10102,7 +10102,7 @@ def truncate( if isinstance(ax, MultiIndex): setattr(result, self._get_axis_name(axis), ax.truncate(before, after)) - if copy or (copy is None and not _using_copy_on_write()): + if copy or (copy is None and not using_copy_on_write()): result = result.copy(deep=copy) return result diff --git a/pandas/core/interchange/dataframe.py b/pandas/core/interchange/dataframe.py index 9139cb41e3af7..0de9b130f0aab 100644 --- a/pandas/core/interchange/dataframe.py +++ b/pandas/core/interchange/dataframe.py @@ -7,8 +7,10 @@ from pandas.core.interchange.dataframe_protocol import DataFrame as DataFrameXchg if TYPE_CHECKING: - import pandas as pd - from pandas import Index + from pandas import ( + DataFrame, + Index, + ) class PandasDataFrameXchg(DataFrameXchg): @@ -21,7 +23,7 @@ class PandasDataFrameXchg(DataFrameXchg): """ def __init__( - self, df: pd.DataFrame, nan_as_null: bool = False, allow_copy: bool = True + self, df: DataFrame, nan_as_null: bool = False, allow_copy: bool = True ) -> None: """ Constructor - an instance of this (private) class is returned from diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 53f347ec4d372..c4e869a3f6a45 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -15,7 +15,7 @@ import numpy as np -from pandas._config.config import _global_config +from pandas._config import config from pandas._libs import ( algos as libalgos, @@ -376,7 +376,7 @@ def setitem(self: T, indexer, value) -> T: if isinstance(indexer, np.ndarray) and indexer.ndim > self.ndim: raise ValueError(f"Cannot set values with ndim > {self.ndim}") - if _using_copy_on_write() and not self._has_no_reference(0): + if using_copy_on_write() and not self._has_no_reference(0): # if being referenced -> perform Copy-on-Write and clear the reference # this method is only called if there is a single block -> hardcoded 0 self = self.copy() @@ -385,7 +385,7 @@ def setitem(self: T, indexer, value) -> T: def putmask(self, mask, new, align: bool = True): if ( - _using_copy_on_write() + using_copy_on_write() and self.refs is not None and not all(ref is None for ref in self.refs) ): @@ -429,7 +429,7 @@ def fillna(self: T, value, limit, inplace: bool, downcast) -> T: limit = libalgos.validate_limit(None, limit=limit) if inplace: # TODO(CoW) can be optimized to only copy those blocks that have refs - if _using_copy_on_write() and any( + if using_copy_on_write() and any( not self._has_no_reference_block(i) for i in range(len(self.blocks)) ): self = self.copy() @@ -613,7 +613,7 @@ def copy(self: T, deep: bool | None | Literal["all"] = True) -> T: BlockManager """ if deep is None: - if _using_copy_on_write(): + if using_copy_on_write(): # use shallow copy deep = False else: @@ -701,7 +701,7 @@ def reindex_indexer( pandas-indexer with -1's only. """ if copy is None: - if _using_copy_on_write(): + if using_copy_on_write(): # use shallow copy copy = False else: @@ -879,7 +879,7 @@ def _slice_take_blocks_ax0( # we may try to only slice taker = blklocs[mgr_locs.indexer] max_len = max(len(mgr_locs), taker.max() + 1) - if only_slice or _using_copy_on_write(): + if only_slice or using_copy_on_write(): taker = lib.maybe_indices_to_slice(taker, max_len) if isinstance(taker, slice): @@ -1039,7 +1039,7 @@ def from_blocks( """ Constructor for BlockManager and SingleBlockManager with same signature. """ - parent = parent if _using_copy_on_write() else None + parent = parent if using_copy_on_write() else None return cls(blocks, axes, refs, parent, verify_integrity=False) # ---------------------------------------------------------------- @@ -1222,7 +1222,7 @@ def value_getitem(placement): blk_locs = blklocs[val_locs.indexer] if inplace and blk.should_store(value): # Updating inplace -> check if we need to do Copy-on-Write - if _using_copy_on_write() and not self._has_no_reference_block(blkno_l): + if using_copy_on_write() and not self._has_no_reference_block(blkno_l): blk.set_inplace(blk_locs, value_getitem(val_locs), copy=True) self._clear_reference_block(blkno_l) else: @@ -1320,7 +1320,7 @@ def _iset_single( if inplace and blk.should_store(value): copy = False - if _using_copy_on_write() and not self._has_no_reference_block(blkno): + if using_copy_on_write() and not self._has_no_reference_block(blkno): # perform Copy-on-Write and clear the reference copy = True self._clear_reference_block(blkno) @@ -1344,7 +1344,7 @@ def column_setitem( This is a method on the BlockManager level, to avoid creating an intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`) """ - if _using_copy_on_write() and not self._has_no_reference(loc): + if using_copy_on_write() and not self._has_no_reference(loc): # otherwise perform Copy-on-Write and clear the reference blkno = self.blknos[loc] blocks = list(self.blocks) @@ -1839,7 +1839,7 @@ def __init__( self.axes = [axis] self.blocks = (block,) self.refs = refs - self.parent = parent if _using_copy_on_write() else None + self.parent = parent if using_copy_on_write() else None @classmethod def from_blocks( @@ -1876,7 +1876,7 @@ def to_2d_mgr(self, columns: Index) -> BlockManager: new_blk = type(blk)(arr, placement=bp, ndim=2) axes = [columns, self.axes[0]] refs: list[weakref.ref | None] = [weakref.ref(blk)] - parent = self if _using_copy_on_write() else None + parent = self if using_copy_on_write() else None return BlockManager( [new_blk], axes=axes, refs=refs, parent=parent, verify_integrity=False ) @@ -2020,7 +2020,7 @@ def setitem_inplace(self, indexer, value) -> None: in place, not returning a new Manager (and Block), and thus never changing the dtype. """ - if _using_copy_on_write() and not self._has_no_reference(0): + if using_copy_on_write() and not self._has_no_reference(0): self.blocks = (self._block.copy(),) self.refs = None self.parent = None @@ -2354,8 +2354,8 @@ def _preprocess_slice_or_indexer( return "fancy", indexer, len(indexer) -_mode_options = _global_config["mode"] +_mode_options = config._global_config["mode"] -def _using_copy_on_write(): +def using_copy_on_write(): return _mode_options["copy_on_write"] diff --git a/pandas/core/strings/base.py b/pandas/core/strings/base.py index b5618207ab9d8..c96e5a1abcf86 100644 --- a/pandas/core/strings/base.py +++ b/pandas/core/strings/base.py @@ -1,10 +1,10 @@ from __future__ import annotations import abc -from collections.abc import Callable # noqa: PDF001 import re from typing import ( TYPE_CHECKING, + Callable, Literal, ) diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index 2d77cd0da816f..508ac122d67af 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -1,12 +1,12 @@ from __future__ import annotations -from collections.abc import Callable # noqa: PDF001 import functools import re import sys import textwrap from typing import ( TYPE_CHECKING, + Callable, Literal, ) import unicodedata diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index a6c32133803d4..c0a7b2b7cc361 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -26,7 +26,7 @@ ) from pandas.core.dtypes.missing import isna -from pandas.core import common # noqa: PDF018 +from pandas.core import common from pandas.core.indexers.objects import ( BaseIndexer, ExponentialMovingWindowIndexer, diff --git a/pandas/io/common.py b/pandas/io/common.py index 4dae46c8f39f6..6deaf40f00c69 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -55,8 +55,8 @@ WriteBuffer, ) from pandas.compat import get_lzma_file -from pandas.compat._compressors import BZ2File as _BZ2File from pandas.compat._optional import import_optional_dependency +from pandas.compat.compressors import BZ2File as _BZ2File from pandas.util._decorators import doc from pandas.util._exceptions import find_stack_level @@ -478,7 +478,7 @@ def file_path_to_url(path: str) -> str: return urljoin("file:", pathname2url(path)) -_extension_to_compression = { +extension_to_compression = { ".tar": "tar", ".tar.gz": "tar", ".tar.bz2": "tar", @@ -489,7 +489,7 @@ def file_path_to_url(path: str) -> str: ".xz": "xz", ".zst": "zstd", } -_supported_compressions = set(_extension_to_compression.values()) +_supported_compressions = set(extension_to_compression.values()) def get_compression_method( @@ -565,7 +565,7 @@ def infer_compression( return None # Infer compression from the filename/URL extension - for extension, compression in _extension_to_compression.items(): + for extension, compression in extension_to_compression.items(): if filepath_or_buffer.lower().endswith(extension): return compression return None diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index f2780d5fa6832..88974f3ab4afa 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -57,7 +57,7 @@ from pandas.io.common import ( IOHandles, - _extension_to_compression, + extension_to_compression, file_exists, get_handle, is_fsspec_url, @@ -854,7 +854,7 @@ def _get_data_from_filepath(self, filepath_or_buffer): elif ( isinstance(filepath_or_buffer, str) and filepath_or_buffer.lower().endswith( - (".json",) + tuple(f".json{c}" for c in _extension_to_compression) + (".json",) + tuple(f".json{c}" for c in extension_to_compression) ) and not file_exists(filepath_or_buffer) ): diff --git a/pandas/io/xml.py b/pandas/io/xml.py index 1368a407fa494..6ffa3356cc9de 100644 --- a/pandas/io/xml.py +++ b/pandas/io/xml.py @@ -46,10 +46,7 @@ if TYPE_CHECKING: from xml.etree.ElementTree import Element - from lxml.etree import ( - _Element, - _XSLTResultTree, - ) + from lxml import etree from pandas import DataFrame @@ -417,7 +414,7 @@ def _validate_names(self) -> None: def _parse_doc( self, raw_doc: FilePath | ReadBuffer[bytes] | ReadBuffer[str] - ) -> Element | _Element: + ) -> Element | etree._Element: """ Build tree from path_or_buffer. @@ -625,7 +622,7 @@ def _validate_names(self) -> None: def _parse_doc( self, raw_doc: FilePath | ReadBuffer[bytes] | ReadBuffer[str] - ) -> _Element: + ) -> etree._Element: from lxml.etree import ( XMLParser, fromstring, @@ -656,7 +653,7 @@ def _parse_doc( return document - def _transform_doc(self) -> _XSLTResultTree: + def _transform_doc(self) -> etree._XSLTResultTree: """ Transform original tree using stylesheet. diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 995b1668046d2..e448e1bce9146 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -250,7 +250,7 @@ class TestTesting(Base): ] def test_testing(self): - from pandas import testing # noqa: PDF015 + from pandas import testing self.check(testing, self.funcs) diff --git a/pandas/tests/arrays/boolean/test_function.py b/pandas/tests/arrays/boolean/test_function.py index 8e9112b531fad..b484dc39cf23b 100644 --- a/pandas/tests/arrays/boolean/test_function.py +++ b/pandas/tests/arrays/boolean/test_function.py @@ -67,7 +67,7 @@ def test_ufuncs_unary(ufunc): def test_ufunc_numeric(): - # np.sqrt on np.bool returns float16, which we upcast to Float32 + # np.sqrt on np.bool_ returns float16, which we upcast to Float32 # bc we do not have Float16 arr = pd.array([True, False, None], dtype="boolean") diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 32ab0336aa93f..46fb614d96633 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -707,13 +707,13 @@ def test_applymap_subset_multiindex(self, slice_): and isinstance(slice_[-1][-1], list) and "C" in slice_[-1][-1] ): - ctx = pytest.raises(KeyError, match="C") # noqa: PDF010 + ctx = pytest.raises(KeyError, match="C") elif ( isinstance(slice_[0], tuple) and isinstance(slice_[0][1], list) and 3 in slice_[0][1] ): - ctx = pytest.raises(KeyError, match="3") # noqa: PDF010 + ctx = pytest.raises(KeyError, match="3") else: ctx = contextlib.nullcontext() diff --git a/pandas/tests/io/pytables/test_round_trip.py b/pandas/tests/io/pytables/test_round_trip.py index 53be06cd491ef..5c7c4f9ce0b75 100644 --- a/pandas/tests/io/pytables/test_round_trip.py +++ b/pandas/tests/io/pytables/test_round_trip.py @@ -349,7 +349,7 @@ def test_index_types(setup_path): _check_roundtrip(ser, func, path=setup_path) -def test_timeseries_preepoch(setup_path): +def test_timeseries_preepoch(setup_path, request): dr = bdate_range("1/1/1940", "1/1/1960") ts = Series(np.random.randn(len(dr)), index=dr) @@ -357,9 +357,10 @@ def test_timeseries_preepoch(setup_path): _check_roundtrip(ts, tm.assert_series_equal, path=setup_path) except OverflowError: if is_platform_windows(): - pytest.xfail("known failure on some windows platforms") - else: - raise + request.node.add_marker( + pytest.mark.xfail("known failure on some windows platforms") + ) + raise @pytest.mark.parametrize( diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 1263d61b55cd5..2664c7df59223 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -911,7 +911,7 @@ def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs): os.close(fd) except (OSError, ValueError): pass - os.remove(new_f) # noqa: PDF008 + os.remove(new_f) # new table df = tm.makeDataFrame() diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index f74d268690a5b..2172a4bf839fb 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -317,7 +317,7 @@ def test_read_expands_user_home_dir( ), ], ) - @pytest.mark.filterwarnings( # pytables np.object usage + @pytest.mark.filterwarnings( # pytables np.object_ usage "ignore:`np.object` is a deprecated alias:DeprecationWarning" ) def test_read_fspath_all(self, reader, module, path, datapath): @@ -372,7 +372,7 @@ def test_write_fspath_all(self, writer_name, writer_kwargs, module): expected = f_path.read() assert result == expected - @pytest.mark.filterwarnings( # pytables np.object usage + @pytest.mark.filterwarnings( # pytables np.object_ usage "ignore:`np.object` is a deprecated alias:DeprecationWarning" ) def test_write_fspath_hdf5(self): diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py index 782753177f245..fc15ff3488ce9 100644 --- a/pandas/tests/io/test_compression.py +++ b/pandas/tests/io/test_compression.py @@ -19,7 +19,7 @@ import pandas.io.common as icom _compression_to_extension = { - value: key for key, value in icom._extension_to_compression.items() + value: key for key, value in icom.extension_to_compression.items() } diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 3dafe6fe61b35..07a028a19d7f9 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -37,8 +37,8 @@ get_lzma_file, is_platform_little_endian, ) -from pandas.compat._compressors import flatten_buffer from pandas.compat._optional import import_optional_dependency +from pandas.compat.compressors import flatten_buffer import pandas.util._test_decorators as td import pandas as pd @@ -255,7 +255,7 @@ def get_random_path(): class TestCompression: - _extension_to_compression = icom._extension_to_compression + _extension_to_compression = icom.extension_to_compression def compress_file(self, src_path, dest_path, compression): if compression is None: diff --git a/requirements-dev.txt b/requirements-dev.txt index 4f2a80d932fd0..428c2cb92d040 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -65,7 +65,6 @@ gitpython gitdb natsort numpydoc -pandas-dev-flaker==0.5.0 pydata-sphinx-theme<0.11 pytest-cython sphinx diff --git a/scripts/check_for_inconsistent_pandas_namespace.py b/scripts/check_for_inconsistent_pandas_namespace.py new file mode 100644 index 0000000000000..3c21821e794a9 --- /dev/null +++ b/scripts/check_for_inconsistent_pandas_namespace.py @@ -0,0 +1,142 @@ +""" +Check that test suite file doesn't use the pandas namespace inconsistently. + +We check for cases of ``Series`` and ``pd.Series`` appearing in the same file +(likewise for other pandas objects). + +This is meant to be run as a pre-commit hook - to run it manually, you can do: + + pre-commit run inconsistent-namespace-usage --all-files + +To automatically fixup a given file, you can pass `--replace`, e.g. + + python scripts/check_for_inconsistent_pandas_namespace.py test_me.py --replace + +though note that you may need to manually fixup some imports and that you will also +need the additional dependency `tokenize-rt` (which is left out from the pre-commit +hook so that it uses the same virtualenv as the other local ones). + +The general structure is similar to that of some plugins from +https://github.com/asottile/pyupgrade . +""" + +import argparse +import ast +import sys +from typing import ( + MutableMapping, + NamedTuple, + Optional, + Sequence, + Set, +) + +ERROR_MESSAGE = ( + "{path}:{lineno}:{col_offset}: " + "Found both '{prefix}.{name}' and '{name}' in {path}" +) + + +class OffsetWithNamespace(NamedTuple): + lineno: int + col_offset: int + namespace: str + + +class Visitor(ast.NodeVisitor): + def __init__(self) -> None: + self.pandas_namespace: MutableMapping[OffsetWithNamespace, str] = {} + self.imported_from_pandas: Set[str] = set() + + def visit_Attribute(self, node: ast.Attribute) -> None: + if isinstance(node.value, ast.Name) and node.value.id in {"pandas", "pd"}: + offset_with_namespace = OffsetWithNamespace( + node.lineno, node.col_offset, node.value.id + ) + self.pandas_namespace[offset_with_namespace] = node.attr + self.generic_visit(node) + + def visit_ImportFrom(self, node: ast.ImportFrom) -> None: + if node.module is not None and "pandas" in node.module: + self.imported_from_pandas.update(name.name for name in node.names) + self.generic_visit(node) + + +def replace_inconsistent_pandas_namespace(visitor: Visitor, content: str) -> str: + from tokenize_rt import ( + reversed_enumerate, + src_to_tokens, + tokens_to_src, + ) + + tokens = src_to_tokens(content) + for n, i in reversed_enumerate(tokens): + offset_with_namespace = OffsetWithNamespace(i.offset[0], i.offset[1], i.src) + if ( + offset_with_namespace in visitor.pandas_namespace + and visitor.pandas_namespace[offset_with_namespace] + in visitor.imported_from_pandas + ): + # Replace `pd` + tokens[n] = i._replace(src="") + # Replace `.` + tokens[n + 1] = tokens[n + 1]._replace(src="") + + new_src: str = tokens_to_src(tokens) + return new_src + + +def check_for_inconsistent_pandas_namespace( + content: str, path: str, *, replace: bool +) -> Optional[str]: + tree = ast.parse(content) + + visitor = Visitor() + visitor.visit(tree) + + inconsistencies = visitor.imported_from_pandas.intersection( + visitor.pandas_namespace.values() + ) + + if not inconsistencies: + # No inconsistent namespace usage, nothing to replace. + return None + + if not replace: + inconsistency = inconsistencies.pop() + lineno, col_offset, prefix = next( + key for key, val in visitor.pandas_namespace.items() if val == inconsistency + ) + msg = ERROR_MESSAGE.format( + lineno=lineno, + col_offset=col_offset, + prefix=prefix, + name=inconsistency, + path=path, + ) + sys.stdout.write(msg) + sys.exit(1) + + return replace_inconsistent_pandas_namespace(visitor, content) + + +def main(argv: Optional[Sequence[str]] = None) -> None: + parser = argparse.ArgumentParser() + parser.add_argument("paths", nargs="*") + parser.add_argument("--replace", action="store_true") + args = parser.parse_args(argv) + + for path in args.paths: + with open(path, encoding="utf-8") as fd: + content = fd.read() + new_content = check_for_inconsistent_pandas_namespace( + content, path, replace=args.replace + ) + if not args.replace or new_content is None: + continue + with open(path, "w", encoding="utf-8") as fd: + fd.write(new_content) + + +if __name__ == "__main__": + main() diff --git a/scripts/sync_flake8_versions.py b/scripts/sync_flake8_versions.py index 8852634c5d796..0d513d5937dbe 100644 --- a/scripts/sync_flake8_versions.py +++ b/scripts/sync_flake8_versions.py @@ -1,5 +1,5 @@ """ -Check that the flake8 (and pandas-dev-flaker) pins are the same in: +Check that the flake8 pins are the same in: - environment.yml - .pre-commit-config.yaml, in the flake8 hook @@ -103,17 +103,13 @@ def get_revisions( precommit_config: YamlMapping, environment: YamlMapping ) -> tuple[Revisions, Revisions]: flake8_revisions = Revisions(name="flake8") - pandas_dev_flaker_revisions = Revisions(name="pandas-dev-flaker") repos = precommit_config["repos"] flake8_repo, flake8_hook = _get_repo_hook(repos, "flake8") flake8_revisions.pre_commit = Revision("flake8", "==", flake8_repo["rev"]) flake8_additional_dependencies = [] for dep in _process_dependencies(flake8_hook.get("additional_dependencies", [])): - if dep.name == "pandas-dev-flaker": - pandas_dev_flaker_revisions.pre_commit = dep - else: - flake8_additional_dependencies.append(dep) + flake8_additional_dependencies.append(dep) environment_dependencies = environment["dependencies"] environment_additional_dependencies = [] @@ -121,8 +117,6 @@ def get_revisions( if dep.name == "flake8": flake8_revisions.environment = dep environment_additional_dependencies.append(dep) - elif dep.name == "pandas-dev-flaker": - pandas_dev_flaker_revisions.environment = dep else: environment_additional_dependencies.append(dep) @@ -131,8 +125,7 @@ def get_revisions( environment_additional_dependencies, ) - for revisions in flake8_revisions, pandas_dev_flaker_revisions: - _validate_revisions(revisions) + _validate_revisions(flake8_revisions) if __name__ == "__main__": diff --git a/scripts/tests/test_inconsistent_namespace_check.py b/scripts/tests/test_inconsistent_namespace_check.py new file mode 100644 index 0000000000000..eb995158d8cb4 --- /dev/null +++ b/scripts/tests/test_inconsistent_namespace_check.py @@ -0,0 +1,61 @@ +import pytest + +from ..check_for_inconsistent_pandas_namespace import ( + check_for_inconsistent_pandas_namespace, +) + +BAD_FILE_0 = ( + "from pandas import Categorical\n" + "cat_0 = Categorical()\n" + "cat_1 = pd.Categorical()" +) +BAD_FILE_1 = ( + "from pandas import Categorical\n" + "cat_0 = pd.Categorical()\n" + "cat_1 = Categorical()" +) +BAD_FILE_2 = ( + "from pandas import Categorical\n" + "cat_0 = pandas.Categorical()\n" + "cat_1 = Categorical()" +) +GOOD_FILE_0 = ( + "from pandas import Categorical\ncat_0 = Categorical()\ncat_1 = Categorical()" +) +GOOD_FILE_1 = "cat_0 = pd.Categorical()\ncat_1 = pd.Categorical()" +GOOD_FILE_2 = "from array import array\nimport pandas as pd\narr = pd.array([])" +PATH = "t.py" + + +@pytest.mark.parametrize( + "content, expected", + [ + (BAD_FILE_0, "t.py:3:8: Found both 'pd.Categorical' and 'Categorical' in t.py"), + (BAD_FILE_1, "t.py:2:8: Found both 'pd.Categorical' and 'Categorical' in t.py"), + ( + BAD_FILE_2, + "t.py:2:8: Found both 'pandas.Categorical' and 'Categorical' in t.py", + ), + ], +) +def test_inconsistent_usage(content, expected, capsys): + with pytest.raises(SystemExit): + check_for_inconsistent_pandas_namespace(content, PATH, replace=False) + result, _ = capsys.readouterr() + assert result == expected + + +@pytest.mark.parametrize("content", [GOOD_FILE_0, GOOD_FILE_1, GOOD_FILE_2]) +@pytest.mark.parametrize("replace", [True, False]) +def test_consistent_usage(content, replace): + # should not raise + check_for_inconsistent_pandas_namespace(content, PATH, replace=replace) + + +@pytest.mark.parametrize("content", [BAD_FILE_0, BAD_FILE_1, BAD_FILE_2]) +def test_inconsistent_usage_with_replace(content): + result = check_for_inconsistent_pandas_namespace(content, PATH, replace=True) + expected = ( + "from pandas import Categorical\ncat_0 = Categorical()\ncat_1 = Categorical()" + ) + assert result == expected diff --git a/scripts/tests/test_sync_flake8_versions.py b/scripts/tests/test_sync_flake8_versions.py index 743ece34e0b56..2349a4f5d8d1c 100644 --- a/scripts/tests/test_sync_flake8_versions.py +++ b/scripts/tests/test_sync_flake8_versions.py @@ -87,7 +87,6 @@ def test_get_revisions_no_failure(capsys): { "id": "flake8", "additional_dependencies": [ - "pandas-dev-flaker==0.4.0", "flake8-bugs==1.1.1", ], } @@ -101,7 +100,6 @@ def test_get_revisions_no_failure(capsys): "id": "yesqa", "additional_dependencies": [ "flake8==0.1.1", - "pandas-dev-flaker==0.4.0", "flake8-bugs==1.1.1", ], } @@ -116,7 +114,6 @@ def test_get_revisions_no_failure(capsys): { "pip": [ "git+https://github.com/pydata/pydata-sphinx-theme.git@master", - "pandas-dev-flaker==0.4.0", ] }, ] diff --git a/scripts/tests/test_validate_unwanted_patterns.py b/scripts/tests/test_validate_unwanted_patterns.py new file mode 100644 index 0000000000000..ef93fd1d21981 --- /dev/null +++ b/scripts/tests/test_validate_unwanted_patterns.py @@ -0,0 +1,419 @@ +import io + +import pytest + +from .. import validate_unwanted_patterns + + +class TestBarePytestRaises: + @pytest.mark.parametrize( + "data", + [ + ( + """ + with pytest.raises(ValueError, match="foo"): + pass + """ + ), + ( + """ + # with pytest.raises(ValueError, match="foo"): + # pass + """ + ), + ( + """ + # with pytest.raises(ValueError): + # pass + """ + ), + ( + """ + with pytest.raises( + ValueError, + match="foo" + ): + pass + """ + ), + ], + ) + def test_pytest_raises(self, data): + fd = io.StringIO(data.strip()) + result = list(validate_unwanted_patterns.bare_pytest_raises(fd)) + assert result == [] + + @pytest.mark.parametrize( + "data, expected", + [ + ( + ( + """ + with pytest.raises(ValueError): + pass + """ + ), + [ + ( + 1, + ( + "Bare pytests raise have been found. " + "Please pass in the argument 'match' " + "as well the exception." + ), + ), + ], + ), + ( + ( + """ + with pytest.raises(ValueError, match="foo"): + with pytest.raises(ValueError): + pass + pass + """ + ), + [ + ( + 2, + ( + "Bare pytests raise have been found. " + "Please pass in the argument 'match' " + "as well the exception." + ), + ), + ], + ), + ( + ( + """ + with pytest.raises(ValueError): + with pytest.raises(ValueError, match="foo"): + pass + pass + """ + ), + [ + ( + 1, + ( + "Bare pytests raise have been found. " + "Please pass in the argument 'match' " + "as well the exception." + ), + ), + ], + ), + ( + ( + """ + with pytest.raises( + ValueError + ): + pass + """ + ), + [ + ( + 1, + ( + "Bare pytests raise have been found. " + "Please pass in the argument 'match' " + "as well the exception." + ), + ), + ], + ), + ( + ( + """ + with pytest.raises( + ValueError, + # match = "foo" + ): + pass + """ + ), + [ + ( + 1, + ( + "Bare pytests raise have been found. " + "Please pass in the argument 'match' " + "as well the exception." + ), + ), + ], + ), + ], + ) + def test_pytest_raises_raises(self, data, expected): + fd = io.StringIO(data.strip()) + result = list(validate_unwanted_patterns.bare_pytest_raises(fd)) + assert result == expected + + +@pytest.mark.parametrize( + "data, expected", + [ + ( + 'msg = ("bar " "baz")', + [ + ( + 1, + ( + "String unnecessarily split in two by black. " + "Please merge them manually." + ), + ) + ], + ), + ( + 'msg = ("foo " "bar " "baz")', + [ + ( + 1, + ( + "String unnecessarily split in two by black. " + "Please merge them manually." + ), + ), + ( + 1, + ( + "String unnecessarily split in two by black. " + "Please merge them manually." + ), + ), + ], + ), + ], +) +def test_strings_to_concatenate(data, expected): + fd = io.StringIO(data.strip()) + result = list(validate_unwanted_patterns.strings_to_concatenate(fd)) + assert result == expected + + +class TestStringsWithWrongPlacedWhitespace: + @pytest.mark.parametrize( + "data", + [ + ( + """ + msg = ( + "foo\n" + " bar" + ) + """ + ), + ( + """ + msg = ( + "foo" + " bar" + "baz" + ) + """ + ), + ( + """ + msg = ( + f"foo" + " bar" + ) + """ + ), + ( + """ + msg = ( + "foo" + f" bar" + ) + """ + ), + ( + """ + msg = ( + "foo" + rf" bar" + ) + """ + ), + ], + ) + def test_strings_with_wrong_placed_whitespace(self, data): + fd = io.StringIO(data.strip()) + result = list( + validate_unwanted_patterns.strings_with_wrong_placed_whitespace(fd) + ) + assert result == [] + + @pytest.mark.parametrize( + "data, expected", + [ + ( + ( + """ + msg = ( + "foo" + " bar" + ) + """ + ), + [ + ( + 3, + ( + "String has a space at the beginning instead " + "of the end of the previous string." + ), + ) + ], + ), + ( + ( + """ + msg = ( + f"foo" + " bar" + ) + """ + ), + [ + ( + 3, + ( + "String has a space at the beginning instead " + "of the end of the previous string." + ), + ) + ], + ), + ( + ( + """ + msg = ( + "foo" + f" bar" + ) + """ + ), + [ + ( + 3, + ( + "String has a space at the beginning instead " + "of the end of the previous string." + ), + ) + ], + ), + ( + ( + """ + msg = ( + f"foo" + f" bar" + ) + """ + ), + [ + ( + 3, + ( + "String has a space at the beginning instead " + "of the end of the previous string." + ), + ) + ], + ), + ( + ( + """ + msg = ( + "foo" + rf" bar" + " baz" + ) + """ + ), + [ + ( + 3, + ( + "String has a space at the beginning instead " + "of the end of the previous string." + ), + ), + ( + 4, + ( + "String has a space at the beginning instead " + "of the end of the previous string." + ), + ), + ], + ), + ( + ( + """ + msg = ( + "foo" + " bar" + rf" baz" + ) + """ + ), + [ + ( + 3, + ( + "String has a space at the beginning instead " + "of the end of the previous string." + ), + ), + ( + 4, + ( + "String has a space at the beginning instead " + "of the end of the previous string." + ), + ), + ], + ), + ( + ( + """ + msg = ( + "foo" + rf" bar" + rf" baz" + ) + """ + ), + [ + ( + 3, + ( + "String has a space at the beginning instead " + "of the end of the previous string." + ), + ), + ( + 4, + ( + "String has a space at the beginning instead " + "of the end of the previous string." + ), + ), + ], + ), + ], + ) + def test_strings_with_wrong_placed_whitespace_raises(self, data, expected): + fd = io.StringIO(data.strip()) + result = list( + validate_unwanted_patterns.strings_with_wrong_placed_whitespace(fd) + ) + assert result == expected diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py new file mode 100755 index 0000000000000..f78b9b8ced863 --- /dev/null +++ b/scripts/validate_unwanted_patterns.py @@ -0,0 +1,487 @@ +#!/usr/bin/env python3 +""" +Unwanted patterns test cases. + +The reason this file exist despite the fact we already have +`ci/code_checks.sh`, +(see https://github.com/pandas-dev/pandas/blob/master/ci/code_checks.sh) + +is that some of the test cases are more complex/impossible to validate via regex. +So this file is somewhat an extensions to `ci/code_checks.sh` +""" + +import argparse +import ast +import sys +import token +import tokenize +from typing import ( + IO, + Callable, + Iterable, + List, + Set, + Tuple, +) + +PRIVATE_IMPORTS_TO_IGNORE: Set[str] = { + "_extension_array_shared_docs", + "_index_shared_docs", + "_interval_shared_docs", + "_merge_doc", + "_shared_docs", + "_apply_docs", + "_new_Index", + "_new_PeriodIndex", + "_doc_template", + "_agg_template", + "_pipe_template", + "__main__", + "_transform_template", + "_flex_comp_doc_FRAME", + "_op_descriptions", + "_IntegerDtype", + "_use_inf_as_na", + "_get_plot_backend", + "_matplotlib", + "_arrow_utils", + "_registry", + "_get_offset", # TODO: remove after get_offset deprecation enforced + "_test_parse_iso8601", + "_json_normalize", # TODO: remove after deprecation is enforced + "_testing", + "_test_decorators", + "__version__", # check np.__version__ in compat.numpy.function +} + + +def _get_literal_string_prefix_len(token_string: str) -> int: + """ + Getting the length of the literal string prefix. + + Parameters + ---------- + token_string : str + String to check. + + Returns + ------- + int + Length of the literal string prefix. + + Examples + -------- + >>> example_string = "'Hello world'" + >>> _get_literal_string_prefix_len(example_string) + 0 + >>> example_string = "r'Hello world'" + >>> _get_literal_string_prefix_len(example_string) + 1 + """ + try: + return min( + token_string.find(quote) + for quote in (r"'", r'"') + if token_string.find(quote) >= 0 + ) + except ValueError: + return 0 + + +def bare_pytest_raises(file_obj: IO[str]) -> Iterable[Tuple[int, str]]: + """ + Test Case for bare pytest raises. + + For example, this is wrong: + + >>> with pytest.raise(ValueError): + ... # Some code that raises ValueError + + And this is what we want instead: + + >>> with pytest.raise(ValueError, match="foo"): + ... # Some code that raises ValueError + + Parameters + ---------- + file_obj : IO + File-like object containing the Python code to validate. + + Yields + ------ + line_number : int + Line number of unconcatenated string. + msg : str + Explanation of the error. + + Notes + ----- + GH #23922 + """ + contents = file_obj.read() + tree = ast.parse(contents) + + for node in ast.walk(tree): + if not isinstance(node, ast.Call): + continue + + try: + if not (node.func.value.id == "pytest" and node.func.attr == "raises"): + continue + except AttributeError: + continue + + if not node.keywords: + yield ( + node.lineno, + "Bare pytests raise have been found. " + "Please pass in the argument 'match' as well the exception.", + ) + else: + # Means that there are arguments that are being passed in, + # now we validate that `match` is one of the passed in arguments + if not any(keyword.arg == "match" for keyword in node.keywords): + yield ( + node.lineno, + "Bare pytests raise have been found. " + "Please pass in the argument 'match' as well the exception.", + ) + + +PRIVATE_FUNCTIONS_ALLOWED = {"sys._getframe"} # no known alternative + + +def private_function_across_module(file_obj: IO[str]) -> Iterable[Tuple[int, str]]: + """ + Checking that a private function is not used across modules. + Parameters + ---------- + file_obj : IO + File-like object containing the Python code to validate. + Yields + ------ + line_number : int + Line number of the private function that is used across modules. + msg : str + Explanation of the error. + """ + contents = file_obj.read() + tree = ast.parse(contents) + + imported_modules: Set[str] = set() + + for node in ast.walk(tree): + if isinstance(node, (ast.Import, ast.ImportFrom)): + for module in node.names: + module_fqdn = module.name if module.asname is None else module.asname + imported_modules.add(module_fqdn) + + if not isinstance(node, ast.Call): + continue + + try: + module_name = node.func.value.id + function_name = node.func.attr + except AttributeError: + continue + + # Exception section # + + # (Debatable) Class case + if module_name[0].isupper(): + continue + # (Debatable) Dunder methods case + elif function_name.startswith("__") and function_name.endswith("__"): + continue + elif module_name + "." + function_name in PRIVATE_FUNCTIONS_ALLOWED: + continue + + if module_name in imported_modules and function_name.startswith("_"): + yield (node.lineno, f"Private function '{module_name}.{function_name}'") + + +def private_import_across_module(file_obj: IO[str]) -> Iterable[Tuple[int, str]]: + """ + Checking that a private function is not imported across modules. + Parameters + ---------- + file_obj : IO + File-like object containing the Python code to validate. + Yields + ------ + line_number : int + Line number of import statement, that imports the private function. + msg : str + Explanation of the error. + """ + contents = file_obj.read() + tree = ast.parse(contents) + + for node in ast.walk(tree): + if not isinstance(node, (ast.Import, ast.ImportFrom)): + continue + + for module in node.names: + module_name = module.name.split(".")[-1] + if module_name in PRIVATE_IMPORTS_TO_IGNORE: + continue + + if module_name.startswith("_"): + yield (node.lineno, f"Import of internal function {repr(module_name)}") + + +def strings_to_concatenate(file_obj: IO[str]) -> Iterable[Tuple[int, str]]: + """ + This test case is necessary after 'Black' (https://github.com/psf/black), + is formatting strings over multiple lines. + + For example, when this: + + >>> foo = ( + ... "bar " + ... "baz" + ... ) + + Is becoming this: + + >>> foo = ("bar " "baz") + + 'Black' is not considering this as an + issue (see https://github.com/psf/black/issues/1051), + so we are checking it here instead. + + Parameters + ---------- + file_obj : IO + File-like object containing the Python code to validate. + + Yields + ------ + line_number : int + Line number of unconcatenated string. + msg : str + Explanation of the error. + + Notes + ----- + GH #30454 + """ + tokens: List = list(tokenize.generate_tokens(file_obj.readline)) + + for current_token, next_token in zip(tokens, tokens[1:]): + if current_token.type == next_token.type == token.STRING: + yield ( + current_token.start[0], + ( + "String unnecessarily split in two by black. " + "Please merge them manually." + ), + ) + + +def strings_with_wrong_placed_whitespace( + file_obj: IO[str], +) -> Iterable[Tuple[int, str]]: + """ + Test case for leading spaces in concated strings. + + For example: + + >>> rule = ( + ... "We want the space at the end of the line, " + ... "not at the beginning" + ... ) + + Instead of: + + >>> rule = ( + ... "We want the space at the end of the line," + ... " not at the beginning" + ... ) + + Parameters + ---------- + file_obj : IO + File-like object containing the Python code to validate. + + Yields + ------ + line_number : int + Line number of unconcatenated string. + msg : str + Explanation of the error. + """ + + def has_wrong_whitespace(first_line: str, second_line: str) -> bool: + """ + Checking if the two lines are mattching the unwanted pattern. + + Parameters + ---------- + first_line : str + First line to check. + second_line : str + Second line to check. + + Returns + ------- + bool + True if the two received string match, an unwanted pattern. + + Notes + ----- + The unwanted pattern that we are trying to catch is if the spaces in + a string that is concatenated over multiple lines are placed at the + end of each string, unless this string is ending with a + newline character (\n). + + For example, this is bad: + + >>> rule = ( + ... "We want the space at the end of the line," + ... " not at the beginning" + ... ) + + And what we want is: + + >>> rule = ( + ... "We want the space at the end of the line, " + ... "not at the beginning" + ... ) + + And if the string is ending with a new line character (\n) we + do not want any trailing whitespaces after it. + + For example, this is bad: + + >>> rule = ( + ... "We want the space at the begging of " + ... "the line if the previous line is ending with a \n " + ... "not at the end, like always" + ... ) + + And what we do want is: + + >>> rule = ( + ... "We want the space at the begging of " + ... "the line if the previous line is ending with a \n" + ... " not at the end, like always" + ... ) + """ + if first_line.endswith(r"\n"): + return False + elif first_line.startswith(" ") or second_line.startswith(" "): + return False + elif first_line.endswith(" ") or second_line.endswith(" "): + return False + elif (not first_line.endswith(" ")) and second_line.startswith(" "): + return True + return False + + tokens: List = list(tokenize.generate_tokens(file_obj.readline)) + + for first_token, second_token, third_token in zip(tokens, tokens[1:], tokens[2:]): + # Checking if we are in a block of concated string + if ( + first_token.type == third_token.type == token.STRING + and second_token.type == token.NL + ): + # Striping the quotes, with the string literal prefix + first_string: str = first_token.string[ + _get_literal_string_prefix_len(first_token.string) + 1 : -1 + ] + second_string: str = third_token.string[ + _get_literal_string_prefix_len(third_token.string) + 1 : -1 + ] + + if has_wrong_whitespace(first_string, second_string): + yield ( + third_token.start[0], + ( + "String has a space at the beginning instead " + "of the end of the previous string." + ), + ) + + +def main( + function: Callable[[IO[str]], Iterable[Tuple[int, str]]], + source_path: str, + output_format: str, +) -> bool: + """ + Main entry point of the script. + + Parameters + ---------- + function : Callable + Function to execute for the specified validation type. + source_path : str + Source path representing path to a file/directory. + output_format : str + Output format of the error message. + file_extensions_to_check : str + Comma separated values of what file extensions to check. + excluded_file_paths : str + Comma separated values of what file paths to exclude during the check. + + Returns + ------- + bool + True if found any patterns are found related to the given function. + + Raises + ------ + ValueError + If the `source_path` is not pointing to existing file/directory. + """ + is_failed: bool = False + + for file_path in source_path: + with open(file_path, encoding="utf-8") as file_obj: + for line_number, msg in function(file_obj): + is_failed = True + print( + output_format.format( + source_path=file_path, line_number=line_number, msg=msg + ) + ) + + return is_failed + + +if __name__ == "__main__": + available_validation_types: List[str] = [ + "bare_pytest_raises", + "private_function_across_module", + "private_import_across_module", + "strings_to_concatenate", + "strings_with_wrong_placed_whitespace", + ] + + parser = argparse.ArgumentParser(description="Unwanted patterns checker.") + + parser.add_argument("paths", nargs="*", help="Source paths of files to check.") + parser.add_argument( + "--format", + "-f", + default="{source_path}:{line_number}:{msg}", + help="Output format of the error message.", + ) + parser.add_argument( + "--validation-type", + "-vt", + choices=available_validation_types, + required=True, + help="Validation test case to check.", + ) + + args = parser.parse_args() + + sys.exit( + main( + function=globals().get(args.validation_type), + source_path=args.paths, + output_format=args.format, + ) + ) diff --git a/setup.cfg b/setup.cfg index ef84dd7f9ce85..562ae70fd73ef 100644 --- a/setup.cfg +++ b/setup.cfg @@ -35,10 +35,6 @@ ignore = B023 # Functions defined inside a loop must not use variables redefined in the loop B301, - # single-letter variables - PDF023, - # "use 'pandas._testing' instead" in non-test code - PDF025, # If test must be a simple comparison against sys.platform or sys.version_info Y002, # Use "_typeshed.Self" instead of class-bound TypeVar @@ -59,18 +55,6 @@ exclude = versioneer.py, # exclude asv benchmark environments from linting env -per-file-ignores = - # private import across modules - pandas/tests/*:PDF020 - # pytest.raises without match= - pandas/tests/extension/*:PDF009 - # os.remove - doc/make.py:PDF008 - # import from pandas._testing - pandas/testing.py:PDF014 - # can't use fixtures in asv - asv_bench/*:PDF016 - [flake8-rst] max-line-length = 84