diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cef3d6aea5d27..da0162ce7e160 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -236,6 +236,14 @@ repos: entry: python scripts/validate_min_versions_in_sync.py language: python files: ^(ci/deps/actions-.*-minimum_versions\.yaml|pandas/compat/_optional\.py)$ + - id: validate-errors-locations + name: Validate errors locations + description: Validate errors are in approriate locations. + entry: python scripts/validate_exception_location.py + language: python + files: ^pandas/ + exclude: ^(pandas/_libs/|pandas/tests/|pandas/errors/__init__.py$|pandas/_version.py) + types: [python] - id: flake8-pyi name: flake8-pyi entry: flake8 --extend-ignore=E301,E302,E305,E701,E704 diff --git a/doc/source/reference/testing.rst b/doc/source/reference/testing.rst index 1144c767942d4..07624e87d82e0 100644 --- a/doc/source/reference/testing.rst +++ b/doc/source/reference/testing.rst @@ -38,9 +38,13 @@ Exceptions and warnings errors.IncompatibilityWarning errors.IndexingError errors.InvalidColumnName + errors.InvalidComparison errors.InvalidIndexError + errors.InvalidVersion errors.IntCastingNaNError + errors.LossySetitemError errors.MergeError + errors.NoBufferPresent errors.NullFrequencyError errors.NumbaUtilError errors.NumExprClobberingError diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index 413597f6c3748..02925afc63918 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -34,6 +34,7 @@ Other enhancements - :func:`assert_frame_equal` now shows the first element where the DataFrames differ, analogously to ``pytest``'s output (:issue:`47910`) - Added ``index`` parameter to :meth:`DataFrame.to_dict` (:issue:`46398`) - Added metadata propagation for binary operators on :class:`DataFrame` (:issue:`28283`) +- :class:`.CategoricalConversionWarning`, :class:`.InvalidComparison`, :class:`.InvalidVersion`, :class:`.LossySetitemError`, and :class:`.NoBufferPresent` are now exposed in ``pandas.errors`` (:issue:`27656`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 4f92afd048c2e..707db65533540 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -65,6 +65,7 @@ from pandas.compat.numpy import function as nv from pandas.errors import ( AbstractMethodError, + InvalidComparison, NullFrequencyError, PerformanceWarning, ) @@ -153,15 +154,6 @@ DatetimeLikeArrayT = TypeVar("DatetimeLikeArrayT", bound="DatetimeLikeArrayMixin") -class InvalidComparison(Exception): - """ - Raised by _validate_comparison_value to indicate to caller it should - return invalid_comparison. - """ - - pass - - class DatetimeLikeArrayMixin(OpsMixin, NDArrayBackedExtensionArray): """ Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 5809acbd55380..75a0db3233130 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -40,7 +40,10 @@ DtypeObj, Scalar, ) -from pandas.errors import IntCastingNaNError +from pandas.errors import ( + IntCastingNaNError, + LossySetitemError, +) from pandas.util._exceptions import find_stack_level from pandas.util._validators import validate_bool_kwarg @@ -2103,11 +2106,3 @@ def _dtype_can_hold_range(rng: range, dtype: np.dtype) -> bool: if not len(rng): return True return np.can_cast(rng[0], dtype) and np.can_cast(rng[-1], dtype) - - -class LossySetitemError(Exception): - """ - Raised when trying to do a __setitem__ on an np.ndarray that is not lossless. - """ - - pass diff --git a/pandas/core/interchange/column.py b/pandas/core/interchange/column.py index dc24c928d1f39..f375048563c70 100644 --- a/pandas/core/interchange/column.py +++ b/pandas/core/interchange/column.py @@ -6,6 +6,7 @@ from pandas._libs.lib import infer_dtype from pandas._libs.tslibs import iNaT +from pandas.errors import NoBufferPresent from pandas.util._decorators import cache_readonly import pandas as pd @@ -23,7 +24,6 @@ from pandas.core.interchange.utils import ( ArrowCTypes, Endianness, - NoBufferPresent, dtype_to_arrow_c_fmt, ) diff --git a/pandas/core/interchange/utils.py b/pandas/core/interchange/utils.py index 1d56af94b2629..aa717d05aecb5 100644 --- a/pandas/core/interchange/utils.py +++ b/pandas/core/interchange/utils.py @@ -89,7 +89,3 @@ def dtype_to_arrow_c_fmt(dtype: DtypeObj) -> str: raise NotImplementedError( f"Conversion of {dtype} to Arrow C format string is not implemented." ) - - -class NoBufferPresent(Exception): - """Exception to signal that there is no requested buffer.""" diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index d0c9ef94f4453..3e4f116953cb3 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -12,6 +12,8 @@ OutOfBoundsTimedelta, ) +from pandas.util.version import InvalidVersion + class IntCastingNaNError(ValueError): """ @@ -535,6 +537,24 @@ class CategoricalConversionWarning(Warning): """ +class LossySetitemError(Exception): + """ + Raised when trying to do a __setitem__ on an np.ndarray that is not lossless. + """ + + +class NoBufferPresent(Exception): + """ + Exception is raised in _get_data_buffer to signal that there is no requested buffer. + """ + + +class InvalidComparison(Exception): + """ + Exception is raised by _validate_comparison_value to indicate an invalid comparison. + """ + + __all__ = [ "AbstractMethodError", "AccessorRegistrationWarning", @@ -550,9 +570,13 @@ class CategoricalConversionWarning(Warning): "IncompatibilityWarning", "IntCastingNaNError", "InvalidColumnName", + "InvalidComparison", "InvalidIndexError", + "InvalidVersion", "IndexingError", + "LossySetitemError", "MergeError", + "NoBufferPresent", "NullFrequencyError", "NumbaUtilError", "NumExprClobberingError", diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py index c6ca51b7763d9..5dffee587adcb 100644 --- a/pandas/tests/test_errors.py +++ b/pandas/tests/test_errors.py @@ -11,33 +11,37 @@ @pytest.mark.parametrize( "exc", [ - "UnsupportedFunctionCall", - "UnsortedIndexError", - "OutOfBoundsDatetime", - "ParserError", - "PerformanceWarning", + "AttributeConflictWarning", + "CSSWarning", + "CategoricalConversionWarning", + "ClosedFileError", + "DataError", + "DatabaseError", "DtypeWarning", "EmptyDataError", - "ParserWarning", + "IncompatibilityWarning", + "IndexingError", + "InvalidColumnName", + "InvalidComparison", + "InvalidVersion", + "LossySetitemError", "MergeError", - "OptionError", - "NumbaUtilError", - "DataError", - "SpecificationError", - "SettingWithCopyError", - "SettingWithCopyWarning", + "NoBufferPresent", "NumExprClobberingError", - "IndexingError", - "PyperclipException", - "CSSWarning", - "ClosedFileError", + "NumbaUtilError", + "OptionError", + "OutOfBoundsDatetime", + "ParserError", + "ParserWarning", + "PerformanceWarning", "PossibleDataLossError", - "IncompatibilityWarning", - "AttributeConflictWarning", - "DatabaseError", "PossiblePrecisionLoss", - "CategoricalConversionWarning", - "InvalidColumnName", + "PyperclipException", + "SettingWithCopyError", + "SettingWithCopyWarning", + "SpecificationError", + "UnsortedIndexError", + "UnsupportedFunctionCall", "ValueLabelTypeMismatch", ], ) diff --git a/scripts/pandas_errors_documented.py b/scripts/pandas_errors_documented.py index 18db5fa10a8f9..52c1e2008b8a0 100644 --- a/scripts/pandas_errors_documented.py +++ b/scripts/pandas_errors_documented.py @@ -1,5 +1,5 @@ """ -Check that doc/source/reference/general_utility_functions.rst documents +Check that doc/source/reference/testing.rst documents all exceptions and warnings in pandas/errors/__init__.py. This is meant to be run as a pre-commit hook - to run it manually, you can do: diff --git a/scripts/tests/test_validate_exception_location.py b/scripts/tests/test_validate_exception_location.py new file mode 100644 index 0000000000000..9d493ee04d1c2 --- /dev/null +++ b/scripts/tests/test_validate_exception_location.py @@ -0,0 +1,59 @@ +import pytest + +from scripts.validate_exception_location import ( + ERROR_MESSAGE, + validate_exception_and_warning_placement, +) + +PATH = "t.py" + +# ERRORS_IN_TESTING_RST is the set returned when parsing testing.rst for all the +# exceptions and warnings. +CUSTOM_EXCEPTION_NOT_IN_TESTING_RST = "MyException" +CUSTOM_EXCEPTION__IN_TESTING_RST = "MyOldException" +ERRORS_IN_TESTING_RST = {CUSTOM_EXCEPTION__IN_TESTING_RST} + +TEST_CODE = """ +import numpy as np +import sys + +def my_func(): + pass + +class {custom_name}({error_type}): + pass + +""" + + +# Test with various python-defined exceptions to ensure they are all flagged. +@pytest.fixture(params=["Exception", "ValueError", "Warning", "UserWarning"]) +def error_type(request): + return request.param + + +def test_class_that_inherits_an_exception_and_is_not_in_the_testing_rst_is_flagged( + capsys, error_type +): + content = TEST_CODE.format( + custom_name=CUSTOM_EXCEPTION_NOT_IN_TESTING_RST, error_type=error_type + ) + expected_msg = ERROR_MESSAGE.format(errors=CUSTOM_EXCEPTION_NOT_IN_TESTING_RST) + with pytest.raises(SystemExit, match=None): + validate_exception_and_warning_placement(PATH, content, ERRORS_IN_TESTING_RST) + result_msg, _ = capsys.readouterr() + assert result_msg == expected_msg + + +def test_class_that_inherits_an_exception_but_is_in_the_testing_rst_is_not_flagged( + capsys, error_type +): + content = TEST_CODE.format( + custom_name=CUSTOM_EXCEPTION__IN_TESTING_RST, error_type=error_type + ) + validate_exception_and_warning_placement(PATH, content, ERRORS_IN_TESTING_RST) + + +def test_class_that_does_not_inherit_an_exception_is_not_flagged(capsys): + content = "class MyClass(NonExceptionClass): pass" + validate_exception_and_warning_placement(PATH, content, ERRORS_IN_TESTING_RST) diff --git a/scripts/validate_exception_location.py b/scripts/validate_exception_location.py new file mode 100644 index 0000000000000..ebbe6c95a3ec9 --- /dev/null +++ b/scripts/validate_exception_location.py @@ -0,0 +1,105 @@ +""" +Validate that the exceptions and warnings are in appropriate places. + +Checks for classes that inherit a python exception and warning and +flags them, unless they are exempted from checking. Exempt meaning +the exception/warning is defined in testing.rst. Testing.rst contains +a list of pandas defined exceptions and warnings. This list is kept +current by other pre-commit hook, pandas_errors_documented.py. +This hook maintains that errors.__init__.py and testing.rst are in-sync. +Therefore, the exception or warning should be defined or imported in +errors.__init__.py. Ideally, the exception or warning is defined unless +there's special reason to import it. + +Prints the exception/warning that do not follow this convention. + +Usage:: + +As a pre-commit hook: + pre-commit run validate-errors-locations --all-files +""" +from __future__ import annotations + +import argparse +import ast +import pathlib +import sys +from typing import Sequence + +API_PATH = pathlib.Path("doc/source/reference/testing.rst").resolve() +ERROR_MESSAGE = ( + "The following exception(s) and/or warning(s): {errors} exist(s) outside of " + "pandas/errors/__init__.py. Please either define them in " + "pandas/errors/__init__.py. Or, if not possible then import them in " + "pandas/errors/__init__.py.\n" +) + + +def get_warnings_and_exceptions_from_api_path() -> set[str]: + with open(API_PATH) as f: + doc_errors = { + line.split(".")[1].strip() for line in f.readlines() if "errors" in line + } + return doc_errors + + +class Visitor(ast.NodeVisitor): + def __init__(self, path: str, exception_set: set[str]) -> None: + self.path = path + self.exception_set = exception_set + self.found_exceptions = set() + + def visit_ClassDef(self, node) -> None: + def is_an_exception_subclass(base_id: str) -> bool: + return ( + base_id == "Exception" + or base_id.endswith("Warning") + or base_id.endswith("Error") + ) + + exception_classes = [] + + # Go through the class's bases and check if they are an Exception or Warning. + for base in node.bases: + base_id = getattr(base, "id", None) + if base_id and is_an_exception_subclass(base_id): + exception_classes.append(base_id) + + # The class subclassed an Exception or Warning so add it to the list. + if exception_classes: + self.found_exceptions.add(node.name) + + +def validate_exception_and_warning_placement( + file_path: str, file_content: str, errors: set[str] +): + tree = ast.parse(file_content) + visitor = Visitor(file_path, errors) + visitor.visit(tree) + + misplaced_exceptions = visitor.found_exceptions.difference(errors) + + # If misplaced_exceptions isn't an empty list then there exists + # pandas-defined Exception or Warnings outside of pandas/errors/__init__.py, so + # we should flag them. + if misplaced_exceptions: + msg = ERROR_MESSAGE.format(errors=", ".join(misplaced_exceptions)) + sys.stdout.write(msg) + sys.exit(1) + + +def main(argv: Sequence[str] | None = None) -> None: + parser = argparse.ArgumentParser() + parser.add_argument("paths", nargs="*") + args = parser.parse_args(argv) + + error_set = get_warnings_and_exceptions_from_api_path() + + for path in args.paths: + with open(path, encoding="utf-8") as fd: + content = fd.read() + validate_exception_and_warning_placement(path, content, error_set) + + +if __name__ == "__main__": + main()