ENH: Move warnings to error/__init__.py per GH27656 (#47901)

dataxerik · web-flow · commit 9e8387c86357 · 2022-08-08T11:49:39.000-07:00
* ENH: Move warnings to error/__init__.py per GH27656

* ENH: update whatsnew line

* ENH: add and re-add final

* ENH: add to __all__

* ENH: apply feedback

* ENH: fix doc string

* ENH: add additional exception/warnings to rst

* ENH: fix rst and typo
diff --git a/doc/source/reference/testing.rst b/doc/source/reference/testing.rst
@@ -27,6 +27,7 @@ Exceptions and warnings
    errors.AbstractMethodError
    errors.AccessorRegistrationWarning
    errors.AttributeConflictWarning
+   errors.CategoricalConversionWarning
    errors.ClosedFileError
    errors.CSSWarning
    errors.DatabaseError
@@ -36,6 +37,7 @@ Exceptions and warnings
    errors.EmptyDataError
    errors.IncompatibilityWarning
    errors.IndexingError
+   errors.InvalidColumnName
    errors.InvalidIndexError
    errors.IntCastingNaNError
    errors.MergeError
@@ -49,6 +51,7 @@ Exceptions and warnings
    errors.ParserWarning
    errors.PerformanceWarning
    errors.PossibleDataLossError
+   errors.PossiblePrecisionLoss
    errors.PyperclipException
    errors.PyperclipWindowsException
    errors.SettingWithCopyError
@@ -57,6 +60,7 @@ Exceptions and warnings
    errors.UndefinedVariableError
    errors.UnsortedIndexError
    errors.UnsupportedFunctionCall
+   errors.ValueLabelTypeMismatch
 
 Bug report function
 -------------------
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
@@ -275,7 +275,7 @@ Other enhancements
 - A :class:`errors.PerformanceWarning` is now thrown when using ``string[pyarrow]`` dtype with methods that don't dispatch to ``pyarrow.compute`` methods (:issue:`42613`)
 - Added ``numeric_only`` argument to :meth:`Resampler.sum`, :meth:`Resampler.prod`, :meth:`Resampler.min`, :meth:`Resampler.max`, :meth:`Resampler.first`, and :meth:`Resampler.last` (:issue:`46442`)
 - ``times`` argument in :class:`.ExponentialMovingWindow` now accepts ``np.timedelta64`` (:issue:`47003`)
-- :class:`.DataError`, :class:`.SpecificationError`, :class:`.SettingWithCopyError`, :class:`.SettingWithCopyWarning`, :class:`.NumExprClobberingError`, :class:`.UndefinedVariableError`, and :class:`.IndexingError` are now exposed in ``pandas.errors`` (:issue:`27656`)
+- :class:`.DataError`, :class:`.SpecificationError`, :class:`.SettingWithCopyError`, :class:`.SettingWithCopyWarning`, :class:`.NumExprClobberingError`, :class:`.UndefinedVariableError`, :class:`.IndexingError`, :class:`.PyperclipException`, :class:`.PyperclipWindowsException`, :class:`.CSSWarning`, :class:`.PossibleDataLossError`, :class:`.ClosedFileError`, :class:`.IncompatibilityWarning`, :class:`.AttributeConflictWarning`, :class:`.DatabaseError, :class:`.PossiblePrecisionLoss, :class:`.ValueLabelTypeMismatch, :class:`.InvalidColumnName, and :class:`.CategoricalConversionWarning` are now exposed in ``pandas.errors`` (:issue:`27656`)
 - Added ``check_like`` argument to :func:`testing.assert_series_equal` (:issue:`47247`)
 - Allow reading compressed SAS files with :func:`read_sas` (e.g., ``.sas7bdat.gz`` files)
 - :meth:`DatetimeIndex.astype` now supports casting timezone-naive indexes to ``datetime64[s]``, ``datetime64[ms]``, and ``datetime64[us]``, and timezone-aware indexes to the corresponding ``datetime64[unit, tzname]`` dtypes (:issue:`47579`)
diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py
@@ -479,10 +479,67 @@ class DatabaseError(OSError):
     """
 
 
+class PossiblePrecisionLoss(Warning):
+    """
+    Warning raised by to_stata on a column with a value outside or equal to int64.
+
+    When the column value is outside or equal to the int64 value the column is
+    converted to a float64 dtype.
+
+    Examples
+    --------
+    >>> df = pd.DataFrame({"s": pd.Series([1, 2**53], dtype=np.int64)})
+    >>> df.to_stata('test') # doctest: +SKIP
+    ... # PossiblePrecisionLoss: Column converted from int64 to float64...
+    """
+
+
+class ValueLabelTypeMismatch(Warning):
+    """
+    Warning raised by to_stata on a category column that contains non-string values.
+
+    Examples
+    --------
+    >>> df = pd.DataFrame({"categories": pd.Series(["a", 2], dtype="category")})
+    >>> df.to_stata('test') # doctest: +SKIP
+    ... # ValueLabelTypeMismatch: Stata value labels (pandas categories) must be str...
+    """
+
+
+class InvalidColumnName(Warning):
+    """
+    Warning raised by to_stata the column contains a non-valid stata name.
+
+    Because the column name is an invalid Stata variable, the name needs to be
+    converted.
+
+    Examples
+    --------
+    >>> df = pd.DataFrame({"0categories": pd.Series([2, 2])})
+    >>> df.to_stata('test') # doctest: +SKIP
+    ... # InvalidColumnName: Not all pandas column names were valid Stata variable...
+    """
+
+
+class CategoricalConversionWarning(Warning):
+    """
+    Warning is raised when reading a partial labeled Stata file using a iterator.
+
+    Examples
+    --------
+    >>> from pandas.io.stata import StataReader
+    >>> with StataReader('dta_file', chunksize=2) as reader: # doctest: +SKIP
+    ...   for i, block in enumerate(reader):
+    ...      print(i, block))
+    ... # CategoricalConversionWarning: One or more series with value labels...
+    """
+
+
 __all__ = [
     "AbstractMethodError",
     "AccessorRegistrationWarning",
     "AttributeConflictWarning",
+    "CategoricalConversionWarning",
     "ClosedFileError",
     "CSSWarning",
     "DatabaseError",
@@ -492,6 +549,7 @@ class DatabaseError(OSError):
     "EmptyDataError",
     "IncompatibilityWarning",
     "IntCastingNaNError",
+    "InvalidColumnName",
     "InvalidIndexError",
     "IndexingError",
     "MergeError",
@@ -505,6 +563,7 @@ class DatabaseError(OSError):
     "ParserWarning",
     "PerformanceWarning",
     "PossibleDataLossError",
+    "PossiblePrecisionLoss",
     "PyperclipException",
     "PyperclipWindowsException",
     "SettingWithCopyError",
@@ -513,4 +572,5 @@ class DatabaseError(OSError):
     "UndefinedVariableError",
     "UnsortedIndexError",
     "UnsupportedFunctionCall",
+    "ValueLabelTypeMismatch",
 ]
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
@@ -41,6 +41,12 @@
     StorageOptions,
     WriteBuffer,
 )
+from pandas.errors import (
+    CategoricalConversionWarning,
+    InvalidColumnName,
+    PossiblePrecisionLoss,
+    ValueLabelTypeMismatch,
+)
 from pandas.util._decorators import (
     Appender,
     doc,
@@ -493,31 +499,19 @@ def g(x: datetime.datetime) -> int:
 """
 
 
-class PossiblePrecisionLoss(Warning):
-    pass
-
-
 precision_loss_doc: Final = """
 Column converted from {0} to {1}, and some data are outside of the lossless
 conversion range. This may result in a loss of precision in the saved data.
 """
 
 
-class ValueLabelTypeMismatch(Warning):
-    pass
-
-
 value_label_mismatch_doc: Final = """
 Stata value labels (pandas categories) must be strings. Column {0} contains
 non-string labels which will be converted to strings.  Please check that the
 Stata data file created has not lost information due to duplicate labels.
 """
 
 
-class InvalidColumnName(Warning):
-    pass
-
-
 invalid_name_doc: Final = """
 Not all pandas column names were valid Stata variable names.
 The following replacements have been made:
@@ -530,11 +524,7 @@ class InvalidColumnName(Warning):
 """
 
 
-class CategoricalConversionWarning(Warning):
-    pass
-
-
-categorical_conversion_warning = """
+categorical_conversion_warning: Final = """
 One or more series with value labels are not fully labeled. Reading this
 dataset with an iterator results in categorical variable with different
 categories. This occurs since it is not possible to know all possible values
diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py
@@ -35,6 +35,10 @@
         "IncompatibilityWarning",
         "AttributeConflictWarning",
         "DatabaseError",
+        "PossiblePrecisionLoss",
+        "CategoricalConversionWarning",
+        "InvalidColumnName",
+        "ValueLabelTypeMismatch",
     ],
 )
 def test_exception_importable(exc):