diff --git a/doc/source/reference/index.rst b/doc/source/reference/index.rst index 7da02f7958416..7f4d05414d254 100644 --- a/doc/source/reference/index.rst +++ b/doc/source/reference/index.rst @@ -24,13 +24,14 @@ The following subpackages are public. `pandas-stubs `_ package which has classes in addition to those that occur in pandas for type-hinting. -In addition, public functions in ``pandas.io`` and ``pandas.tseries`` submodules -are mentioned in the documentation. +In addition, public functions in ``pandas.io``, ``pandas.tseries``, ``pandas.util`` submodules +are explicitly mentioned in the documentation. Further APIs in these modules are not guaranteed +to be stable. .. warning:: - The ``pandas.core``, ``pandas.compat``, and ``pandas.util`` top-level modules are PRIVATE. Stable functionality in such modules is not guaranteed. + The ``pandas.core``, ``pandas.compat`` top-level modules are PRIVATE. Stable functionality in such modules is not guaranteed. .. If you update this toctree, also update the manual toctree in the .. main index.rst.template diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 97f4eaa7c208a..cda25bd7b98b0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -60,6 +60,8 @@ from pandas.errors import ( ChainedAssignmentError, InvalidIndexError, +) +from pandas.errors.cow import ( _chained_assignment_method_msg, _chained_assignment_msg, _chained_assignment_warning_method_msg, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 87a394761ee7c..a7e3e6c48c4ca 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -101,6 +101,8 @@ InvalidIndexError, SettingWithCopyError, SettingWithCopyWarning, +) +from pandas.errors.cow import ( _chained_assignment_method_msg, _chained_assignment_warning_method_msg, _check_cacher, diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 2e7a237406ca5..24f3ff4279a84 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -27,6 +27,8 @@ IndexingError, InvalidIndexError, LossySetitemError, +) +from pandas.errors.cow import ( _chained_assignment_msg, _chained_assignment_warning_msg, _check_cacher, diff --git a/pandas/core/series.py b/pandas/core/series.py index ad63bc8a8750f..19e54954bb41c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -45,6 +45,8 @@ from pandas.errors import ( ChainedAssignmentError, InvalidIndexError, +) +from pandas.errors.cow import ( _chained_assignment_method_msg, _chained_assignment_msg, _chained_assignment_warning_method_msg, diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index d47e02cda1837..52b896dc01e8f 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -475,81 +475,6 @@ class ChainedAssignmentError(Warning): """ -_chained_assignment_msg = ( - "A value is trying to be set on a copy of a DataFrame or Series " - "through chained assignment.\n" - "When using the Copy-on-Write mode, such chained assignment never works " - "to update the original DataFrame or Series, because the intermediate " - "object on which we are setting values always behaves as a copy.\n\n" - "Try using '.loc[row_indexer, col_indexer] = value' instead, to perform " - "the assignment in a single step.\n\n" - "See the caveats in the documentation: " - "https://pandas.pydata.org/pandas-docs/stable/user_guide/" - "indexing.html#returning-a-view-versus-a-copy" -) - - -_chained_assignment_method_msg = ( - "A value is trying to be set on a copy of a DataFrame or Series " - "through chained assignment using an inplace method.\n" - "When using the Copy-on-Write mode, such inplace method never works " - "to update the original DataFrame or Series, because the intermediate " - "object on which we are setting values always behaves as a copy.\n\n" - "For example, when doing 'df[col].method(value, inplace=True)', try " - "using 'df.method({col: value}, inplace=True)' instead, to perform " - "the operation inplace on the original object.\n\n" -) - - -_chained_assignment_warning_msg = ( - "ChainedAssignmentError: behaviour will change in pandas 3.0!\n" - "You are setting values through chained assignment. Currently this works " - "in certain cases, but when using Copy-on-Write (which will become the " - "default behaviour in pandas 3.0) this will never work to update the " - "original DataFrame or Series, because the intermediate object on which " - "we are setting values will behave as a copy.\n" - "A typical example is when you are setting values in a column of a " - "DataFrame, like:\n\n" - 'df["col"][row_indexer] = value\n\n' - 'Use `df.loc[row_indexer, "col"] = values` instead, to perform the ' - "assignment in a single step and ensure this keeps updating the original `df`.\n\n" - "See the caveats in the documentation: " - "https://pandas.pydata.org/pandas-docs/stable/user_guide/" - "indexing.html#returning-a-view-versus-a-copy\n" -) - - -_chained_assignment_warning_method_msg = ( - "A value is trying to be set on a copy of a DataFrame or Series " - "through chained assignment using an inplace method.\n" - "The behavior will change in pandas 3.0. This inplace method will " - "never work because the intermediate object on which we are setting " - "values always behaves as a copy.\n\n" - "For example, when doing 'df[col].method(value, inplace=True)', try " - "using 'df.method({col: value}, inplace=True)' or " - "df[col] = df[col].method(value) instead, to perform " - "the operation inplace on the original object.\n\n" -) - - -def _check_cacher(obj) -> bool: - # This is a mess, selection paths that return a view set the _cacher attribute - # on the Series; most of them also set _item_cache which adds 1 to our relevant - # reference count, but iloc does not, so we have to check if we are actually - # in the item cache - if hasattr(obj, "_cacher"): - parent = obj._cacher[1]() - # parent could be dead - if parent is None: - return False - if hasattr(parent, "_item_cache"): - if obj._cacher[0] in parent._item_cache: - # Check if we are actually the item from item_cache, iloc creates a - # new object - return obj is parent._item_cache[obj._cacher[0]] - return False - - class NumExprClobberingError(NameError): """ Exception raised when trying to use a built-in numexpr name as a variable name. @@ -831,6 +756,7 @@ class InvalidComparison(Exception): "AbstractMethodError", "AttributeConflictWarning", "CategoricalConversionWarning", + "ChainedAssignmentError", "ClosedFileError", "CSSWarning", "DatabaseError", diff --git a/pandas/errors/cow.py b/pandas/errors/cow.py new file mode 100644 index 0000000000000..2215ec2148757 --- /dev/null +++ b/pandas/errors/cow.py @@ -0,0 +1,74 @@ +from typing import Any + +_chained_assignment_msg = ( + "A value is trying to be set on a copy of a DataFrame or Series " + "through chained assignment.\n" + "When using the Copy-on-Write mode, such chained assignment never works " + "to update the original DataFrame or Series, because the intermediate " + "object on which we are setting values always behaves as a copy.\n\n" + "Try using '.loc[row_indexer, col_indexer] = value' instead, to perform " + "the assignment in a single step.\n\n" + "See the caveats in the documentation: " + "https://pandas.pydata.org/pandas-docs/stable/user_guide/" + "indexing.html#returning-a-view-versus-a-copy" +) + + +_chained_assignment_method_msg = ( + "A value is trying to be set on a copy of a DataFrame or Series " + "through chained assignment using an inplace method.\n" + "When using the Copy-on-Write mode, such inplace method never works " + "to update the original DataFrame or Series, because the intermediate " + "object on which we are setting values always behaves as a copy.\n\n" + "For example, when doing 'df[col].method(value, inplace=True)', try " + "using 'df.method({col: value}, inplace=True)' instead, to perform " + "the operation inplace on the original object.\n\n" +) + + +_chained_assignment_warning_msg = ( + "ChainedAssignmentError: behaviour will change in pandas 3.0!\n" + "You are setting values through chained assignment. Currently this works " + "in certain cases, but when using Copy-on-Write (which will become the " + "default behaviour in pandas 3.0) this will never work to update the " + "original DataFrame or Series, because the intermediate object on which " + "we are setting values will behave as a copy.\n" + "A typical example is when you are setting values in a column of a " + "DataFrame, like:\n\n" + 'df["col"][row_indexer] = value\n\n' + 'Use `df.loc[row_indexer, "col"] = values` instead, to perform the ' + "assignment in a single step and ensure this keeps updating the original `df`.\n\n" + "See the caveats in the documentation: " + "https://pandas.pydata.org/pandas-docs/stable/user_guide/" + "indexing.html#returning-a-view-versus-a-copy\n" +) + +_chained_assignment_warning_method_msg = ( + "A value is trying to be set on a copy of a DataFrame or Series " + "through chained assignment using an inplace method.\n" + "The behavior will change in pandas 3.0. This inplace method will " + "never work because the intermediate object on which we are setting " + "values always behaves as a copy.\n\n" + "For example, when doing 'df[col].method(value, inplace=True)', try " + "using 'df.method({col: value}, inplace=True)' or " + "df[col] = df[col].method(value) instead, to perform " + "the operation inplace on the original object.\n\n" +) + + +def _check_cacher(obj: Any) -> bool: + # This is a mess, selection paths that return a view set the _cacher attribute + # on the Series; most of them also set _item_cache which adds 1 to our relevant + # reference count, but iloc does not, so we have to check if we are actually + # in the item cache + if hasattr(obj, "_cacher"): + parent = obj._cacher[1]() + # parent could be dead + if parent is None: + return False + if hasattr(parent, "_item_cache"): + if obj._cacher[0] in parent._item_cache: + # Check if we are actually the item from item_cache, iloc creates a + # new object + return obj is parent._item_cache[obj._cacher[0]] + return False diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 60bcb97aaa364..61d6aaf63adf1 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -357,6 +357,29 @@ def test_api_extensions(self): self.check(api_extensions, self.allowed_api_extensions) +class TestErrors(Base): + def test_errors(self): + self.check(pd.errors, pd.errors.__all__, ignored=["ctypes", "cow"]) + + +class TestUtil(Base): + def test_util(self): + self.check( + pd.util, + ["hash_array", "hash_pandas_object"], + ignored=[ + "_decorators", + "_test_decorators", + "_exceptions", + "_validators", + "capitalize_first_letter", + "version", + "_print_versions", + "_tester", + ], + ) + + class TestTesting(Base): funcs = [ "assert_frame_equal", diff --git a/pandas/util/__init__.py b/pandas/util/__init__.py index 91282fde8b11d..59ab324ba38ca 100644 --- a/pandas/util/__init__.py +++ b/pandas/util/__init__.py @@ -25,5 +25,9 @@ def __getattr__(key: str): raise AttributeError(f"module 'pandas.util' has no attribute '{key}'") +def __dir__(): + return list(globals().keys()) + ["hash_array", "hash_pandas_object"] + + def capitalize_first_letter(s: str) -> str: return s[:1].upper() + s[1:]