From 73062624c78cd8818ecd63735c54870d353d7ba0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Fri, 28 Jul 2023 19:14:39 +0200 Subject: [PATCH 1/2] Examples ExtensionArray._from_sequence_of_strings ... --- ci/code_checks.sh | 8 ------- pandas/core/arrays/base.py | 44 ++++++++++++++++++++++++++++++++++ pandas/errors/__init__.py | 19 +++++++++++++++ scripts/validate_docstrings.py | 1 + 4 files changed, 64 insertions(+), 8 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index dd9219f03abca..f6ed36b735cf4 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -63,10 +63,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then MSG='Partially validate docstrings (EX01)' ; echo $MSG $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX01 --ignore_functions \ - pandas.errors.PerformanceWarning \ - pandas.errors.PyperclipException \ pandas.errors.PyperclipWindowsException \ - pandas.errors.UnsortedIndexError \ pandas.errors.UnsupportedFunctionCall \ pandas.NaT \ pandas.io.stata.StataReader.data_label \ @@ -78,11 +75,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.api.extensions.ExtensionDtype \ pandas.api.extensions.ExtensionArray \ pandas.arrays.NumpyExtensionArray \ - pandas.api.extensions.ExtensionArray._from_sequence_of_strings \ - pandas.api.extensions.ExtensionArray._hash_pandas_object \ - pandas.api.extensions.ExtensionArray._reduce \ - pandas.api.extensions.ExtensionArray._values_for_factorize \ - pandas.api.extensions.ExtensionArray.interpolate \ RET=$(($RET + $?)) ; echo $MSG "DONE" fi diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 1f48d124f7948..d810af94ccfe4 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -301,6 +301,13 @@ def _from_sequence_of_strings( Returns ------- ExtensionArray + + Examples + -------- + >>> pd.arrays.IntegerArray._from_sequence_of_strings(["1", "2", "3"]) + + [1, 2, 3] + Length: 3, dtype: Int64 """ raise AbstractMethodError(cls) @@ -878,6 +885,21 @@ def interpolate( ) -> Self: """ See DataFrame.interpolate.__doc__. + + Examples + -------- + >>> arr = pd.arrays.NumpyExtensionArray(np.array([0, 1, np.nan, 3])) + >>> arr.interpolate(method="linear", + ... limit=3, + ... limit_direction="forward", + ... index=pd.Index([1, 2, 3, 4]), + ... fill_value=1, + ... copy=False, axis=0, + ... limit_area="inside" + ... ) + + [0.0, 1.0, 2.0, 3.0] + Length: 4, dtype: float64 """ # NB: we return type(self) even if copy=False raise NotImplementedError( @@ -1212,6 +1234,11 @@ def _values_for_factorize(self) -> tuple[np.ndarray, Any]: The values returned by this method are also used in :func:`pandas.util.hash_pandas_object`. If needed, this can be overridden in the ``self._hash_pandas_object()`` method. + + Examples + -------- + >>> pd.array([1, 2, 3])._values_for_factorize() + (array([1, 2, 3], dtype=object), nan) """ return self.astype(object), np.nan @@ -1714,6 +1741,11 @@ def _reduce( Raises ------ TypeError : subclass does not define reductions + + Examples + -------- + >>> pd.array([1, 2, 3])._reduce("min") + 1 """ meth = getattr(self, name, None) if meth is None: @@ -1760,12 +1792,24 @@ def _hash_pandas_object( Parameters ---------- encoding : str + Encoding for data & key when strings. hash_key : str + Hash_key for string key to encode. categorize : bool + Whether to first categorize object arrays before hashing. This is more + efficient when the array contains duplicate values. Returns ------- np.ndarray[uint64] + + Examples + -------- + >>> pd.array([1, 2])._hash_pandas_object(encoding='utf-8', + ... hash_key="1000000000000000", + ... categorize=False + ... ) + array([11381023671546835630, 4641644667904626417], dtype=uint64) """ from pandas.core.util.hashing import hash_array diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 71e12109292ae..9cdb07c9d72b6 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -62,6 +62,25 @@ class UnsortedIndexError(KeyError): Error raised when slicing a MultiIndex which has not been lexsorted. Subclass of `KeyError`. + + Examples + -------- + >>> df = pd.DataFrame({"cat": [0, 0, 1, 1], + ... "color": ["white", "white", "brown", "black"], + ... "lives": [4, 4, 3, 7]}, + ... ) + >>> df = df.set_index(["cat", "color"]) + >>> df + lives + cat color + 0 white 4 + white 4 + 1 brown 3 + black 7 + >>> df.loc[(0, "black"):(1, "white")] + Traceback (most recent call last): + UnsortedIndexError: 'Key length (2) was greater + than MultiIndex lexsort depth (1)' """ diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 206932a18c60a..43cca70d92077 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -53,6 +53,7 @@ "errors.LossySetitemError", "errors.NoBufferPresent", "errors.IncompatibilityWarning", + "errors.PyperclipException", } PRIVATE_CLASSES = ["NDFrame", "IndexOpsMixin"] ERROR_MSGS = { From f980d0fe6d9f30a166fce166f336450681e29115 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Fri, 28 Jul 2023 20:49:59 +0200 Subject: [PATCH 2/2] Moved axis to new line --- pandas/core/arrays/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index d810af94ccfe4..f69ddac4db6bf 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -894,7 +894,8 @@ def interpolate( ... limit_direction="forward", ... index=pd.Index([1, 2, 3, 4]), ... fill_value=1, - ... copy=False, axis=0, + ... copy=False, + ... axis=0, ... limit_area="inside" ... )