From f7a48f33b3aca249d03c471633fc6bb661f76e85 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 4 Apr 2023 16:01:34 -0700 Subject: [PATCH] REF: define reductions non-dynamically --- pandas/core/frame.py | 209 ++++++++-- pandas/core/generic.py | 883 +++++++++++++++-------------------------- pandas/core/series.py | 135 ++++++- 3 files changed, 620 insertions(+), 607 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 74873abac0758..44320de2c3209 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -140,7 +140,10 @@ sanitize_array, sanitize_masked_array, ) -from pandas.core.generic import NDFrame +from pandas.core.generic import ( + NDFrame, + make_doc, +) from pandas.core.indexers import check_key_length from pandas.core.indexes.api import ( DatetimeIndex, @@ -9592,43 +9595,6 @@ def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs): agg = aggregate - # error: Signature of "any" incompatible with supertype "NDFrame" [override] - @overload # type: ignore[override] - def any( - self, - *, - axis: Axis = ..., - bool_only: bool | None = ..., - skipna: bool = ..., - level: None = ..., - **kwargs, - ) -> Series: - ... - - @overload - def any( - self, - *, - axis: Axis = ..., - bool_only: bool | None = ..., - skipna: bool = ..., - level: Level, - **kwargs, - ) -> DataFrame | Series: - ... - - # error: Missing return statement - @doc(NDFrame.any, **_shared_doc_kwargs) - def any( # type: ignore[empty-body] - self, - axis: Axis = 0, - bool_only: bool | None = None, - skipna: bool = True, - level: Level = None, - **kwargs, - ) -> DataFrame | Series: - ... - @doc( _shared_docs["transform"], klass=_shared_doc_kwargs["klass"], @@ -10920,6 +10886,170 @@ def _reduce_axis1(self, name: str, func, skipna: bool) -> Series: res_ser = self._constructor_sliced(result, index=self.index, copy=False) return res_ser + @doc(make_doc("any", ndim=2)) + # error: Signature of "any" incompatible with supertype "NDFrame" + def any( # type: ignore[override] + self, + *, + axis: Axis = 0, + bool_only=None, + skipna: bool = True, + **kwargs, + ) -> Series: + # error: Incompatible return value type (got "Union[Series, bool]", + # expected "Series") + return self._logical_func( # type: ignore[return-value] + "any", nanops.nanany, axis, bool_only, skipna, **kwargs + ) + + @doc(make_doc("all", ndim=2)) + def all( + self, + axis: Axis = 0, + bool_only=None, + skipna: bool = True, + **kwargs, + ) -> Series: + # error: Incompatible return value type (got "Union[Series, bool]", + # expected "Series") + return self._logical_func( # type: ignore[return-value] + "all", nanops.nanall, axis, bool_only, skipna, **kwargs + ) + + @doc(make_doc("min", ndim=2)) + def min( + self, + axis: Axis | None = 0, + skipna: bool = True, + numeric_only: bool = False, + **kwargs, + ): + return super().min(axis, skipna, numeric_only, **kwargs) + + @doc(make_doc("max", ndim=2)) + def max( + self, + axis: Axis | None = 0, + skipna: bool = True, + numeric_only: bool = False, + **kwargs, + ): + return super().max(axis, skipna, numeric_only, **kwargs) + + @doc(make_doc("sum", ndim=2)) + def sum( + self, + axis: Axis | None = None, + skipna: bool = True, + numeric_only: bool = False, + min_count: int = 0, + **kwargs, + ): + return super().sum(axis, skipna, numeric_only, min_count, **kwargs) + + @doc(make_doc("prod", ndim=2)) + def prod( + self, + axis: Axis | None = None, + skipna: bool = True, + numeric_only: bool = False, + min_count: int = 0, + **kwargs, + ): + return super().prod(axis, skipna, numeric_only, min_count, **kwargs) + + @doc(make_doc("mean", ndim=2)) + def mean( + self, + axis: Axis | None = 0, + skipna: bool = True, + numeric_only: bool = False, + **kwargs, + ): + return super().mean(axis, skipna, numeric_only, **kwargs) + + @doc(make_doc("median", ndim=2)) + def median( + self, + axis: Axis | None = 0, + skipna: bool = True, + numeric_only: bool = False, + **kwargs, + ): + return super().median(axis, skipna, numeric_only, **kwargs) + + @doc(make_doc("sem", ndim=2)) + def sem( + self, + axis: Axis | None = None, + skipna: bool = True, + ddof: int = 1, + numeric_only: bool = False, + **kwargs, + ): + return super().sem(axis, skipna, ddof, numeric_only, **kwargs) + + @doc(make_doc("var", ndim=2)) + def var( + self, + axis: Axis | None = None, + skipna: bool = True, + ddof: int = 1, + numeric_only: bool = False, + **kwargs, + ): + return super().var(axis, skipna, ddof, numeric_only, **kwargs) + + @doc(make_doc("std", ndim=2)) + def std( + self, + axis: Axis | None = None, + skipna: bool = True, + ddof: int = 1, + numeric_only: bool = False, + **kwargs, + ): + return super().std(axis, skipna, ddof, numeric_only, **kwargs) + + @doc(make_doc("skew", ndim=2)) + def skew( + self, + axis: Axis | None = 0, + skipna: bool = True, + numeric_only: bool = False, + **kwargs, + ): + return super().skew(axis, skipna, numeric_only, **kwargs) + + @doc(make_doc("kurt", ndim=2)) + def kurt( + self, + axis: Axis | None = 0, + skipna: bool = True, + numeric_only: bool = False, + **kwargs, + ): + return super().kurt(axis, skipna, numeric_only, **kwargs) + + kurtosis = kurt + product = prod + + @doc(make_doc("cummin", ndim=2)) + def cummin(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): + return NDFrame.cummin(self, axis, skipna, *args, **kwargs) + + @doc(make_doc("cummax", ndim=2)) + def cummax(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): + return NDFrame.cummax(self, axis, skipna, *args, **kwargs) + + @doc(make_doc("cumsum", ndim=2)) + def cumsum(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): + return NDFrame.cumsum(self, axis, skipna, *args, **kwargs) + + @doc(make_doc("cumprod", 2)) + def cumprod(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): + return NDFrame.cumprod(self, axis, skipna, *args, **kwargs) + def nunique(self, axis: Axis = 0, dropna: bool = True) -> Series: """ Count number of distinct elements in specified axis. @@ -11724,9 +11854,6 @@ def values(self) -> np.ndarray: return self._mgr.as_array() -DataFrame._add_numeric_operations() - - def _from_nested_dict(data) -> collections.defaultdict: new_data: collections.defaultdict = collections.defaultdict(dict) for index, s in data.items(): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8d9bca9208f5a..38bac75bd48b9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11210,7 +11210,7 @@ def any( bool_only: bool_t = False, skipna: bool_t = True, **kwargs, - ) -> DataFrame | Series | bool_t: + ) -> Series | bool_t: return self._logical_func( "any", nanops.nanany, axis, bool_only, skipna, **kwargs ) @@ -11498,556 +11498,6 @@ def prod( product = prod - @classmethod - def _add_numeric_operations(cls) -> None: - """ - Add the operations to the cls; evaluate the doc strings again - """ - axis_descr, name1, name2 = _doc_params(cls) - - @doc( - _bool_doc, - desc=_any_desc, - name1=name1, - name2=name2, - axis_descr=axis_descr, - see_also=_any_see_also, - examples=_any_examples, - empty_value=False, - ) - def any( - self, - *, - axis: Axis = 0, - bool_only=None, - skipna: bool_t = True, - **kwargs, - ): - return self._logical_func( - "any", nanops.nanany, axis, bool_only, skipna, **kwargs - ) - - if cls._typ == "dataframe": - setattr(cls, "any", any) - - @doc( - _bool_doc, - desc=_all_desc, - name1=name1, - name2=name2, - axis_descr=axis_descr, - see_also=_all_see_also, - examples=_all_examples, - empty_value=True, - ) - def all( - self, - axis: Axis = 0, - bool_only=None, - skipna: bool_t = True, - **kwargs, - ): - return self._logical_func( - "all", nanops.nanall, axis, bool_only, skipna, **kwargs - ) - - if cls._typ == "dataframe": - setattr(cls, "all", all) - - @doc( - _num_ddof_doc, - desc="Return unbiased standard error of the mean over requested " - "axis.\n\nNormalized by N-1 by default. This can be changed " - "using the ddof argument", - name1=name1, - name2=name2, - axis_descr=axis_descr, - notes="", - examples=""" - - Examples - -------- - >>> s = pd.Series([1, 2, 3]) - >>> s.sem().round(6) - 0.57735 - - With a DataFrame - - >>> df = pd.DataFrame({'a': [1, 2], 'b': [2, 3]}, index=['tiger', 'zebra']) - >>> df - a b - tiger 1 2 - zebra 2 3 - >>> df.sem() - a 0.5 - b 0.5 - dtype: float64 - - Using axis=1 - - >>> df.sem(axis=1) - tiger 0.5 - zebra 0.5 - dtype: float64 - - In this case, `numeric_only` should be set to `True` - to avoid getting an error. - - >>> df = pd.DataFrame({'a': [1, 2], 'b': ['T', 'Z']}, - ... index=['tiger', 'zebra']) - >>> df.sem(numeric_only=True) - a 0.5 - dtype: float64""", - ) - def sem( - self, - axis: Axis | None = None, - skipna: bool_t = True, - ddof: int = 1, - numeric_only: bool_t = False, - **kwargs, - ): - return NDFrame.sem(self, axis, skipna, ddof, numeric_only, **kwargs) - - setattr(cls, "sem", sem) - - @doc( - _num_ddof_doc, - desc="Return unbiased variance over requested axis.\n\nNormalized by " - "N-1 by default. This can be changed using the ddof argument.", - name1=name1, - name2=name2, - axis_descr=axis_descr, - notes="", - examples=_var_examples, - ) - def var( - self, - axis: Axis | None = None, - skipna: bool_t = True, - ddof: int = 1, - numeric_only: bool_t = False, - **kwargs, - ): - return NDFrame.var(self, axis, skipna, ddof, numeric_only, **kwargs) - - setattr(cls, "var", var) - - @doc( - _num_ddof_doc, - desc="Return sample standard deviation over requested axis." - "\n\nNormalized by N-1 by default. This can be changed using the " - "ddof argument.", - name1=name1, - name2=name2, - axis_descr=axis_descr, - notes=_std_notes, - examples=_std_examples, - ) - def std( - self, - axis: Axis | None = None, - skipna: bool_t = True, - ddof: int = 1, - numeric_only: bool_t = False, - **kwargs, - ): - return NDFrame.std(self, axis, skipna, ddof, numeric_only, **kwargs) - - setattr(cls, "std", std) - - @doc( - _cnum_doc, - desc="minimum", - name1=name1, - name2=name2, - axis_descr=axis_descr, - accum_func_name="min", - examples=_cummin_examples, - ) - def cummin( - self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs - ): - return NDFrame.cummin(self, axis, skipna, *args, **kwargs) - - setattr(cls, "cummin", cummin) - - @doc( - _cnum_doc, - desc="maximum", - name1=name1, - name2=name2, - axis_descr=axis_descr, - accum_func_name="max", - examples=_cummax_examples, - ) - def cummax( - self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs - ): - return NDFrame.cummax(self, axis, skipna, *args, **kwargs) - - setattr(cls, "cummax", cummax) - - @doc( - _cnum_doc, - desc="sum", - name1=name1, - name2=name2, - axis_descr=axis_descr, - accum_func_name="sum", - examples=_cumsum_examples, - ) - def cumsum( - self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs - ): - return NDFrame.cumsum(self, axis, skipna, *args, **kwargs) - - setattr(cls, "cumsum", cumsum) - - @doc( - _cnum_doc, - desc="product", - name1=name1, - name2=name2, - axis_descr=axis_descr, - accum_func_name="prod", - examples=_cumprod_examples, - ) - def cumprod( - self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs - ): - return NDFrame.cumprod(self, axis, skipna, *args, **kwargs) - - setattr(cls, "cumprod", cumprod) - - # error: Untyped decorator makes function "sum" untyped - @doc( # type: ignore[misc] - _num_doc, - desc="Return the sum of the values over the requested axis.\n\n" - "This is equivalent to the method ``numpy.sum``.", - name1=name1, - name2=name2, - axis_descr=axis_descr, - min_count=_min_count_stub, - see_also=_stat_func_see_also, - examples=_sum_examples, - ) - def sum( - self, - axis: Axis | None = None, - skipna: bool_t = True, - numeric_only: bool_t = False, - min_count: int = 0, - **kwargs, - ): - return NDFrame.sum(self, axis, skipna, numeric_only, min_count, **kwargs) - - setattr(cls, "sum", sum) - - @doc( - _num_doc, - desc="Return the product of the values over the requested axis.", - name1=name1, - name2=name2, - axis_descr=axis_descr, - min_count=_min_count_stub, - see_also=_stat_func_see_also, - examples=_prod_examples, - ) - def prod( - self, - axis: Axis | None = None, - skipna: bool_t = True, - numeric_only: bool_t = False, - min_count: int = 0, - **kwargs, - ): - return NDFrame.prod(self, axis, skipna, numeric_only, min_count, **kwargs) - - setattr(cls, "prod", prod) - cls.product = prod - - @doc( - _num_doc, - desc="Return the mean of the values over the requested axis.", - name1=name1, - name2=name2, - axis_descr=axis_descr, - min_count="", - see_also="", - examples=""" - - Examples - -------- - >>> s = pd.Series([1, 2, 3]) - >>> s.mean() - 2.0 - - With a DataFrame - - >>> df = pd.DataFrame({'a': [1, 2], 'b': [2, 3]}, index=['tiger', 'zebra']) - >>> df - a b - tiger 1 2 - zebra 2 3 - >>> df.mean() - a 1.5 - b 2.5 - dtype: float64 - - Using axis=1 - - >>> df.mean(axis=1) - tiger 1.5 - zebra 2.5 - dtype: float64 - - In this case, `numeric_only` should be set to `True` to avoid - getting an error. - - >>> df = pd.DataFrame({'a': [1, 2], 'b': ['T', 'Z']}, - ... index=['tiger', 'zebra']) - >>> df.mean(numeric_only=True) - a 1.5 - dtype: float64""", - ) - def mean( - self, - axis: AxisInt | None = 0, - skipna: bool_t = True, - numeric_only: bool_t = False, - **kwargs, - ): - return NDFrame.mean(self, axis, skipna, numeric_only, **kwargs) - - setattr(cls, "mean", mean) - - @doc( - _num_doc, - desc="Return unbiased skew over requested axis.\n\nNormalized by N-1.", - name1=name1, - name2=name2, - axis_descr=axis_descr, - min_count="", - see_also="", - examples=""" - - Examples - -------- - >>> s = pd.Series([1, 2, 3]) - >>> s.skew() - 0.0 - - With a DataFrame - - >>> df = pd.DataFrame({'a': [1, 2, 3], 'b': [2, 3, 4], 'c': [1, 3, 5]}, - ... index=['tiger', 'zebra', 'cow']) - >>> df - a b c - tiger 1 2 1 - zebra 2 3 3 - cow 3 4 5 - >>> df.skew() - a 0.0 - b 0.0 - c 0.0 - dtype: float64 - - Using axis=1 - - >>> df.skew(axis=1) - tiger 1.732051 - zebra -1.732051 - cow 0.000000 - dtype: float64 - - In this case, `numeric_only` should be set to `True` to avoid - getting an error. - - >>> df = pd.DataFrame({'a': [1, 2, 3], 'b': ['T', 'Z', 'X']}, - ... index=['tiger', 'zebra', 'cow']) - >>> df.skew(numeric_only=True) - a 0.0 - dtype: float64""", - ) - def skew( - self, - axis: AxisInt | None = 0, - skipna: bool_t = True, - numeric_only: bool_t = False, - **kwargs, - ): - return NDFrame.skew(self, axis, skipna, numeric_only, **kwargs) - - setattr(cls, "skew", skew) - - @doc( - _num_doc, - desc="Return unbiased kurtosis over requested axis.\n\n" - "Kurtosis obtained using Fisher's definition of\n" - "kurtosis (kurtosis of normal == 0.0). Normalized " - "by N-1.", - name1=name1, - name2=name2, - axis_descr=axis_descr, - min_count="", - see_also="", - examples=""" - - Examples - -------- - >>> s = pd.Series([1, 2, 2, 3], index=['cat', 'dog', 'dog', 'mouse']) - >>> s - cat 1 - dog 2 - dog 2 - mouse 3 - dtype: int64 - >>> s.kurt() - 1.5 - - With a DataFrame - - >>> df = pd.DataFrame({'a': [1, 2, 2, 3], 'b': [3, 4, 4, 4]}, - ... index=['cat', 'dog', 'dog', 'mouse']) - >>> df - a b - cat 1 3 - dog 2 4 - dog 2 4 - mouse 3 4 - >>> df.kurt() - a 1.5 - b 4.0 - dtype: float64 - - With axis=None - - >>> df.kurt(axis=None).round(6) - -0.988693 - - Using axis=1 - - >>> df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [3, 4], 'd': [1, 2]}, - ... index=['cat', 'dog']) - >>> df.kurt(axis=1) - cat -6.0 - dog -6.0 - dtype: float64""", - ) - def kurt( - self, - axis: Axis | None = 0, - skipna: bool_t = True, - numeric_only: bool_t = False, - **kwargs, - ): - return NDFrame.kurt(self, axis, skipna, numeric_only, **kwargs) - - setattr(cls, "kurt", kurt) - cls.kurtosis = kurt - - @doc( - _num_doc, - desc="Return the median of the values over the requested axis.", - name1=name1, - name2=name2, - axis_descr=axis_descr, - min_count="", - see_also="", - examples=""" - - Examples - -------- - >>> s = pd.Series([1, 2, 3]) - >>> s.median() - 2.0 - - With a DataFrame - - >>> df = pd.DataFrame({'a': [1, 2], 'b': [2, 3]}, index=['tiger', 'zebra']) - >>> df - a b - tiger 1 2 - zebra 2 3 - >>> df.median() - a 1.5 - b 2.5 - dtype: float64 - - Using axis=1 - - >>> df.median(axis=1) - tiger 1.5 - zebra 2.5 - dtype: float64 - - In this case, `numeric_only` should be set to `True` - to avoid getting an error. - - >>> df = pd.DataFrame({'a': [1, 2], 'b': ['T', 'Z']}, - ... index=['tiger', 'zebra']) - >>> df.median(numeric_only=True) - a 1.5 - dtype: float64""", - ) - def median( - self, - axis: AxisInt | None = 0, - skipna: bool_t = True, - numeric_only: bool_t = False, - **kwargs, - ): - return NDFrame.median(self, axis, skipna, numeric_only, **kwargs) - - setattr(cls, "median", median) - - @doc( - _num_doc, - desc="Return the maximum of the values over the requested axis.\n\n" - "If you want the *index* of the maximum, use ``idxmax``. This is " - "the equivalent of the ``numpy.ndarray`` method ``argmax``.", - name1=name1, - name2=name2, - axis_descr=axis_descr, - min_count="", - see_also=_stat_func_see_also, - examples=_max_examples, - ) - def max( - self, - axis: AxisInt | None = 0, - skipna: bool_t = True, - numeric_only: bool_t = False, - **kwargs, - ): - return NDFrame.max(self, axis, skipna, numeric_only, **kwargs) - - setattr(cls, "max", max) - - @doc( - _num_doc, - desc="Return the minimum of the values over the requested axis.\n\n" - "If you want the *index* of the minimum, use ``idxmin``. This is " - "the equivalent of the ``numpy.ndarray`` method ``argmin``.", - name1=name1, - name2=name2, - axis_descr=axis_descr, - min_count="", - see_also=_stat_func_see_also, - examples=_min_examples, - ) - def min( - self, - axis: AxisInt | None = 0, - skipna: bool_t = True, - numeric_only: bool_t = False, - **kwargs, - ): - return NDFrame.min(self, axis, skipna, numeric_only, **kwargs) - - setattr(cls, "min", min) - @final @doc(Rolling) def rolling( @@ -12316,16 +11766,6 @@ def last_valid_index(self) -> Hashable | None: return self._find_valid_index(how="last") -def _doc_params(cls): - """Return a tuple of the doc params.""" - axis_descr = ( - f"{{{', '.join([f'{a} ({i})' for i, a in enumerate(cls._AXIS_ORDERS)])}}}" - ) - name = cls._constructor_sliced.__name__ if cls._AXIS_LEN > 1 else "scalar" - name2 = cls.__name__ - return axis_descr, name, name2 - - _num_doc = """ {desc} @@ -13038,25 +12478,338 @@ def make_doc(name: str, ndim: int) -> str: axis_descr = "{index (0), columns (1)}" if name == "any": + base_doc = _bool_doc desc = _any_desc see_also = _any_see_also examples = _any_examples - empty_value = False + kwargs = {"empty_value": "False"} elif name == "all": + base_doc = _bool_doc desc = _all_desc see_also = _all_see_also examples = _all_examples - empty_value = True + kwargs = {"empty_value": "True"} + elif name == "min": + base_doc = _num_doc + desc = ( + "Return the minimum of the values over the requested axis.\n\n" + "If you want the *index* of the minimum, use ``idxmin``. This is " + "the equivalent of the ``numpy.ndarray`` method ``argmin``." + ) + see_also = _stat_func_see_also + examples = _min_examples + kwargs = {"min_count": ""} + elif name == "max": + base_doc = _num_doc + desc = ( + "Return the maximum of the values over the requested axis.\n\n" + "If you want the *index* of the maximum, use ``idxmax``. This is " + "the equivalent of the ``numpy.ndarray`` method ``argmax``." + ) + see_also = _stat_func_see_also + examples = _max_examples + kwargs = {"min_count": ""} + + elif name == "sum": + base_doc = _num_doc + desc = ( + "Return the sum of the values over the requested axis.\n\n" + "This is equivalent to the method ``numpy.sum``." + ) + see_also = _stat_func_see_also + examples = _sum_examples + kwargs = {"min_count": _min_count_stub} + + elif name == "prod": + base_doc = _num_doc + desc = "Return the product of the values over the requested axis." + see_also = _stat_func_see_also + examples = _prod_examples + kwargs = {"min_count": _min_count_stub} + + elif name == "median": + base_doc = _num_doc + desc = "Return the median of the values over the requested axis." + see_also = "" + examples = """ + + Examples + -------- + >>> s = pd.Series([1, 2, 3]) + >>> s.median() + 2.0 + + With a DataFrame + + >>> df = pd.DataFrame({'a': [1, 2], 'b': [2, 3]}, index=['tiger', 'zebra']) + >>> df + a b + tiger 1 2 + zebra 2 3 + >>> df.median() + a 1.5 + b 2.5 + dtype: float64 + + Using axis=1 + + >>> df.median(axis=1) + tiger 1.5 + zebra 2.5 + dtype: float64 + + In this case, `numeric_only` should be set to `True` + to avoid getting an error. + + >>> df = pd.DataFrame({'a': [1, 2], 'b': ['T', 'Z']}, + ... index=['tiger', 'zebra']) + >>> df.median(numeric_only=True) + a 1.5 + dtype: float64""" + kwargs = {"min_count": ""} + + elif name == "mean": + base_doc = _num_doc + desc = "Return the mean of the values over the requested axis." + see_also = "" + examples = """ + + Examples + -------- + >>> s = pd.Series([1, 2, 3]) + >>> s.mean() + 2.0 + + With a DataFrame + + >>> df = pd.DataFrame({'a': [1, 2], 'b': [2, 3]}, index=['tiger', 'zebra']) + >>> df + a b + tiger 1 2 + zebra 2 3 + >>> df.mean() + a 1.5 + b 2.5 + dtype: float64 + + Using axis=1 + + >>> df.mean(axis=1) + tiger 1.5 + zebra 2.5 + dtype: float64 + + In this case, `numeric_only` should be set to `True` to avoid + getting an error. + + >>> df = pd.DataFrame({'a': [1, 2], 'b': ['T', 'Z']}, + ... index=['tiger', 'zebra']) + >>> df.mean(numeric_only=True) + a 1.5 + dtype: float64""" + kwargs = {"min_count": ""} + + elif name == "var": + base_doc = _num_ddof_doc + desc = ( + "Return unbiased variance over requested axis.\n\nNormalized by " + "N-1 by default. This can be changed using the ddof argument." + ) + examples = _var_examples + see_also = "" + kwargs = {"notes": ""} + + elif name == "std": + base_doc = _num_ddof_doc + desc = ( + "Return sample standard deviation over requested axis." + "\n\nNormalized by N-1 by default. This can be changed using the " + "ddof argument." + ) + examples = _std_examples + see_also = "" + kwargs = {"notes": _std_notes} + + elif name == "sem": + base_doc = _num_ddof_doc + desc = ( + "Return unbiased standard error of the mean over requested " + "axis.\n\nNormalized by N-1 by default. This can be changed " + "using the ddof argument" + ) + examples = """ + + Examples + -------- + >>> s = pd.Series([1, 2, 3]) + >>> s.sem().round(6) + 0.57735 + + With a DataFrame + + >>> df = pd.DataFrame({'a': [1, 2], 'b': [2, 3]}, index=['tiger', 'zebra']) + >>> df + a b + tiger 1 2 + zebra 2 3 + >>> df.sem() + a 0.5 + b 0.5 + dtype: float64 + + Using axis=1 + + >>> df.sem(axis=1) + tiger 0.5 + zebra 0.5 + dtype: float64 + + In this case, `numeric_only` should be set to `True` + to avoid getting an error. + + >>> df = pd.DataFrame({'a': [1, 2], 'b': ['T', 'Z']}, + ... index=['tiger', 'zebra']) + >>> df.sem(numeric_only=True) + a 0.5 + dtype: float64""" + see_also = "" + kwargs = {"notes": ""} + + elif name == "skew": + base_doc = _num_doc + desc = "Return unbiased skew over requested axis.\n\nNormalized by N-1." + see_also = "" + examples = """ + + Examples + -------- + >>> s = pd.Series([1, 2, 3]) + >>> s.skew() + 0.0 + + With a DataFrame + + >>> df = pd.DataFrame({'a': [1, 2, 3], 'b': [2, 3, 4], 'c': [1, 3, 5]}, + ... index=['tiger', 'zebra', 'cow']) + >>> df + a b c + tiger 1 2 1 + zebra 2 3 3 + cow 3 4 5 + >>> df.skew() + a 0.0 + b 0.0 + c 0.0 + dtype: float64 + + Using axis=1 + + >>> df.skew(axis=1) + tiger 1.732051 + zebra -1.732051 + cow 0.000000 + dtype: float64 + + In this case, `numeric_only` should be set to `True` to avoid + getting an error. + + >>> df = pd.DataFrame({'a': [1, 2, 3], 'b': ['T', 'Z', 'X']}, + ... index=['tiger', 'zebra', 'cow']) + >>> df.skew(numeric_only=True) + a 0.0 + dtype: float64""" + kwargs = {"min_count": ""} + elif name == "kurt": + base_doc = _num_doc + desc = ( + "Return unbiased kurtosis over requested axis.\n\n" + "Kurtosis obtained using Fisher's definition of\n" + "kurtosis (kurtosis of normal == 0.0). Normalized " + "by N-1." + ) + see_also = "" + examples = """ + + Examples + -------- + >>> s = pd.Series([1, 2, 2, 3], index=['cat', 'dog', 'dog', 'mouse']) + >>> s + cat 1 + dog 2 + dog 2 + mouse 3 + dtype: int64 + >>> s.kurt() + 1.5 + + With a DataFrame + + >>> df = pd.DataFrame({'a': [1, 2, 2, 3], 'b': [3, 4, 4, 4]}, + ... index=['cat', 'dog', 'dog', 'mouse']) + >>> df + a b + cat 1 3 + dog 2 4 + dog 2 4 + mouse 3 4 + >>> df.kurt() + a 1.5 + b 4.0 + dtype: float64 + + With axis=None + + >>> df.kurt(axis=None).round(6) + -0.988693 + + Using axis=1 + + >>> df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [3, 4], 'd': [1, 2]}, + ... index=['cat', 'dog']) + >>> df.kurt(axis=1) + cat -6.0 + dog -6.0 + dtype: float64""" + kwargs = {"min_count": ""} + + elif name == "cumsum": + base_doc = _cnum_doc + desc = "sum" + see_also = "" + examples = _cumsum_examples + kwargs = {"accum_func_name": "sum"} + + elif name == "cumprod": + base_doc = _cnum_doc + desc = "product" + see_also = "" + examples = _cumprod_examples + kwargs = {"accum_func_name": "prod"} + + elif name == "cummin": + base_doc = _cnum_doc + desc = "minimum" + see_also = "" + examples = _cummin_examples + kwargs = {"accum_func_name": "min"} + + elif name == "cummax": + base_doc = _cnum_doc + desc = "maximum" + see_also = "" + examples = _cummax_examples + kwargs = {"accum_func_name": "max"} + else: raise NotImplementedError - docstr = _bool_doc.format( + docstr = base_doc.format( desc=desc, name1=name1, name2=name2, axis_descr=axis_descr, see_also=see_also, examples=examples, - empty_value=empty_value, + **kwargs, ) return docstr diff --git a/pandas/core/series.py b/pandas/core/series.py index 2bed6400eabbf..9c91badc57ce3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -6091,5 +6091,138 @@ def all( filter_type="bool", ) + @doc(make_doc("min", ndim=1)) + # error: Signature of "min" incompatible with supertype "IndexOpsMixin" + def min( # type: ignore[override] + self, + axis: Axis | None = 0, + skipna: bool = True, + numeric_only: bool = False, + **kwargs, + ): + return NDFrame.min(self, axis, skipna, numeric_only, **kwargs) + + @doc(make_doc("max", ndim=1)) + # error: Signature of "max" incompatible with supertype "IndexOpsMixin" + def max( # type: ignore[override] + self, + axis: Axis | None = 0, + skipna: bool = True, + numeric_only: bool = False, + **kwargs, + ): + return NDFrame.max(self, axis, skipna, numeric_only, **kwargs) + + @doc(make_doc("sum", ndim=1)) + def sum( + self, + axis: Axis | None = None, + skipna: bool = True, + numeric_only: bool = False, + min_count: int = 0, + **kwargs, + ): + return NDFrame.sum(self, axis, skipna, numeric_only, min_count, **kwargs) + + @doc(make_doc("prod", ndim=1)) + def prod( + self, + axis: Axis | None = None, + skipna: bool = True, + numeric_only: bool = False, + min_count: int = 0, + **kwargs, + ): + return NDFrame.prod(self, axis, skipna, numeric_only, min_count, **kwargs) + + @doc(make_doc("mean", ndim=1)) + def mean( + self, + axis: Axis | None = 0, + skipna: bool = True, + numeric_only: bool = False, + **kwargs, + ): + return NDFrame.mean(self, axis, skipna, numeric_only, **kwargs) + + @doc(make_doc("median", ndim=1)) + def median( + self, + axis: Axis | None = 0, + skipna: bool = True, + numeric_only: bool = False, + **kwargs, + ): + return NDFrame.median(self, axis, skipna, numeric_only, **kwargs) + + @doc(make_doc("sem", ndim=1)) + def sem( + self, + axis: Axis | None = None, + skipna: bool = True, + ddof: int = 1, + numeric_only: bool = False, + **kwargs, + ): + return NDFrame.sem(self, axis, skipna, ddof, numeric_only, **kwargs) + + @doc(make_doc("var", ndim=1)) + def var( + self, + axis: Axis | None = None, + skipna: bool = True, + ddof: int = 1, + numeric_only: bool = False, + **kwargs, + ): + return NDFrame.var(self, axis, skipna, ddof, numeric_only, **kwargs) + + @doc(make_doc("std", ndim=1)) + def std( + self, + axis: Axis | None = None, + skipna: bool = True, + ddof: int = 1, + numeric_only: bool = False, + **kwargs, + ): + return NDFrame.std(self, axis, skipna, ddof, numeric_only, **kwargs) + + @doc(make_doc("skew", ndim=1)) + def skew( + self, + axis: Axis | None = 0, + skipna: bool = True, + numeric_only: bool = False, + **kwargs, + ): + return NDFrame.skew(self, axis, skipna, numeric_only, **kwargs) + + @doc(make_doc("kurt", ndim=1)) + def kurt( + self, + axis: Axis | None = 0, + skipna: bool = True, + numeric_only: bool = False, + **kwargs, + ): + return NDFrame.kurt(self, axis, skipna, numeric_only, **kwargs) + + kurtosis = kurt + product = prod + + @doc(make_doc("cummin", ndim=1)) + def cummin(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): + return NDFrame.cummin(self, axis, skipna, *args, **kwargs) + + @doc(make_doc("cummax", ndim=1)) + def cummax(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): + return NDFrame.cummax(self, axis, skipna, *args, **kwargs) + + @doc(make_doc("cumsum", ndim=1)) + def cumsum(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): + return NDFrame.cumsum(self, axis, skipna, *args, **kwargs) -Series._add_numeric_operations() + @doc(make_doc("cumprod", 1)) + def cumprod(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): + return NDFrame.cumprod(self, axis, skipna, *args, **kwargs)