From 802a45d908e690eb1d1a8d6c4f5de0f7fca4326e Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 20 Jun 2023 17:13:55 +0100 Subject: [PATCH 1/3] add correction to std --- .../dataframe_api/column_object.py | 20 +++++++++++++++++-- .../dataframe_api/dataframe_object.py | 20 +++++++++++++++++-- .../dataframe_api/groupby_object.py | 4 ++-- 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index 17722f5f..9fa2debe 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -410,20 +410,36 @@ def mean(self, *, skip_nulls: bool = True) -> DType: dtypes. """ - def std(self, *, skip_nulls: bool = True) -> DType: + def std(self, *, correction: int | float = 1, skip_nulls: bool = True) -> DType: """ Reduction returns a scalar. Must be supported for numerical and datetime data types. Returns a float for numerical data types, and datetime (with the appropriate timedelta format string) for datetime dtypes. + + Parameters + ---------- + correction + Correction to apply to the result. 0 for sample standard deviation + and 1 for population standard deviation. + skip_nulls + Whether to skip null values. """ - def var(self, *, skip_nulls: bool = True) -> DType: + def var(self, *, correction: int | float = 1, skip_nulls: bool = True) -> DType: """ Reduction returns a scalar. Must be supported for numerical and datetime data types. Returns a float for numerical data types, and datetime (with the appropriate timedelta format string) for datetime dtypes. + + Parameters + ---------- + correction + Correction to apply to the result. 0 for sample standard deviation + and 1 for population standard deviation. + skip_nulls + Whether to skip null values. """ def is_null(self) -> Column: diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index 46773428..2247c8a6 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -684,15 +684,31 @@ def mean(self, *, skip_nulls: bool = True) -> DataFrame: """ ... - def std(self, *, skip_nulls: bool = True) -> DataFrame: + def std(self, *, correction: int | float = 1, skip_nulls: bool = True) -> DataFrame: """ Reduction returns a 1-row DataFrame. + + Parameters + ---------- + correction + Correction to apply to the result. 0 for sample standard deviation + and 1 for population standard deviation. + skip_nulls + Whether to skip null values. """ ... - def var(self, *, skip_nulls: bool = True) -> DataFrame: + def var(self, *, correction: int | float = 1, skip_nulls: bool = True) -> DataFrame: """ Reduction returns a 1-row DataFrame. + + Parameters + ---------- + correction + Correction to apply to the result. 0 for sample standard deviation + and 1 for population standard deviation. + skip_nulls + Whether to skip null values. """ ... diff --git a/spec/API_specification/dataframe_api/groupby_object.py b/spec/API_specification/dataframe_api/groupby_object.py index cfc7bc62..11a7b102 100644 --- a/spec/API_specification/dataframe_api/groupby_object.py +++ b/spec/API_specification/dataframe_api/groupby_object.py @@ -41,10 +41,10 @@ def median(self, *, skip_nulls: bool = True) -> "DataFrame": def mean(self, *, skip_nulls: bool = True) -> "DataFrame": ... - def std(self, *, skip_nulls: bool = True) -> "DataFrame": + def std(self, *, correction: int | float = 1, skip_nulls: bool = True) -> "DataFrame": ... - def var(self, *, skip_nulls: bool = True) -> "DataFrame": + def var(self, *, correction: int | float = 1, skip_nulls: bool = True) -> "DataFrame": ... def size(self) -> "DataFrame": From a258400f5d46a7b6890f0156e27b66839232f1d1 Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Mon, 26 Jun 2023 12:01:23 +0100 Subject: [PATCH 2/3] Update spec/API_specification/dataframe_api/column_object.py --- spec/API_specification/dataframe_api/column_object.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index f5cff999..90cb9d8c 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -479,8 +479,8 @@ def std(self, *, correction: int | float = 1, skip_nulls: bool = True) -> Scalar Parameters ---------- correction - Correction to apply to the result. 0 for sample standard deviation - and 1 for population standard deviation. + Correction to apply to the result. For example, 0 for sample standard + deviation and 1 for population standard deviation. skip_nulls Whether to skip null values. """ From b77d7027f1ae5fd57bb758e6d6d06a3547e25444 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Wed, 5 Jul 2023 13:00:02 +0200 Subject: [PATCH 3/3] Clarify the `correction` parameter and its allowed values in more detail --- .../dataframe_api/column_object.py | 19 +++++++++++++++---- .../dataframe_api/dataframe_object.py | 10 ++++++---- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index 90cb9d8c..84b2e692 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -479,8 +479,18 @@ def std(self, *, correction: int | float = 1, skip_nulls: bool = True) -> Scalar Parameters ---------- correction - Correction to apply to the result. For example, 0 for sample standard - deviation and 1 for population standard deviation. + Degrees of freedom adjustment. Setting this parameter to a value other + than ``0`` has the effect of adjusting the divisor during the + calculation of the standard deviation according to ``N-correction``, + where ``N`` corresponds to the total number of elements over which + the standard deviation is computed. When computing the standard + deviation of a population, setting this parameter to ``0`` is the + standard choice (i.e., the provided column contains data + constituting an entire population). When computing the corrected + sample standard deviation, setting this parameter to ``1`` is the + standard choice (i.e., the provided column contains data sampled + from a larger population; this is commonly referred to as Bessel's + correction). Fractional (float) values are allowed. Default: ``1``. skip_nulls Whether to skip null values. """ @@ -495,8 +505,9 @@ def var(self, *, correction: int | float = 1, skip_nulls: bool = True) -> Scalar Parameters ---------- correction - Correction to apply to the result. 0 for sample standard deviation - and 1 for population standard deviation. + Correction to apply to the result. For example, ``0`` for sample + standard deviation and ``1`` for population standard deviation. + See `Column.std` for a more detailed description. skip_nulls Whether to skip null values. """ diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index aca79068..bbec16f8 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -691,8 +691,9 @@ def std(self, *, correction: int | float = 1, skip_nulls: bool = True) -> DataFr Parameters ---------- correction - Correction to apply to the result. 0 for sample standard deviation - and 1 for population standard deviation. + Correction to apply to the result. For example, ``0`` for sample + standard deviation and ``1`` for population standard deviation. + See `Column.std` for a more detailed description. skip_nulls Whether to skip null values. """ @@ -705,8 +706,9 @@ def var(self, *, correction: int | float = 1, skip_nulls: bool = True) -> DataFr Parameters ---------- correction - Correction to apply to the result. 0 for sample standard deviation - and 1 for population standard deviation. + Correction to apply to the result. For example, ``0`` for sample + standard deviation and ``1`` for population standard deviation. + See `Column.std` for a more detailed description. skip_nulls Whether to skip null values. """