From 5e1dd17db7ff65ec1e2fc9e3fa4e757486e7ddd7 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 31 Oct 2023 16:56:26 +0000 Subject: [PATCH 1/5] wip --- .../dataframe_api/column_object.py | 25 ++++++++++++++++++- .../dataframe_api/dataframe_object.py | 14 +++++++++-- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index 0bccf2d0..3adae007 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -5,6 +5,7 @@ if TYPE_CHECKING: from .typing import NullType, Scalar, DType, Namespace from typing_extensions import Self + from dataframe_api.dataframe_object import DataFrame __all__ = ['Column'] @@ -17,8 +18,30 @@ class Column(Protocol): Note that this column object is not meant to be instantiated directly by users of the library implementing the dataframe API standard. Rather, use constructor functions or an already-created dataframe object retrieved via - + :meth:`DataFrame.col`. """ + @property + def dataframe(self) -> DataFrame | None: + """ + Return parent DataFrame, if present. + + For example, if we have the following + + .. code-block:: python + + df: DataFrame + column = df.col('a') + + then `column.dataframe` should return `df`. + + On the other hand, if we had: + + .. code-block:: python + + column = column_from_1d_array(...) + + then `column.dataframe` should return `None`. + """ def __column_namespace__(self) -> Namespace: """ diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index f5c17d80..14b61147 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -142,6 +142,11 @@ def get_rows(self, indices: Column) -> Self: Returns ------- DataFrame + + Notes + ----- + `indices`'s parent DataFrame must be `self` - else, + the operation is unsupported and may vary across implementations. """ ... @@ -177,8 +182,8 @@ def filter(self, mask: Column) -> Self: Notes ----- - Some participants preferred a weaker type Arraylike[bool] for mask, - where 'Arraylike' denotes an object adhering to the Array API standard. + `mask`'s parent DataFrame must be `self` - else, + the operation is unsupported and may vary across implementations. """ ... @@ -207,6 +212,11 @@ def assign(self, *columns: Column) -> Self: Returns ------- DataFrame + + Notes + ----- + All of `columns`'s parent DataFrame must be `self` - else, + the operation is unsupported and may vary across implementations. """ ... From c83cfca805655b11a63cab366e0d8c9652cdc849 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 31 Oct 2023 17:01:02 +0000 Subject: [PATCH 2/5] add notes about parent dataframes --- .../dataframe_api/column_object.py | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index 3adae007..b1b7a006 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -238,6 +238,11 @@ def __eq__(self, other: Self | Scalar) -> Self: # type: ignore[override] Returns ------- Column + + Notes + ----- + `other`'s parent DataFrame must be the same as `self`'s - else, + the operation is unsupported and may vary across implementations. """ ... @@ -257,6 +262,11 @@ def __ne__(self, other: Self | Scalar) -> Self: # type: ignore[override] Returns ------- Column + + Notes + ----- + `other`'s parent DataFrame must be the same as `self`'s - else, + the operation is unsupported and may vary across implementations. """ ... @@ -274,6 +284,11 @@ def __ge__(self, other: Self | Scalar) -> Self: Returns ------- Column + + Notes + ----- + `other`'s parent DataFrame must be the same as `self`'s - else, + the operation is unsupported and may vary across implementations. """ ... @@ -291,6 +306,11 @@ def __gt__(self, other: Self | Scalar) -> Self: Returns ------- Column + + Notes + ----- + `other`'s parent DataFrame must be the same as `self`'s - else, + the operation is unsupported and may vary across implementations. """ ... @@ -308,6 +328,11 @@ def __le__(self, other: Self | Scalar) -> Self: Returns ------- Column + + Notes + ----- + `other`'s parent DataFrame must be the same as `self`'s - else, + the operation is unsupported and may vary across implementations. """ ... @@ -325,6 +350,11 @@ def __lt__(self, other: Self | Scalar) -> Self: Returns ------- Column + + Notes + ----- + `other`'s parent DataFrame must be the same as `self`'s - else, + the operation is unsupported and may vary across implementations. """ ... @@ -343,6 +373,11 @@ def __and__(self, other: Self | bool) -> Self: ------- Column + Notes + ----- + `other`'s parent DataFrame must be the same as `self`'s - else, + the operation is unsupported and may vary across implementations. + Raises ------ ValueError @@ -365,6 +400,11 @@ def __or__(self, other: Self | bool) -> Self: ------- Column + Notes + ----- + `other`'s parent DataFrame must be the same as `self`'s - else, + the operation is unsupported and may vary across implementations. + Raises ------ ValueError @@ -382,6 +422,11 @@ def __add__(self, other: Self | Scalar) -> Self: If Column, must have same length. "Scalar" here is defined implicitly by what scalar types are allowed for the operation by the underling dtypes. + + Notes + ----- + `other`'s parent DataFrame must be the same as `self`'s - else, + the operation is unsupported and may vary across implementations. Returns ------- @@ -403,6 +448,11 @@ def __sub__(self, other: Self | Scalar) -> Self: Returns ------- Column + + Notes + ----- + `other`'s parent DataFrame must be the same as `self`'s - else, + the operation is unsupported and may vary across implementations. """ ... @@ -420,6 +470,11 @@ def __mul__(self, other: Self | Scalar) -> Self: Returns ------- Column + + Notes + ----- + `other`'s parent DataFrame must be the same as `self`'s - else, + the operation is unsupported and may vary across implementations. """ ... @@ -437,6 +492,11 @@ def __truediv__(self, other: Self | Scalar) -> Self: Returns ------- Column + + Notes + ----- + `other`'s parent DataFrame must be the same as `self`'s - else, + the operation is unsupported and may vary across implementations. """ ... @@ -454,6 +514,11 @@ def __floordiv__(self, other: Self | Scalar) -> Self: Returns ------- Column + + Notes + ----- + `other`'s parent DataFrame must be the same as `self`'s - else, + the operation is unsupported and may vary across implementations. """ ... @@ -475,6 +540,11 @@ def __pow__(self, other: Self | Scalar) -> Self: Returns ------- Column + + Notes + ----- + `other`'s parent DataFrame must be the same as `self`'s - else, + the operation is unsupported and may vary across implementations. """ ... @@ -492,6 +562,11 @@ def __mod__(self, other: Self | Scalar) -> Self: Returns ------- Column + + Notes + ----- + `other`'s parent DataFrame must be the same as `self`'s - else, + the operation is unsupported and may vary across implementations. """ ... @@ -509,6 +584,11 @@ def __divmod__(self, other: Self | Scalar) -> tuple[Column, Column]: Returns ------- Column + + Notes + ----- + `other`'s parent DataFrame must be the same as `self`'s - else, + the operation is unsupported and may vary across implementations. """ ... From e362b88b44eb21b15225c55de2a598a4888252ee Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Wed, 1 Nov 2023 08:27:36 +0000 Subject: [PATCH 3/5] add note about free-standing columns --- .../dataframe_api/column_object.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index b1b7a006..e1ad2d32 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -19,6 +19,19 @@ class Column(Protocol): users of the library implementing the dataframe API standard. Rather, use constructor functions or an already-created dataframe object retrieved via :meth:`DataFrame.col`. + + The parent dataframe (which can be retrieved via the :meth:`dataframe` property) + plays a key role here: + + - If two columns were retrieved from the same dataframe, + then they can be combined and compared at will. + - If two columns were retrieved from different dataframes, + then there is no guarantee about how or whether they can be combined and + compared, this may vary across implementations. + - If two columns are both "free-standing" (i.e. not retrieved from a dataframe + but constructed directly from a 1D array or sequence), then they can be + combined and compared with each other. Note, however, that they still can't + be compared or combined with columns retrieved from a dataframe. """ @property def dataframe(self) -> DataFrame | None: From 3cb4e397e7e812654047eecb4a5841554d7f80be Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Wed, 8 Nov 2023 18:15:28 +0000 Subject: [PATCH 4/5] post merge fixup --- .../dataframe_api/column_object.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index 0bcc6bf8..9b639373 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -4,6 +4,7 @@ if TYPE_CHECKING: from typing_extensions import Self + from dataframe_api.dataframe_object import DataFrame from .typing import DType, Namespace, NullType, Scalar @@ -24,7 +25,7 @@ class Column(Protocol): plays a key role here: - If two columns were retrieved from the same dataframe, - then they can be combined and compared at will. + then they can be combined and compared at will. - If two columns were retrieved from different dataframes, then there is no guarantee about how or whether they can be combined and compared, this may vary across implementations. @@ -33,10 +34,10 @@ class Column(Protocol): combined and compared with each other. Note, however, that they still can't be compared or combined with columns retrieved from a dataframe. """ + @property def dataframe(self) -> DataFrame | None: - """ - Return parent DataFrame, if present. + """Return parent DataFrame, if present. For example, if we have the following @@ -44,7 +45,7 @@ def dataframe(self) -> DataFrame | None: df: DataFrame column = df.col('a') - + then `column.dataframe` should return `df`. On the other hand, if we had: @@ -52,7 +53,7 @@ def dataframe(self) -> DataFrame | None: .. code-block:: python column = column_from_1d_array(...) - + then `column.dataframe` should return `None`. """ @@ -413,7 +414,7 @@ def __add__(self, other: Self | Scalar) -> Self: If Column, must have same length. "Scalar" here is defined implicitly by what scalar types are allowed for the operation by the underling dtypes. - + Notes ----- `other`'s parent DataFrame must be the same as `self`'s - else, From 34ed16ed676fa46711b5890afdad932f64f117d0 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 14 Nov 2023 17:11:20 +0000 Subject: [PATCH 5/5] :truck: Column.dataframe -> Column.parent_dataframe, note unsupportedness rather than impossibility --- .../dataframe_api/column_object.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index 9b639373..c0c054e3 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -21,8 +21,8 @@ class Column(Protocol): constructor functions or an already-created dataframe object retrieved via :meth:`DataFrame.col`. - The parent dataframe (which can be retrieved via the :meth:`dataframe` property) - plays a key role here: + The parent dataframe (which can be retrieved via the :meth:`parent_dataframe` + property) plays a key role here: - If two columns were retrieved from the same dataframe, then they can be combined and compared at will. @@ -31,12 +31,13 @@ class Column(Protocol): compared, this may vary across implementations. - If two columns are both "free-standing" (i.e. not retrieved from a dataframe but constructed directly from a 1D array or sequence), then they can be - combined and compared with each other. Note, however, that they still can't - be compared or combined with columns retrieved from a dataframe. + combined and compared with each other. Note, however, that there's no guarantee + about whether they can be compared or combined with columns retrieved from a + different dataframe, this may vary across implementations. """ @property - def dataframe(self) -> DataFrame | None: + def parent_dataframe(self) -> DataFrame | None: """Return parent DataFrame, if present. For example, if we have the following @@ -46,7 +47,7 @@ def dataframe(self) -> DataFrame | None: df: DataFrame column = df.col('a') - then `column.dataframe` should return `df`. + then `column.parent_dataframe` should return `df`. On the other hand, if we had: @@ -54,7 +55,7 @@ def dataframe(self) -> DataFrame | None: column = column_from_1d_array(...) - then `column.dataframe` should return `None`. + then `column.parent_dataframe` should return `None`. """ def __column_namespace__(self) -> Namespace: