Skip to content

Add note about parent dataframes and cross-dataframe column comparisons #310

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 117 additions & 1 deletion spec/API_specification/dataframe_api/column_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
if TYPE_CHECKING:
from .typing import NullType, Scalar, DType, Namespace
from typing_extensions import Self
from dataframe_api.dataframe_object import DataFrame


__all__ = ['Column']
Expand All @@ -17,8 +18,43 @@ class Column(Protocol):
Note that this column object is not meant to be instantiated directly by
users of the library implementing the dataframe API standard. Rather, use
constructor functions or an already-created dataframe object retrieved via

:meth:`DataFrame.col`.

The parent dataframe (which can be retrieved via the :meth:`dataframe` property)
plays a key role here:

- If two columns were retrieved from the same dataframe,
then they can be combined and compared at will.
- If two columns were retrieved from different dataframes,
then there is no guarantee about how or whether they can be combined and
compared, this may vary across implementations.
- If two columns are both "free-standing" (i.e. not retrieved from a dataframe
but constructed directly from a 1D array or sequence), then they can be
combined and compared with each other. Note, however, that they still can't
be compared or combined with columns retrieved from a dataframe.
"""
@property
def dataframe(self) -> DataFrame | None:
"""
Return parent DataFrame, if present.

For example, if we have the following

.. code-block:: python

df: DataFrame
column = df.col('a')

then `column.dataframe` should return `df`.

On the other hand, if we had:

.. code-block:: python

column = column_from_1d_array(...)

then `column.dataframe` should return `None`.
"""

def __column_namespace__(self) -> Namespace:
"""
Expand Down Expand Up @@ -215,6 +251,11 @@ def __eq__(self, other: Self | Scalar) -> Self: # type: ignore[override]
Returns
-------
Column

Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand All @@ -234,6 +275,11 @@ def __ne__(self, other: Self | Scalar) -> Self: # type: ignore[override]
Returns
-------
Column

Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand All @@ -251,6 +297,11 @@ def __ge__(self, other: Self | Scalar) -> Self:
Returns
-------
Column

Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand All @@ -268,6 +319,11 @@ def __gt__(self, other: Self | Scalar) -> Self:
Returns
-------
Column

Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand All @@ -285,6 +341,11 @@ def __le__(self, other: Self | Scalar) -> Self:
Returns
-------
Column

Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand All @@ -302,6 +363,11 @@ def __lt__(self, other: Self | Scalar) -> Self:
Returns
-------
Column

Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand All @@ -320,6 +386,11 @@ def __and__(self, other: Self | bool) -> Self:
-------
Column

Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.

Raises
------
ValueError
Expand All @@ -342,6 +413,11 @@ def __or__(self, other: Self | bool) -> Self:
-------
Column

Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.

Raises
------
ValueError
Expand All @@ -359,6 +435,11 @@ def __add__(self, other: Self | Scalar) -> Self:
If Column, must have same length.
"Scalar" here is defined implicitly by what scalar types are allowed
for the operation by the underling dtypes.

Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.

Returns
-------
Expand All @@ -380,6 +461,11 @@ def __sub__(self, other: Self | Scalar) -> Self:
Returns
-------
Column

Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand All @@ -397,6 +483,11 @@ def __mul__(self, other: Self | Scalar) -> Self:
Returns
-------
Column

Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand All @@ -414,6 +505,11 @@ def __truediv__(self, other: Self | Scalar) -> Self:
Returns
-------
Column

Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand All @@ -431,6 +527,11 @@ def __floordiv__(self, other: Self | Scalar) -> Self:
Returns
-------
Column

Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand All @@ -452,6 +553,11 @@ def __pow__(self, other: Self | Scalar) -> Self:
Returns
-------
Column

Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand All @@ -469,6 +575,11 @@ def __mod__(self, other: Self | Scalar) -> Self:
Returns
-------
Column

Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand All @@ -486,6 +597,11 @@ def __divmod__(self, other: Self | Scalar) -> tuple[Column, Column]:
Returns
-------
Column

Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand Down
14 changes: 12 additions & 2 deletions spec/API_specification/dataframe_api/dataframe_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,11 @@ def get_rows(self, indices: Column) -> Self:
Returns
-------
DataFrame

Notes
-----
`indices`'s parent DataFrame must be `self` - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand Down Expand Up @@ -177,8 +182,8 @@ def filter(self, mask: Column) -> Self:

Notes
-----
Some participants preferred a weaker type Arraylike[bool] for mask,
where 'Arraylike' denotes an object adhering to the Array API standard.
`mask`'s parent DataFrame must be `self` - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand Down Expand Up @@ -207,6 +212,11 @@ def assign(self, *columns: Column) -> Self:
Returns
-------
DataFrame

Notes
-----
All of `columns`'s parent DataFrame must be `self` - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand Down