Skip to content

Commit 25e5a52

Browse files
authored
Add note about parent dataframes and cross-dataframe column comparisons (#310)
* wip * add notes about parent dataframes * add note about free-standing columns * post merge fixup * 🚚 Column.dataframe -> Column.parent_dataframe, note unsupportedness rather than impossibility
1 parent 098e11e commit 25e5a52

File tree

2 files changed

+131
-3
lines changed

2 files changed

+131
-3
lines changed

spec/API_specification/dataframe_api/column_object.py

Lines changed: 119 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
if TYPE_CHECKING:
66
from typing_extensions import Self
77

8+
from dataframe_api.dataframe_object import DataFrame
9+
810
from .typing import DType, Namespace, NullType, Scalar
911

1012

@@ -17,9 +19,45 @@ class Column(Protocol):
1719
Note that this column object is not meant to be instantiated directly by
1820
users of the library implementing the dataframe API standard. Rather, use
1921
constructor functions or an already-created dataframe object retrieved via
20-
22+
:meth:`DataFrame.col`.
23+
24+
The parent dataframe (which can be retrieved via the :meth:`parent_dataframe`
25+
property) plays a key role here:
26+
27+
- If two columns were retrieved from the same dataframe,
28+
then they can be combined and compared at will.
29+
- If two columns were retrieved from different dataframes,
30+
then there is no guarantee about how or whether they can be combined and
31+
compared, this may vary across implementations.
32+
- If two columns are both "free-standing" (i.e. not retrieved from a dataframe
33+
but constructed directly from a 1D array or sequence), then they can be
34+
combined and compared with each other. Note, however, that there's no guarantee
35+
about whether they can be compared or combined with columns retrieved from a
36+
different dataframe, this may vary across implementations.
2137
"""
2238

39+
@property
40+
def parent_dataframe(self) -> DataFrame | None:
41+
"""Return parent DataFrame, if present.
42+
43+
For example, if we have the following
44+
45+
.. code-block:: python
46+
47+
df: DataFrame
48+
column = df.col('a')
49+
50+
then `column.parent_dataframe` should return `df`.
51+
52+
On the other hand, if we had:
53+
54+
.. code-block:: python
55+
56+
column = column_from_1d_array(...)
57+
58+
then `column.parent_dataframe` should return `None`.
59+
"""
60+
2361
def __column_namespace__(self) -> Namespace:
2462
"""Return an object that has all the Dataframe Standard API functions on it.
2563
@@ -201,6 +239,11 @@ def __eq__(self, other: Self | Scalar) -> Self: # type: ignore[override]
201239
Returns
202240
-------
203241
Column
242+
243+
Notes
244+
-----
245+
`other`'s parent DataFrame must be the same as `self`'s - else,
246+
the operation is unsupported and may vary across implementations.
204247
"""
205248
...
206249

@@ -219,6 +262,11 @@ def __ne__(self, other: Self | Scalar) -> Self: # type: ignore[override]
219262
Returns
220263
-------
221264
Column
265+
266+
Notes
267+
-----
268+
`other`'s parent DataFrame must be the same as `self`'s - else,
269+
the operation is unsupported and may vary across implementations.
222270
"""
223271
...
224272

@@ -235,6 +283,11 @@ def __ge__(self, other: Self | Scalar) -> Self:
235283
Returns
236284
-------
237285
Column
286+
287+
Notes
288+
-----
289+
`other`'s parent DataFrame must be the same as `self`'s - else,
290+
the operation is unsupported and may vary across implementations.
238291
"""
239292
...
240293

@@ -251,6 +304,11 @@ def __gt__(self, other: Self | Scalar) -> Self:
251304
Returns
252305
-------
253306
Column
307+
308+
Notes
309+
-----
310+
`other`'s parent DataFrame must be the same as `self`'s - else,
311+
the operation is unsupported and may vary across implementations.
254312
"""
255313
...
256314

@@ -267,6 +325,11 @@ def __le__(self, other: Self | Scalar) -> Self:
267325
Returns
268326
-------
269327
Column
328+
329+
Notes
330+
-----
331+
`other`'s parent DataFrame must be the same as `self`'s - else,
332+
the operation is unsupported and may vary across implementations.
270333
"""
271334
...
272335

@@ -283,6 +346,11 @@ def __lt__(self, other: Self | Scalar) -> Self:
283346
Returns
284347
-------
285348
Column
349+
350+
Notes
351+
-----
352+
`other`'s parent DataFrame must be the same as `self`'s - else,
353+
the operation is unsupported and may vary across implementations.
286354
"""
287355
...
288356

@@ -300,6 +368,11 @@ def __and__(self, other: Self | bool) -> Self:
300368
-------
301369
Column
302370
371+
Notes
372+
-----
373+
`other`'s parent DataFrame must be the same as `self`'s - else,
374+
the operation is unsupported and may vary across implementations.
375+
303376
Raises
304377
------
305378
ValueError
@@ -321,6 +394,11 @@ def __or__(self, other: Self | bool) -> Self:
321394
-------
322395
Column
323396
397+
Notes
398+
-----
399+
`other`'s parent DataFrame must be the same as `self`'s - else,
400+
the operation is unsupported and may vary across implementations.
401+
324402
Raises
325403
------
326404
ValueError
@@ -338,6 +416,11 @@ def __add__(self, other: Self | Scalar) -> Self:
338416
"Scalar" here is defined implicitly by what scalar types are allowed
339417
for the operation by the underling dtypes.
340418
419+
Notes
420+
-----
421+
`other`'s parent DataFrame must be the same as `self`'s - else,
422+
the operation is unsupported and may vary across implementations.
423+
341424
Returns
342425
-------
343426
Column
@@ -357,6 +440,11 @@ def __sub__(self, other: Self | Scalar) -> Self:
357440
Returns
358441
-------
359442
Column
443+
444+
Notes
445+
-----
446+
`other`'s parent DataFrame must be the same as `self`'s - else,
447+
the operation is unsupported and may vary across implementations.
360448
"""
361449
...
362450

@@ -373,6 +461,11 @@ def __mul__(self, other: Self | Scalar) -> Self:
373461
Returns
374462
-------
375463
Column
464+
465+
Notes
466+
-----
467+
`other`'s parent DataFrame must be the same as `self`'s - else,
468+
the operation is unsupported and may vary across implementations.
376469
"""
377470
...
378471

@@ -389,6 +482,11 @@ def __truediv__(self, other: Self | Scalar) -> Self:
389482
Returns
390483
-------
391484
Column
485+
486+
Notes
487+
-----
488+
`other`'s parent DataFrame must be the same as `self`'s - else,
489+
the operation is unsupported and may vary across implementations.
392490
"""
393491
...
394492

@@ -405,6 +503,11 @@ def __floordiv__(self, other: Self | Scalar) -> Self:
405503
Returns
406504
-------
407505
Column
506+
507+
Notes
508+
-----
509+
`other`'s parent DataFrame must be the same as `self`'s - else,
510+
the operation is unsupported and may vary across implementations.
408511
"""
409512
...
410513

@@ -425,6 +528,11 @@ def __pow__(self, other: Self | Scalar) -> Self:
425528
Returns
426529
-------
427530
Column
531+
532+
Notes
533+
-----
534+
`other`'s parent DataFrame must be the same as `self`'s - else,
535+
the operation is unsupported and may vary across implementations.
428536
"""
429537
...
430538

@@ -441,6 +549,11 @@ def __mod__(self, other: Self | Scalar) -> Self:
441549
Returns
442550
-------
443551
Column
552+
553+
Notes
554+
-----
555+
`other`'s parent DataFrame must be the same as `self`'s - else,
556+
the operation is unsupported and may vary across implementations.
444557
"""
445558
...
446559

@@ -457,6 +570,11 @@ def __divmod__(self, other: Self | Scalar) -> tuple[Column, Column]:
457570
Returns
458571
-------
459572
Column
573+
574+
Notes
575+
-----
576+
`other`'s parent DataFrame must be the same as `self`'s - else,
577+
the operation is unsupported and may vary across implementations.
460578
"""
461579
...
462580

spec/API_specification/dataframe_api/dataframe_object.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,11 @@ def get_rows(self, indices: Column) -> Self:
136136
Returns
137137
-------
138138
DataFrame
139+
140+
Notes
141+
-----
142+
`indices`'s parent DataFrame must be `self` - else,
143+
the operation is unsupported and may vary across implementations.
139144
"""
140145
...
141146

@@ -172,8 +177,8 @@ def filter(self, mask: Column) -> Self:
172177
173178
Notes
174179
-----
175-
Some participants preferred a weaker type Arraylike[bool] for mask,
176-
where 'Arraylike' denotes an object adhering to the Array API standard.
180+
`mask`'s parent DataFrame must be `self` - else,
181+
the operation is unsupported and may vary across implementations.
177182
"""
178183
...
179184

@@ -201,6 +206,11 @@ def assign(self, *columns: Column) -> Self:
201206
Returns
202207
-------
203208
DataFrame
209+
210+
Notes
211+
-----
212+
All of `columns`'s parent DataFrame must be `self` - else,
213+
the operation is unsupported and may vary across implementations.
204214
"""
205215
...
206216

0 commit comments

Comments
 (0)