1
1
from __future__ import annotations
2
- from typing import Sequence , Union , TYPE_CHECKING , NoReturn
2
+
3
+ from typing import Literal , Mapping , Sequence , Union , TYPE_CHECKING , NoReturn
4
+
3
5
4
6
if TYPE_CHECKING :
5
7
from .column_object import Column
11
13
12
14
13
15
class DataFrame :
16
+ """
17
+ DataFrame object
18
+
19
+ Note that this dataframe object is not meant to be instantiated directly by
20
+ users of the library implementing the dataframe API standard. Rather, use
21
+ constructor functions or an already-created dataframe object retrieved via
22
+
23
+ **Python operator support**
24
+
25
+ All arithmetic operators defined by the Python language, except for
26
+ ``__matmul__``, ``__neg__`` and ``__pos__``, must be supported for
27
+ numerical data types.
28
+
29
+ All comparison operators defined by the Python language must be supported
30
+ by the dataframe object for all data types for which those comparisons are
31
+ supported by the builtin scalar types corresponding to a data type.
32
+
33
+ In-place operators must not be supported. All operations on the dataframe
34
+ object are out-of-place.
35
+
36
+ **Methods and Attributes**
37
+
38
+ """
39
+
40
+ @classmethod
41
+ def from_dict (cls , data : Mapping [str , Column ]) -> DataFrame :
42
+ """
43
+ Construct DataFrame from map of column names to Columns.
44
+
45
+ Parameters
46
+ ----------
47
+ data : Mapping[str, Column]
48
+ Column must be of the corresponding type of the DataFrame.
49
+ For example, it is only supported to build a ``LibraryXDataFrame`` using
50
+ ``LibraryXColumn`` instances.
51
+
52
+ Returns
53
+ -------
54
+ DataFrame
55
+ """
56
+
14
57
@property
15
58
def dataframe (self ) -> object :
16
59
"""
@@ -88,24 +131,18 @@ def get_columns_by_name(self, names: Sequence[str], /) -> DataFrame:
88
131
"""
89
132
...
90
133
91
- def get_rows (self , indices : Sequence [int ]) -> DataFrame :
134
+ def get_rows (self , indices : "Column [int]" ) -> DataFrame :
92
135
"""
93
136
Select a subset of rows, similar to `ndarray.take`.
94
137
95
138
Parameters
96
139
----------
97
- indices : Sequence [int]
140
+ indices : Column [int]
98
141
Positions of rows to select.
99
142
100
143
Returns
101
144
-------
102
145
DataFrame
103
-
104
- Notes
105
- -----
106
- Some discussion participants prefer a stricter type Column[int] for
107
- indices in order to make it easier to implement in a performant manner
108
- on GPUs.
109
146
"""
110
147
...
111
148
@@ -204,6 +241,47 @@ def get_column_names(self) -> Sequence[str]:
204
241
"""
205
242
...
206
243
244
+ def sorted_indices (
245
+ self ,
246
+ keys : Sequence [str ],
247
+ * ,
248
+ ascending : Sequence [bool ] | bool = True ,
249
+ nulls_position : Literal ['first' , 'last' ] = 'last' ,
250
+ ) -> Column [int ]:
251
+ """
252
+ Return row numbers which would sort according to given columns.
253
+
254
+ If you need to sort the DataFrame, you can simply do::
255
+
256
+ df.get_rows(df.sorted_indices(keys))
257
+
258
+ Parameters
259
+ ----------
260
+ keys : Sequence[str]
261
+ Names of columns to sort by.
262
+ ascending : Sequence[bool] or bool
263
+ If `True`, sort by all keys in ascending order.
264
+ If `False`, sort by all keys in descending order.
265
+ If a sequence, it must be the same length as `keys`,
266
+ and determines the direction with which to use each
267
+ key to sort by.
268
+ nulls_position : {'first', 'last'}
269
+ Whether null values should be placed at the beginning
270
+ or at the end of the result.
271
+ Note that the position of NaNs is unspecified and may
272
+ vary based on the implementation.
273
+
274
+ Returns
275
+ -------
276
+ Column[int]
277
+
278
+ Raises
279
+ ------
280
+ ValueError
281
+ If `keys` and `ascending` are sequences of different lengths.
282
+ """
283
+ ...
284
+
207
285
def __eq__ (self , other : DataFrame | Scalar ) -> DataFrame :
208
286
"""
209
287
Compare for equality.
@@ -465,7 +543,7 @@ def __iter__(self) -> NoReturn:
465
543
"""
466
544
raise NotImplementedError ("'__iter__' is intentionally not implemented." )
467
545
468
- def any (self , skipna : bool = True ) -> DataFrame :
546
+ def any (self , skip_nulls : bool = True ) -> DataFrame :
469
547
"""
470
548
Reduction returns a 1-row DataFrame.
471
549
@@ -476,7 +554,7 @@ def any(self, skipna: bool = True) -> DataFrame:
476
554
"""
477
555
...
478
556
479
- def all (self , skipna : bool = True ) -> DataFrame :
557
+ def all (self , skip_nulls : bool = True ) -> DataFrame :
480
558
"""
481
559
Reduction returns a 1-row DataFrame.
482
560
@@ -515,49 +593,49 @@ def all_rowwise(self, skipna: bool = True) -> Column:
515
593
"""
516
594
...
517
595
518
- def min (self , skipna : bool = True ) -> DataFrame :
596
+ def min (self , skip_nulls : bool = True ) -> DataFrame :
519
597
"""
520
598
Reduction returns a 1-row DataFrame.
521
599
"""
522
600
...
523
601
524
- def max (self , skipna : bool = True ) -> DataFrame :
602
+ def max (self , skip_nulls : bool = True ) -> DataFrame :
525
603
"""
526
604
Reduction returns a 1-row DataFrame.
527
605
"""
528
606
...
529
607
530
- def sum (self , skipna : bool = True ) -> DataFrame :
608
+ def sum (self , skip_nulls : bool = True ) -> DataFrame :
531
609
"""
532
610
Reduction returns a 1-row DataFrame.
533
611
"""
534
612
...
535
613
536
- def prod (self , skipna : bool = True ) -> DataFrame :
614
+ def prod (self , skip_nulls : bool = True ) -> DataFrame :
537
615
"""
538
616
Reduction returns a 1-row DataFrame.
539
617
"""
540
618
...
541
619
542
- def median (self , skipna : bool = True ) -> DataFrame :
620
+ def median (self , skip_nulls : bool = True ) -> DataFrame :
543
621
"""
544
622
Reduction returns a 1-row DataFrame.
545
623
"""
546
624
...
547
625
548
- def mean (self , skipna : bool = True ) -> DataFrame :
626
+ def mean (self , skip_nulls : bool = True ) -> DataFrame :
549
627
"""
550
628
Reduction returns a 1-row DataFrame.
551
629
"""
552
630
...
553
631
554
- def std (self , skipna : bool = True ) -> DataFrame :
632
+ def std (self , skip_nulls : bool = True ) -> DataFrame :
555
633
"""
556
634
Reduction returns a 1-row DataFrame.
557
635
"""
558
636
...
559
637
560
- def var (self , skipna : bool = True ) -> DataFrame :
638
+ def var (self , skip_nulls : bool = True ) -> DataFrame :
561
639
"""
562
640
Reduction returns a 1-row DataFrame.
563
641
"""
@@ -578,12 +656,14 @@ def isnull(self) -> DataFrame:
578
656
Notes
579
657
-----
580
658
Does *not* include NaN-like entries.
659
+ May optionally include 'NaT' values (if present in an implementation),
660
+ but note that the Standard makes no guarantees about them.
581
661
"""
582
662
...
583
663
584
664
def isnan (self ) -> DataFrame :
585
665
"""
586
- Check for nan-like entries.
666
+ Check for nan entries.
587
667
588
668
Returns
589
669
-------
@@ -595,7 +675,8 @@ def isnan(self) -> DataFrame:
595
675
596
676
Notes
597
677
-----
598
- Includes anything with NaN-like semantics, e.g. np.datetime64("NaT") .
678
+ This only checks for 'NaN' .
599
679
Does *not* include 'missing' or 'null' entries.
680
+ In particular, does not check for `np.timedelta64('NaT')`.
600
681
"""
601
682
...
0 commit comments