8
8
from .eagerframe_object import EagerFrame
9
9
from .eagercolumn_object import EagerColumn
10
10
from .groupby_object import GroupBy
11
- from ._types import NullType , Scalar
11
+ from ._types import NullType , Scalar , IntoExpression
12
12
13
13
14
14
__all__ = ["DataFrame" ]
15
15
16
- IntoExpression = EagerColumn [Any ] | Expression
17
16
18
17
19
18
class DataFrame :
@@ -68,13 +67,13 @@ def dataframe(self) -> object:
68
67
"""
69
68
...
70
69
71
- def groupby (self , keys : str | list [ str ], / ) -> GroupBy :
70
+ def groupby (self , * keys : str ) -> GroupBy :
72
71
"""
73
72
Group the DataFrame by the given columns.
74
73
75
74
Parameters
76
75
----------
77
- keys : str | list[str]
76
+ keys : str
78
77
79
78
Returns
80
79
-------
@@ -93,7 +92,7 @@ def groupby(self, keys: str | list[str], /) -> GroupBy:
93
92
"""
94
93
...
95
94
96
- def select (self , names : str | Expression | Sequence [ str | Expression ], / ) -> DataFrame :
95
+ def select (self , * names : str | Expression ) -> DataFrame :
97
96
"""
98
97
Select multiple columns, either by name or by expressions.
99
98
@@ -107,16 +106,9 @@ def select(self, names: str | Expression | Sequence[str | Expression], /) -> Dat
107
106
108
107
Examples
109
108
--------
110
- Select columns 'a' and 'b':
111
-
112
- >>> df: DataFrame
113
- >>> df.select(['a', 'b'])
114
-
115
- You can also pass expressions:
116
-
117
109
>>> df: DataFrame
118
- >>> namespace = df.__dataframe_namespace__()
119
- >>> df.select([ 'a', namespace. col('b')+1] )
110
+ >>> col = df.__dataframe_namespace__().col
111
+ >>> df = df .select('a', col('b'), (col('c')+col('d')+1).rename('e') )
120
112
121
113
Raises
122
114
------
@@ -145,13 +137,13 @@ def slice_rows(
145
137
"""
146
138
...
147
139
148
- def filter (self , mask : Expression ) -> DataFrame :
140
+ def filter (self , mask : IntoExpression ) -> DataFrame :
149
141
"""
150
142
Select a subset of rows corresponding to a mask.
151
143
152
144
Parameters
153
145
----------
154
- mask : Expression
146
+ mask : Expression or EagerColumn
155
147
156
148
Returns
157
149
-------
@@ -170,7 +162,7 @@ def filter(self, mask: Expression) -> DataFrame:
170
162
"""
171
163
...
172
164
173
- def insert_columns (self , columns : IntoExpression | Sequence [ IntoExpression ]) -> DataFrame :
165
+ def insert_columns (self , * columns : Expression | EagerColumn [ Any ]) -> DataFrame :
174
166
"""
175
167
Insert column into DataFrame at rightmost location.
176
168
@@ -184,7 +176,7 @@ def insert_columns(self, columns: IntoExpression | Sequence[IntoExpression]) ->
184
176
namespace = df.__dataframe_namespace__()
185
177
col = namespace.col
186
178
new_column = namespace.col('a') + 1
187
- df = df.insert_column (new_column.rename('a_plus_1'))
179
+ df = df.insert_columns (new_column.rename('a_plus_1'))
188
180
189
181
If you need to insert the column at a different location, combine with
190
182
:meth:`select`, e.g.:
@@ -196,7 +188,7 @@ def insert_columns(self, columns: IntoExpression | Sequence[IntoExpression]) ->
196
188
col = namespace.col
197
189
new_column = namespace.col('a') + 1
198
190
new_columns_names = ['a_plus_1'] + df.get_column_names()
199
- df = df.insert_column (new_column.rename('a_plus_1'))
191
+ df = df.insert_columns (new_column.rename('a_plus_1'))
200
192
df = df.select(new_column_names)
201
193
202
194
Parameters
@@ -206,7 +198,7 @@ def insert_columns(self, columns: IntoExpression | Sequence[IntoExpression]) ->
206
198
"""
207
199
...
208
200
209
- def update_columns (self , columns : Expression | EagerColumn | Sequence [ Expression | EagerColumn ], / ) -> DataFrame :
201
+ def update_columns (self , * columns : Expression | EagerColumn [ Any ] ) -> DataFrame :
210
202
"""
211
203
Update values in existing column(s) from Dataframe.
212
204
@@ -224,7 +216,7 @@ def update_columns(self, columns: Expression | EagerColumn | Sequence[Expression
224
216
225
217
Parameters
226
218
----------
227
- columns : Expression | Sequence[Expression]
219
+ columns : Expression, EagerColumn, or sequence of either
228
220
Column(s) to update. If updating multiple columns, they must all have
229
221
different names.
230
222
@@ -268,7 +260,8 @@ def rename_columns(self, mapping: Mapping[str, str]) -> DataFrame:
268
260
"""
269
261
...
270
262
271
- def get_column_names (self ) -> list [str ]:
263
+ @property
264
+ def column_names (self ) -> list [str ]:
272
265
"""
273
266
Get column names.
274
267
@@ -280,8 +273,7 @@ def get_column_names(self) -> list[str]:
280
273
281
274
def sort (
282
275
self ,
283
- keys : str | Expression | list [str | Expression ] | None = None ,
284
- * ,
276
+ * keys : str | Expression ,
285
277
ascending : Sequence [bool ] | bool = True ,
286
278
nulls_position : Literal ['first' , 'last' ] = 'last' ,
287
279
) -> DataFrame :
@@ -293,9 +285,9 @@ def sort(
293
285
294
286
Parameters
295
287
----------
296
- keys : str | list[str], optional
288
+ keys : str | Expression
297
289
Names of columns to sort by.
298
- If `None`, sort by all columns.
290
+ If not passed, will sort by all columns.
299
291
ascending : Sequence[bool] or bool
300
292
If `True`, sort by all keys in ascending order.
301
293
If `False`, sort by all keys in descending order.
@@ -759,38 +751,6 @@ def fill_nan(self, value: float | NullType, /) -> DataFrame:
759
751
"""
760
752
...
761
753
762
- def fill_null (
763
- self , value : Scalar , / , * , column_names : list [str ] | None = None
764
- ) -> DataFrame :
765
- """
766
- Fill null values with the given fill value.
767
-
768
- This method can only be used if all columns that are to be filled are
769
- of the same dtype (e.g., all of ``Float64`` or all of string dtype).
770
- If that is not the case, it is not possible to use a single Python
771
- scalar type that matches the dtype of all columns to which
772
- ``fill_null`` is being applied, and hence an exception will be raised.
773
-
774
- Parameters
775
- ----------
776
- value : Scalar
777
- Value used to replace any ``null`` values in the dataframe with.
778
- Must be of the Python scalar type matching the dtype(s) of the dataframe.
779
- column_names : list[str] | None
780
- A list of column names for which to replace nulls with the given
781
- scalar value. If ``None``, nulls will be replaced in all columns.
782
-
783
- Raises
784
- ------
785
- TypeError
786
- If the columns of the dataframe are not all of the same kind.
787
- KeyError
788
- If ``column_names`` contains a column name that is not present in
789
- the dataframe.
790
-
791
- """
792
- ...
793
-
794
754
def join (
795
755
self ,
796
756
other : DataFrame ,
0 commit comments