From 70e9a829140f00ffb5c1741c84525ce02fc2b664 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 17 Oct 2023 12:35:01 +0300 Subject: [PATCH 1/2] make passing column names more ergonomic --- .../dataframe_api/dataframe_object.py | 32 +++++++++---------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index e8a9a21e..765888a9 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -64,13 +64,13 @@ def shape(self) -> tuple[int, int]: Return number of rows and number of columns. """ - def group_by(self, keys: str | list[str], /) -> GroupBy: + def group_by(self, *keys: str) -> GroupBy: """ Group the DataFrame by the given columns. Parameters ---------- - keys : str | list[str] + *keys : str Returns ------- @@ -179,7 +179,7 @@ def filter(self, mask: Column) -> DataFrame: """ ... - def assign(self, columns: Column | Sequence[Column], /) -> DataFrame: + def assign(self, *columns: Column) -> DataFrame: """ Insert new column(s), or update values in existing ones. @@ -197,7 +197,7 @@ def assign(self, columns: Column | Sequence[Column], /) -> DataFrame: Parameters ---------- - columns : Column | Sequence[Column] + *columns : Column Column(s) to update/insert. If updating/inserting multiple columns, they must all have different names. @@ -207,13 +207,13 @@ def assign(self, columns: Column | Sequence[Column], /) -> DataFrame: """ ... - def drop_columns(self, label: str | list[str]) -> DataFrame: + def drop_columns(self, *labels: str) -> DataFrame: """ Drop the specified column(s). Parameters ---------- - label : str | list[str] + *label : str Column name(s) to drop. Returns @@ -266,8 +266,7 @@ def schema(self) -> dict[str, Any]: def sort( self, - keys: str | list[str] | None = None, - *, + *keys: str, ascending: Sequence[bool] | bool = True, nulls_position: Literal['first', 'last'] = 'last', ) -> DataFrame: @@ -279,9 +278,9 @@ def sort( Parameters ---------- - keys : str | list[str], optional + *keys : str Names of columns to sort by. - If `None`, sort by all columns. + If not specified, sort by all columns. ascending : Sequence[bool] or bool If `True`, sort by all keys in ascending order. If `False`, sort by all keys in descending order. @@ -307,8 +306,7 @@ def sort( def sorted_indices( self, - keys: str | list[str] | None = None, - *, + *keys: str, ascending: Sequence[bool] | bool = True, nulls_position: Literal['first', 'last'] = 'last', ) -> Column: @@ -319,9 +317,9 @@ def sorted_indices( Parameters ---------- - keys : str | list[str], optional + *keys : str Names of columns to sort by. - If `None`, sort by all columns. + If not specified, sort by all columns. ascending : Sequence[bool] or bool If `True`, sort by all keys in ascending order. If `False`, sort by all keys in descending order. @@ -796,15 +794,15 @@ def is_nan(self) -> DataFrame: """ ... - def unique_indices(self, keys: str | list[str] | None = None, *, skip_nulls: bool = True) -> Column: + def unique_indices(self, *keys: str, *, skip_nulls: bool = True) -> Column: """ Return indices corresponding to unique values across selected columns. Parameters ---------- - keys : str | list[str], optional + *keys : str Column names to consider when finding unique values. - If `None`, all columns are considered. + If not specified, all columns are considered. Returns ------- From 6b227f175a4239fcee5a35d7a452da9fc05f5849 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 17 Oct 2023 19:58:10 +0300 Subject: [PATCH 2/2] syntax --- spec/API_specification/dataframe_api/dataframe_object.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index 765888a9..21f0cf4a 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -794,7 +794,7 @@ def is_nan(self) -> DataFrame: """ ... - def unique_indices(self, *keys: str, *, skip_nulls: bool = True) -> Column: + def unique_indices(self, *keys: str, skip_nulls: bool = True) -> Column: """ Return indices corresponding to unique values across selected columns.