diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index 10ba9a35..d45a543f 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -622,7 +622,7 @@ def unique_indices(self, *, skip_nulls: bool = True) -> Column[int]: indices corresponding to the same unique value, there is no guarantee about which one will appear in the result. If the original Column contains multiple `'NaN'` values, then - only a single index corresponding to those values should be returned. + only a single index corresponding to those values will be returned. Likewise for null values (if ``skip_nulls=False``). To get the unique values, you can do ``col.get_rows(col.unique_indices())``. """ diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index f56dae67..fb0948e8 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -758,6 +758,27 @@ def is_nan(self) -> DataFrame: """ ... + def unique_indices(self, keys: Sequence[str], *, skip_nulls: bool = True) -> Column[int]: + """ + Return indices corresponding to unique values across selected columns. + + Returns + ------- + Column[int] + Indices corresponding to unique values. + + Notes + ----- + There are no ordering guarantees. In particular, if there are multiple + indices corresponding to the same unique value(s), there is no guarantee + about which one will appear in the result. + If the original column(s) contain multiple `'NaN'` values, then + only a single index corresponding to those values will be returned. + Likewise for null values (if ``skip_nulls=False``). + To get the unique values, you can do ``df.get_rows(df.unique_indices(keys))``. + """ + ... + def fill_nan(self, value: float | 'null', /) -> DataFrame: """ Fill ``nan`` values with the given fill value.