Skip to content

Commit 27fac78

Browse files
authored
feat: support unique() for Index (#1750)
* feat: support for Index * fix lint * fix test by limiting Pandas version
1 parent 36c359d commit 27fac78

File tree

3 files changed

+64
-0
lines changed

3 files changed

+64
-0
lines changed

bigframes/core/indexes/base.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,12 @@ def drop_duplicates(self, *, keep: str = "first") -> Index:
451451
block = block_ops.drop_duplicates(self._block, self._block.index_columns, keep)
452452
return Index(block)
453453

454+
def unique(self, level: Hashable | int | None = None) -> Index:
455+
if level is None:
456+
return self.drop_duplicates()
457+
458+
return self.get_level_values(level).drop_duplicates()
459+
454460
def isin(self, values) -> Index:
455461
if not utils.is_list_like(values):
456462
raise TypeError(
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from packaging import version
16+
import pandas as pd
17+
import pandas.testing
18+
import pytest
19+
20+
21+
@pytest.mark.parametrize("level", [None, 0, 1, "level0", "level1"])
22+
def test_unique(session, level):
23+
if version.Version(pd.__version__) < version.Version("2.0.0"):
24+
pytest.skip("StringDtype for multi-index not supported until Pandas 2.0")
25+
arrays = [
26+
pd.Series(["A", "A", "B", "B", "A"], dtype=pd.StringDtype(storage="pyarrow")),
27+
pd.Series([1, 2, 1, 2, 1], dtype=pd.Int64Dtype()),
28+
]
29+
pd_idx = pd.MultiIndex.from_arrays(arrays, names=["level0", "level1"])
30+
bf_idx = session.read_pandas(pd_idx)
31+
32+
actual_result = bf_idx.unique(level).to_pandas()
33+
34+
expected_result = pd_idx.unique(level)
35+
pandas.testing.assert_index_equal(actual_result, expected_result)

third_party/bigframes_vendored/pandas/core/indexes/base.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/indexes/base.py
22
from __future__ import annotations
33

4+
from collections.abc import Hashable
45
import typing
56

67
from bigframes import constants
@@ -1061,6 +1062,28 @@ def drop_duplicates(self, *, keep: str = "first"):
10611062
"""
10621063
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
10631064

1065+
def unique(self, level: Hashable | int | None = None):
1066+
"""
1067+
Returns unique values in the index.
1068+
1069+
**Examples:**
1070+
1071+
>>> import bigframes.pandas as bpd
1072+
>>> bpd.options.display.progress_bar = None
1073+
>>> idx = bpd.Index([1, 1, 2, 3, 3])
1074+
>>> idx.unique()
1075+
Index([1, 2, 3], dtype='Int64')
1076+
1077+
Args:
1078+
level (int or hashable, optional):
1079+
Only return values from specified level (for MultiIndex).
1080+
If int, gets the level by integer position, else by level name.
1081+
1082+
Returns:
1083+
bigframes.pandas.Index
1084+
"""
1085+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
1086+
10641087
def to_numpy(self, dtype, *, allow_large_results=None):
10651088
"""
10661089
A NumPy ndarray representing the values in this Series or Index.

0 commit comments

Comments
 (0)