Skip to content

Commit 346b2f2

Browse files
authored
REF: simplify ensure_index (#41793)
1 parent 8f4d591 commit 346b2f2

File tree

6 files changed

+28
-59
lines changed

6 files changed

+28
-59
lines changed

pandas/_libs/lib.pyi

+1-5
Original file line numberDiff line numberDiff line change
@@ -190,11 +190,7 @@ def maybe_indices_to_slice(
190190
max_len: int,
191191
) -> slice | np.ndarray: ... # np.ndarray[np.uint8]
192192

193-
def clean_index_list(obj: list) -> tuple[
194-
list | np.ndarray, # np.ndarray[object | np.int64 | np.uint64]
195-
bool,
196-
]: ...
197-
193+
def is_all_arraylike(obj: list) -> bool: ...
198194

199195
# -----------------------------------------------------------------
200196
# Functions which in reality take memoryviews

pandas/_libs/lib.pyx

+3-31
Original file line numberDiff line numberDiff line change
@@ -740,19 +740,15 @@ cpdef ndarray[object] ensure_string_array(
740740
return result
741741

742742

743-
@cython.wraparound(False)
744-
@cython.boundscheck(False)
745-
def clean_index_list(obj: list):
743+
def is_all_arraylike(obj: list) -> bool:
746744
"""
747-
Utility used in ``pandas.core.indexes.api.ensure_index``.
745+
Should we treat these as levels of a MultiIndex, as opposed to Index items?
748746
"""
749747
cdef:
750748
Py_ssize_t i, n = len(obj)
751749
object val
752750
bint all_arrays = True
753751

754-
# First check if we have a list of arraylikes, in which case we will
755-
# pass them to MultiIndex.from_arrays
756752
for i in range(n):
757753
val = obj[i]
758754
if not (isinstance(val, list) or
@@ -762,31 +758,7 @@ def clean_index_list(obj: list):
762758
all_arrays = False
763759
break
764760

765-
if all_arrays:
766-
return obj, all_arrays
767-
768-
# don't force numpy coerce with nan's
769-
inferred = infer_dtype(obj, skipna=False)
770-
if inferred in ['string', 'bytes', 'mixed', 'mixed-integer']:
771-
return np.asarray(obj, dtype=object), 0
772-
elif inferred in ['integer']:
773-
# we infer an integer but it *could* be a uint64
774-
775-
arr = np.asarray(obj)
776-
if arr.dtype.kind not in ["i", "u"]:
777-
# eg [0, uint64max] gets cast to float64,
778-
# but then we know we have either uint64 or object
779-
if (arr < 0).any():
780-
# TODO: similar to maybe_cast_to_integer_array
781-
return np.asarray(obj, dtype="object"), 0
782-
783-
# GH#35481
784-
guess = np.asarray(obj, dtype="uint64")
785-
return guess, 0
786-
787-
return arr, 0
788-
789-
return np.asarray(obj), 0
761+
return all_arrays
790762

791763

792764
# ------------------------------------------------------------------------------

pandas/core/indexes/base.py

+4-11
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
from copy import copy as copy_func
43
from datetime import datetime
54
import functools
65
from itertools import zip_longest
@@ -6312,21 +6311,15 @@ def ensure_index(index_like: AnyArrayLike | Sequence, copy: bool = False) -> Ind
63126311
# check in clean_index_list
63136312
index_like = list(index_like)
63146313

6315-
converted, all_arrays = lib.clean_index_list(index_like)
6316-
6317-
if len(converted) > 0 and all_arrays:
6314+
if len(index_like) and lib.is_all_arraylike(index_like):
63186315
from pandas.core.indexes.multi import MultiIndex
63196316

6320-
return MultiIndex.from_arrays(converted)
6317+
return MultiIndex.from_arrays(index_like)
63216318
else:
6322-
index_like = converted
6319+
return Index(index_like, copy=copy, tupleize_cols=False)
63236320
else:
6324-
# clean_index_list does the equivalent of copying
6325-
# so only need to do this if not list instance
6326-
if copy:
6327-
index_like = copy_func(index_like)
63286321

6329-
return Index(index_like)
6322+
return Index(index_like, copy=copy)
63306323

63316324

63326325
def ensure_has_len(seq):

pandas/tests/indexes/numeric/test_numeric.py

+8
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,14 @@ def test_constructor(self, dtype):
531531
res = Index([1, 2 ** 63 + 1], dtype=dtype)
532532
tm.assert_index_equal(res, idx)
533533

534+
@pytest.mark.xfail(reason="https://github.com/numpy/numpy/issues/19146")
535+
def test_constructor_does_not_cast_to_float(self):
536+
# https://github.com/numpy/numpy/issues/19146
537+
values = [0, np.iinfo(np.uint64).max]
538+
539+
result = UInt64Index(values)
540+
assert list(result) == values
541+
534542

535543
@pytest.mark.parametrize(
536544
"box",

pandas/tests/indexes/test_base.py

+12
Original file line numberDiff line numberDiff line change
@@ -1622,6 +1622,18 @@ def test_ensure_index_mixed_closed_intervals(self):
16221622
expected = Index(intervals, dtype=object)
16231623
tm.assert_index_equal(result, expected)
16241624

1625+
def test_ensure_index_uint64(self):
1626+
# with both 0 and a large-uint64, np.array will infer to float64
1627+
# https://github.com/numpy/numpy/issues/19146
1628+
# but a more accurate choice would be uint64
1629+
values = [0, np.iinfo(np.uint64).max]
1630+
1631+
result = ensure_index(values)
1632+
assert list(result) == values
1633+
1634+
expected = Index(values, dtype="uint64")
1635+
tm.assert_index_equal(result, expected)
1636+
16251637
def test_get_combined_index(self):
16261638
result = _get_combined_index([])
16271639
expected = Index([])

pandas/tests/libs/test_lib.py

-12
Original file line numberDiff line numberDiff line change
@@ -206,15 +206,3 @@ def test_no_default_pickle():
206206
# GH#40397
207207
obj = tm.round_trip_pickle(lib.no_default)
208208
assert obj is lib.no_default
209-
210-
211-
def test_clean_index_list():
212-
# with both 0 and a large-uint64, np.array will infer to float64
213-
# https://github.com/numpy/numpy/issues/19146
214-
# but a more accurate choice would be uint64
215-
values = [0, np.iinfo(np.uint64).max]
216-
217-
result, _ = lib.clean_index_list(values)
218-
219-
expected = np.array(values, dtype="uint64")
220-
tm.assert_numpy_array_equal(result, expected, check_dtype=True)

0 commit comments

Comments
 (0)