Skip to content

Commit 3eb26c7

Browse files
authored
BUG: clean_index_list handle uint64 case (#41784)
1 parent 9c68314 commit 3eb26c7

File tree

5 files changed

+42
-32
lines changed

5 files changed

+42
-32
lines changed

pandas/_libs/lib.pyi

+1-1
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ def maybe_indices_to_slice(
185185
) -> slice | np.ndarray: ... # np.ndarray[np.uint8]
186186

187187
def clean_index_list(obj: list) -> tuple[
188-
list | np.ndarray, # np.ndarray[object] | np.ndarray[np.int64]
188+
list | np.ndarray, # np.ndarray[object | np.int64 | np.uint64]
189189
bool,
190190
]: ...
191191

pandas/_libs/lib.pyx

+20-8
Original file line numberDiff line numberDiff line change
@@ -747,10 +747,14 @@ def clean_index_list(obj: list):
747747
object val
748748
bint all_arrays = True
749749

750+
# First check if we have a list of arraylikes, in which case we will
751+
# pass them to MultiIndex.from_arrays
750752
for i in range(n):
751753
val = obj[i]
752754
if not (isinstance(val, list) or
753755
util.is_array(val) or hasattr(val, '_data')):
756+
# TODO: EA?
757+
# exclude tuples, frozensets as they may be contained in an Index
754758
all_arrays = False
755759
break
756760

@@ -762,11 +766,21 @@ def clean_index_list(obj: list):
762766
if inferred in ['string', 'bytes', 'mixed', 'mixed-integer']:
763767
return np.asarray(obj, dtype=object), 0
764768
elif inferred in ['integer']:
765-
# TODO: we infer an integer but it *could* be a uint64
766-
try:
767-
return np.asarray(obj, dtype='int64'), 0
768-
except OverflowError:
769-
return np.asarray(obj, dtype='object'), 0
769+
# we infer an integer but it *could* be a uint64
770+
771+
arr = np.asarray(obj)
772+
if arr.dtype.kind not in ["i", "u"]:
773+
# eg [0, uint64max] gets cast to float64,
774+
# but then we know we have either uint64 or object
775+
if (arr < 0).any():
776+
# TODO: similar to maybe_cast_to_integer_array
777+
return np.asarray(obj, dtype="object"), 0
778+
779+
# GH#35481
780+
guess = np.asarray(obj, dtype="uint64")
781+
return guess, 0
782+
783+
return arr, 0
770784

771785
return np.asarray(obj), 0
772786

@@ -1552,9 +1566,7 @@ def infer_dtype(value: object, skipna: bool = True) -> str:
15521566

15531567
for i in range(n):
15541568
val = values[i]
1555-
if (util.is_integer_object(val) and
1556-
not util.is_timedelta64_object(val) and
1557-
not util.is_datetime64_object(val)):
1569+
if util.is_integer_object(val):
15581570
return "mixed-integer"
15591571

15601572
return "mixed"

pandas/core/indexes/base.py

+6-22
Original file line numberDiff line numberDiff line change
@@ -6299,27 +6299,18 @@ def ensure_index(index_like: AnyArrayLike | Sequence, copy: bool = False) -> Ind
62996299
if copy:
63006300
index_like = index_like.copy()
63016301
return index_like
6302-
if hasattr(index_like, "name"):
6303-
# https://github.com/python/mypy/issues/1424
6304-
# error: Item "ExtensionArray" of "Union[ExtensionArray,
6305-
# Sequence[Any]]" has no attribute "name"
6306-
# error: Item "Sequence[Any]" of "Union[ExtensionArray, Sequence[Any]]"
6307-
# has no attribute "name"
6308-
# error: "Sequence[Any]" has no attribute "name"
6309-
# error: Item "Sequence[Any]" of "Union[Series, Sequence[Any]]" has no
6310-
# attribute "name"
6311-
# error: Item "Sequence[Any]" of "Union[Any, Sequence[Any]]" has no
6312-
# attribute "name"
6313-
name = index_like.name # type: ignore[union-attr, attr-defined]
6302+
6303+
if isinstance(index_like, ABCSeries):
6304+
name = index_like.name
63146305
return Index(index_like, name=name, copy=copy)
63156306

63166307
if is_iterator(index_like):
63176308
index_like = list(index_like)
63186309

6319-
# must check for exactly list here because of strict type
6320-
# check in clean_index_list
63216310
if isinstance(index_like, list):
6322-
if type(index_like) != list:
6311+
if type(index_like) is not list:
6312+
# must check for exactly list here because of strict type
6313+
# check in clean_index_list
63236314
index_like = list(index_like)
63246315

63256316
converted, all_arrays = lib.clean_index_list(index_like)
@@ -6329,13 +6320,6 @@ def ensure_index(index_like: AnyArrayLike | Sequence, copy: bool = False) -> Ind
63296320

63306321
return MultiIndex.from_arrays(converted)
63316322
else:
6332-
if isinstance(converted, np.ndarray) and converted.dtype == np.int64:
6333-
# Check for overflows if we should actually be uint64
6334-
# xref GH#35481
6335-
alt = np.asarray(index_like)
6336-
if alt.dtype == np.uint64:
6337-
converted = alt
6338-
63396323
index_like = converted
63406324
else:
63416325
# clean_index_list does the equivalent of copying

pandas/core/indexing.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1934,7 +1934,9 @@ def _setitem_with_indexer_missing(self, indexer, value):
19341934
# e.g. 0.0 -> 0
19351935
# GH#12246
19361936
if index.is_unique:
1937-
new_indexer = index.get_indexer([new_index[-1]])
1937+
# pass new_index[-1:] instead if [new_index[-1]]
1938+
# so that we retain dtype
1939+
new_indexer = index.get_indexer(new_index[-1:])
19381940
if (new_indexer != -1).any():
19391941
# We get only here with loc, so can hard code
19401942
return self._setitem_with_indexer(new_indexer, value, "loc")

pandas/tests/libs/test_lib.py

+12
Original file line numberDiff line numberDiff line change
@@ -206,3 +206,15 @@ def test_no_default_pickle():
206206
# GH#40397
207207
obj = tm.round_trip_pickle(lib.no_default)
208208
assert obj is lib.no_default
209+
210+
211+
def test_clean_index_list():
212+
# with both 0 and a large-uint64, np.array will infer to float64
213+
# https://github.com/numpy/numpy/issues/19146
214+
# but a more accurate choice would be uint64
215+
values = [0, np.iinfo(np.uint64).max]
216+
217+
result, _ = lib.clean_index_list(values)
218+
219+
expected = np.array(values, dtype="uint64")
220+
tm.assert_numpy_array_equal(result, expected, check_dtype=True)

0 commit comments

Comments
 (0)