Skip to content

Commit 6349c68

Browse files
jbrockmendeljreback
authored andcommitted
REF: separate casting out of Index.__new__ (#30586)
1 parent bd78b32 commit 6349c68

File tree

1 file changed

+100
-35
lines changed

1 file changed

+100
-35
lines changed

pandas/core/indexes/base.py

+100-35
Original file line numberDiff line numberDiff line change
@@ -349,41 +349,8 @@ def __new__(
349349
# they are actually ints, e.g. '0' and 0.0
350350
# should not be coerced
351351
# GH 11836
352-
if is_integer_dtype(dtype):
353-
inferred = lib.infer_dtype(data, skipna=False)
354-
if inferred == "integer":
355-
data = maybe_cast_to_integer_array(data, dtype, copy=copy)
356-
elif inferred in ["floating", "mixed-integer-float"]:
357-
if isna(data).any():
358-
raise ValueError("cannot convert float NaN to integer")
359-
360-
if inferred == "mixed-integer-float":
361-
data = maybe_cast_to_integer_array(data, dtype)
362-
363-
# If we are actually all equal to integers,
364-
# then coerce to integer.
365-
try:
366-
return cls._try_convert_to_int_index(
367-
data, copy, name, dtype
368-
)
369-
except ValueError:
370-
pass
371-
372-
# Return an actual float index.
373-
return Float64Index(data, copy=copy, name=name)
374-
375-
elif inferred == "string":
376-
pass
377-
else:
378-
data = data.astype(dtype)
379-
elif is_float_dtype(dtype):
380-
inferred = lib.infer_dtype(data, skipna=False)
381-
if inferred == "string":
382-
pass
383-
else:
384-
data = data.astype(dtype)
385-
else:
386-
data = np.array(data, dtype=dtype, copy=copy)
352+
data = _maybe_cast_with_dtype(data, dtype, copy)
353+
dtype = data.dtype # TODO: maybe not for object?
387354

388355
# maybe coerce to a sub-class
389356
if is_signed_integer_dtype(data.dtype):
@@ -5486,3 +5453,101 @@ def maybe_extract_name(name, obj, cls) -> Optional[Hashable]:
54865453
raise TypeError(f"{cls.__name__}.name must be a hashable type")
54875454

54885455
return name
5456+
5457+
5458+
def _maybe_cast_with_dtype(data: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
5459+
"""
5460+
If a dtype is passed, cast to the closest matching dtype that is supported
5461+
by Index.
5462+
5463+
Parameters
5464+
----------
5465+
data : np.ndarray
5466+
dtype : np.dtype
5467+
copy : bool
5468+
5469+
Returns
5470+
-------
5471+
np.ndarray
5472+
"""
5473+
# we need to avoid having numpy coerce
5474+
# things that look like ints/floats to ints unless
5475+
# they are actually ints, e.g. '0' and 0.0
5476+
# should not be coerced
5477+
# GH 11836
5478+
if is_integer_dtype(dtype):
5479+
inferred = lib.infer_dtype(data, skipna=False)
5480+
if inferred == "integer":
5481+
data = maybe_cast_to_integer_array(data, dtype, copy=copy)
5482+
elif inferred in ["floating", "mixed-integer-float"]:
5483+
if isna(data).any():
5484+
raise ValueError("cannot convert float NaN to integer")
5485+
5486+
if inferred == "mixed-integer-float":
5487+
data = maybe_cast_to_integer_array(data, dtype)
5488+
5489+
# If we are actually all equal to integers,
5490+
# then coerce to integer.
5491+
try:
5492+
data = _try_convert_to_int_array(data, copy, dtype)
5493+
except ValueError:
5494+
data = np.array(data, dtype=np.float64, copy=copy)
5495+
5496+
elif inferred == "string":
5497+
pass
5498+
else:
5499+
data = data.astype(dtype)
5500+
elif is_float_dtype(dtype):
5501+
inferred = lib.infer_dtype(data, skipna=False)
5502+
if inferred == "string":
5503+
pass
5504+
else:
5505+
data = data.astype(dtype)
5506+
else:
5507+
data = np.array(data, dtype=dtype, copy=copy)
5508+
5509+
return data
5510+
5511+
5512+
def _try_convert_to_int_array(
5513+
data: np.ndarray, copy: bool, dtype: np.dtype
5514+
) -> np.ndarray:
5515+
"""
5516+
Attempt to convert an array of data into an integer array.
5517+
5518+
Parameters
5519+
----------
5520+
data : The data to convert.
5521+
copy : bool
5522+
Whether to copy the data or not.
5523+
dtype : np.dtype
5524+
5525+
Returns
5526+
-------
5527+
int_array : data converted to either an ndarray[int64] or ndarray[uint64]
5528+
5529+
Raises
5530+
------
5531+
ValueError if the conversion was not successful.
5532+
"""
5533+
5534+
if not is_unsigned_integer_dtype(dtype):
5535+
# skip int64 conversion attempt if uint-like dtype is passed, as
5536+
# this could return Int64Index when UInt64Index is what's desired
5537+
try:
5538+
res = data.astype("i8", copy=False)
5539+
if (res == data).all():
5540+
return res # TODO: might still need to copy
5541+
except (OverflowError, TypeError, ValueError):
5542+
pass
5543+
5544+
# Conversion to int64 failed (possibly due to overflow) or was skipped,
5545+
# so let's try now with uint64.
5546+
try:
5547+
res = data.astype("u8", copy=False)
5548+
if (res == data).all():
5549+
return res # TODO: might still need to copy
5550+
except (OverflowError, TypeError, ValueError):
5551+
pass
5552+
5553+
raise ValueError

0 commit comments

Comments
 (0)