Skip to content

Commit fdeefff

Browse files
jbrockmendelhweecat
authored andcommitted
REF: move inference/casting out of Index.__new__ (pandas-dev#30596)
1 parent 7384108 commit fdeefff

File tree

1 file changed

+77
-81
lines changed

1 file changed

+77
-81
lines changed

pandas/core/indexes/base.py

+77-81
Original file line numberDiff line numberDiff line change
@@ -370,43 +370,12 @@ def __new__(
370370
subarr = subarr.copy()
371371

372372
if dtype is None:
373-
inferred = lib.infer_dtype(subarr, skipna=False)
374-
if inferred == "integer":
375-
try:
376-
return cls._try_convert_to_int_index(subarr, copy, name, dtype)
377-
except ValueError:
378-
pass
379-
380-
return Index(subarr, copy=copy, dtype=object, name=name)
381-
elif inferred in ["floating", "mixed-integer-float", "integer-na"]:
382-
# TODO: Returns IntegerArray for integer-na case in the future
383-
return Float64Index(subarr, copy=copy, name=name)
384-
elif inferred == "interval":
385-
try:
386-
return IntervalIndex(subarr, name=name, copy=copy)
387-
except ValueError:
388-
# GH27172: mixed closed Intervals --> object dtype
389-
pass
390-
elif inferred == "boolean":
391-
# don't support boolean explicitly ATM
392-
pass
393-
elif inferred != "string":
394-
if inferred.startswith("datetime"):
395-
try:
396-
return DatetimeIndex(subarr, copy=copy, name=name, **kwargs)
397-
except (ValueError, OutOfBoundsDatetime):
398-
# GH 27011
399-
# If we have mixed timezones, just send it
400-
# down the base constructor
401-
pass
402-
403-
elif inferred.startswith("timedelta"):
404-
return TimedeltaIndex(subarr, copy=copy, name=name, **kwargs)
405-
elif inferred == "period":
406-
try:
407-
return PeriodIndex(subarr, name=name, **kwargs)
408-
except IncompatibleFrequency:
409-
pass
373+
new_data, new_dtype = _maybe_cast_data_without_dtype(subarr)
374+
if new_dtype is not None:
375+
return cls(
376+
new_data, dtype=new_dtype, copy=False, name=name, **kwargs
377+
)
378+
410379
if kwargs:
411380
raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}")
412381
return cls._simple_new(subarr, name, **kwargs)
@@ -3806,50 +3775,6 @@ def where(self, cond, other=None):
38063775
return self._shallow_copy_with_infer(values, dtype=dtype)
38073776

38083777
# construction helpers
3809-
@classmethod
3810-
def _try_convert_to_int_index(cls, data, copy, name, dtype):
3811-
"""
3812-
Attempt to convert an array of data into an integer index.
3813-
3814-
Parameters
3815-
----------
3816-
data : The data to convert.
3817-
copy : Whether to copy the data or not.
3818-
name : The name of the index returned.
3819-
3820-
Returns
3821-
-------
3822-
int_index : data converted to either an Int64Index or a
3823-
UInt64Index
3824-
3825-
Raises
3826-
------
3827-
ValueError if the conversion was not successful.
3828-
"""
3829-
3830-
from .numeric import Int64Index, UInt64Index
3831-
3832-
if not is_unsigned_integer_dtype(dtype):
3833-
# skip int64 conversion attempt if uint-like dtype is passed, as
3834-
# this could return Int64Index when UInt64Index is what's desired
3835-
try:
3836-
res = data.astype("i8", copy=False)
3837-
if (res == data).all():
3838-
return Int64Index(res, copy=copy, name=name)
3839-
except (OverflowError, TypeError, ValueError):
3840-
pass
3841-
3842-
# Conversion to int64 failed (possibly due to overflow) or was skipped,
3843-
# so let's try now with uint64.
3844-
try:
3845-
res = data.astype("u8", copy=False)
3846-
if (res == data).all():
3847-
return UInt64Index(res, copy=copy, name=name)
3848-
except (OverflowError, TypeError, ValueError):
3849-
pass
3850-
3851-
raise ValueError
3852-
38533778
@classmethod
38543779
def _scalar_data_error(cls, data):
38553780
# We return the TypeError so that we can raise it from the constructor
@@ -5509,6 +5434,77 @@ def _maybe_cast_with_dtype(data: np.ndarray, dtype: np.dtype, copy: bool) -> np.
55095434
return data
55105435

55115436

5437+
def _maybe_cast_data_without_dtype(subarr):
5438+
"""
5439+
If we have an arraylike input but no passed dtype, try to infer
5440+
a supported dtype.
5441+
5442+
Parameters
5443+
----------
5444+
subarr : np.ndarray, Index, or Series
5445+
5446+
Returns
5447+
-------
5448+
converted : np.ndarray or ExtensionArray
5449+
dtype : np.dtype or ExtensionDtype
5450+
"""
5451+
# Runtime import needed bc IntervalArray imports Index
5452+
from pandas.core.arrays import (
5453+
IntervalArray,
5454+
PeriodArray,
5455+
DatetimeArray,
5456+
TimedeltaArray,
5457+
)
5458+
5459+
inferred = lib.infer_dtype(subarr, skipna=False)
5460+
5461+
if inferred == "integer":
5462+
try:
5463+
data = _try_convert_to_int_array(subarr, False, None)
5464+
return data, data.dtype
5465+
except ValueError:
5466+
pass
5467+
5468+
return subarr, object
5469+
5470+
elif inferred in ["floating", "mixed-integer-float", "integer-na"]:
5471+
# TODO: Returns IntegerArray for integer-na case in the future
5472+
return subarr, np.float64
5473+
5474+
elif inferred == "interval":
5475+
try:
5476+
data = IntervalArray._from_sequence(subarr, copy=False)
5477+
return data, data.dtype
5478+
except ValueError:
5479+
# GH27172: mixed closed Intervals --> object dtype
5480+
pass
5481+
elif inferred == "boolean":
5482+
# don't support boolean explicitly ATM
5483+
pass
5484+
elif inferred != "string":
5485+
if inferred.startswith("datetime"):
5486+
try:
5487+
data = DatetimeArray._from_sequence(subarr, copy=False)
5488+
return data, data.dtype
5489+
except (ValueError, OutOfBoundsDatetime):
5490+
# GH 27011
5491+
# If we have mixed timezones, just send it
5492+
# down the base constructor
5493+
pass
5494+
5495+
elif inferred.startswith("timedelta"):
5496+
data = TimedeltaArray._from_sequence(subarr, copy=False)
5497+
return data, data.dtype
5498+
elif inferred == "period":
5499+
try:
5500+
data = PeriodArray._from_sequence(subarr)
5501+
return data, data.dtype
5502+
except IncompatibleFrequency:
5503+
pass
5504+
5505+
return subarr, subarr.dtype
5506+
5507+
55125508
def _try_convert_to_int_array(
55135509
data: np.ndarray, copy: bool, dtype: np.dtype
55145510
) -> np.ndarray:

0 commit comments

Comments
 (0)