Skip to content

Commit beb4f1b

Browse files
authored
REF: Index.__new__ (#38665)
1 parent db1be4d commit beb4f1b

File tree

2 files changed

+99
-41
lines changed

2 files changed

+99
-41
lines changed

pandas/core/indexes/base.py

+87-38
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,12 @@
6666
validate_all_hashable,
6767
)
6868
from pandas.core.dtypes.concat import concat_compat
69+
from pandas.core.dtypes.dtypes import (
70+
CategoricalDtype,
71+
DatetimeTZDtype,
72+
IntervalDtype,
73+
PeriodDtype,
74+
)
6975
from pandas.core.dtypes.generic import (
7076
ABCDatetimeIndex,
7177
ABCMultiIndex,
@@ -331,12 +337,6 @@ def __new__(
331337

332338
# index-like
333339
elif isinstance(data, (np.ndarray, Index, ABCSeries)):
334-
# Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
335-
from pandas.core.indexes.numeric import (
336-
Float64Index,
337-
Int64Index,
338-
UInt64Index,
339-
)
340340

341341
if dtype is not None:
342342
# we need to avoid having numpy coerce
@@ -347,42 +347,31 @@ def __new__(
347347
data = _maybe_cast_with_dtype(data, dtype, copy)
348348
dtype = data.dtype # TODO: maybe not for object?
349349

350-
# maybe coerce to a sub-class
351-
if is_signed_integer_dtype(data.dtype):
352-
return Int64Index(data, copy=copy, dtype=dtype, name=name)
353-
elif is_unsigned_integer_dtype(data.dtype):
354-
return UInt64Index(data, copy=copy, dtype=dtype, name=name)
355-
elif is_float_dtype(data.dtype):
356-
return Float64Index(data, copy=copy, dtype=dtype, name=name)
357-
elif issubclass(data.dtype.type, bool) or is_bool_dtype(data):
358-
subarr = data.astype("object")
350+
if data.dtype.kind in ["i", "u", "f"]:
351+
# maybe coerce to a sub-class
352+
arr = data
359353
else:
360-
subarr = com.asarray_tuplesafe(data, dtype=object)
361-
362-
# asarray_tuplesafe does not always copy underlying data,
363-
# so need to make sure that this happens
364-
if copy:
365-
subarr = subarr.copy()
354+
arr = com.asarray_tuplesafe(data, dtype=object)
366355

367-
if dtype is None:
368-
new_data, new_dtype = _maybe_cast_data_without_dtype(subarr)
369-
if new_dtype is not None:
356+
if dtype is None:
357+
new_data = _maybe_cast_data_without_dtype(arr)
358+
new_dtype = new_data.dtype
370359
return cls(
371-
new_data, dtype=new_dtype, copy=False, name=name, **kwargs
360+
new_data, dtype=new_dtype, copy=copy, name=name, **kwargs
372361
)
373362

363+
klass = cls._dtype_to_subclass(arr.dtype)
364+
arr = klass._ensure_array(arr, dtype, copy)
374365
if kwargs:
375366
raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}")
376-
if subarr.ndim > 1:
377-
# GH#13601, GH#20285, GH#27125
378-
raise ValueError("Index data must be 1-dimensional")
379-
return cls._simple_new(subarr, name)
367+
return klass._simple_new(arr, name)
380368

381-
elif data is None or is_scalar(data):
369+
elif is_scalar(data):
382370
raise cls._scalar_data_error(data)
383371
elif hasattr(data, "__array__"):
384372
return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs)
385373
else:
374+
386375
if tupleize_cols and is_list_like(data):
387376
# GH21470: convert iterable to list before determining if empty
388377
if is_iterator(data):
@@ -400,6 +389,64 @@ def __new__(
400389
subarr = com.asarray_tuplesafe(data, dtype=object)
401390
return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs)
402391

392+
@classmethod
393+
def _ensure_array(cls, data, dtype, copy: bool):
394+
"""
395+
Ensure we have a valid array to pass to _simple_new.
396+
"""
397+
if data.ndim > 1:
398+
# GH#13601, GH#20285, GH#27125
399+
raise ValueError("Index data must be 1-dimensional")
400+
if copy:
401+
# asarray_tuplesafe does not always copy underlying data,
402+
# so need to make sure that this happens
403+
data = data.copy()
404+
return data
405+
406+
@classmethod
407+
def _dtype_to_subclass(cls, dtype: DtypeObj):
408+
# Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
409+
410+
if isinstance(dtype, DatetimeTZDtype) or dtype == np.dtype("M8[ns]"):
411+
from pandas import DatetimeIndex
412+
413+
return DatetimeIndex
414+
elif dtype == "m8[ns]":
415+
from pandas import TimedeltaIndex
416+
417+
return TimedeltaIndex
418+
elif isinstance(dtype, CategoricalDtype):
419+
from pandas import CategoricalIndex
420+
421+
return CategoricalIndex
422+
elif isinstance(dtype, IntervalDtype):
423+
from pandas import IntervalIndex
424+
425+
return IntervalIndex
426+
elif isinstance(dtype, PeriodDtype):
427+
from pandas import PeriodIndex
428+
429+
return PeriodIndex
430+
431+
elif is_float_dtype(dtype):
432+
from pandas import Float64Index
433+
434+
return Float64Index
435+
elif is_unsigned_integer_dtype(dtype):
436+
from pandas import UInt64Index
437+
438+
return UInt64Index
439+
elif is_signed_integer_dtype(dtype):
440+
from pandas import Int64Index
441+
442+
return Int64Index
443+
444+
elif dtype == object:
445+
# NB: assuming away MultiIndex
446+
return Index
447+
448+
raise NotImplementedError(dtype)
449+
403450
"""
404451
NOTE for new Index creation:
405452
@@ -6112,25 +6159,27 @@ def _maybe_cast_data_without_dtype(subarr):
61126159
TimedeltaArray,
61136160
)
61146161

6162+
assert subarr.dtype == object, subarr.dtype
61156163
inferred = lib.infer_dtype(subarr, skipna=False)
61166164

61176165
if inferred == "integer":
61186166
try:
61196167
data = _try_convert_to_int_array(subarr, False, None)
6120-
return data, data.dtype
6168+
return data
61216169
except ValueError:
61226170
pass
61236171

6124-
return subarr, object
6172+
return subarr
61256173

61266174
elif inferred in ["floating", "mixed-integer-float", "integer-na"]:
61276175
# TODO: Returns IntegerArray for integer-na case in the future
6128-
return subarr, np.float64
6176+
data = np.asarray(subarr).astype(np.float64, copy=False)
6177+
return data
61296178

61306179
elif inferred == "interval":
61316180
try:
61326181
data = IntervalArray._from_sequence(subarr, copy=False)
6133-
return data, data.dtype
6182+
return data
61346183
except ValueError:
61356184
# GH27172: mixed closed Intervals --> object dtype
61366185
pass
@@ -6141,7 +6190,7 @@ def _maybe_cast_data_without_dtype(subarr):
61416190
if inferred.startswith("datetime"):
61426191
try:
61436192
data = DatetimeArray._from_sequence(subarr, copy=False)
6144-
return data, data.dtype
6193+
return data
61456194
except (ValueError, OutOfBoundsDatetime):
61466195
# GH 27011
61476196
# If we have mixed timezones, just send it
@@ -6150,15 +6199,15 @@ def _maybe_cast_data_without_dtype(subarr):
61506199

61516200
elif inferred.startswith("timedelta"):
61526201
data = TimedeltaArray._from_sequence(subarr, copy=False)
6153-
return data, data.dtype
6202+
return data
61546203
elif inferred == "period":
61556204
try:
61566205
data = PeriodArray._from_sequence(subarr)
6157-
return data, data.dtype
6206+
return data
61586207
except IncompatibleFrequency:
61596208
pass
61606209

6161-
return subarr, subarr.dtype
6210+
return subarr
61626211

61636212

61646213
def _try_convert_to_int_array(

pandas/core/indexes/numeric.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,20 @@ class NumericIndex(Index):
4646
_can_hold_strings = False
4747

4848
def __new__(cls, data=None, dtype=None, copy=False, name=None):
49-
cls._validate_dtype(dtype)
5049
name = maybe_extract_name(name, data, cls)
5150

52-
# Coerce to ndarray if not already ndarray or Index
51+
subarr = cls._ensure_array(data, dtype, copy)
52+
return cls._simple_new(subarr, name=name)
53+
54+
@classmethod
55+
def _ensure_array(cls, data, dtype, copy: bool):
56+
"""
57+
Ensure we have a valid array to pass to _simple_new.
58+
"""
59+
cls._validate_dtype(dtype)
60+
5361
if not isinstance(data, (np.ndarray, Index)):
62+
# Coerce to ndarray if not already ndarray or Index
5463
if is_scalar(data):
5564
raise cls._scalar_data_error(data)
5665

@@ -74,7 +83,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None):
7483
raise ValueError("Index data must be 1-dimensional")
7584

7685
subarr = np.asarray(subarr)
77-
return cls._simple_new(subarr, name=name)
86+
return subarr
7887

7988
@classmethod
8089
def _validate_dtype(cls, dtype: Dtype) -> None:

0 commit comments

Comments
 (0)