66
66
validate_all_hashable ,
67
67
)
68
68
from pandas .core .dtypes .concat import concat_compat
69
+ from pandas .core .dtypes .dtypes import (
70
+ CategoricalDtype ,
71
+ DatetimeTZDtype ,
72
+ IntervalDtype ,
73
+ PeriodDtype ,
74
+ )
69
75
from pandas .core .dtypes .generic import (
70
76
ABCDatetimeIndex ,
71
77
ABCMultiIndex ,
@@ -331,12 +337,6 @@ def __new__(
331
337
332
338
# index-like
333
339
elif isinstance (data , (np .ndarray , Index , ABCSeries )):
334
- # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
335
- from pandas .core .indexes .numeric import (
336
- Float64Index ,
337
- Int64Index ,
338
- UInt64Index ,
339
- )
340
340
341
341
if dtype is not None :
342
342
# we need to avoid having numpy coerce
@@ -347,42 +347,31 @@ def __new__(
347
347
data = _maybe_cast_with_dtype (data , dtype , copy )
348
348
dtype = data .dtype # TODO: maybe not for object?
349
349
350
- # maybe coerce to a sub-class
351
- if is_signed_integer_dtype (data .dtype ):
352
- return Int64Index (data , copy = copy , dtype = dtype , name = name )
353
- elif is_unsigned_integer_dtype (data .dtype ):
354
- return UInt64Index (data , copy = copy , dtype = dtype , name = name )
355
- elif is_float_dtype (data .dtype ):
356
- return Float64Index (data , copy = copy , dtype = dtype , name = name )
357
- elif issubclass (data .dtype .type , bool ) or is_bool_dtype (data ):
358
- subarr = data .astype ("object" )
350
+ if data .dtype .kind in ["i" , "u" , "f" ]:
351
+ # maybe coerce to a sub-class
352
+ arr = data
359
353
else :
360
- subarr = com .asarray_tuplesafe (data , dtype = object )
361
-
362
- # asarray_tuplesafe does not always copy underlying data,
363
- # so need to make sure that this happens
364
- if copy :
365
- subarr = subarr .copy ()
354
+ arr = com .asarray_tuplesafe (data , dtype = object )
366
355
367
- if dtype is None :
368
- new_data , new_dtype = _maybe_cast_data_without_dtype (subarr )
369
- if new_dtype is not None :
356
+ if dtype is None :
357
+ new_data = _maybe_cast_data_without_dtype (arr )
358
+ new_dtype = new_data . dtype
370
359
return cls (
371
- new_data , dtype = new_dtype , copy = False , name = name , ** kwargs
360
+ new_data , dtype = new_dtype , copy = copy , name = name , ** kwargs
372
361
)
373
362
363
+ klass = cls ._dtype_to_subclass (arr .dtype )
364
+ arr = klass ._ensure_array (arr , dtype , copy )
374
365
if kwargs :
375
366
raise TypeError (f"Unexpected keyword arguments { repr (set (kwargs ))} " )
376
- if subarr .ndim > 1 :
377
- # GH#13601, GH#20285, GH#27125
378
- raise ValueError ("Index data must be 1-dimensional" )
379
- return cls ._simple_new (subarr , name )
367
+ return klass ._simple_new (arr , name )
380
368
381
- elif data is None or is_scalar (data ):
369
+ elif is_scalar (data ):
382
370
raise cls ._scalar_data_error (data )
383
371
elif hasattr (data , "__array__" ):
384
372
return Index (np .asarray (data ), dtype = dtype , copy = copy , name = name , ** kwargs )
385
373
else :
374
+
386
375
if tupleize_cols and is_list_like (data ):
387
376
# GH21470: convert iterable to list before determining if empty
388
377
if is_iterator (data ):
@@ -400,6 +389,64 @@ def __new__(
400
389
subarr = com .asarray_tuplesafe (data , dtype = object )
401
390
return Index (subarr , dtype = dtype , copy = copy , name = name , ** kwargs )
402
391
392
+ @classmethod
393
+ def _ensure_array (cls , data , dtype , copy : bool ):
394
+ """
395
+ Ensure we have a valid array to pass to _simple_new.
396
+ """
397
+ if data .ndim > 1 :
398
+ # GH#13601, GH#20285, GH#27125
399
+ raise ValueError ("Index data must be 1-dimensional" )
400
+ if copy :
401
+ # asarray_tuplesafe does not always copy underlying data,
402
+ # so need to make sure that this happens
403
+ data = data .copy ()
404
+ return data
405
+
406
+ @classmethod
407
+ def _dtype_to_subclass (cls , dtype : DtypeObj ):
408
+ # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
409
+
410
+ if isinstance (dtype , DatetimeTZDtype ) or dtype == np .dtype ("M8[ns]" ):
411
+ from pandas import DatetimeIndex
412
+
413
+ return DatetimeIndex
414
+ elif dtype == "m8[ns]" :
415
+ from pandas import TimedeltaIndex
416
+
417
+ return TimedeltaIndex
418
+ elif isinstance (dtype , CategoricalDtype ):
419
+ from pandas import CategoricalIndex
420
+
421
+ return CategoricalIndex
422
+ elif isinstance (dtype , IntervalDtype ):
423
+ from pandas import IntervalIndex
424
+
425
+ return IntervalIndex
426
+ elif isinstance (dtype , PeriodDtype ):
427
+ from pandas import PeriodIndex
428
+
429
+ return PeriodIndex
430
+
431
+ elif is_float_dtype (dtype ):
432
+ from pandas import Float64Index
433
+
434
+ return Float64Index
435
+ elif is_unsigned_integer_dtype (dtype ):
436
+ from pandas import UInt64Index
437
+
438
+ return UInt64Index
439
+ elif is_signed_integer_dtype (dtype ):
440
+ from pandas import Int64Index
441
+
442
+ return Int64Index
443
+
444
+ elif dtype == object :
445
+ # NB: assuming away MultiIndex
446
+ return Index
447
+
448
+ raise NotImplementedError (dtype )
449
+
403
450
"""
404
451
NOTE for new Index creation:
405
452
@@ -6112,25 +6159,27 @@ def _maybe_cast_data_without_dtype(subarr):
6112
6159
TimedeltaArray ,
6113
6160
)
6114
6161
6162
+ assert subarr .dtype == object , subarr .dtype
6115
6163
inferred = lib .infer_dtype (subarr , skipna = False )
6116
6164
6117
6165
if inferred == "integer" :
6118
6166
try :
6119
6167
data = _try_convert_to_int_array (subarr , False , None )
6120
- return data , data . dtype
6168
+ return data
6121
6169
except ValueError :
6122
6170
pass
6123
6171
6124
- return subarr , object
6172
+ return subarr
6125
6173
6126
6174
elif inferred in ["floating" , "mixed-integer-float" , "integer-na" ]:
6127
6175
# TODO: Returns IntegerArray for integer-na case in the future
6128
- return subarr , np .float64
6176
+ data = np .asarray (subarr ).astype (np .float64 , copy = False )
6177
+ return data
6129
6178
6130
6179
elif inferred == "interval" :
6131
6180
try :
6132
6181
data = IntervalArray ._from_sequence (subarr , copy = False )
6133
- return data , data . dtype
6182
+ return data
6134
6183
except ValueError :
6135
6184
# GH27172: mixed closed Intervals --> object dtype
6136
6185
pass
@@ -6141,7 +6190,7 @@ def _maybe_cast_data_without_dtype(subarr):
6141
6190
if inferred .startswith ("datetime" ):
6142
6191
try :
6143
6192
data = DatetimeArray ._from_sequence (subarr , copy = False )
6144
- return data , data . dtype
6193
+ return data
6145
6194
except (ValueError , OutOfBoundsDatetime ):
6146
6195
# GH 27011
6147
6196
# If we have mixed timezones, just send it
@@ -6150,15 +6199,15 @@ def _maybe_cast_data_without_dtype(subarr):
6150
6199
6151
6200
elif inferred .startswith ("timedelta" ):
6152
6201
data = TimedeltaArray ._from_sequence (subarr , copy = False )
6153
- return data , data . dtype
6202
+ return data
6154
6203
elif inferred == "period" :
6155
6204
try :
6156
6205
data = PeriodArray ._from_sequence (subarr )
6157
- return data , data . dtype
6206
+ return data
6158
6207
except IncompatibleFrequency :
6159
6208
pass
6160
6209
6161
- return subarr , subarr . dtype
6210
+ return subarr
6162
6211
6163
6212
6164
6213
def _try_convert_to_int_array (
0 commit comments