Skip to content

Commit 5ff5584

Browse files
Terji PetersenTerji Petersen
Terji Petersen
authored and
Terji Petersen
committed
DEPR: don't make Index instantiate Int64/Uint64/Flaot64Index
1 parent 0b93117 commit 5ff5584

33 files changed

+255
-289
lines changed

pandas/conftest.py

-3
Original file line numberDiff line numberDiff line change
@@ -593,10 +593,7 @@ def _create_mi_with_dt64tz_level():
593593
"datetime-tz": tm.makeDateIndex(100, tz="US/Pacific"),
594594
"period": tm.makePeriodIndex(100),
595595
"timedelta": tm.makeTimedeltaIndex(100),
596-
"int": tm.makeIntIndex(100),
597-
"uint": tm.makeUIntIndex(100),
598596
"range": tm.makeRangeIndex(100),
599-
"float": tm.makeFloatIndex(100),
600597
"complex64": tm.makeFloatIndex(100).astype("complex64"),
601598
"complex128": tm.makeFloatIndex(100).astype("complex128"),
602599
"num_int64": tm.makeNumericIndex(100, dtype="int64"),

pandas/core/indexes/base.py

+41-27
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@
8989
ensure_platform_int,
9090
is_bool_dtype,
9191
is_categorical_dtype,
92+
is_complex_dtype,
9293
is_dtype_equal,
9394
is_ea_or_datetimelike_dtype,
9495
is_extension_array_dtype,
@@ -104,6 +105,7 @@
104105
is_scalar,
105106
is_signed_integer_dtype,
106107
is_string_dtype,
108+
is_unsigned_integer_dtype,
107109
needs_i8_conversion,
108110
pandas_dtype,
109111
validate_all_hashable,
@@ -588,18 +590,14 @@ def _dtype_to_subclass(cls, dtype: DtypeObj):
588590

589591
return TimedeltaIndex
590592

591-
elif dtype.kind == "f":
592-
from pandas.core.api import Float64Index
593-
594-
return Float64Index
595-
elif dtype.kind == "u":
596-
from pandas.core.api import UInt64Index
597-
598-
return UInt64Index
599-
elif dtype.kind == "i":
600-
from pandas.core.api import Int64Index
593+
elif (
594+
is_numeric_dtype(dtype)
595+
and not is_bool_dtype(dtype)
596+
and not is_complex_dtype(dtype)
597+
):
598+
from pandas.core.api import NumericIndex
601599

602-
return Int64Index
600+
return NumericIndex
603601

604602
elif dtype.kind == "O":
605603
# NB: assuming away MultiIndex
@@ -1040,14 +1038,29 @@ def astype(self, dtype, copy: bool = True):
10401038
new_values = astype_nansafe(values, dtype=dtype, copy=copy)
10411039

10421040
# pass copy=False because any copying will be done in the astype above
1043-
if self._is_backward_compat_public_numeric_index:
1044-
# this block is needed so e.g. NumericIndex[int8].astype("int32") returns
1045-
# NumericIndex[int32] and not Int64Index with dtype int64.
1041+
if not self._is_backward_compat_public_numeric_index and not isinstance(
1042+
self, ABCRangeIndex
1043+
):
1044+
# this block is needed so e.g. Int64Index.astype("int32") returns
1045+
# Int64Index and not a NumericIndex with dtype int32.
10461046
# When Int64Index etc. are removed from the code base, removed this also.
10471047
if isinstance(dtype, np.dtype) and is_numeric_dtype(dtype):
1048-
return self._constructor(
1049-
new_values, name=self.name, dtype=dtype, copy=False
1048+
from pandas.core.api import (
1049+
Float64Index,
1050+
Int64Index,
1051+
UInt64Index,
10501052
)
1053+
1054+
if is_signed_integer_dtype(dtype):
1055+
klass = Int64Index
1056+
elif is_unsigned_integer_dtype(dtype):
1057+
klass = UInt64Index
1058+
elif is_float_dtype(dtype):
1059+
klass = Float64Index
1060+
else:
1061+
klass = Index
1062+
return klass(new_values, name=self.name, dtype=dtype, copy=False)
1063+
10511064
return Index(new_values, name=self.name, dtype=new_values.dtype, copy=False)
10521065

10531066
_index_shared_docs[
@@ -5247,6 +5260,7 @@ def putmask(self, mask, value) -> Index:
52475260
if self.dtype != object and is_valid_na_for_dtype(value, self.dtype):
52485261
# e.g. None -> np.nan, see also Block._standardize_fill_value
52495262
value = self._na_value
5263+
52505264
try:
52515265
converted = self._validate_fill_value(value)
52525266
except (LossySetitemError, ValueError, TypeError) as err:
@@ -6115,13 +6129,6 @@ def map(self, mapper, na_action=None):
61156129
new_values, self.dtype, same_dtype=same_dtype
61166130
)
61176131

6118-
if self._is_backward_compat_public_numeric_index and is_numeric_dtype(
6119-
new_values.dtype
6120-
):
6121-
return self._constructor(
6122-
new_values, dtype=dtype, copy=False, name=self.name
6123-
)
6124-
61256132
return Index._with_infer(new_values, dtype=dtype, copy=False, name=self.name)
61266133

61276134
# TODO: De-duplicate with map, xref GH#32349
@@ -6598,10 +6605,17 @@ def insert(self, loc: int, item) -> Index:
65986605
loc = loc if loc >= 0 else loc - 1
65996606
new_values[loc] = item
66006607

6601-
if self._typ == "numericindex":
6602-
# Use self._constructor instead of Index to retain NumericIndex GH#43921
6603-
# TODO(2.0) can use Index instead of self._constructor
6604-
return self._constructor._with_infer(new_values, name=self.name)
6608+
if not self._is_backward_compat_public_numeric_index:
6609+
from pandas.core.indexes.numeric import NumericIndex
6610+
6611+
if not isinstance(self, ABCRangeIndex) or not isinstance(
6612+
self, NumericIndex
6613+
):
6614+
return Index._with_infer(new_values, name=self.name)
6615+
else:
6616+
# Use self._constructor instead of Index to retain old-style num. index
6617+
# TODO(2.0) can use Index instead of self._constructor
6618+
return self._constructor._with_infer(new_values, name=self.name)
66056619
else:
66066620
return Index._with_infer(new_values, name=self.name)
66076621

pandas/core/indexes/numeric.py

+29-2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
)
1414
from pandas._typing import (
1515
Dtype,
16+
DtypeObj,
1617
npt,
1718
)
1819
from pandas.util._decorators import (
@@ -174,6 +175,10 @@ def _ensure_array(cls, data, dtype, copy: bool):
174175
raise ValueError("Index data must be 1-dimensional")
175176

176177
subarr = np.asarray(subarr)
178+
if subarr.dtype == "float16":
179+
# float16 not supported (no indexing engine)
180+
subarr = subarr.astype("float32")
181+
177182
return subarr
178183

179184
@classmethod
@@ -198,6 +203,9 @@ def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None:
198203
return cls._default_dtype
199204

200205
dtype = pandas_dtype(dtype)
206+
if dtype == np.float16:
207+
# float16 not supported (no indexing engine)
208+
dtype = np.dtype(np.float32)
201209
assert isinstance(dtype, np.dtype)
202210

203211
if cls._is_backward_compat_public_numeric_index:
@@ -347,7 +355,26 @@ def _format_native_types(
347355
"""
348356

349357

350-
class IntegerIndex(NumericIndex):
358+
class TempBaseIndex(NumericIndex):
359+
@classmethod
360+
def _dtype_to_subclass(cls, dtype: DtypeObj):
361+
if is_integer_dtype(dtype):
362+
from pandas.core.api import Int64Index
363+
364+
return Int64Index
365+
elif is_unsigned_integer_dtype(dtype):
366+
from pandas.core.api import UInt64Index
367+
368+
return UInt64Index
369+
elif is_float_dtype(dtype):
370+
from pandas.core.api import Float64Index
371+
372+
return Float64Index
373+
else:
374+
return super()._dtype_to_subclass(dtype)
375+
376+
377+
class IntegerIndex(TempBaseIndex):
351378
"""
352379
This is an abstract class for Int64Index, UInt64Index.
353380
"""
@@ -391,7 +418,7 @@ def _engine_type(self) -> type[libindex.UInt64Engine]:
391418
return libindex.UInt64Engine
392419

393420

394-
class Float64Index(NumericIndex):
421+
class Float64Index(TempBaseIndex):
395422
_index_descr_args = {
396423
"klass": "Float64Index",
397424
"dtype": "float64",

pandas/core/indexes/range.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -185,9 +185,9 @@ def _simple_new(cls, values: range, name: Hashable = None) -> RangeIndex:
185185
# error: Return type "Type[Int64Index]" of "_constructor" incompatible with return
186186
# type "Type[RangeIndex]" in supertype "Index"
187187
@cache_readonly
188-
def _constructor(self) -> type[Int64Index]: # type: ignore[override]
188+
def _constructor(self) -> type[NumericIndex]: # type: ignore[override]
189189
"""return the class to use for construction"""
190-
return Int64Index
190+
return NumericIndex
191191

192192
# error: Signature of "_data" incompatible with supertype "Index"
193193
@cache_readonly

pandas/core/internals/managers.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@
6464
)
6565
from pandas.core.indexers import maybe_convert_indices
6666
from pandas.core.indexes.api import (
67-
Float64Index,
6867
Index,
6968
ensure_index,
7069
)
@@ -1582,7 +1581,7 @@ def _equal_values(self: BlockManager, other: BlockManager) -> bool:
15821581
def quantile(
15831582
self: T,
15841583
*,
1585-
qs: Float64Index,
1584+
qs: Index, # of dtype float 64
15861585
axis: AxisInt = 0,
15871586
interpolation: QuantileInterpolation = "linear",
15881587
) -> T:
@@ -1610,7 +1609,7 @@ def quantile(
16101609
assert axis == 1 # only ever called this way
16111610

16121611
new_axes = list(self.axes)
1613-
new_axes[1] = Float64Index(qs)
1612+
new_axes[1] = Index(qs, dtype=np.float64)
16141613

16151614
blocks = [
16161615
blk.quantile(axis=axis, qs=qs, interpolation=interpolation)

pandas/tests/apply/test_series_apply.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ def test_apply_datetimetz():
186186
# change dtype
187187
# GH 14506 : Returned dtype changed from int32 to int64
188188
result = s.apply(lambda x: x.hour)
189-
exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64)
189+
exp = Series(list(range(24)) + [0], name="XX", dtype=np.int32)
190190
tm.assert_series_equal(result, exp)
191191

192192
# not vectorized
@@ -766,7 +766,7 @@ def test_map_datetimetz():
766766
# change dtype
767767
# GH 14506 : Returned dtype changed from int32 to int64
768768
result = s.map(lambda x: x.hour)
769-
exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64)
769+
exp = Series(list(range(24)) + [0], name="XX", dtype=np.int32)
770770
tm.assert_series_equal(result, exp)
771771

772772
# not vectorized

pandas/tests/arrays/interval/test_interval.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ def test_arrow_array():
287287
with pytest.raises(TypeError, match="Not supported to convert IntervalArray"):
288288
pa.array(intervals, type="float64")
289289

290-
with pytest.raises(TypeError, match="different 'subtype'"):
290+
with pytest.raises(TypeError, match="Not supported to convert IntervalArray"):
291291
pa.array(intervals, type=ArrowIntervalType(pa.float64(), "left"))
292292

293293

pandas/tests/arrays/sparse/test_accessor.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,12 @@ def test_from_coo(self):
4141
sp_array = scipy.sparse.coo_matrix((data, (row, col)), dtype="int")
4242
result = pd.Series.sparse.from_coo(sp_array)
4343

44-
index = pd.MultiIndex.from_arrays([[0, 0, 1, 3], [0, 2, 1, 3]])
44+
index = pd.MultiIndex.from_arrays(
45+
[
46+
np.array([0, 0, 1, 3], dtype=np.int32),
47+
np.array([0, 2, 1, 3], dtype=np.int32),
48+
],
49+
)
4550
expected = pd.Series([4, 9, 7, 5], index=index, dtype="Sparse[int]")
4651
tm.assert_series_equal(result, expected)
4752

@@ -212,7 +217,15 @@ def test_series_from_coo(self, dtype, dense_index):
212217

213218
A = scipy.sparse.eye(3, format="coo", dtype=dtype)
214219
result = pd.Series.sparse.from_coo(A, dense_index=dense_index)
215-
index = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
220+
221+
index_dtype = np.int64 if dense_index else np.int32
222+
index = pd.MultiIndex.from_tuples(
223+
[
224+
np.array([0, 0], dtype=index_dtype),
225+
np.array([1, 1], dtype=index_dtype),
226+
np.array([2, 2], dtype=index_dtype),
227+
],
228+
)
216229
expected = pd.Series(SparseArray(np.array([1, 1, 1], dtype=dtype)), index=index)
217230
if dense_index:
218231
expected = expected.reindex(pd.MultiIndex.from_product(index.levels))

pandas/tests/frame/indexing/test_indexing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -736,7 +736,7 @@ def test_getitem_setitem_float_labels(self, using_array_manager):
736736

737737
# positional slicing only via iloc!
738738
msg = (
739-
"cannot do positional indexing on Float64Index with "
739+
"cannot do positional indexing on NumericIndex with "
740740
r"these indexers \[1.0\] of type float"
741741
)
742742
with pytest.raises(TypeError, match=msg):

pandas/tests/frame/methods/test_set_index.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ def test_set_index_cast(self):
159159
df = DataFrame(
160160
{"A": [1.1, 2.2, 3.3], "B": [5.0, 6.1, 7.2]}, index=[2010, 2011, 2012]
161161
)
162-
df2 = df.set_index(df.index.astype(np.int32))
162+
df2 = df.set_index(df.index.astype(np.int64))
163163
tm.assert_frame_equal(df, df2)
164164

165165
# A has duplicate values, C does not

pandas/tests/indexes/common.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -835,11 +835,7 @@ def test_insert_non_na(self, simple_index):
835835

836836
result = index.insert(0, index[0])
837837

838-
cls = type(index)
839-
if cls is RangeIndex:
840-
cls = Int64Index
841-
842-
expected = cls([index[0]] + list(index), dtype=index.dtype)
838+
expected = Index([index[0]] + list(index), dtype=index.dtype)
843839
tm.assert_index_equal(result, expected, exact=True)
844840

845841
def test_insert_na(self, nulls_fixture, simple_index):
@@ -850,7 +846,7 @@ def test_insert_na(self, nulls_fixture, simple_index):
850846
if na_val is pd.NaT:
851847
expected = Index([index[0], pd.NaT] + list(index[1:]), dtype=object)
852848
else:
853-
expected = Float64Index([index[0], np.nan] + list(index[1:]))
849+
expected = Index([index[0], np.nan] + list(index[1:]))
854850

855851
if index._is_backward_compat_public_numeric_index:
856852
# GH#43921 we preserve NumericIndex

pandas/tests/indexes/datetimes/test_date_range.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -812,7 +812,7 @@ def test_date_range_span_dst_transition(self, tzstr):
812812

813813
dr = date_range("2012-11-02", periods=10, tz=tzstr)
814814
result = dr.hour
815-
expected = pd.Index([0] * 10)
815+
expected = pd.Index([0] * 10, dtype="int32")
816816
tm.assert_index_equal(result, expected)
817817

818818
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])

pandas/tests/indexes/datetimes/test_misc.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -281,8 +281,9 @@ def test_datetime_name_accessors(self, time_locale):
281281

282282
def test_nanosecond_field(self):
283283
dti = DatetimeIndex(np.arange(10))
284+
expected = Index(np.arange(10, dtype=np.int32))
284285

285-
tm.assert_index_equal(dti.nanosecond, Index(np.arange(10, dtype=np.int64)))
286+
tm.assert_index_equal(dti.nanosecond, expected)
286287

287288

288289
def test_iter_readonly():

0 commit comments

Comments
 (0)