Skip to content

Commit a4a2aa5

Browse files
committed
ENH: Add NumIndex for indexic of any numeric type
1 parent cc75b68 commit a4a2aa5

19 files changed

+280
-26
lines changed

doc/source/whatsnew/v1.3.0.rst

+24
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,30 @@ including other versions of pandas.
2323
Enhancements
2424
~~~~~~~~~~~~
2525

26+
.. _whatsnew_130.numindex:
27+
28+
NumIndex: New Index type than can hold all numpy numeric dtypes
29+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
30+
31+
We've added :class:`NumIndex`, a index type whose dtype can be any of int64/32/16/8,
32+
uint64/32/16/8 or float64/32 numpy dtypes. For example:
33+
34+
.. ipython:: python
35+
36+
idx = pd.NumIndex(range(1_000), dtype="uint16")
37+
idx
38+
ser = pd.Series(range(1_000), index=idx)
39+
ser
40+
41+
To use ``NumIndex``, you need to instantiate it directly, as e.g.
42+
``pd.Index(range(1_000), dtype="uint16")`` will return a ``UInt64Index`` for reasons
43+
of backwards compatibility.
44+
45+
``NumIndex`` will become the default index for numeric indexes in Pandas 2.0 and
46+
``Int64Index``, ``UInt64Index`` and ``Float64Index`` will be deprecated in a
47+
future version of Pandas and removed in version 2.0.
48+
49+
2650
.. _whatsnew_130.read_csv_json_http_headers:
2751

2852
Custom HTTP(s) headers when reading csv or json files

pandas/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
UInt64Index,
7979
RangeIndex,
8080
Float64Index,
81+
NumIndex,
8182
MultiIndex,
8283
IntervalIndex,
8384
TimedeltaIndex,

pandas/_libs/join.pyx

+3
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,9 @@ ctypedef fused join_t:
263263
int16_t
264264
int32_t
265265
int64_t
266+
uint8_t
267+
uint16_t
268+
uint32_t
266269
uint64_t
267270

268271

pandas/_testing/__init__.py

+16
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
is_period_dtype,
3434
is_sequence,
3535
is_timedelta64_dtype,
36+
pandas_dtype,
3637
)
3738

3839
import pandas as pd
@@ -44,6 +45,7 @@
4445
Index,
4546
IntervalIndex,
4647
MultiIndex,
48+
NumIndex,
4749
RangeIndex,
4850
Series,
4951
bdate_range,
@@ -309,6 +311,20 @@ def makeFloatIndex(k=10, name=None):
309311
return Index(values * (10 ** np.random.randint(0, 9)), name=name)
310312

311313

314+
def makeNumIndex(k=10, name=None, *, dtype):
315+
dtype = pandas_dtype(dtype)
316+
if dtype.kind == "i":
317+
return NumIndex(list(range(k)), name=name, dtype=dtype)
318+
elif dtype.kind == "u":
319+
start_num = 2 ** (dtype.itemsize * 8 - 1)
320+
return NumIndex([start_num + i for i in range(k)], name=name, dtype=dtype)
321+
elif dtype.kind == "f":
322+
values = sorted(np.random.random_sample(k)) - np.random.random_sample(1)
323+
return NumIndex(values, name=name, dtype=dtype)
324+
else:
325+
raise NotImplementedError()
326+
327+
312328
def makeDateIndex(k: int = 10, freq="B", name=None, **kwargs) -> DatetimeIndex:
313329
dt = datetime(2000, 1, 1)
314330
dr = bdate_range(dt, periods=k, freq=freq, name=name)

pandas/_testing/asserters.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -265,8 +265,9 @@ def assert_index_equal(
265265
right : Index
266266
exact : bool or {'equiv'}, default 'equiv'
267267
Whether to check the Index class, dtype and inferred_type
268-
are identical. If 'equiv', then RangeIndex can be substituted for
269-
Int64Index as well.
268+
are identical. If 'equiv', RangeIndex can be substituted for
269+
Int64Index and signed integer dtypes will be equivalent to each other, unsigned
270+
integer to each other and float dtypes equivalent to each other.
270271
check_names : bool, default True
271272
Whether to check the names attribute.
272273
check_less_precise : bool or int, default False
@@ -313,7 +314,10 @@ def _check_types(left, right, obj="Index"):
313314
assert_class_equal(left, right, exact=exact, obj=obj)
314315

315316
# Skip exact dtype checking when `check_categorical` is False
316-
if check_categorical:
317+
if check_categorical and "categorical" in (
318+
left.inferred_type,
319+
right.inferred_type,
320+
):
317321
assert_attr_equal("dtype", left, right, obj=obj)
318322

319323
# allow string-like to have different inferred_types

pandas/conftest.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,16 @@ def _create_mi_with_dt64tz_level():
471471
"uint": tm.makeUIntIndex(100),
472472
"range": tm.makeRangeIndex(100),
473473
"float": tm.makeFloatIndex(100),
474+
"num_int64": tm.makeNumIndex(100, dtype="int64"),
475+
"num_int32": tm.makeNumIndex(100, dtype="int32"),
476+
"num_int16": tm.makeNumIndex(100, dtype="int16"),
477+
"num_int8": tm.makeNumIndex(100, dtype="int8"),
478+
"num_uint64": tm.makeNumIndex(100, dtype="uint64"),
479+
"num_uint32": tm.makeNumIndex(100, dtype="uint32"),
480+
"num_uint16": tm.makeNumIndex(100, dtype="uint16"),
481+
"num_uint8": tm.makeNumIndex(100, dtype="uint8"),
482+
"num_float64": tm.makeNumIndex(100, dtype="float64"),
483+
"num_float32": tm.makeNumIndex(100, dtype="float32"),
474484
"bool": tm.makeBoolIndex(10),
475485
"categorical": tm.makeCategoricalIndex(100),
476486
"interval": tm.makeIntervalIndex(100),
@@ -522,7 +532,10 @@ def index_flat(request):
522532
params=[
523533
key
524534
for key in indices_dict
525-
if key not in ["int", "uint", "range", "empty", "repeats"]
535+
if not (
536+
key in ["int", "uint", "range", "empty", "repeats"]
537+
or key.startswith("num_")
538+
)
526539
and not isinstance(indices_dict[key], MultiIndex)
527540
]
528541
)

pandas/core/algorithms.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -143,11 +143,11 @@ def _ensure_data(values: ArrayLike) -> tuple[np.ndarray, DtypeObj]:
143143
# until our algos support uint8 directly (see TODO)
144144
return np.asarray(values).astype("uint64"), np.dtype("bool")
145145
elif is_signed_integer_dtype(values):
146-
return ensure_int64(values), np.dtype("int64")
146+
return ensure_int64(values), values.dtype
147147
elif is_unsigned_integer_dtype(values):
148-
return ensure_uint64(values), np.dtype("uint64")
148+
return ensure_uint64(values), values.dtype
149149
elif is_float_dtype(values):
150-
return ensure_float64(values), np.dtype("float64")
150+
return ensure_float64(values), values.dtype
151151
elif is_complex_dtype(values):
152152

153153
# ignore the fact that we are casting to float

pandas/core/api.py

+1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
Int64Index,
5858
IntervalIndex,
5959
MultiIndex,
60+
NumIndex,
6061
PeriodIndex,
6162
RangeIndex,
6263
TimedeltaIndex,

pandas/core/dtypes/generic.py

+1
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ def _check(cls, inst) -> bool:
100100
"rangeindex",
101101
"float64index",
102102
"uint64index",
103+
"numindex",
103104
"multiindex",
104105
"datetimeindex",
105106
"timedeltaindex",

pandas/core/indexes/api.py

+2
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
Float64Index,
2626
Int64Index,
2727
NumericIndex,
28+
NumIndex,
2829
UInt64Index,
2930
)
3031
from pandas.core.indexes.period import PeriodIndex
@@ -46,6 +47,7 @@
4647
__all__ = [
4748
"Index",
4849
"MultiIndex",
50+
"NumIndex",
4951
"NumericIndex",
5052
"Float64Index",
5153
"Int64Index",

pandas/core/indexes/base.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,7 @@ def __new__(
390390
)
391391

392392
from pandas.core.arrays import PandasArray
393+
from pandas.core.indexes.numeric import NumIndex
393394
from pandas.core.indexes.range import RangeIndex
394395

395396
name = maybe_extract_name(name, data, cls)
@@ -441,6 +442,8 @@ def __new__(
441442
return Index._simple_new(data, name=name)
442443

443444
# index-like
445+
elif isinstance(data, NumIndex) and dtype is None:
446+
return NumIndex(data, name=name, copy=copy)
444447
elif isinstance(data, (np.ndarray, Index, ABCSeries)):
445448

446449
if isinstance(data, ABCMultiIndex):
@@ -2939,7 +2942,11 @@ def union(self, other, sort=None):
29392942
# float | [u]int -> float (the special case)
29402943
# <T> | <T> -> T
29412944
# <T> | <U> -> object
2942-
if not (is_integer_dtype(self.dtype) and is_integer_dtype(other.dtype)):
2945+
if is_float_dtype(self.dtype) and is_float_dtype(other.dtype):
2946+
dtype = np.dtype("object")
2947+
elif not (
2948+
is_integer_dtype(self.dtype) and is_integer_dtype(other.dtype)
2949+
):
29432950
dtype = np.dtype("float64")
29442951
else:
29452952
# one is int64 other is uint64
@@ -5465,8 +5472,9 @@ def map(self, mapper, na_action=None):
54655472
a MultiIndex will be returned.
54665473
"""
54675474
from pandas.core.indexes.multi import MultiIndex
5475+
from pandas.core.indexes.numeric import NumIndex
54685476

5469-
new_values = super()._map_values(mapper, na_action=na_action)
5477+
new_values = self._map_values(mapper, na_action=na_action)
54705478

54715479
attributes = self._get_attributes_dict()
54725480

@@ -5485,6 +5493,9 @@ def map(self, mapper, na_action=None):
54855493
# empty
54865494
attributes["dtype"] = self.dtype
54875495

5496+
if isinstance(self, NumIndex):
5497+
return NumIndex(new_values, **attributes)
5498+
54885499
return Index(new_values, **attributes)
54895500

54905501
# TODO: De-duplicate with map, xref GH#32349

pandas/core/indexes/category.py

+21
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
ensure_platform_int,
2626
is_categorical_dtype,
2727
is_scalar,
28+
pandas_dtype,
2829
)
2930
from pandas.core.dtypes.missing import (
3031
is_valid_na_for_dtype,
@@ -277,6 +278,26 @@ def _is_dtype_compat(self, other) -> Categorical:
277278

278279
return other
279280

281+
@doc(Index.astype)
282+
def astype(self, dtype, copy: bool = True) -> Index:
283+
from pandas import NumIndex
284+
285+
dtype = pandas_dtype(dtype)
286+
287+
cat = self.categories
288+
if isinstance(cat, NumIndex):
289+
try:
290+
cat._validate_dtype(dtype)
291+
except ValueError:
292+
pass
293+
else:
294+
new_values = self._data.astype(dtype, copy=copy)
295+
# pass copy=False because any copying has been done in the
296+
# _data.astype call above
297+
return NumIndex(new_values, name=self.name, copy=False)
298+
299+
return super().astype(dtype, copy=copy)
300+
280301
def equals(self, other: object) -> bool:
281302
"""
282303
Determine if two CategoricalIndex objects contain the same elements.

0 commit comments

Comments
 (0)