From 9739f851600fea6f1b2de96b1b61c08a820ba95c Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 25 Apr 2021 20:07:30 +0100 Subject: [PATCH 01/55] ENH: Add NumIndex for indexic of any numeric type --- pandas/__init__.py | 1 + pandas/_libs/join.pyx | 3 +++ pandas/_testing/__init__.py | 1 + pandas/_testing/asserters.py | 5 +++-- pandas/conftest.py | 15 ++++++++++++++- pandas/core/api.py | 1 + pandas/core/dtypes/generic.py | 1 + pandas/core/indexes/api.py | 2 ++ pandas/core/indexes/base.py | 7 +++++++ pandas/core/indexes/category.py | 21 +++++++++++++++++++++ pandas/core/indexes/numeric.py | 1 + pandas/tests/indexes/common.py | 6 +++--- pandas/tests/indexes/test_base.py | 3 +++ pandas/tests/indexes/test_common.py | 7 +++++-- pandas/tests/indexes/test_numpy_compat.py | 8 +++----- pandas/tests/indexes/test_setops.py | 14 ++++++++++++++ 16 files changed, 83 insertions(+), 13 deletions(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index 43f05617584cc..e8fef482247b0 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -75,6 +75,7 @@ UInt64Index, RangeIndex, Float64Index, + NumIndex, MultiIndex, IntervalIndex, TimedeltaIndex, diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index eefa16d23f576..b6acf8914c0a6 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -265,6 +265,9 @@ ctypedef fused join_t: int16_t int32_t int64_t + uint8_t + uint16_t + uint32_t uint64_t diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index fc6c7f4c17ea0..ca05231b9c3ca 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -50,6 +50,7 @@ Int64Index, IntervalIndex, MultiIndex, + NumIndex, RangeIndex, Series, UInt64Index, diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index d0957b1814213..bc7d17bb99c20 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -267,8 +267,9 @@ def assert_index_equal( right : Index exact : bool or {'equiv'}, default 'equiv' Whether to check the Index class, dtype and inferred_type - are identical. If 'equiv', then RangeIndex can be substituted for - Int64Index as well. + are identical. If 'equiv', RangeIndex can be substituted for + Int64Index and signed integer dtypes will be equivalent to each other, unsigned + integer to each other and float dtypes equivalent to each other. check_names : bool, default True Whether to check the names attribute. check_less_precise : bool or int, default False diff --git a/pandas/conftest.py b/pandas/conftest.py index 218fae7ecd969..4a1ec977baab5 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -460,6 +460,16 @@ def _create_mi_with_dt64tz_level(): "uint": tm.makeUIntIndex(100), "range": tm.makeRangeIndex(100), "float": tm.makeFloatIndex(100), + "num_int64": tm.makeNumIndex(100, dtype="int64"), + "num_int32": tm.makeNumIndex(100, dtype="int32"), + "num_int16": tm.makeNumIndex(100, dtype="int16"), + "num_int8": tm.makeNumIndex(100, dtype="int8"), + "num_uint64": tm.makeNumIndex(100, dtype="uint64"), + "num_uint32": tm.makeNumIndex(100, dtype="uint32"), + "num_uint16": tm.makeNumIndex(100, dtype="uint16"), + "num_uint8": tm.makeNumIndex(100, dtype="uint8"), + "num_float64": tm.makeNumIndex(100, dtype="float64"), + "num_float32": tm.makeNumIndex(100, dtype="float32"), "bool": tm.makeBoolIndex(10), "categorical": tm.makeCategoricalIndex(100), "interval": tm.makeIntervalIndex(100), @@ -511,7 +521,10 @@ def index_flat(request): params=[ key for key in indices_dict - if key not in ["int", "uint", "range", "empty", "repeats"] + if not ( + key in ["int", "uint", "range", "empty", "repeats"] + or key.startswith("num_") + ) and not isinstance(indices_dict[key], MultiIndex) ] ) diff --git a/pandas/core/api.py b/pandas/core/api.py index 2677530455b07..5cb1993b2c5c7 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -57,6 +57,7 @@ Int64Index, IntervalIndex, MultiIndex, + NumIndex, PeriodIndex, RangeIndex, TimedeltaIndex, diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py index 2de7b262c3533..8fac906fdca87 100644 --- a/pandas/core/dtypes/generic.py +++ b/pandas/core/dtypes/generic.py @@ -100,6 +100,7 @@ def _check(cls, inst) -> bool: "rangeindex", "float64index", "uint64index", + "numindex", "multiindex", "datetimeindex", "timedeltaindex", diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 304c42321e72a..7b8856fa8c73a 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -23,6 +23,7 @@ Float64Index, Int64Index, NumericIndex, + NumIndex, UInt64Index, ) from pandas.core.indexes.period import PeriodIndex @@ -44,6 +45,7 @@ __all__ = [ "Index", "MultiIndex", + "NumIndex", "NumericIndex", "Float64Index", "Int64Index", diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 54271f0f9b492..ddbe8731e3d3e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -385,6 +385,7 @@ def __new__( ) from pandas.core.arrays import PandasArray + from pandas.core.indexes.numeric import NumIndex from pandas.core.indexes.range import RangeIndex name = maybe_extract_name(name, data, cls) @@ -436,6 +437,8 @@ def __new__( return Index._simple_new(data, name=name) # index-like + elif isinstance(data, NumIndex) and dtype is None: + return NumIndex(data, name=name, copy=copy) elif isinstance(data, (np.ndarray, Index, ABCSeries)): if isinstance(data, ABCMultiIndex): @@ -5699,6 +5702,7 @@ def map(self, mapper, na_action=None): a MultiIndex will be returned. """ from pandas.core.indexes.multi import MultiIndex + from pandas.core.indexes.numeric import NumIndex new_values = self._map_values(mapper, na_action=na_action) @@ -5719,6 +5723,9 @@ def map(self, mapper, na_action=None): # empty attributes["dtype"] = self.dtype + if isinstance(self, NumIndex): + return NumIndex(new_values, **attributes) + return Index(new_values, **attributes) # TODO: De-duplicate with map, xref GH#32349 diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 7339c82cbcc77..aa0b72aca3765 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -21,6 +21,7 @@ from pandas.core.dtypes.common import ( is_categorical_dtype, is_scalar, + pandas_dtype, ) from pandas.core.dtypes.missing import ( is_valid_na_for_dtype, @@ -280,6 +281,26 @@ def _is_dtype_compat(self, other) -> Categorical: return other + @doc(Index.astype) + def astype(self, dtype, copy: bool = True) -> Index: + from pandas import NumIndex + + dtype = pandas_dtype(dtype) + + cat = self.categories + if isinstance(cat, NumIndex): + try: + cat._validate_dtype(dtype) + except ValueError: + pass + else: + new_values = self._data.astype(dtype, copy=copy) + # pass copy=False because any copying has been done in the + # _data.astype call above + return NumIndex(new_values, name=self.name, copy=False) + + return super().astype(dtype, copy=copy) + def equals(self, other: object) -> bool: """ Determine if two CategoricalIndex objects contain the same elements. diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index bb9a2688f0485..466e363a66adf 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -23,6 +23,7 @@ from pandas.core.dtypes.cast import astype_nansafe from pandas.core.dtypes.common import ( + is_categorical_dtype, is_dtype_equal, is_extension_array_dtype, is_float, diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index e02b2559bb8ae..edd73f7ef768d 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -13,7 +13,7 @@ from pandas.core.dtypes.dtypes import CategoricalDtype import pandas as pd -from pandas import ( +from pandas import ( # noqa CategoricalIndex, DatetimeIndex, Float64Index, @@ -533,7 +533,7 @@ def test_hasnans_isnans(self, index_flat): return elif isinstance(index, DatetimeIndexOpsMixin): values[1] = iNaT - elif isinstance(index, (Int64Index, UInt64Index, RangeIndex)): + elif issubclass(index.dtype.type, np.integer): return else: values[1] = np.nan @@ -572,7 +572,7 @@ def test_fillna(self, index): if isinstance(index, DatetimeIndexOpsMixin): values[1] = iNaT - elif isinstance(index, (Int64Index, UInt64Index, RangeIndex)): + elif issubclass(index.dtype.type, np.integer): return else: values[1] = np.nan diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 826649358e663..c9823b7f0ee03 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -24,6 +24,7 @@ Float64Index, Int64Index, IntervalIndex, + NumIndex, PeriodIndex, RangeIndex, Series, @@ -713,6 +714,8 @@ def test_map_dictlike(self, index, mapper): if index.empty: # to match proper result coercion for uints expected = Index([]) + elif isinstance(index, NumIndex): + expected = NumIndex(np.arange(len(index), 0, -1), dtype=index.dtype) else: expected = Index(np.arange(len(index), 0, -1)) diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 882e708a357c8..a47fbe8160a5a 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -21,6 +21,7 @@ CategoricalIndex, DatetimeIndex, MultiIndex, + NumIndex, PeriodIndex, RangeIndex, TimedeltaIndex, @@ -261,7 +262,8 @@ def test_drop_duplicates(self, index_flat, keep): # make unique index holder = type(index) unique_values = list(set(index)) - unique_idx = holder(unique_values) + dtype = index.dtype if isinstance(index, NumIndex) else None + unique_idx = holder(unique_values, dtype=dtype) # make duplicated index n = len(unique_idx) @@ -289,7 +291,8 @@ def test_drop_duplicates_no_duplicates(self, index_flat): else: holder = type(index) unique_values = list(set(index)) - unique_idx = holder(unique_values) + dtype = index.dtype if isinstance(index, NumIndex) else None + unique_idx = holder(unique_values, dtype=dtype) # check on unique index expected_duplicated = np.array([False] * len(unique_idx), dtype="bool") diff --git a/pandas/tests/indexes/test_numpy_compat.py b/pandas/tests/indexes/test_numpy_compat.py index 92adc0570dee1..b869aeff9786d 100644 --- a/pandas/tests/indexes/test_numpy_compat.py +++ b/pandas/tests/indexes/test_numpy_compat.py @@ -5,14 +5,12 @@ DatetimeIndex, Float64Index, Index, - Int64Index, PeriodIndex, - RangeIndex, TimedeltaIndex, - UInt64Index, ) import pandas._testing as tm from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin +from pandas.core.indexes.numeric import NumericIndex @pytest.mark.parametrize( @@ -51,7 +49,7 @@ def test_numpy_ufuncs_basic(index, func): with tm.external_error_raised((TypeError, AttributeError)): with np.errstate(all="ignore"): func(index) - elif isinstance(index, (Float64Index, Int64Index, UInt64Index, RangeIndex)): + elif isinstance(index, NumericIndex): # coerces to float (e.g. np.sin) with np.errstate(all="ignore"): result = func(index) @@ -96,7 +94,7 @@ def test_numpy_ufuncs_other(index, func, request): with tm.external_error_raised(TypeError): func(index) - elif isinstance(index, (Float64Index, Int64Index, UInt64Index, RangeIndex)): + elif isinstance(index, NumericIndex): # Results in bool array result = func(index) assert isinstance(result, np.ndarray) diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 20174beacf1d3..c5be98d4bce8f 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -31,7 +31,21 @@ COMPATIBLE_INCONSISTENT_PAIRS = [ (np.float64, np.int64), + (np.float64, np.int32), + (np.float64, np.int16), + (np.float64, np.int8), (np.float64, np.uint64), + (np.float64, np.uint32), + (np.float64, np.uint16), + (np.float64, np.uint8), + (np.float32, np.int64), + (np.float32, np.int32), + (np.float32, np.int16), + (np.float32, np.int8), + (np.float32, np.uint64), + (np.float32, np.uint32), + (np.float32, np.uint16), + (np.float32, np.uint8), ] From 88a7858775f28ce045d8f4122beb5b457b482ca6 Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 25 Apr 2021 22:56:03 +0100 Subject: [PATCH 02/55] fix various issues reported by the CI --- pandas/tests/base/test_unique.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py index cabe766a4e9eb..258d9a0e3c30d 100644 --- a/pandas/tests/base/test_unique.py +++ b/pandas/tests/base/test_unique.py @@ -24,6 +24,9 @@ def test_unique(index_or_series_obj): expected = pd.MultiIndex.from_tuples(unique_values) expected.names = obj.names tm.assert_index_equal(result, expected, exact=True) + elif isinstance(obj, pd.NumIndex): + expected = pd.NumIndex(unique_values, dtype=obj.dtype) + tm.assert_index_equal(result, expected) elif isinstance(obj, pd.Index): expected = pd.Index(unique_values, dtype=obj.dtype) if is_datetime64tz_dtype(obj.dtype): @@ -62,7 +65,10 @@ def test_unique_null(null_obj, index_or_series_obj): unique_values_not_null = [val for val in unique_values_raw if not pd.isnull(val)] unique_values = [null_obj] + unique_values_not_null - if isinstance(obj, pd.Index): + if isinstance(obj, pd.NumIndex): + expected = pd.NumIndex(unique_values, dtype=obj.dtype) + tm.assert_index_equal(result, expected) + elif isinstance(obj, pd.Index): expected = pd.Index(unique_values, dtype=obj.dtype) if is_datetime64tz_dtype(obj.dtype): result = result.normalize() From 7ccb1b79c1753dde295db98de4510dedb4b0124c Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 26 Apr 2021 06:21:48 +0100 Subject: [PATCH 03/55] fix test failure --- pandas/tests/api/test_api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 95dc1d82cb286..58c7d556826de 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -67,6 +67,7 @@ class TestPDApi(Base): "HDFStore", "Index", "Int64Index", + "NumIndex", "MultiIndex", "Period", "PeriodIndex", From b45500dde7d1a6d5c58bd6b4af484a4ff77e08b0 Mon Sep 17 00:00:00 2001 From: tp Date: Tue, 27 Apr 2021 12:11:28 +0100 Subject: [PATCH 04/55] Make (Int|UInt|Float)64Index inherit from NumIndex --- pandas/core/indexes/base.py | 6 +++--- pandas/core/indexes/category.py | 2 +- pandas/tests/indexes/test_base.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ddbe8731e3d3e..a453b134aa278 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -437,7 +437,7 @@ def __new__( return Index._simple_new(data, name=name) # index-like - elif isinstance(data, NumIndex) and dtype is None: + elif type(data) is NumIndex and dtype is None: return NumIndex(data, name=name, copy=copy) elif isinstance(data, (np.ndarray, Index, ABCSeries)): @@ -5723,8 +5723,8 @@ def map(self, mapper, na_action=None): # empty attributes["dtype"] = self.dtype - if isinstance(self, NumIndex): - return NumIndex(new_values, **attributes) + if type(self) is NumIndex: + return type(self)(new_values, **attributes) return Index(new_values, **attributes) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index aa0b72aca3765..43cbe2b80a1ec 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -288,7 +288,7 @@ def astype(self, dtype, copy: bool = True) -> Index: dtype = pandas_dtype(dtype) cat = self.categories - if isinstance(cat, NumIndex): + if type(cat) is NumIndex: try: cat._validate_dtype(dtype) except ValueError: diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index c9823b7f0ee03..4b7d330cf19da 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -714,7 +714,7 @@ def test_map_dictlike(self, index, mapper): if index.empty: # to match proper result coercion for uints expected = Index([]) - elif isinstance(index, NumIndex): + elif type(index) is NumIndex: expected = NumIndex(np.arange(len(index), 0, -1), dtype=index.dtype) else: expected = Index(np.arange(len(index), 0, -1)) From 5ef35f58503e6e089a011a9c2e2d3dea240053e6 Mon Sep 17 00:00:00 2001 From: tp Date: Tue, 27 Apr 2021 13:34:22 +0100 Subject: [PATCH 05/55] fix errors --- pandas/core/indexes/category.py | 1 + pandas/tests/base/test_unique.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 43cbe2b80a1ec..a77f855ab20cc 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -289,6 +289,7 @@ def astype(self, dtype, copy: bool = True) -> Index: cat = self.categories if type(cat) is NumIndex: + assert isinstance(cat, NumIndex) # mypy complaint fix try: cat._validate_dtype(dtype) except ValueError: diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py index 258d9a0e3c30d..7844530df6147 100644 --- a/pandas/tests/base/test_unique.py +++ b/pandas/tests/base/test_unique.py @@ -65,7 +65,7 @@ def test_unique_null(null_obj, index_or_series_obj): unique_values_not_null = [val for val in unique_values_raw if not pd.isnull(val)] unique_values = [null_obj] + unique_values_not_null - if isinstance(obj, pd.NumIndex): + if type(obj) is pd.NumIndex: expected = pd.NumIndex(unique_values, dtype=obj.dtype) tm.assert_index_equal(result, expected) elif isinstance(obj, pd.Index): From d8f6c229fae2626b14c683438553b013ae4f9417 Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 2 May 2021 15:22:46 +0100 Subject: [PATCH 06/55] Add more numeric tests for NumIndex --- pandas/core/indexes/base.py | 26 +++++++++++++++++++++++--- pandas/tests/indexes/common.py | 15 +++++++++++---- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a453b134aa278..5dd6891afb60c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -437,7 +437,7 @@ def __new__( return Index._simple_new(data, name=name) # index-like - elif type(data) is NumIndex and dtype is None: + elif isinstance(data, NumIndex) and data._is_num_index() and dtype is None: return NumIndex(data, name=name, copy=copy) elif isinstance(data, (np.ndarray, Index, ABCSeries)): @@ -2428,6 +2428,26 @@ def _is_multi(self) -> bool: """ return isinstance(self, ABCMultiIndex) + def _is_num_index(self) -> bool: + """ + Whether self is a NumIndex, but not *not* Int64Index, UInt64Index, FloatIndex. + + Typically used to check if an operation should return NumIndex or plain Index. + """ + from pandas.core.indexes.numeric import ( + Float64Index, + Int64Index, + NumIndex, + UInt64Index, + ) + + if not isinstance(self, NumIndex): + return False + elif isinstance(self, (Int64Index, UInt64Index, Float64Index)): + return False + else: + return True + # -------------------------------------------------------------------- # Pickle Methods @@ -5723,8 +5743,8 @@ def map(self, mapper, na_action=None): # empty attributes["dtype"] = self.dtype - if type(self) is NumIndex: - return type(self)(new_values, **attributes) + if self._is_num_index() and issubclass(new_values.dtype.type, np.number): + return NumIndex(new_values, **attributes) return Index(new_values, **attributes) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index edd73f7ef768d..3b64ec7dc3f0a 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -13,7 +13,7 @@ from pandas.core.dtypes.dtypes import CategoricalDtype import pandas as pd -from pandas import ( # noqa +from pandas import ( CategoricalIndex, DatetimeIndex, Float64Index, @@ -30,6 +30,7 @@ ) import pandas._testing as tm from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin +from pandas.core.indexes.numeric import NumIndex class Base: @@ -351,12 +352,13 @@ def test_numpy_argsort(self, index): def test_repeat(self, simple_index): rep = 2 idx = simple_index.copy() - expected = Index(idx.values.repeat(rep), name=idx.name) + new_index_cls = type(idx) if not isinstance(idx, RangeIndex) else Int64Index + expected = new_index_cls(idx.values.repeat(rep), name=idx.name) tm.assert_index_equal(idx.repeat(rep), expected) idx = simple_index rep = np.arange(len(idx)) - expected = Index(idx.values.repeat(rep), name=idx.name) + expected = new_index_cls(idx.values.repeat(rep), name=idx.name) tm.assert_index_equal(idx.repeat(rep), expected) def test_numpy_repeat(self, simple_index): @@ -657,7 +659,12 @@ def test_map_dictlike(self, mapper, simple_index): tm.assert_index_equal(result, expected) # empty mappable - expected = Index([np.nan] * len(idx)) + if idx._is_num_index(): + new_index_cls = NumIndex + else: + new_index_cls = Float64Index + + expected = new_index_cls([np.nan] * len(idx)) result = idx.map(mapper(expected, idx)) tm.assert_index_equal(result, expected) From 1c65a0b7352aed6720a88e0428058410cdca9a12 Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 2 May 2021 16:24:16 +0100 Subject: [PATCH 07/55] fixups --- pandas/tests/indexes/test_base.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 4b7d330cf19da..b9be3bd6f1f06 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -24,7 +24,6 @@ Float64Index, Int64Index, IntervalIndex, - NumIndex, PeriodIndex, RangeIndex, Series, @@ -714,8 +713,8 @@ def test_map_dictlike(self, index, mapper): if index.empty: # to match proper result coercion for uints expected = Index([]) - elif type(index) is NumIndex: - expected = NumIndex(np.arange(len(index), 0, -1), dtype=index.dtype) + elif index._is_num_index(): + expected = type(index)(np.arange(len(index), 0, -1), dtype=index.dtype) else: expected = Index(np.arange(len(index), 0, -1)) From f3e13aa920f9b2f3031d40a0c6e855f18be2bc7d Mon Sep 17 00:00:00 2001 From: tp Date: Thu, 6 May 2021 21:16:29 +0100 Subject: [PATCH 08/55] fix exact='equiv' --- pandas/_testing/asserters.py | 6 ++++-- pandas/core/indexes/base.py | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index bc7d17bb99c20..60016bbf92e60 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -16,6 +16,8 @@ is_bool, is_categorical_dtype, is_extension_array_dtype, + is_float_dtype, + is_integer_dtype, is_interval_dtype, is_number, is_numeric_dtype, @@ -268,8 +270,8 @@ def assert_index_equal( exact : bool or {'equiv'}, default 'equiv' Whether to check the Index class, dtype and inferred_type are identical. If 'equiv', RangeIndex can be substituted for - Int64Index and signed integer dtypes will be equivalent to each other, unsigned - integer to each other and float dtypes equivalent to each other. + Int64Index and integer dtypes will be equivalent to each other and + float dtypes equivalent to each other. check_names : bool, default True Whether to check the names attribute. check_less_precise : bool or int, default False diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5dd6891afb60c..604f64fe872b0 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -80,6 +80,7 @@ is_interval_dtype, is_iterator, is_list_like, + is_numeric_dtype, is_object_dtype, is_scalar, is_signed_integer_dtype, @@ -5743,7 +5744,7 @@ def map(self, mapper, na_action=None): # empty attributes["dtype"] = self.dtype - if self._is_num_index() and issubclass(new_values.dtype.type, np.number): + if self._is_num_index() and is_numeric_dtype(new_values.dtype): return NumIndex(new_values, **attributes) return Index(new_values, **attributes) From 4e174855d717da70337e92b5239b7c16c32bd720 Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 10 May 2021 09:10:57 +0100 Subject: [PATCH 09/55] add more comprehensive tests --- pandas/core/indexes/numeric.py | 87 +++++--------------- pandas/tests/indexes/common.py | 10 ++- pandas/tests/indexes/numeric/test_numeric.py | 29 +++---- 3 files changed, 38 insertions(+), 88 deletions(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 466e363a66adf..540d47287b60a 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -23,7 +23,6 @@ from pandas.core.dtypes.cast import astype_nansafe from pandas.core.dtypes.common import ( - is_categorical_dtype, is_dtype_equal, is_extension_array_dtype, is_float, @@ -163,10 +162,13 @@ def _ensure_array(cls, data, dtype, copy: bool): if issubclass(data.dtype.type, str): cls._string_data_error(data) - dtype = cls._ensure_dtype(dtype) + dtype = cls._ensure_dtype(dtype, validate=False) if copy or not is_dtype_equal(data.dtype, dtype): subarr = np.array(data, dtype=dtype, copy=copy) + if not is_numeric_dtype(subarr.dtype): + # hack to raise correctly + subarr = np.array(data, dtype="float64", copy=copy) cls._assert_safe_casting(data, subarr) else: subarr = data @@ -193,42 +195,20 @@ def _validate_dtype(cls, dtype: Dtype | None) -> None: def _ensure_dtype( cls, dtype: Dtype | None, + validate: bool = True, ) -> np.dtype | None: - """Ensure int64 dtype for Int64Index, etc. Assumed dtype is validated.""" - return cls._default_dtype + """Ensure int64 dtype for Int64Index, etc. but allow int32 etc. for NumIndex.""" + if validate: + cls._validate_dtype(dtype) - def __contains__(self, key) -> bool: - """ - Check if key is a float and has a decimal. If it has, return False. - """ - if not is_integer_dtype(self.dtype): - return super().__contains__(key) - - hash(key) - try: - if is_float(key) and int(key) != key: - # otherwise the `key in self._engine` check casts e.g. 1.1 -> 1 - return False - return key in self._engine - except (OverflowError, TypeError, ValueError): - return False - - @doc(Index.astype) - def astype(self, dtype, copy=True): - if is_float_dtype(self.dtype): - dtype = pandas_dtype(dtype) - if needs_i8_conversion(dtype): - raise TypeError( - f"Cannot convert Float64Index to dtype {dtype}; integer " - "values are required for conversion" - ) - elif is_integer_dtype(dtype) and not is_extension_array_dtype(dtype): - # TODO(jreback); this can change once we have an EA Index type - # GH 13149 - arr = astype_nansafe(self._values, dtype=dtype) - return Int64Index(arr, name=self.name) + if dtype is None: + return cls._default_dtype - return super().astype(dtype, copy=copy) + dtype = pandas_dtype(dtype) + if cls._default_dtype is not None: + return cls._default_dtype + else: + return dtype # ---------------------------------------------------------------- # Indexing Methods @@ -288,16 +268,13 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: return is_numeric_dtype(dtype) @classmethod - def _assert_safe_casting(cls, data: np.ndarray, subarr: np.ndarray) -> None: + def _assert_safe_casting(cls, data, subarr): """ - Ensure incoming data can be represented with matching signed-ness. - - Needed if the process of casting data from some accepted dtype to the internal - dtype(s) bears the risk of truncation (e.g. float to int). + Subclasses need to override this only if the process of casting data + from some accepted dtype to the internal dtype(s) bears the risk of + truncation (e.g. float to int). """ - if is_integer_dtype(subarr.dtype): - if not np.array_equal(data, subarr): - raise TypeError("Unsafe NumPy casting, you must explicitly cast") + pass @property def _is_all_dates(self) -> bool: @@ -306,30 +283,6 @@ def _is_all_dates(self) -> bool: """ return False - def _format_native_types( - self, na_rep="", float_format=None, decimal=".", quoting=None, **kwargs - ): - from pandas.io.formats.format import FloatArrayFormatter - - if is_float_dtype(self.dtype): - formatter = FloatArrayFormatter( - self._values, - na_rep=na_rep, - float_format=float_format, - decimal=decimal, - quoting=quoting, - fixed_width=False, - ) - return formatter.get_result_as_array() - - return super()._format_native_types( - na_rep=na_rep, - float_format=float_format, - decimal=decimal, - quoting=quoting, - **kwargs, - ) - class IntegerIndex(NumericIndex): """ diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 3b64ec7dc3f0a..1494fa7754a6c 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -9,7 +9,11 @@ from pandas._libs import iNaT from pandas._libs.tslibs import Timestamp -from pandas.core.dtypes.common import is_datetime64tz_dtype +from pandas.core.dtypes.common import ( + is_datetime64tz_dtype, + is_float_dtype, + is_integer_dtype, +) from pandas.core.dtypes.dtypes import CategoricalDtype import pandas as pd @@ -624,8 +628,10 @@ def test_map(self, simple_index): idx = simple_index # we don't infer UInt64 - if isinstance(idx, UInt64Index): + if is_integer_dtype(idx.dtype): expected = idx.astype("int64") + elif is_float_dtype(idx.dtype): + expected = idx.astype("float64") else: expected = idx diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py index 8cbca0ba8eb65..a638b7e3113a4 100644 --- a/pandas/tests/indexes/numeric/test_numeric.py +++ b/pandas/tests/indexes/numeric/test_numeric.py @@ -8,6 +8,7 @@ Float64Index, Index, Int64Index, + NumIndex, Series, UInt64Index, ) @@ -22,9 +23,7 @@ class TestFloat64Index(NumericBase): def dtype(self, request): return request.param - @pytest.fixture( - params=["int64", "uint64", "category", "datetime64", "object"], - ) + @pytest.fixture(params=["int64", "uint64", "category", "datetime64", "object"]) def invalid_dtype(self, request): return request.param @@ -92,11 +91,11 @@ def test_constructor(self, dtype): assert isinstance(index, index_cls) assert index.dtype == dtype - index = index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=np.float32) + index = index_cls([1.0, 2, 3, 4, 5], dtype=dtype) assert isinstance(index, index_cls) assert index.dtype == dtype - index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=np.float32) + index = index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=dtype) assert isinstance(index, index_cls) assert index.dtype == dtype @@ -387,9 +386,7 @@ class TestInt64Index(NumericInt): def dtype(self, request): return request.param - @pytest.fixture( - params=["uint64", "float64", "category", "datetime64", "object"], - ) + @pytest.fixture(params=["uint64", "float64", "category", "datetime64", "object"]) def invalid_dtype(self, request): return request.param @@ -447,18 +444,14 @@ def test_constructor_corner(self, dtype): index_cls = self._index_cls arr = np.array([1, 2, 3, 4], dtype=object) - index = index_cls(arr) - assert index.values.dtype == dtype + index = index_cls(arr, dtype=dtype) + assert index.values.dtype == index.dtype tm.assert_index_equal(index, Index(arr)) # preventing casting arr = np.array([1, "2", 3, "4"], dtype=object) with pytest.raises(TypeError, match="casting"): - index_cls(arr) - - arr_with_floats = [0, 2, 3, 4, 5, 1.25, 3, -1] - with pytest.raises(TypeError, match="casting"): - index_cls(arr_with_floats) + index_cls(arr, dtype=dtype) def test_constructor_coercion_signed_to_unsigned(self, uint_dtype): @@ -486,9 +479,7 @@ class TestUInt64Index(NumericInt): def dtype(self): return np.uint64 - @pytest.fixture( - params=["int64", "float64", "category", "datetime64", "object"], - ) + @pytest.fixture(params=["int64", "float64", "category", "datetime64", "object"]) def invalid_dtype(self, request): return request.param @@ -505,7 +496,7 @@ def simple_index(self, dtype): ids=["index_inc", "index_dec"], ) def index(self, request): - return self._index_cls(request.param) + return self._index_cls(request.param, dtype=np.uint64) def test_constructor(self, dtype): index_cls = self._index_cls From c1e801db19414726f272db03f8e28ea9b4e6c206 Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 10 May 2021 17:25:52 +0100 Subject: [PATCH 10/55] fixes --- pandas/core/indexes/category.py | 4 ++-- pandas/core/indexes/numeric.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index a77f855ab20cc..9508819f3f395 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -288,7 +288,7 @@ def astype(self, dtype, copy: bool = True) -> Index: dtype = pandas_dtype(dtype) cat = self.categories - if type(cat) is NumIndex: + if cat._is_num_index(): assert isinstance(cat, NumIndex) # mypy complaint fix try: cat._validate_dtype(dtype) @@ -298,7 +298,7 @@ def astype(self, dtype, copy: bool = True) -> Index: new_values = self._data.astype(dtype, copy=copy) # pass copy=False because any copying has been done in the # _data.astype call above - return NumIndex(new_values, name=self.name, copy=False) + return type(cat)(new_values, name=self.name, copy=False) return super().astype(dtype, copy=copy) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 540d47287b60a..a0e45a940c651 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -205,6 +205,8 @@ def _ensure_dtype( return cls._default_dtype dtype = pandas_dtype(dtype) + assert isinstance(dtype, np.dtype) + if cls._default_dtype is not None: return cls._default_dtype else: From aa0cea778152de4da1b1f68ca8a4baccb1e11dc9 Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 10 May 2021 21:18:04 +0100 Subject: [PATCH 11/55] addresses comments (move _format_native_types, assert_index_equal etc.) --- pandas/core/indexes/category.py | 3 +++ pandas/core/indexes/numeric.py | 26 ++++++++++++++++++++++++++ pandas/tests/base/test_unique.py | 6 +++--- pandas/tests/indexes/test_setops.py | 1 + 4 files changed, 33 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 9508819f3f395..705f8e041dcef 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -288,6 +288,9 @@ def astype(self, dtype, copy: bool = True) -> Index: dtype = pandas_dtype(dtype) cat = self.categories + # the super method always returns Int64Index, UInt64Index and Float64Index + # but if e.g. the categories are a NumIndex with dtype float32, we want to + # return an index with the same dtype as self.categories. if cat._is_num_index(): assert isinstance(cat, NumIndex) # mypy complaint fix try: diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index a0e45a940c651..7aa2577fe855f 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -285,6 +285,32 @@ def _is_all_dates(self) -> bool: """ return False + # ---------------------------------------------------------------- + + def _format_native_types( + self, na_rep="", float_format=None, decimal=".", quoting=None, **kwargs + ): + from pandas.io.formats.format import FloatArrayFormatter + + if not is_float_dtype(self.dtype): + return super()._format_native_types( + na_rep=na_rep, + float_format=float_format, + decimal=decimal, + quoting=quoting, + **kwargs, + ) + + formatter = FloatArrayFormatter( + self._values, + na_rep=na_rep, + float_format=float_format, + decimal=decimal, + quoting=quoting, + fixed_width=False, + ) + return formatter.get_result_as_array() + class IntegerIndex(NumericIndex): """ diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py index 7844530df6147..e40652e2ad514 100644 --- a/pandas/tests/base/test_unique.py +++ b/pandas/tests/base/test_unique.py @@ -24,9 +24,9 @@ def test_unique(index_or_series_obj): expected = pd.MultiIndex.from_tuples(unique_values) expected.names = obj.names tm.assert_index_equal(result, expected, exact=True) - elif isinstance(obj, pd.NumIndex): + elif type(obj) is pd.NumIndex: expected = pd.NumIndex(unique_values, dtype=obj.dtype) - tm.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected, exact=True) elif isinstance(obj, pd.Index): expected = pd.Index(unique_values, dtype=obj.dtype) if is_datetime64tz_dtype(obj.dtype): @@ -67,7 +67,7 @@ def test_unique_null(null_obj, index_or_series_obj): if type(obj) is pd.NumIndex: expected = pd.NumIndex(unique_values, dtype=obj.dtype) - tm.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected, exact=True) elif isinstance(obj, pd.Index): expected = pd.Index(unique_values, dtype=obj.dtype) if is_datetime64tz_dtype(obj.dtype): diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index c5be98d4bce8f..87057d1147883 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -46,6 +46,7 @@ (np.float32, np.uint32), (np.float32, np.uint16), (np.float32, np.uint8), + (np.float32, np.float64), ] From 1f5f922d0e9867e0fc216f871dd2a565fe2335b7 Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 10 May 2021 21:29:12 +0100 Subject: [PATCH 12/55] remove from public namespace --- pandas/__init__.py | 1 - pandas/_testing/__init__.py | 2 +- pandas/core/indexes/category.py | 2 +- pandas/tests/indexes/numeric/test_numeric.py | 2 +- pandas/tests/indexes/test_common.py | 2 +- 5 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index e8fef482247b0..43f05617584cc 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -75,7 +75,6 @@ UInt64Index, RangeIndex, Float64Index, - NumIndex, MultiIndex, IntervalIndex, TimedeltaIndex, diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index ca05231b9c3ca..9a7e6dbac106c 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -50,7 +50,6 @@ Int64Index, IntervalIndex, MultiIndex, - NumIndex, RangeIndex, Series, UInt64Index, @@ -106,6 +105,7 @@ use_numexpr, with_csv_dialect, ) +from pandas.core.api import NumIndex from pandas.core.arrays import ( DatetimeArray, PandasArray, diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 705f8e041dcef..96b4846fed7e3 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -283,7 +283,7 @@ def _is_dtype_compat(self, other) -> Categorical: @doc(Index.astype) def astype(self, dtype, copy: bool = True) -> Index: - from pandas import NumIndex + from pandas.core.api import NumIndex dtype = pandas_dtype(dtype) diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py index a638b7e3113a4..77b93120ab53d 100644 --- a/pandas/tests/indexes/numeric/test_numeric.py +++ b/pandas/tests/indexes/numeric/test_numeric.py @@ -8,11 +8,11 @@ Float64Index, Index, Int64Index, - NumIndex, Series, UInt64Index, ) import pandas._testing as tm +from pandas.core.api import NumIndex from pandas.tests.indexes.common import NumericBase diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index a47fbe8160a5a..57c407e2f121b 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -21,12 +21,12 @@ CategoricalIndex, DatetimeIndex, MultiIndex, - NumIndex, PeriodIndex, RangeIndex, TimedeltaIndex, ) import pandas._testing as tm +from pandas.core.api import NumIndex class TestCommon: From 132ce4401ed164e81e13a37c219a7128fb2eb18d Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 10 May 2021 21:41:50 +0100 Subject: [PATCH 13/55] rename to NumericIndex --- pandas/_testing/__init__.py | 2 +- pandas/core/api.py | 2 +- pandas/core/index.py | 2 +- pandas/core/indexes/api.py | 4 ++-- pandas/core/indexes/base.py | 14 +++++++------- pandas/core/indexes/category.py | 4 ++-- pandas/core/indexes/range.py | 4 ++-- pandas/tests/indexes/common.py | 4 ++-- pandas/tests/indexes/numeric/test_numeric.py | 2 +- pandas/tests/indexes/test_common.py | 6 +++--- pandas/tests/indexes/test_numpy_compat.py | 6 +++--- 11 files changed, 25 insertions(+), 25 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 9a7e6dbac106c..206534efd4fc3 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -105,7 +105,7 @@ use_numexpr, with_csv_dialect, ) -from pandas.core.api import NumIndex +from pandas.core.api import NumericIndex from pandas.core.arrays import ( DatetimeArray, PandasArray, diff --git a/pandas/core/api.py b/pandas/core/api.py index 5cb1993b2c5c7..a03293ce13144 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -57,7 +57,7 @@ Int64Index, IntervalIndex, MultiIndex, - NumIndex, + NumericIndex, PeriodIndex, RangeIndex, TimedeltaIndex, diff --git a/pandas/core/index.py b/pandas/core/index.py index 44f434e038a4b..bcdfa67e02f0d 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1,6 +1,7 @@ import warnings from pandas.core.indexes.api import ( # noqa:F401 + BaseNumericIndex, CategoricalIndex, DatetimeIndex, Float64Index, @@ -9,7 +10,6 @@ IntervalIndex, MultiIndex, NaT, - NumericIndex, PeriodIndex, RangeIndex, TimedeltaIndex, diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 7b8856fa8c73a..377a82d329e20 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -20,10 +20,10 @@ from pandas.core.indexes.interval import IntervalIndex from pandas.core.indexes.multi import MultiIndex from pandas.core.indexes.numeric import ( + BaseNumericIndex, Float64Index, Int64Index, NumericIndex, - NumIndex, UInt64Index, ) from pandas.core.indexes.period import PeriodIndex @@ -45,8 +45,8 @@ __all__ = [ "Index", "MultiIndex", - "NumIndex", "NumericIndex", + "BaseNumericIndex", "Float64Index", "Int64Index", "CategoricalIndex", diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 604f64fe872b0..d55ec90040fec 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -386,7 +386,7 @@ def __new__( ) from pandas.core.arrays import PandasArray - from pandas.core.indexes.numeric import NumIndex + from pandas.core.indexes.numeric import NumericIndex from pandas.core.indexes.range import RangeIndex name = maybe_extract_name(name, data, cls) @@ -438,8 +438,8 @@ def __new__( return Index._simple_new(data, name=name) # index-like - elif isinstance(data, NumIndex) and data._is_num_index() and dtype is None: - return NumIndex(data, name=name, copy=copy) + elif isinstance(data, NumericIndex) and data._is_num_index() and dtype is None: + return NumericIndex(data, name=name, copy=copy) elif isinstance(data, (np.ndarray, Index, ABCSeries)): if isinstance(data, ABCMultiIndex): @@ -2438,11 +2438,11 @@ def _is_num_index(self) -> bool: from pandas.core.indexes.numeric import ( Float64Index, Int64Index, - NumIndex, + NumericIndex, UInt64Index, ) - if not isinstance(self, NumIndex): + if not isinstance(self, NumericIndex): return False elif isinstance(self, (Int64Index, UInt64Index, Float64Index)): return False @@ -5723,7 +5723,7 @@ def map(self, mapper, na_action=None): a MultiIndex will be returned. """ from pandas.core.indexes.multi import MultiIndex - from pandas.core.indexes.numeric import NumIndex + from pandas.core.indexes.numeric import NumericIndex new_values = self._map_values(mapper, na_action=na_action) @@ -5745,7 +5745,7 @@ def map(self, mapper, na_action=None): attributes["dtype"] = self.dtype if self._is_num_index() and is_numeric_dtype(new_values.dtype): - return NumIndex(new_values, **attributes) + return NumericIndex(new_values, **attributes) return Index(new_values, **attributes) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 96b4846fed7e3..46cdb4fa477ea 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -283,7 +283,7 @@ def _is_dtype_compat(self, other) -> Categorical: @doc(Index.astype) def astype(self, dtype, copy: bool = True) -> Index: - from pandas.core.api import NumIndex + from pandas.core.api import NumericIndex dtype = pandas_dtype(dtype) @@ -292,7 +292,7 @@ def astype(self, dtype, copy: bool = True) -> Index: # but if e.g. the categories are a NumIndex with dtype float32, we want to # return an index with the same dtype as self.categories. if cat._is_num_index(): - assert isinstance(cat, NumIndex) # mypy complaint fix + assert isinstance(cat, NumericIndex) # mypy complaint fix try: cat._validate_dtype(dtype) except ValueError: diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 0ce99df44a5f9..2349fa6171882 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -45,9 +45,9 @@ import pandas.core.indexes.base as ibase from pandas.core.indexes.base import maybe_extract_name from pandas.core.indexes.numeric import ( + BaseNumericIndex, Float64Index, Int64Index, - NumericIndex, ) from pandas.core.ops.common import unpack_zerodim_and_defer @@ -57,7 +57,7 @@ _empty_range = range(0) -class RangeIndex(NumericIndex): +class RangeIndex(BaseNumericIndex): """ Immutable Index implementing a monotonic integer range. diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 1494fa7754a6c..e92db1f27cf46 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -34,7 +34,7 @@ ) import pandas._testing as tm from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin -from pandas.core.indexes.numeric import NumIndex +from pandas.core.indexes.numeric import NumericIndex class Base: @@ -666,7 +666,7 @@ def test_map_dictlike(self, mapper, simple_index): # empty mappable if idx._is_num_index(): - new_index_cls = NumIndex + new_index_cls = NumericIndex else: new_index_cls = Float64Index diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py index 77b93120ab53d..082343e2597d4 100644 --- a/pandas/tests/indexes/numeric/test_numeric.py +++ b/pandas/tests/indexes/numeric/test_numeric.py @@ -12,7 +12,7 @@ UInt64Index, ) import pandas._testing as tm -from pandas.core.api import NumIndex +from pandas.core.api import NumericIndex from pandas.tests.indexes.common import NumericBase diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 57c407e2f121b..8facaf279f2cf 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -26,7 +26,7 @@ TimedeltaIndex, ) import pandas._testing as tm -from pandas.core.api import NumIndex +from pandas.core.api import NumericIndex class TestCommon: @@ -262,7 +262,7 @@ def test_drop_duplicates(self, index_flat, keep): # make unique index holder = type(index) unique_values = list(set(index)) - dtype = index.dtype if isinstance(index, NumIndex) else None + dtype = index.dtype if isinstance(index, NumericIndex) else None unique_idx = holder(unique_values, dtype=dtype) # make duplicated index @@ -291,7 +291,7 @@ def test_drop_duplicates_no_duplicates(self, index_flat): else: holder = type(index) unique_values = list(set(index)) - dtype = index.dtype if isinstance(index, NumIndex) else None + dtype = index.dtype if isinstance(index, NumericIndex) else None unique_idx = holder(unique_values, dtype=dtype) # check on unique index diff --git a/pandas/tests/indexes/test_numpy_compat.py b/pandas/tests/indexes/test_numpy_compat.py index b869aeff9786d..9d0cee6310b7a 100644 --- a/pandas/tests/indexes/test_numpy_compat.py +++ b/pandas/tests/indexes/test_numpy_compat.py @@ -10,7 +10,7 @@ ) import pandas._testing as tm from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin -from pandas.core.indexes.numeric import NumericIndex +from pandas.core.indexes.numeric import BaseNumericIndex @pytest.mark.parametrize( @@ -49,7 +49,7 @@ def test_numpy_ufuncs_basic(index, func): with tm.external_error_raised((TypeError, AttributeError)): with np.errstate(all="ignore"): func(index) - elif isinstance(index, NumericIndex): + elif isinstance(index, BaseNumericIndex): # coerces to float (e.g. np.sin) with np.errstate(all="ignore"): result = func(index) @@ -94,7 +94,7 @@ def test_numpy_ufuncs_other(index, func, request): with tm.external_error_raised(TypeError): func(index) - elif isinstance(index, NumericIndex): + elif isinstance(index, BaseNumericIndex): # Results in bool array result = func(index) assert isinstance(result, np.ndarray) From 058cd2e1d0deb6bd43c40fc23a11aefbce240f6c Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 10 May 2021 23:09:54 +0100 Subject: [PATCH 14/55] fixes --- pandas/core/dtypes/generic.py | 2 +- pandas/tests/base/test_unique.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py index 8fac906fdca87..d6dbc83934db0 100644 --- a/pandas/core/dtypes/generic.py +++ b/pandas/core/dtypes/generic.py @@ -100,7 +100,7 @@ def _check(cls, inst) -> bool: "rangeindex", "float64index", "uint64index", - "numindex", + "numericindex", "multiindex", "datetimeindex", "timedeltaindex", diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py index e40652e2ad514..2695921bdb40c 100644 --- a/pandas/tests/base/test_unique.py +++ b/pandas/tests/base/test_unique.py @@ -10,6 +10,7 @@ import pandas as pd import pandas._testing as tm +from pandas.core.api import NumericIndex from pandas.tests.base.common import allow_na_ops @@ -24,8 +25,8 @@ def test_unique(index_or_series_obj): expected = pd.MultiIndex.from_tuples(unique_values) expected.names = obj.names tm.assert_index_equal(result, expected, exact=True) - elif type(obj) is pd.NumIndex: - expected = pd.NumIndex(unique_values, dtype=obj.dtype) + elif isinstance(obj, pd.Index) and obj._is_num_index(): + expected = NumericIndex(unique_values, dtype=obj.dtype) tm.assert_index_equal(result, expected, exact=True) elif isinstance(obj, pd.Index): expected = pd.Index(unique_values, dtype=obj.dtype) @@ -65,8 +66,8 @@ def test_unique_null(null_obj, index_or_series_obj): unique_values_not_null = [val for val in unique_values_raw if not pd.isnull(val)] unique_values = [null_obj] + unique_values_not_null - if type(obj) is pd.NumIndex: - expected = pd.NumIndex(unique_values, dtype=obj.dtype) + if isinstance(obj, pd.Index) and obj._is_num_index(): + expected = NumericIndex(unique_values, dtype=obj.dtype) tm.assert_index_equal(result, expected, exact=True) elif isinstance(obj, pd.Index): expected = pd.Index(unique_values, dtype=obj.dtype) From 1c7f23fb4c493cc292c9cf54f5a4a281b8012bd7 Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 10 May 2021 23:49:18 +0100 Subject: [PATCH 15/55] fix test --- pandas/tests/api/test_api.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 58c7d556826de..95dc1d82cb286 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -67,7 +67,6 @@ class TestPDApi(Base): "HDFStore", "Index", "Int64Index", - "NumIndex", "MultiIndex", "Period", "PeriodIndex", From 07a097ca4bfe0b68da8d42d0f042f4b5735a8160 Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 25 Apr 2021 20:07:30 +0100 Subject: [PATCH 16/55] ENH: Add NumIndex for indexic of any numeric type --- pandas/_testing/asserters.py | 4 ++-- pandas/core/indexes/base.py | 7 ++++--- pandas/core/indexes/numeric.py | 1 + pandas/tests/indexes/common.py | 8 +++++--- pandas/tests/indexes/test_base.py | 1 + pandas/tests/indexes/test_common.py | 1 + 6 files changed, 14 insertions(+), 8 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 60016bbf92e60..9e9964918a76a 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -270,8 +270,8 @@ def assert_index_equal( exact : bool or {'equiv'}, default 'equiv' Whether to check the Index class, dtype and inferred_type are identical. If 'equiv', RangeIndex can be substituted for - Int64Index and integer dtypes will be equivalent to each other and - float dtypes equivalent to each other. + Int64Index and signed integer dtypes will be equivalent to each other, unsigned + integer to each other and float dtypes equivalent to each other. check_names : bool, default True Whether to check the names attribute. check_less_precise : bool or int, default False diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index d55ec90040fec..71d7ed420de17 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2429,7 +2429,8 @@ def _is_multi(self) -> bool: """ return isinstance(self, ABCMultiIndex) - def _is_num_index(self) -> bool: + @classmethod + def _is_num_index(cls) -> bool: """ Whether self is a NumIndex, but not *not* Int64Index, UInt64Index, FloatIndex. @@ -2442,9 +2443,9 @@ def _is_num_index(self) -> bool: UInt64Index, ) - if not isinstance(self, NumericIndex): + if not issubclass(cls, NumericIndex): return False - elif isinstance(self, (Int64Index, UInt64Index, Float64Index)): + elif issubclass(cls, (Int64Index, UInt64Index, Float64Index)): return False else: return True diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 7aa2577fe855f..290c8bc545aa1 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -23,6 +23,7 @@ from pandas.core.dtypes.cast import astype_nansafe from pandas.core.dtypes.common import ( + is_categorical_dtype, is_dtype_equal, is_extension_array_dtype, is_float, diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index e92db1f27cf46..25a7b80d31bfb 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -17,7 +17,7 @@ from pandas.core.dtypes.dtypes import CategoricalDtype import pandas as pd -from pandas import ( +from pandas import ( # noqa CategoricalIndex, DatetimeIndex, Float64Index, @@ -795,9 +795,11 @@ class NumericBase(Base): """ def test_constructor_unwraps_index(self, dtype): + index_cls = self._index_cls + idx = Index([1, 2], dtype=dtype) - result = self._index_cls(idx) - expected = np.array([1, 2], dtype=dtype) + result = index_cls(idx) + expected = np.array([1, 2], dtype=idx.dtype) tm.assert_numpy_array_equal(result._data, expected) def test_where(self): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index b9be3bd6f1f06..5c3b92522b19f 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -24,6 +24,7 @@ Float64Index, Int64Index, IntervalIndex, + NumIndex, PeriodIndex, RangeIndex, Series, diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 8facaf279f2cf..84235d37d52aa 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -21,6 +21,7 @@ CategoricalIndex, DatetimeIndex, MultiIndex, + NumIndex, PeriodIndex, RangeIndex, TimedeltaIndex, From ff6cfb48a958ffe1c78ec595c01b93ac8337d4b3 Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 26 Apr 2021 06:21:48 +0100 Subject: [PATCH 17/55] fix test failure --- pandas/tests/api/test_api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 95dc1d82cb286..58c7d556826de 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -67,6 +67,7 @@ class TestPDApi(Base): "HDFStore", "Index", "Int64Index", + "NumIndex", "MultiIndex", "Period", "PeriodIndex", From fe7b97caf35afcfc75973b34cece40dff98f684e Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 2 May 2021 15:22:46 +0100 Subject: [PATCH 18/55] Add more numeric tests for NumIndex --- pandas/tests/indexes/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 25a7b80d31bfb..6b49dcf397e3c 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -17,7 +17,7 @@ from pandas.core.dtypes.dtypes import CategoricalDtype import pandas as pd -from pandas import ( # noqa +from pandas import ( CategoricalIndex, DatetimeIndex, Float64Index, From 86f396053f8a2498ec48ddd558a1f5dc0bda21d2 Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 2 May 2021 16:24:16 +0100 Subject: [PATCH 19/55] fixups --- pandas/tests/indexes/test_base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 5c3b92522b19f..b9be3bd6f1f06 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -24,7 +24,6 @@ Float64Index, Int64Index, IntervalIndex, - NumIndex, PeriodIndex, RangeIndex, Series, From 2424c0d328aadeffe0c516d3963d4936853dc84c Mon Sep 17 00:00:00 2001 From: tp Date: Thu, 6 May 2021 21:16:29 +0100 Subject: [PATCH 20/55] fix exact='equiv' --- pandas/_testing/asserters.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 9e9964918a76a..60016bbf92e60 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -270,8 +270,8 @@ def assert_index_equal( exact : bool or {'equiv'}, default 'equiv' Whether to check the Index class, dtype and inferred_type are identical. If 'equiv', RangeIndex can be substituted for - Int64Index and signed integer dtypes will be equivalent to each other, unsigned - integer to each other and float dtypes equivalent to each other. + Int64Index and integer dtypes will be equivalent to each other and + float dtypes equivalent to each other. check_names : bool, default True Whether to check the names attribute. check_less_precise : bool or int, default False From a515bba25289a38be8ec3df553697678c897f79a Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 10 May 2021 09:10:57 +0100 Subject: [PATCH 21/55] add more comprehensive tests --- pandas/core/indexes/numeric.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 290c8bc545aa1..7aa2577fe855f 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -23,7 +23,6 @@ from pandas.core.dtypes.cast import astype_nansafe from pandas.core.dtypes.common import ( - is_categorical_dtype, is_dtype_equal, is_extension_array_dtype, is_float, From 341fc2f1cdbd53cca553976a09465ddf7bbd2578 Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 10 May 2021 21:29:12 +0100 Subject: [PATCH 22/55] remove from public namespace --- pandas/tests/indexes/test_common.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 84235d37d52aa..8facaf279f2cf 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -21,7 +21,6 @@ CategoricalIndex, DatetimeIndex, MultiIndex, - NumIndex, PeriodIndex, RangeIndex, TimedeltaIndex, From c2d8884f59731b31e7c6ae80c0ccba18c38b1e94 Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 10 May 2021 23:49:18 +0100 Subject: [PATCH 23/55] fix test --- pandas/tests/api/test_api.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 58c7d556826de..95dc1d82cb286 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -67,7 +67,6 @@ class TestPDApi(Base): "HDFStore", "Index", "Int64Index", - "NumIndex", "MultiIndex", "Period", "PeriodIndex", From 5a56b1a4be232f3780bf750fff9ecc594d65ccbb Mon Sep 17 00:00:00 2001 From: tp Date: Tue, 11 May 2021 08:49:15 +0100 Subject: [PATCH 24/55] add back numeric tests --- pandas/tests/indexes/numeric/test_numeric.py | 102 ++++++++++++++++--- 1 file changed, 89 insertions(+), 13 deletions(-) diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py index 082343e2597d4..f7e56975f13d4 100644 --- a/pandas/tests/indexes/numeric/test_numeric.py +++ b/pandas/tests/indexes/numeric/test_numeric.py @@ -16,14 +16,14 @@ from pandas.tests.indexes.common import NumericBase -class TestFloat64Index(NumericBase): - _index_cls = Float64Index +class TestFloatNumericIndex(NumericBase): + _index_cls = NumericIndex - @pytest.fixture(params=[np.float64]) + @pytest.fixture(params=[np.float64, np.float32]) def dtype(self, request): return request.param - @pytest.fixture(params=["int64", "uint64", "category", "datetime64", "object"]) + @pytest.fixture(params=["category", "datetime64", "object"]) def invalid_dtype(self, request): return request.param @@ -277,6 +277,31 @@ def test_fillna_float64(self): tm.assert_index_equal(idx.fillna("obj"), exp) +class TestFloat64Index(TestFloatNumericIndex): + _index_cls = Float64Index + + @pytest.fixture + def dtype(self, request): + return np.float64 + + @pytest.fixture( + params=["int64", "uint64", "object", "category", "datetime64"], + ) + def invalid_dtype(self, request): + return request.param + + def test_constructor_32bit(self, dtype): + index_cls = self._index_cls + + index = index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=np.float32) + assert isinstance(index, index_cls) + assert index.dtype == np.float64 + + index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=np.float32) + assert isinstance(index, index_cls) + assert index.dtype == np.float64 + + class NumericInt(NumericBase): def test_view(self, dtype): index_cls = self._index_cls @@ -379,14 +404,14 @@ def test_prevent_casting(self, simple_index): assert result.dtype == np.object_ -class TestInt64Index(NumericInt): - _index_cls = Int64Index +class TestIntNumericIndex(NumericInt): + _index_cls = NumericIndex - @pytest.fixture(params=[np.int64]) + @pytest.fixture(params=[np.int64, np.int32, np.int16, np.int8]) def dtype(self, request): return request.param - @pytest.fixture(params=["uint64", "float64", "category", "datetime64", "object"]) + @pytest.fixture(params=["category", "datetime64", "object"]) def invalid_dtype(self, request): return request.param @@ -471,15 +496,40 @@ def test_coerce_list(self): assert type(arr) is Index -class TestUInt64Index(NumericInt): - - _index_cls = UInt64Index +class TestInt64Index(TestIntNumericIndex): + _index_cls = Int64Index @pytest.fixture def dtype(self): - return np.uint64 + return np.int64 - @pytest.fixture(params=["int64", "float64", "category", "datetime64", "object"]) + @pytest.fixture( + params=["float64", "uint64", "object", "category", "datetime64"], + ) + def invalid_dtype(self, request): + return request.param + + def test_constructor_32bit(self, dtype): + index_cls = self._index_cls + + index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=np.int32) + assert isinstance(index, index_cls) + assert index.dtype == np.int64 + + index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=np.int32) + assert isinstance(index, index_cls) + assert index.dtype == np.int64 + + +class TestUIntNumericIndex(NumericInt): + + _index_cls = NumericIndex + + @pytest.fixture(params=[np.uint64]) + def dtype(self, request): + return request.param + + @pytest.fixture(params=["category", "datetime64", "object"]) def invalid_dtype(self, request): return request.param @@ -498,6 +548,21 @@ def simple_index(self, dtype): def index(self, request): return self._index_cls(request.param, dtype=np.uint64) + +class TestUInt64Index(TestUIntNumericIndex): + + _index_cls = UInt64Index + + @pytest.fixture + def dtype(self): + return np.uint64 + + @pytest.fixture( + params=["int64", "float64", "object", "category", "datetime64"], + ) + def invalid_dtype(self, request): + return request.param + def test_constructor(self, dtype): index_cls = self._index_cls @@ -529,6 +594,17 @@ def test_constructor_does_not_cast_to_float(self): result = UInt64Index(values) assert list(result) == values + def test_constructor_32bit(self, dtype): + index_cls = self._index_cls + + index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=np.uint32) + assert isinstance(index, index_cls) + assert index.dtype == np.uint64 + + index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=np.uint32) + assert isinstance(index, index_cls) + assert index.dtype == np.uint64 + @pytest.mark.parametrize( "box", From a497d57f3414c3fbc628fcdab45771cbf12f2e96 Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 12 May 2021 08:03:40 +0100 Subject: [PATCH 25/55] fix comments --- pandas/core/indexes/base.py | 5 +++-- pandas/core/indexes/category.py | 10 +++++----- pandas/core/indexes/numeric.py | 10 ++++++++++ 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 71d7ed420de17..09df2cd006e2a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2429,12 +2429,13 @@ def _is_multi(self) -> bool: """ return isinstance(self, ABCMultiIndex) + @final @classmethod def _is_num_index(cls) -> bool: """ - Whether self is a NumIndex, but not *not* Int64Index, UInt64Index, FloatIndex. + Check if this is a NumericIndex, but *not* Int64Index, UInt64Index, FloatIndex. - Typically used to check if an operation should return NumIndex or plain Index. + Used to check if an operation should return NumericIndex or plain Index. """ from pandas.core.indexes.numeric import ( Float64Index, diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 46cdb4fa477ea..7d6ae580ee0a0 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -287,21 +287,21 @@ def astype(self, dtype, copy: bool = True) -> Index: dtype = pandas_dtype(dtype) - cat = self.categories + categories = self.categories # the super method always returns Int64Index, UInt64Index and Float64Index # but if e.g. the categories are a NumIndex with dtype float32, we want to # return an index with the same dtype as self.categories. - if cat._is_num_index(): - assert isinstance(cat, NumericIndex) # mypy complaint fix + if categories._is_num_index(): + assert isinstance(categories, NumericIndex) # mypy complaint fix try: - cat._validate_dtype(dtype) + categories._validate_dtype(dtype) except ValueError: pass else: new_values = self._data.astype(dtype, copy=copy) # pass copy=False because any copying has been done in the # _data.astype call above - return type(cat)(new_values, name=self.name, copy=False) + return type(categories)(new_values, name=self.name, copy=False) return super().astype(dtype, copy=copy) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 7aa2577fe855f..4bcedeaaa8398 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -15,6 +15,7 @@ from pandas._typing import ( Dtype, DtypeObj, + final, ) from pandas.util._decorators import ( cache_readonly, @@ -278,6 +279,15 @@ def _assert_safe_casting(cls, data, subarr): """ pass + @final + @cache_readonly + def _can_hold_na(self) -> bool: + if is_integer_dtype(self.dtype): + return False + else: + return True + + @final @property def _is_all_dates(self) -> bool: """ From 65576892fd4ec02dba4e6a74e2a8347bd19dd384 Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 12 May 2021 18:31:28 +0100 Subject: [PATCH 26/55] fix comments part II --- pandas/core/indexes/numeric.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 4bcedeaaa8398..7aa2577fe855f 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -15,7 +15,6 @@ from pandas._typing import ( Dtype, DtypeObj, - final, ) from pandas.util._decorators import ( cache_readonly, @@ -279,15 +278,6 @@ def _assert_safe_casting(cls, data, subarr): """ pass - @final - @cache_readonly - def _can_hold_na(self) -> bool: - if is_integer_dtype(self.dtype): - return False - else: - return True - - @final @property def _is_all_dates(self) -> bool: """ From 5bc4c2c82d8e3eafee248c262d612757a4cef0ce Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 14 May 2021 08:15:59 +0100 Subject: [PATCH 27/55] _is_num_index -> _is_numeric_index + Index.union --- pandas/core/indexes/base.py | 10 +++++++--- pandas/core/indexes/category.py | 2 +- pandas/tests/base/test_unique.py | 4 ++-- pandas/tests/indexes/common.py | 2 +- pandas/tests/indexes/test_base.py | 2 +- 5 files changed, 12 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 09df2cd006e2a..d10e72ba02058 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -438,7 +438,11 @@ def __new__( return Index._simple_new(data, name=name) # index-like - elif isinstance(data, NumericIndex) and data._is_num_index() and dtype is None: + elif ( + isinstance(data, NumericIndex) + and data._is_numeric_index() + and dtype is None + ): return NumericIndex(data, name=name, copy=copy) elif isinstance(data, (np.ndarray, Index, ABCSeries)): @@ -2431,7 +2435,7 @@ def _is_multi(self) -> bool: @final @classmethod - def _is_num_index(cls) -> bool: + def _is_numeric_index(cls) -> bool: """ Check if this is a NumericIndex, but *not* Int64Index, UInt64Index, FloatIndex. @@ -5746,7 +5750,7 @@ def map(self, mapper, na_action=None): # empty attributes["dtype"] = self.dtype - if self._is_num_index() and is_numeric_dtype(new_values.dtype): + if self._is_numeric_index() and is_numeric_dtype(new_values.dtype): return NumericIndex(new_values, **attributes) return Index(new_values, **attributes) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 7d6ae580ee0a0..aa8400fc3b759 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -291,7 +291,7 @@ def astype(self, dtype, copy: bool = True) -> Index: # the super method always returns Int64Index, UInt64Index and Float64Index # but if e.g. the categories are a NumIndex with dtype float32, we want to # return an index with the same dtype as self.categories. - if categories._is_num_index(): + if categories._is_numeric_index(): assert isinstance(categories, NumericIndex) # mypy complaint fix try: categories._validate_dtype(dtype) diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py index 2695921bdb40c..ee1f3305b435b 100644 --- a/pandas/tests/base/test_unique.py +++ b/pandas/tests/base/test_unique.py @@ -25,7 +25,7 @@ def test_unique(index_or_series_obj): expected = pd.MultiIndex.from_tuples(unique_values) expected.names = obj.names tm.assert_index_equal(result, expected, exact=True) - elif isinstance(obj, pd.Index) and obj._is_num_index(): + elif isinstance(obj, pd.Index) and obj._is_numeric_index(): expected = NumericIndex(unique_values, dtype=obj.dtype) tm.assert_index_equal(result, expected, exact=True) elif isinstance(obj, pd.Index): @@ -66,7 +66,7 @@ def test_unique_null(null_obj, index_or_series_obj): unique_values_not_null = [val for val in unique_values_raw if not pd.isnull(val)] unique_values = [null_obj] + unique_values_not_null - if isinstance(obj, pd.Index) and obj._is_num_index(): + if isinstance(obj, pd.Index) and obj._is_numeric_index(): expected = NumericIndex(unique_values, dtype=obj.dtype) tm.assert_index_equal(result, expected, exact=True) elif isinstance(obj, pd.Index): diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 6b49dcf397e3c..473d29c5a43c5 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -665,7 +665,7 @@ def test_map_dictlike(self, mapper, simple_index): tm.assert_index_equal(result, expected) # empty mappable - if idx._is_num_index(): + if idx._is_numeric_index(): new_index_cls = NumericIndex else: new_index_cls = Float64Index diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index b9be3bd6f1f06..9d16bcb9634f0 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -713,7 +713,7 @@ def test_map_dictlike(self, index, mapper): if index.empty: # to match proper result coercion for uints expected = Index([]) - elif index._is_num_index(): + elif index._is_numeric_index(): expected = type(index)(np.arange(len(index), 0, -1), dtype=index.dtype) else: expected = Index(np.arange(len(index), 0, -1)) From 84bf54067da97df956083a1184110b92f0ab2f6b Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 14 May 2021 08:50:27 +0100 Subject: [PATCH 28/55] makeNumIndex -> makeNumericIndex and refactor makeIntIndex etc. --- pandas/conftest.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 4a1ec977baab5..460f34fd02109 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -460,16 +460,16 @@ def _create_mi_with_dt64tz_level(): "uint": tm.makeUIntIndex(100), "range": tm.makeRangeIndex(100), "float": tm.makeFloatIndex(100), - "num_int64": tm.makeNumIndex(100, dtype="int64"), - "num_int32": tm.makeNumIndex(100, dtype="int32"), - "num_int16": tm.makeNumIndex(100, dtype="int16"), - "num_int8": tm.makeNumIndex(100, dtype="int8"), - "num_uint64": tm.makeNumIndex(100, dtype="uint64"), - "num_uint32": tm.makeNumIndex(100, dtype="uint32"), - "num_uint16": tm.makeNumIndex(100, dtype="uint16"), - "num_uint8": tm.makeNumIndex(100, dtype="uint8"), - "num_float64": tm.makeNumIndex(100, dtype="float64"), - "num_float32": tm.makeNumIndex(100, dtype="float32"), + "num_int64": tm.makeNumericIndex(100, dtype="int64"), + "num_int32": tm.makeNumericIndex(100, dtype="int32"), + "num_int16": tm.makeNumericIndex(100, dtype="int16"), + "num_int8": tm.makeNumericIndex(100, dtype="int8"), + "num_uint64": tm.makeNumericIndex(100, dtype="uint64"), + "num_uint32": tm.makeNumericIndex(100, dtype="uint32"), + "num_uint16": tm.makeNumericIndex(100, dtype="uint16"), + "num_uint8": tm.makeNumericIndex(100, dtype="uint8"), + "num_float64": tm.makeNumericIndex(100, dtype="float64"), + "num_float32": tm.makeNumericIndex(100, dtype="float32"), "bool": tm.makeBoolIndex(10), "categorical": tm.makeCategoricalIndex(100), "interval": tm.makeIntervalIndex(100), From 69953b4870b34d458434a9d2598941a8d949234f Mon Sep 17 00:00:00 2001 From: tp Date: Thu, 20 May 2021 22:34:06 +0100 Subject: [PATCH 29/55] fix errors --- pandas/tests/indexes/numeric/test_numeric.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py index f7e56975f13d4..9bc89c161e763 100644 --- a/pandas/tests/indexes/numeric/test_numeric.py +++ b/pandas/tests/indexes/numeric/test_numeric.py @@ -106,11 +106,6 @@ def test_constructor(self, dtype): result = index_cls(np.array([np.nan]), dtype=dtype) assert pd.isna(result.values).all() - result = Index(np.array([np.nan], dtype=dtype)) - assert isinstance(result, index_cls) - assert result.dtype == dtype - assert pd.isna(result.values).all() - def test_constructor_invalid(self): index_cls = self._index_cls cls_name = index_cls.__name__ @@ -290,6 +285,14 @@ def dtype(self, request): def invalid_dtype(self, request): return request.param + def test_constructor_from_base_index(self, dtype): + index_cls = self._index_cls + + result = Index(np.array([np.nan], dtype=dtype)) + assert isinstance(result, index_cls) + assert result.dtype == dtype + assert pd.isna(result.values).all() + def test_constructor_32bit(self, dtype): index_cls = self._index_cls From b4be77d86a339d5d63933bd94e4faf5e907b47bb Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 21 May 2021 00:03:25 +0100 Subject: [PATCH 30/55] rebase after #41472 --- pandas/__init__.py | 1 + pandas/core/index.py | 2 +- pandas/core/indexes/api.py | 2 - pandas/core/indexes/base.py | 5 +- pandas/core/indexes/numeric.py | 66 ++++++++++++++++++++--- pandas/core/indexes/range.py | 4 +- pandas/tests/indexes/test_numpy_compat.py | 6 +-- pandas/tests/indexes/test_setops.py | 15 ------ 8 files changed, 70 insertions(+), 31 deletions(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index 43f05617584cc..5de55b530de42 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -76,6 +76,7 @@ RangeIndex, Float64Index, MultiIndex, + NumericIndex, IntervalIndex, TimedeltaIndex, DatetimeIndex, diff --git a/pandas/core/index.py b/pandas/core/index.py index bcdfa67e02f0d..44f434e038a4b 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1,7 +1,6 @@ import warnings from pandas.core.indexes.api import ( # noqa:F401 - BaseNumericIndex, CategoricalIndex, DatetimeIndex, Float64Index, @@ -10,6 +9,7 @@ IntervalIndex, MultiIndex, NaT, + NumericIndex, PeriodIndex, RangeIndex, TimedeltaIndex, diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 377a82d329e20..304c42321e72a 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -20,7 +20,6 @@ from pandas.core.indexes.interval import IntervalIndex from pandas.core.indexes.multi import MultiIndex from pandas.core.indexes.numeric import ( - BaseNumericIndex, Float64Index, Int64Index, NumericIndex, @@ -46,7 +45,6 @@ "Index", "MultiIndex", "NumericIndex", - "BaseNumericIndex", "Float64Index", "Int64Index", "CategoricalIndex", diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index d10e72ba02058..0bada0a543c24 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2441,16 +2441,17 @@ def _is_numeric_index(cls) -> bool: Used to check if an operation should return NumericIndex or plain Index. """ - from pandas.core.indexes.numeric import ( + from pandas import ( Float64Index, Int64Index, NumericIndex, + RangeIndex, UInt64Index, ) if not issubclass(cls, NumericIndex): return False - elif issubclass(cls, (Int64Index, UInt64Index, Float64Index)): + elif issubclass(cls, (RangeIndex, Int64Index, UInt64Index, Float64Index)): return False else: return True diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 7aa2577fe855f..efd584c5618a5 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -212,13 +212,52 @@ def _ensure_dtype( else: return dtype + def __contains__(self, key) -> bool: + """ + Check if key is a float and has a decimal. If it has, return False. + """ + if not is_integer_dtype(self.dtype): + return super().__contains__(key) + + hash(key) + try: + if is_float(key) and int(key) != key: + # otherwise the `key in self._engine` check casts e.g. 1.1 -> 1 + return False + return key in self._engine + except (OverflowError, TypeError, ValueError): + return False + + @doc(Index.astype) + def astype(self, dtype, copy=True): + if is_float_dtype(self.dtype): + dtype = pandas_dtype(dtype) + if needs_i8_conversion(dtype): + raise TypeError( + f"Cannot convert Float64Index to dtype {dtype}; integer " + "values are required for conversion" + ) + elif is_integer_dtype(dtype) and not is_extension_array_dtype(dtype): + # TODO(jreback); this can change once we have an EA Index type + # GH 13149 + arr = astype_nansafe(self._values, dtype=dtype) + if isinstance(self, Float64Index): + return Int64Index(arr, name=self.name) + else: + return NumericIndex(arr, name=self.name, dtype=dtype) + + return super().astype(dtype, copy=copy) + # ---------------------------------------------------------------- # Indexing Methods @cache_readonly @doc(Index._should_fallback_to_positional) def _should_fallback_to_positional(self) -> bool: - return False + if self.inferred_type == "floating": + return False + + return super()._should_fallback_to_positional() @doc(Index._convert_slice_indexer) def _convert_slice_indexer(self, key: slice, kind: str): @@ -239,6 +278,21 @@ def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default): # we will try to coerce to integers return self._maybe_cast_indexer(label) + @doc(Index._convert_arr_indexer) + def _convert_arr_indexer(self, keyarr) -> np.ndarray: + if is_unsigned_integer_dtype(self.dtype): + # Cast the indexer to uint64 if possible so that the values returned + # from indexing are also uint64. + dtype = None + if is_integer_dtype(keyarr) or ( + lib.infer_dtype(keyarr, skipna=False) == "integer" + ): + dtype = np.dtype(np.uint64) + + return com.asarray_tuplesafe(keyarr, dtype=dtype) + + return super()._convert_arr_indexer(keyarr) + # ---------------------------------------------------------------- @doc(Index._shallow_copy) @@ -270,13 +324,13 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: return is_numeric_dtype(dtype) @classmethod - def _assert_safe_casting(cls, data, subarr): + def _assert_safe_casting(cls, data, subarr) -> None: """ - Subclasses need to override this only if the process of casting data - from some accepted dtype to the internal dtype(s) bears the risk of - truncation (e.g. float to int). + Ensure incoming data can be represented with matching signed-ness. """ - pass + if is_integer_dtype(subarr.dtype): + if not np.array_equal(data, subarr): + raise TypeError("Unsafe NumPy casting, you must explicitly cast") @property def _is_all_dates(self) -> bool: diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 2349fa6171882..0ce99df44a5f9 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -45,9 +45,9 @@ import pandas.core.indexes.base as ibase from pandas.core.indexes.base import maybe_extract_name from pandas.core.indexes.numeric import ( - BaseNumericIndex, Float64Index, Int64Index, + NumericIndex, ) from pandas.core.ops.common import unpack_zerodim_and_defer @@ -57,7 +57,7 @@ _empty_range = range(0) -class RangeIndex(BaseNumericIndex): +class RangeIndex(NumericIndex): """ Immutable Index implementing a monotonic integer range. diff --git a/pandas/tests/indexes/test_numpy_compat.py b/pandas/tests/indexes/test_numpy_compat.py index 9d0cee6310b7a..b869aeff9786d 100644 --- a/pandas/tests/indexes/test_numpy_compat.py +++ b/pandas/tests/indexes/test_numpy_compat.py @@ -10,7 +10,7 @@ ) import pandas._testing as tm from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin -from pandas.core.indexes.numeric import BaseNumericIndex +from pandas.core.indexes.numeric import NumericIndex @pytest.mark.parametrize( @@ -49,7 +49,7 @@ def test_numpy_ufuncs_basic(index, func): with tm.external_error_raised((TypeError, AttributeError)): with np.errstate(all="ignore"): func(index) - elif isinstance(index, BaseNumericIndex): + elif isinstance(index, NumericIndex): # coerces to float (e.g. np.sin) with np.errstate(all="ignore"): result = func(index) @@ -94,7 +94,7 @@ def test_numpy_ufuncs_other(index, func, request): with tm.external_error_raised(TypeError): func(index) - elif isinstance(index, BaseNumericIndex): + elif isinstance(index, NumericIndex): # Results in bool array result = func(index) assert isinstance(result, np.ndarray) diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 87057d1147883..20174beacf1d3 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -31,22 +31,7 @@ COMPATIBLE_INCONSISTENT_PAIRS = [ (np.float64, np.int64), - (np.float64, np.int32), - (np.float64, np.int16), - (np.float64, np.int8), (np.float64, np.uint64), - (np.float64, np.uint32), - (np.float64, np.uint16), - (np.float64, np.uint8), - (np.float32, np.int64), - (np.float32, np.int32), - (np.float32, np.int16), - (np.float32, np.int8), - (np.float32, np.uint64), - (np.float32, np.uint32), - (np.float32, np.uint16), - (np.float32, np.uint8), - (np.float32, np.float64), ] From bb42e2dd3808e5e79c3be6ee1723b7c151e657df Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 21 May 2021 00:14:20 +0100 Subject: [PATCH 31/55] small clean-up --- pandas/core/indexes/base.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 0bada0a543c24..1b81ee79c8831 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -386,7 +386,6 @@ def __new__( ) from pandas.core.arrays import PandasArray - from pandas.core.indexes.numeric import NumericIndex from pandas.core.indexes.range import RangeIndex name = maybe_extract_name(name, data, cls) @@ -438,12 +437,8 @@ def __new__( return Index._simple_new(data, name=name) # index-like - elif ( - isinstance(data, NumericIndex) - and data._is_numeric_index() - and dtype is None - ): - return NumericIndex(data, name=name, copy=copy) + elif data._is_numeric_index() and dtype is None: + return type(data)(data, name=name, copy=copy) elif isinstance(data, (np.ndarray, Index, ABCSeries)): if isinstance(data, ABCMultiIndex): @@ -5730,7 +5725,6 @@ def map(self, mapper, na_action=None): a MultiIndex will be returned. """ from pandas.core.indexes.multi import MultiIndex - from pandas.core.indexes.numeric import NumericIndex new_values = self._map_values(mapper, na_action=na_action) @@ -5752,7 +5746,7 @@ def map(self, mapper, na_action=None): attributes["dtype"] = self.dtype if self._is_numeric_index() and is_numeric_dtype(new_values.dtype): - return NumericIndex(new_values, **attributes) + return type(self)(new_values, **attributes) return Index(new_values, **attributes) From bafa9b39fc03d319f47a616d872198fe7e0194c4 Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 21 May 2021 00:18:46 +0100 Subject: [PATCH 32/55] small clean-up II --- pandas/core/indexes/numeric.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index efd584c5618a5..02b8dcfe5904f 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -197,7 +197,9 @@ def _ensure_dtype( dtype: Dtype | None, validate: bool = True, ) -> np.dtype | None: - """Ensure int64 dtype for Int64Index, etc. but allow int32 etc. for NumIndex.""" + """ + Ensure int64 dtype for Int64Index etc. but allow int32 etc. for NumericIndex. + """ if validate: cls._validate_dtype(dtype) @@ -207,10 +209,10 @@ def _ensure_dtype( dtype = pandas_dtype(dtype) assert isinstance(dtype, np.dtype) - if cls._default_dtype is not None: - return cls._default_dtype - else: + if cls._default_dtype is None: return dtype + else: + return cls._default_dtype def __contains__(self, key) -> bool: """ From 35b0e71952ddbe4db73c706ecfae357341d4c49e Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 21 May 2021 00:23:39 +0100 Subject: [PATCH 33/55] small clean-up III --- pandas/core/indexes/numeric.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 02b8dcfe5904f..907c5b61916d9 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -258,8 +258,8 @@ def astype(self, dtype, copy=True): def _should_fallback_to_positional(self) -> bool: if self.inferred_type == "floating": return False - - return super()._should_fallback_to_positional() + else: + return super()._should_fallback_to_positional() @doc(Index._convert_slice_indexer) def _convert_slice_indexer(self, key: slice, kind: str): @@ -326,7 +326,7 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: return is_numeric_dtype(dtype) @classmethod - def _assert_safe_casting(cls, data, subarr) -> None: + def _assert_safe_casting(cls, data: np.ndarray, subarr: np.ndarray) -> None: """ Ensure incoming data can be represented with matching signed-ness. """ @@ -341,8 +341,6 @@ def _is_all_dates(self) -> bool: """ return False - # ---------------------------------------------------------------- - def _format_native_types( self, na_rep="", float_format=None, decimal=".", quoting=None, **kwargs ): From ed4730b7af0c3bc4c227c7c2eb636d1dc47acf03 Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 21 May 2021 00:26:46 +0100 Subject: [PATCH 34/55] small clean-up IV --- pandas/core/indexes/numeric.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 907c5b61916d9..38cdbf88b1a74 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -282,18 +282,18 @@ def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default): @doc(Index._convert_arr_indexer) def _convert_arr_indexer(self, keyarr) -> np.ndarray: - if is_unsigned_integer_dtype(self.dtype): - # Cast the indexer to uint64 if possible so that the values returned - # from indexing are also uint64. - dtype = None - if is_integer_dtype(keyarr) or ( - lib.infer_dtype(keyarr, skipna=False) == "integer" - ): - dtype = np.dtype(np.uint64) - - return com.asarray_tuplesafe(keyarr, dtype=dtype) - - return super()._convert_arr_indexer(keyarr) + if not is_unsigned_integer_dtype(self.dtype): + return super()._convert_arr_indexer(keyarr) + + # Cast the indexer to uint64 if possible so that the values returned + # from indexing are also uint64. + dtype = None + if is_integer_dtype(keyarr) or ( + lib.infer_dtype(keyarr, skipna=False) == "integer" + ): + dtype = np.dtype(np.uint64) + + return com.asarray_tuplesafe(keyarr, dtype=dtype) # ---------------------------------------------------------------- From 6a32788345ab003b765afc50fcb57b7acbcc41da Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 21 May 2021 00:32:48 +0100 Subject: [PATCH 35/55] small clean-up V --- pandas/core/indexes/numeric.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 38cdbf88b1a74..badf108cc26f4 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -329,6 +329,9 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: def _assert_safe_casting(cls, data: np.ndarray, subarr: np.ndarray) -> None: """ Ensure incoming data can be represented with matching signed-ness. + + Needed if the process of casting data from some accepted dtype to the internal + dtype(s) bears the risk of truncation (e.g. float to int). """ if is_integer_dtype(subarr.dtype): if not np.array_equal(data, subarr): @@ -346,24 +349,25 @@ def _format_native_types( ): from pandas.io.formats.format import FloatArrayFormatter - if not is_float_dtype(self.dtype): - return super()._format_native_types( + if is_float_dtype(self.dtype): + formatter = FloatArrayFormatter( + self._values, na_rep=na_rep, float_format=float_format, decimal=decimal, quoting=quoting, + fixed_width=False, **kwargs, ) + return formatter.get_result_as_array() - formatter = FloatArrayFormatter( - self._values, + return super()._format_native_types( na_rep=na_rep, float_format=float_format, decimal=decimal, quoting=quoting, - fixed_width=False, + **kwargs, ) - return formatter.get_result_as_array() class IntegerIndex(NumericIndex): From 47e208cb3142eb1fb9a54ad7b5eca254ab32646d Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 21 May 2021 00:57:02 +0100 Subject: [PATCH 36/55] fix bug --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 1b81ee79c8831..8ae70a808bd41 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -437,7 +437,7 @@ def __new__( return Index._simple_new(data, name=name) # index-like - elif data._is_numeric_index() and dtype is None: + elif isinstance(data, Index) and data._is_numeric_index() and dtype is None: return type(data)(data, name=name, copy=copy) elif isinstance(data, (np.ndarray, Index, ABCSeries)): From d6a03a0f75d76090c5d9772fbec391dcc096ca14 Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 21 May 2021 07:31:24 +0100 Subject: [PATCH 37/55] fix failures --- pandas/core/indexes/numeric.py | 1 - pandas/tests/api/test_api.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index badf108cc26f4..f402a0d390a21 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -357,7 +357,6 @@ def _format_native_types( decimal=decimal, quoting=quoting, fixed_width=False, - **kwargs, ) return formatter.get_result_as_array() diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 95dc1d82cb286..7173a43d4c5e6 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -68,6 +68,7 @@ class TestPDApi(Base): "Index", "Int64Index", "MultiIndex", + "NumericIndex", "Period", "PeriodIndex", "RangeIndex", From ec003ed7f6809d13e0f099dbf6dc72e6c6790e33 Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 21 May 2021 08:25:19 +0100 Subject: [PATCH 38/55] cleanups --- pandas/tests/indexes/common.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 473d29c5a43c5..7a58bc69d0612 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -537,10 +537,10 @@ def test_hasnans_isnans(self, index_flat): if len(index) == 0: return + elif isinstance(index, NumericIndex) and is_integer_dtype(index.dtype): + return elif isinstance(index, DatetimeIndexOpsMixin): values[1] = iNaT - elif issubclass(index.dtype.type, np.integer): - return else: values[1] = np.nan @@ -557,7 +557,9 @@ def test_hasnans_isnans(self, index_flat): def test_fillna(self, index): # GH 11343 if len(index) == 0: - pass + return + elif isinstance(index, NumericIndex) and is_integer_dtype(index.dtype): + return elif isinstance(index, MultiIndex): idx = index.copy(deep=True) msg = "isna is not defined for MultiIndex" @@ -578,8 +580,6 @@ def test_fillna(self, index): if isinstance(index, DatetimeIndexOpsMixin): values[1] = iNaT - elif issubclass(index.dtype.type, np.integer): - return else: values[1] = np.nan From 7ddee717bafd6b5a97c163d18a6bc6d317a70277 Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 21 May 2021 22:14:43 +0100 Subject: [PATCH 39/55] chabge _is_numeric_index to be an attribute --- pandas/core/indexes/base.py | 7 +++++-- pandas/core/indexes/category.py | 2 +- pandas/core/indexes/numeric.py | 4 ++++ pandas/core/indexes/range.py | 1 + pandas/tests/base/test_unique.py | 4 ++-- pandas/tests/indexes/common.py | 2 +- pandas/tests/indexes/test_base.py | 2 +- 7 files changed, 15 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 8ae70a808bd41..d7ebf8089de83 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -359,6 +359,9 @@ def _outer_indexer( _is_numeric_dtype: bool = False _can_hold_na: bool = True _can_hold_strings: bool = True + # Whether this index is a NumericIndex, but not a Int64Index, Float64Index, + # UInt64Index or RangeIndex + _is_numeric_index: bool = False _engine_type: type[libindex.IndexEngine] = libindex.ObjectEngine # whether we support partial string indexing. Overridden @@ -437,7 +440,7 @@ def __new__( return Index._simple_new(data, name=name) # index-like - elif isinstance(data, Index) and data._is_numeric_index() and dtype is None: + elif isinstance(data, Index) and data._is_numeric_index and dtype is None: return type(data)(data, name=name, copy=copy) elif isinstance(data, (np.ndarray, Index, ABCSeries)): @@ -5745,7 +5748,7 @@ def map(self, mapper, na_action=None): # empty attributes["dtype"] = self.dtype - if self._is_numeric_index() and is_numeric_dtype(new_values.dtype): + if self._is_numeric_index and is_numeric_dtype(new_values.dtype): return type(self)(new_values, **attributes) return Index(new_values, **attributes) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index aa8400fc3b759..9dc862582e557 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -291,7 +291,7 @@ def astype(self, dtype, copy: bool = True) -> Index: # the super method always returns Int64Index, UInt64Index and Float64Index # but if e.g. the categories are a NumIndex with dtype float32, we want to # return an index with the same dtype as self.categories. - if categories._is_numeric_index(): + if categories._is_numeric_index: assert isinstance(categories, NumericIndex) # mypy complaint fix try: categories._validate_dtype(dtype) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index f402a0d390a21..77343b4227c8b 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -97,6 +97,7 @@ class NumericIndex(Index): ) _is_numeric_dtype = True _can_hold_strings = False + _is_numeric_index: bool = True @cache_readonly def _can_hold_na(self) -> bool: @@ -374,6 +375,8 @@ class IntegerIndex(NumericIndex): This is an abstract class for Int64Index, UInt64Index. """ + _is_numeric_index: bool = False + @property def asi8(self) -> np.ndarray: # do not cache or you'll create a memory leak @@ -438,3 +441,4 @@ class Float64Index(NumericIndex): _engine_type = libindex.Float64Engine _default_dtype = np.dtype(np.float64) _dtype_validation_metadata = (is_float_dtype, "float") + _is_numeric_index: bool = False diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 0ce99df44a5f9..2c5e4b1f7bfbb 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -101,6 +101,7 @@ class RangeIndex(NumericIndex): _engine_type = libindex.Int64Engine _dtype_validation_metadata = (is_signed_integer_dtype, "signed integer") _range: range + _is_numeric_index: bool = False # -------------------------------------------------------------------- # Constructors diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py index ee1f3305b435b..a044b82587fae 100644 --- a/pandas/tests/base/test_unique.py +++ b/pandas/tests/base/test_unique.py @@ -25,7 +25,7 @@ def test_unique(index_or_series_obj): expected = pd.MultiIndex.from_tuples(unique_values) expected.names = obj.names tm.assert_index_equal(result, expected, exact=True) - elif isinstance(obj, pd.Index) and obj._is_numeric_index(): + elif isinstance(obj, pd.Index) and obj._is_numeric_index: expected = NumericIndex(unique_values, dtype=obj.dtype) tm.assert_index_equal(result, expected, exact=True) elif isinstance(obj, pd.Index): @@ -66,7 +66,7 @@ def test_unique_null(null_obj, index_or_series_obj): unique_values_not_null = [val for val in unique_values_raw if not pd.isnull(val)] unique_values = [null_obj] + unique_values_not_null - if isinstance(obj, pd.Index) and obj._is_numeric_index(): + if isinstance(obj, pd.Index) and obj._is_numeric_index: expected = NumericIndex(unique_values, dtype=obj.dtype) tm.assert_index_equal(result, expected, exact=True) elif isinstance(obj, pd.Index): diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 7a58bc69d0612..31ea2a8dda801 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -665,7 +665,7 @@ def test_map_dictlike(self, mapper, simple_index): tm.assert_index_equal(result, expected) # empty mappable - if idx._is_numeric_index(): + if idx._is_numeric_index: new_index_cls = NumericIndex else: new_index_cls = Float64Index diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 9d16bcb9634f0..99ae7994a8d52 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -713,7 +713,7 @@ def test_map_dictlike(self, index, mapper): if index.empty: # to match proper result coercion for uints expected = Index([]) - elif index._is_numeric_index(): + elif index._is_numeric_index: expected = type(index)(np.arange(len(index), 0, -1), dtype=index.dtype) else: expected = Index(np.arange(len(index), 0, -1)) From 2bb282f091f99d9b982d1df65c035b05d5b54bd3 Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 21 May 2021 23:31:43 +0100 Subject: [PATCH 40/55] minor clean-ups --- pandas/core/indexes/numeric.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 77343b4227c8b..7b17fcba4b4f0 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -193,17 +193,12 @@ def _validate_dtype(cls, dtype: Dtype | None) -> None: ) @classmethod - def _ensure_dtype( - cls, - dtype: Dtype | None, - validate: bool = True, - ) -> np.dtype | None: + def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None: """ Ensure int64 dtype for Int64Index etc. but allow int32 etc. for NumericIndex. - """ - if validate: - cls._validate_dtype(dtype) + Assumed dtype has already been validated. + """ if dtype is None: return cls._default_dtype @@ -257,7 +252,7 @@ def astype(self, dtype, copy=True): @cache_readonly @doc(Index._should_fallback_to_positional) def _should_fallback_to_positional(self) -> bool: - if self.inferred_type == "floating": + if is_float_dtype(self.dtype): return False else: return super()._should_fallback_to_positional() From c1633fbfa34ec872b61fd02c47b70184d214f71e Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 21 May 2021 23:35:26 +0100 Subject: [PATCH 41/55] fix not-allowed parameter --- pandas/core/indexes/numeric.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 7b17fcba4b4f0..b3b4d10a5d835 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -163,7 +163,7 @@ def _ensure_array(cls, data, dtype, copy: bool): if issubclass(data.dtype.type, str): cls._string_data_error(data) - dtype = cls._ensure_dtype(dtype, validate=False) + dtype = cls._ensure_dtype(dtype) if copy or not is_dtype_equal(data.dtype, dtype): subarr = np.array(data, dtype=dtype, copy=copy) From 9c7d57b0004ed930a29b180716cc2f9706c846a3 Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 22 May 2021 12:25:56 +0100 Subject: [PATCH 42/55] fix _should_fallback_to_positional --- pandas/core/indexes/numeric.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index b3b4d10a5d835..ce79f991b1c5d 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -252,10 +252,7 @@ def astype(self, dtype, copy=True): @cache_readonly @doc(Index._should_fallback_to_positional) def _should_fallback_to_positional(self) -> bool: - if is_float_dtype(self.dtype): - return False - else: - return super()._should_fallback_to_positional() + return False @doc(Index._convert_slice_indexer) def _convert_slice_indexer(self, key: slice, kind: str): From f6dccc15ff8d4527f18b3c8af64b69a4067ec76b Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 4 Jun 2021 11:31:31 +0100 Subject: [PATCH 43/55] clean-ups after rebasing --- pandas/core/indexes/base.py | 23 ----------------------- pandas/core/indexes/numeric.py | 20 ++++---------------- 2 files changed, 4 insertions(+), 39 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index d7ebf8089de83..7988c52c44cf0 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2431,29 +2431,6 @@ def _is_multi(self) -> bool: """ return isinstance(self, ABCMultiIndex) - @final - @classmethod - def _is_numeric_index(cls) -> bool: - """ - Check if this is a NumericIndex, but *not* Int64Index, UInt64Index, FloatIndex. - - Used to check if an operation should return NumericIndex or plain Index. - """ - from pandas import ( - Float64Index, - Int64Index, - NumericIndex, - RangeIndex, - UInt64Index, - ) - - if not issubclass(cls, NumericIndex): - return False - elif issubclass(cls, (RangeIndex, Int64Index, UInt64Index, Float64Index)): - return False - else: - return True - # -------------------------------------------------------------------- # Pickle Methods diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index ce79f991b1c5d..0f000cfcaed12 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -228,8 +228,8 @@ def __contains__(self, key) -> bool: @doc(Index.astype) def astype(self, dtype, copy=True): + dtype = pandas_dtype(dtype) if is_float_dtype(self.dtype): - dtype = pandas_dtype(dtype) if needs_i8_conversion(dtype): raise TypeError( f"Cannot convert Float64Index to dtype {dtype}; integer " @@ -243,6 +243,9 @@ def astype(self, dtype, copy=True): return Int64Index(arr, name=self.name) else: return NumericIndex(arr, name=self.name, dtype=dtype) + elif self._is_numeric_index: + if not is_extension_array_dtype(dtype) and is_numeric_dtype(dtype): + return type(self)(self, dtype=dtype, copy=copy) return super().astype(dtype, copy=copy) @@ -273,21 +276,6 @@ def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default): # we will try to coerce to integers return self._maybe_cast_indexer(label) - @doc(Index._convert_arr_indexer) - def _convert_arr_indexer(self, keyarr) -> np.ndarray: - if not is_unsigned_integer_dtype(self.dtype): - return super()._convert_arr_indexer(keyarr) - - # Cast the indexer to uint64 if possible so that the values returned - # from indexing are also uint64. - dtype = None - if is_integer_dtype(keyarr) or ( - lib.infer_dtype(keyarr, skipna=False) == "integer" - ): - dtype = np.dtype(np.uint64) - - return com.asarray_tuplesafe(keyarr, dtype=dtype) - # ---------------------------------------------------------------- @doc(Index._shallow_copy) From 3630fc7419b5874a06da87eb5aa5ee6e3a1d4ca1 Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 4 Jun 2021 12:07:34 +0100 Subject: [PATCH 44/55] more clean-ups --- pandas/_testing/__init__.py | 2 +- pandas/core/indexes/numeric.py | 28 ++++++++++++++++++++++------ 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 206534efd4fc3..97e07a76b9149 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -315,7 +315,7 @@ def makeNumericIndex(k=10, name=None, *, dtype): else: raise NotImplementedError(f"wrong dtype {dtype}") - return Index(values, dtype=dtype, name=name) + return NumericIndex(values, dtype=dtype, name=name) def makeIntIndex(k=10, name=None): diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 0f000cfcaed12..8e2049bea484a 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -197,18 +197,14 @@ def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None: """ Ensure int64 dtype for Int64Index etc. but allow int32 etc. for NumericIndex. - Assumed dtype has already been validated. + Assumes dtype has already been validated. """ if dtype is None: return cls._default_dtype dtype = pandas_dtype(dtype) assert isinstance(dtype, np.dtype) - - if cls._default_dtype is None: - return dtype - else: - return cls._default_dtype + return dtype def __contains__(self, key) -> bool: """ @@ -357,6 +353,16 @@ class IntegerIndex(NumericIndex): _is_numeric_index: bool = False + @classmethod + @doc(NumericIndex._ensure_dtype) + def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None: + if dtype is None: + return cls._default_dtype + dtype = pandas_dtype(dtype) + assert isinstance(dtype, np.dtype) + + return cls._default_dtype + @property def asi8(self) -> np.ndarray: # do not cache or you'll create a memory leak @@ -422,3 +428,13 @@ class Float64Index(NumericIndex): _default_dtype = np.dtype(np.float64) _dtype_validation_metadata = (is_float_dtype, "float") _is_numeric_index: bool = False + + @classmethod + @doc(NumericIndex._ensure_dtype) + def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None: + if dtype is None: + return cls._default_dtype + dtype = pandas_dtype(dtype) + assert isinstance(dtype, np.dtype) + + return cls._default_dtype From bfe6895a06ca0529d99d69a6825630767c36c4cd Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 5 Jun 2021 15:49:50 +0100 Subject: [PATCH 45/55] add cleanups --- pandas/__init__.py | 1 - pandas/core/indexes/numeric.py | 9 +++++---- pandas/tests/indexes/common.py | 2 +- pandas/tests/indexes/numeric/test_numeric.py | 6 ++---- pandas/tests/indexes/test_numpy_compat.py | 2 +- 5 files changed, 9 insertions(+), 11 deletions(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index 5de55b530de42..43f05617584cc 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -76,7 +76,6 @@ RangeIndex, Float64Index, MultiIndex, - NumericIndex, IntervalIndex, TimedeltaIndex, DatetimeIndex, diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 8e2049bea484a..403296e81d9dd 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -166,10 +166,11 @@ def _ensure_array(cls, data, dtype, copy: bool): dtype = cls._ensure_dtype(dtype) if copy or not is_dtype_equal(data.dtype, dtype): - subarr = np.array(data, dtype=dtype, copy=copy) - if not is_numeric_dtype(subarr.dtype): - # hack to raise correctly - subarr = np.array(data, dtype="float64", copy=copy) + try: + subarr = np.array(data, dtype=dtype, copy=copy) + cls._validate_dtype(subarr.dtype) + except (TypeError, ValueError): + raise ValueError(f"data is not compatible with {cls.__name__}") cls._assert_safe_casting(data, subarr) else: subarr = data diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 31ea2a8dda801..05665b6d35e59 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -33,8 +33,8 @@ isna, ) import pandas._testing as tm +from pandas.core.api import NumericIndex from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin -from pandas.core.indexes.numeric import NumericIndex class Base: diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py index 9bc89c161e763..809851e5b3097 100644 --- a/pandas/tests/indexes/numeric/test_numeric.py +++ b/pandas/tests/indexes/numeric/test_numeric.py @@ -128,10 +128,8 @@ def test_constructor_invalid(self): with pytest.raises((TypeError, ValueError), match=msg): index_cls(["a", "b", 0.0]) - msg = ( - r"float\(\) argument must be a string or a( real)? number, not 'Timestamp'" - ) - with pytest.raises(TypeError, match=msg): + msg = f"data is not compatible with {index_cls.__name__}" + with pytest.raises(ValueError, match=msg): index_cls([Timestamp("20130101")]) def test_constructor_coerce(self, mixed_index, float_index): diff --git a/pandas/tests/indexes/test_numpy_compat.py b/pandas/tests/indexes/test_numpy_compat.py index b869aeff9786d..80ba0c53fb9c4 100644 --- a/pandas/tests/indexes/test_numpy_compat.py +++ b/pandas/tests/indexes/test_numpy_compat.py @@ -9,8 +9,8 @@ TimedeltaIndex, ) import pandas._testing as tm +from pandas.core.api import NumericIndex from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin -from pandas.core.indexes.numeric import NumericIndex @pytest.mark.parametrize( From 8532ddb41418440babaa4d8319841e4415aa6463 Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 5 Jun 2021 18:30:27 +0100 Subject: [PATCH 46/55] fix TestApi failure --- pandas/tests/api/test_api.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 7173a43d4c5e6..95dc1d82cb286 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -68,7 +68,6 @@ class TestPDApi(Base): "Index", "Int64Index", "MultiIndex", - "NumericIndex", "Period", "PeriodIndex", "RangeIndex", From 186de8ecec0de3f01e0790698a82909afef164eb Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 5 Jun 2021 19:17:49 +0100 Subject: [PATCH 47/55] more precise tests --- pandas/tests/indexes/numeric/test_numeric.py | 45 +++++++++++--------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py index 809851e5b3097..4fee74bb1903e 100644 --- a/pandas/tests/indexes/numeric/test_numeric.py +++ b/pandas/tests/indexes/numeric/test_numeric.py @@ -53,7 +53,7 @@ def float_index(self, dtype): return self._index_cls([0.0, 2.5, 5.0, 7.5, 10.0], dtype=dtype) def test_repr_roundtrip(self, index): - tm.assert_index_equal(eval(repr(index)), index) + tm.assert_index_equal(eval(repr(index)), index, exact=True) def check_is_index(self, idx): assert isinstance(idx, Index) @@ -160,7 +160,7 @@ def test_type_coercion_valid(self, float_dtype): # There is no Float32Index, so we always # generate Float64Index. idx = Index([1, 2, 3.5], dtype=float_dtype) - tm.assert_index_equal(idx, Index([1, 2, 3.5])) + tm.assert_index_equal(idx, Index([1, 2, 3.5]), exact=True) def test_equals_numeric(self): index_cls = self._index_cls @@ -255,19 +255,21 @@ def test_nan_multiple_containment(self): tm.assert_numpy_array_equal(idx.isin([np.nan]), np.array([False, False])) def test_fillna_float64(self): + index_cls = self._index_cls # GH 11343 idx = Index([1.0, np.nan, 3.0], dtype=float, name="x") # can't downcast exp = Index([1.0, 0.1, 3.0], name="x") - tm.assert_index_equal(idx.fillna(0.1), exp) + tm.assert_index_equal(idx.fillna(0.1), exp, exact=True) # downcast - exp = self._index_cls([1.0, 2.0, 3.0], name="x") - tm.assert_index_equal(idx.fillna(2), exp) + exact = True if index_cls is Int64Index else "equiv" + exp = index_cls([1.0, 2.0, 3.0], name="x") + tm.assert_index_equal(idx.fillna(2), exp, exact=exact) # object exp = Index([1.0, "obj", 3.0], name="x") - tm.assert_index_equal(idx.fillna("obj"), exp) + tm.assert_index_equal(idx.fillna("obj"), exp, exact=True) class TestFloat64Index(TestFloatNumericIndex): @@ -312,10 +314,10 @@ def test_view(self, dtype): assert idx_view.name == "Foo" idx_view = idx.view(dtype) - tm.assert_index_equal(idx, index_cls(idx_view, name="Foo")) + tm.assert_index_equal(idx, index_cls(idx_view, name="Foo"), exact=True) idx_view = idx.view(index_cls) - tm.assert_index_equal(idx, index_cls(idx_view, name="Foo")) + tm.assert_index_equal(idx, index_cls(idx_view, name="Foo"), exact=True) def test_is_monotonic(self): index_cls = self._index_cls @@ -432,11 +434,13 @@ def test_constructor(self, dtype): # pass list, coerce fine index = index_cls([-5, 0, 1, 2], dtype=dtype) expected = Index([-5, 0, 1, 2], dtype=dtype) - tm.assert_index_equal(index, expected) + exact = True if index_cls is Int64Index else "equiv" + tm.assert_index_equal(index, expected, exact=exact) # from iterable - index = index_cls(iter([-5, 0, 1, 2])) - tm.assert_index_equal(index, expected) + index = index_cls(iter([-5, 0, 1, 2]), dtype=dtype) + expected = index_cls([-5, 0, 1, 2], dtype=dtype) + tm.assert_index_equal(index, expected, exact=True) # scalar raise Exception msg = ( @@ -449,7 +453,7 @@ def test_constructor(self, dtype): # copy arr = index.values new_index = index_cls(arr, copy=True) - tm.assert_index_equal(new_index, index) + tm.assert_index_equal(new_index, index, exact=True) val = arr[0] + 3000 # this should not change index @@ -459,12 +463,13 @@ def test_constructor(self, dtype): # interpret list-like expected = index_cls([5, 0]) for cls in [Index, index_cls]: + exact = True if cls is Int64Index else "equiv" for idx in [ cls([5, 0], dtype=dtype), cls(np.array([5, 0]), dtype=dtype), cls(Series([5, 0]), dtype=dtype), ]: - tm.assert_index_equal(idx, expected) + tm.assert_index_equal(idx, expected, exact=exact) def test_constructor_corner(self, dtype): index_cls = self._index_cls @@ -472,7 +477,8 @@ def test_constructor_corner(self, dtype): arr = np.array([1, 2, 3, 4], dtype=object) index = index_cls(arr, dtype=dtype) assert index.values.dtype == index.dtype - tm.assert_index_equal(index, Index(arr)) + exact = True if index_cls is Int64Index else "equiv" + tm.assert_index_equal(index, Index(arr), exact=exact) # preventing casting arr = np.array([1, "2", 3, "4"], dtype=object) @@ -566,27 +572,28 @@ def invalid_dtype(self, request): def test_constructor(self, dtype): index_cls = self._index_cls + exact = True if index_cls is UInt64Index else "equiv" idx = index_cls([1, 2, 3]) res = Index([1, 2, 3], dtype=dtype) - tm.assert_index_equal(res, idx) + tm.assert_index_equal(res, idx, exact=exact) idx = index_cls([1, 2 ** 63]) res = Index([1, 2 ** 63], dtype=dtype) - tm.assert_index_equal(res, idx) + tm.assert_index_equal(res, idx, exact=exact) idx = index_cls([1, 2 ** 63]) res = Index([1, 2 ** 63]) - tm.assert_index_equal(res, idx) + tm.assert_index_equal(res, idx, exact=exact) idx = Index([-1, 2 ** 63], dtype=object) res = Index(np.array([-1, 2 ** 63], dtype=object)) - tm.assert_index_equal(res, idx) + tm.assert_index_equal(res, idx, exact=exact) # https://github.com/pandas-dev/pandas/issues/29526 idx = index_cls([1, 2 ** 63 + 1], dtype=dtype) res = Index([1, 2 ** 63 + 1], dtype=dtype) - tm.assert_index_equal(res, idx) + tm.assert_index_equal(res, idx, exact=exact) def test_constructor_does_not_cast_to_float(self): # https://github.com/numpy/numpy/issues/19146 From ead8f57def759359ab38746d48ef6bbf599abfa6 Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 21 Jun 2021 23:05:27 +0100 Subject: [PATCH 48/55] update tests --- pandas/_testing/asserters.py | 2 - pandas/tests/indexes/common.py | 4 +- pandas/tests/indexes/numeric/test_numeric.py | 50 +++++++++++--------- pandas/tests/indexes/test_any_index.py | 15 +++++- pandas/tests/indexes/test_base.py | 7 ++- 5 files changed, 49 insertions(+), 29 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 60016bbf92e60..6144bd436d84f 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -16,8 +16,6 @@ is_bool, is_categorical_dtype, is_extension_array_dtype, - is_float_dtype, - is_integer_dtype, is_interval_dtype, is_number, is_numeric_dtype, diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 05665b6d35e59..3894d14c5540b 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -13,6 +13,7 @@ is_datetime64tz_dtype, is_float_dtype, is_integer_dtype, + is_unsigned_integer_dtype, ) from pandas.core.dtypes.dtypes import CategoricalDtype @@ -29,7 +30,6 @@ RangeIndex, Series, TimedeltaIndex, - UInt64Index, isna, ) import pandas._testing as tm @@ -655,7 +655,7 @@ def test_map_dictlike(self, mapper, simple_index): identity = mapper(idx.values, idx) # we don't infer to UInt64 for a dict - if isinstance(idx, UInt64Index) and isinstance(identity, dict): + if is_unsigned_integer_dtype(idx.dtype) and isinstance(identity, dict): expected = idx.astype("int64") else: expected = idx diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py index 4fee74bb1903e..e7dd547b3e73e 100644 --- a/pandas/tests/indexes/numeric/test_numeric.py +++ b/pandas/tests/indexes/numeric/test_numeric.py @@ -431,17 +431,6 @@ def index(self, request, dtype): def test_constructor(self, dtype): index_cls = self._index_cls - # pass list, coerce fine - index = index_cls([-5, 0, 1, 2], dtype=dtype) - expected = Index([-5, 0, 1, 2], dtype=dtype) - exact = True if index_cls is Int64Index else "equiv" - tm.assert_index_equal(index, expected, exact=exact) - - # from iterable - index = index_cls(iter([-5, 0, 1, 2]), dtype=dtype) - expected = index_cls([-5, 0, 1, 2], dtype=dtype) - tm.assert_index_equal(index, expected, exact=True) - # scalar raise Exception msg = ( rf"{index_cls.__name__}\(\.\.\.\) must be called with a collection of some " @@ -451,6 +440,8 @@ def test_constructor(self, dtype): index_cls(5) # copy + # pass list, coerce fine + index = index_cls([-5, 0, 1, 2], dtype=dtype) arr = index.values new_index = index_cls(arr, copy=True) tm.assert_index_equal(new_index, index, exact=True) @@ -460,16 +451,28 @@ def test_constructor(self, dtype): arr[0] = val assert new_index[0] != val - # interpret list-like - expected = index_cls([5, 0]) - for cls in [Index, index_cls]: - exact = True if cls is Int64Index else "equiv" - for idx in [ - cls([5, 0], dtype=dtype), - cls(np.array([5, 0]), dtype=dtype), - cls(Series([5, 0]), dtype=dtype), - ]: - tm.assert_index_equal(idx, expected, exact=exact) + if dtype == np.int64: + exact = "equiv" if index_cls != Int64Index else True + + # pass list, coerce fine + index = index_cls([-5, 0, 1, 2], dtype=dtype) + expected = Index([-5, 0, 1, 2], dtype=dtype) + tm.assert_index_equal(index, expected, exact=exact) + + # from iterable + index = index_cls(iter([-5, 0, 1, 2]), dtype=dtype) + expected = index_cls([-5, 0, 1, 2], dtype=dtype) + tm.assert_index_equal(index, expected, exact=exact) + + # interpret list-like + expected = index_cls([5, 0], dtype=dtype) + for cls in [Index, index_cls]: + for idx in [ + cls([5, 0], dtype=dtype), + cls(np.array([5, 0]), dtype=dtype), + cls(Series([5, 0]), dtype=dtype), + ]: + tm.assert_index_equal(idx, expected, exact=exact) def test_constructor_corner(self, dtype): index_cls = self._index_cls @@ -477,8 +480,9 @@ def test_constructor_corner(self, dtype): arr = np.array([1, 2, 3, 4], dtype=object) index = index_cls(arr, dtype=dtype) assert index.values.dtype == index.dtype - exact = True if index_cls is Int64Index else "equiv" - tm.assert_index_equal(index, Index(arr), exact=exact) + if dtype == np.int64: + exact = True if index_cls is Int64Index else "equiv" + tm.assert_index_equal(index, Index(arr), exact=exact) # preventing casting arr = np.array([1, "2", 3, "4"], dtype=object) diff --git a/pandas/tests/indexes/test_any_index.py b/pandas/tests/indexes/test_any_index.py index f7dcaa628228b..1ea60b02618c8 100644 --- a/pandas/tests/indexes/test_any_index.py +++ b/pandas/tests/indexes/test_any_index.py @@ -5,8 +5,11 @@ """ import re +import numpy as np import pytest +from pandas.core.dtypes.common import is_float_dtype + import pandas._testing as tm @@ -47,7 +50,17 @@ def test_mutability(index): def test_map_identity_mapping(index): # GH#12766 - tm.assert_index_equal(index, index.map(lambda x: x)) + result = index.map(lambda x: x) + if index._is_numeric_index: + if is_float_dtype(index.dtype): + expected = index.astype(np.float64) + elif index.dtype == np.uint64: + expected = index.astype(np.uint64) + else: + expected = index.astype(np.int64) + else: + expected = index + tm.assert_index_equal(result, expected) def test_wrong_number_names(index): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 99ae7994a8d52..59f0c482e45ba 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -35,6 +35,7 @@ period_range, ) import pandas._testing as tm +from pandas.api.types import is_float_dtype from pandas.core.indexes.api import ( Index, MultiIndex, @@ -714,7 +715,11 @@ def test_map_dictlike(self, index, mapper): # to match proper result coercion for uints expected = Index([]) elif index._is_numeric_index: - expected = type(index)(np.arange(len(index), 0, -1), dtype=index.dtype) + if is_float_dtype(index.dtype): + exp_dtype = np.float64 + else: + exp_dtype = np.int64 + expected = index._constructor(np.arange(len(index), 0, -1), dtype=exp_dtype) else: expected = Index(np.arange(len(index), 0, -1)) From 2a850eaac0fc43c44c54b0d75b52c98c921f472c Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 21 Jun 2021 23:25:12 +0100 Subject: [PATCH 49/55] update asserters doc string --- pandas/_testing/asserters.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 6144bd436d84f..8f874d76f316d 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -266,10 +266,9 @@ def assert_index_equal( left : Index right : Index exact : bool or {'equiv'}, default 'equiv' - Whether to check the Index class, dtype and inferred_type - are identical. If 'equiv', RangeIndex can be substituted for - Int64Index and integer dtypes will be equivalent to each other and - float dtypes equivalent to each other. + It True, check that the Index class, dtype and inferred_type are identical. + If 'equiv', numeric indexes will only be compared by dtype and inferred_type. + It False, do not check that Index class, dtype and inferred_type are identical. check_names : bool, default True Whether to check the names attribute. check_less_precise : bool or int, default False From d04da70290fcbc334f00287f2ab6122dfa7f9ddb Mon Sep 17 00:00:00 2001 From: tp Date: Tue, 22 Jun 2021 00:02:45 +0100 Subject: [PATCH 50/55] update tests/common.py --- pandas/tests/indexes/common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 3894d14c5540b..517e6ce326b31 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -32,6 +32,7 @@ TimedeltaIndex, isna, ) +from pandas import UInt64Index # noqa:F401 import pandas._testing as tm from pandas.core.api import NumericIndex from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin From 4b8385cb781dd6822ad7379cfe0fc1713daca4e0 Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 23 Jun 2021 07:15:10 +0100 Subject: [PATCH 51/55] cleanups --- pandas/_testing/asserters.py | 6 +++--- pandas/core/indexes/base.py | 4 ++-- pandas/core/indexes/category.py | 4 ++-- pandas/core/indexes/numeric.py | 2 +- pandas/tests/indexes/common.py | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 8f874d76f316d..d0957b1814213 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -266,9 +266,9 @@ def assert_index_equal( left : Index right : Index exact : bool or {'equiv'}, default 'equiv' - It True, check that the Index class, dtype and inferred_type are identical. - If 'equiv', numeric indexes will only be compared by dtype and inferred_type. - It False, do not check that Index class, dtype and inferred_type are identical. + Whether to check the Index class, dtype and inferred_type + are identical. If 'equiv', then RangeIndex can be substituted for + Int64Index as well. check_names : bool, default True Whether to check the names attribute. check_less_precise : bool or int, default False diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7988c52c44cf0..60038b3eff394 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -441,7 +441,7 @@ def __new__( # index-like elif isinstance(data, Index) and data._is_numeric_index and dtype is None: - return type(data)(data, name=name, copy=copy) + return data._constructor(data, name=name, copy=copy) elif isinstance(data, (np.ndarray, Index, ABCSeries)): if isinstance(data, ABCMultiIndex): @@ -5726,7 +5726,7 @@ def map(self, mapper, na_action=None): attributes["dtype"] = self.dtype if self._is_numeric_index and is_numeric_dtype(new_values.dtype): - return type(self)(new_values, **attributes) + return self._constructor(new_values, **attributes) return Index(new_values, **attributes) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 9dc862582e557..f009e9bff36f6 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -289,7 +289,7 @@ def astype(self, dtype, copy: bool = True) -> Index: categories = self.categories # the super method always returns Int64Index, UInt64Index and Float64Index - # but if e.g. the categories are a NumIndex with dtype float32, we want to + # but if the categories are a NumericIndex with dtype float32, we want to # return an index with the same dtype as self.categories. if categories._is_numeric_index: assert isinstance(categories, NumericIndex) # mypy complaint fix @@ -301,7 +301,7 @@ def astype(self, dtype, copy: bool = True) -> Index: new_values = self._data.astype(dtype, copy=copy) # pass copy=False because any copying has been done in the # _data.astype call above - return type(categories)(new_values, name=self.name, copy=False) + return categories._constructor(new_values, name=self.name, copy=False) return super().astype(dtype, copy=copy) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 403296e81d9dd..7be9c339563dc 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -242,7 +242,7 @@ def astype(self, dtype, copy=True): return NumericIndex(arr, name=self.name, dtype=dtype) elif self._is_numeric_index: if not is_extension_array_dtype(dtype) and is_numeric_dtype(dtype): - return type(self)(self, dtype=dtype, copy=copy) + return self._constructor(self, dtype=dtype, copy=copy) return super().astype(dtype, copy=copy) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 517e6ce326b31..1349c91b28670 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -357,7 +357,7 @@ def test_numpy_argsort(self, index): def test_repeat(self, simple_index): rep = 2 idx = simple_index.copy() - new_index_cls = type(idx) if not isinstance(idx, RangeIndex) else Int64Index + new_index_cls = Int64Index if isinstance(idx, RangeIndex) else idx._constructor expected = new_index_cls(idx.values.repeat(rep), name=idx.name) tm.assert_index_equal(idx.repeat(rep), expected) From 1f52f8b949561d4ca4cff7658e616b6150c26029 Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 24 Jul 2021 10:17:06 +0100 Subject: [PATCH 52/55] simplify _ensure_dtype --- pandas/core/indexes/base.py | 3 ++- pandas/core/indexes/numeric.py | 26 +++++--------------------- 2 files changed, 7 insertions(+), 22 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 60038b3eff394..be7a84493b64b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -359,8 +359,9 @@ def _outer_indexer( _is_numeric_dtype: bool = False _can_hold_na: bool = True _can_hold_strings: bool = True + # Whether this index is a NumericIndex, but not a Int64Index, Float64Index, - # UInt64Index or RangeIndex + # UInt64Index or RangeIndex. Needed for backwards compat. Remove in pandas 2.0. _is_numeric_index: bool = False _engine_type: type[libindex.IndexEngine] = libindex.ObjectEngine diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 7be9c339563dc..03e4a2ba02fbc 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -205,7 +205,11 @@ def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None: dtype = pandas_dtype(dtype) assert isinstance(dtype, np.dtype) - return dtype + + if cls._is_numeric_index: # NumericIndex + return dtype + else: # Int64Index, UInt64Index etc. + return cls._default_dtype def __contains__(self, key) -> bool: """ @@ -354,16 +358,6 @@ class IntegerIndex(NumericIndex): _is_numeric_index: bool = False - @classmethod - @doc(NumericIndex._ensure_dtype) - def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None: - if dtype is None: - return cls._default_dtype - dtype = pandas_dtype(dtype) - assert isinstance(dtype, np.dtype) - - return cls._default_dtype - @property def asi8(self) -> np.ndarray: # do not cache or you'll create a memory leak @@ -429,13 +423,3 @@ class Float64Index(NumericIndex): _default_dtype = np.dtype(np.float64) _dtype_validation_metadata = (is_float_dtype, "float") _is_numeric_index: bool = False - - @classmethod - @doc(NumericIndex._ensure_dtype) - def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None: - if dtype is None: - return cls._default_dtype - dtype = pandas_dtype(dtype) - assert isinstance(dtype, np.dtype) - - return cls._default_dtype From 951c5f71cc930ec9fcab1d8806edefad387449e3 Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 25 Jul 2021 22:47:42 +0100 Subject: [PATCH 53/55] make attribute name clearer --- pandas/core/indexes/base.py | 12 +++++++++--- pandas/core/indexes/category.py | 2 +- pandas/core/indexes/numeric.py | 14 ++++++++------ pandas/core/indexes/range.py | 2 +- pandas/tests/base/test_unique.py | 4 ++-- pandas/tests/indexes/common.py | 2 +- pandas/tests/indexes/test_any_index.py | 2 +- pandas/tests/indexes/test_base.py | 2 +- 8 files changed, 24 insertions(+), 16 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index be7a84493b64b..35ffa86a663e3 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -362,7 +362,7 @@ def _outer_indexer( # Whether this index is a NumericIndex, but not a Int64Index, Float64Index, # UInt64Index or RangeIndex. Needed for backwards compat. Remove in pandas 2.0. - _is_numeric_index: bool = False + _is_backward_compat_public_numeric_index: bool = False _engine_type: type[libindex.IndexEngine] = libindex.ObjectEngine # whether we support partial string indexing. Overridden @@ -441,7 +441,11 @@ def __new__( return Index._simple_new(data, name=name) # index-like - elif isinstance(data, Index) and data._is_numeric_index and dtype is None: + elif ( + isinstance(data, Index) + and data._is_backward_compat_public_numeric_index + and dtype is None + ): return data._constructor(data, name=name, copy=copy) elif isinstance(data, (np.ndarray, Index, ABCSeries)): @@ -5726,7 +5730,9 @@ def map(self, mapper, na_action=None): # empty attributes["dtype"] = self.dtype - if self._is_numeric_index and is_numeric_dtype(new_values.dtype): + if self._is_backward_compat_public_numeric_index and is_numeric_dtype( + new_values.dtype + ): return self._constructor(new_values, **attributes) return Index(new_values, **attributes) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index f009e9bff36f6..0897d0f3f67e9 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -291,7 +291,7 @@ def astype(self, dtype, copy: bool = True) -> Index: # the super method always returns Int64Index, UInt64Index and Float64Index # but if the categories are a NumericIndex with dtype float32, we want to # return an index with the same dtype as self.categories. - if categories._is_numeric_index: + if categories._is_backward_compat_public_numeric_index: assert isinstance(categories, NumericIndex) # mypy complaint fix try: categories._validate_dtype(dtype) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 03e4a2ba02fbc..a0763d7156195 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -97,7 +97,7 @@ class NumericIndex(Index): ) _is_numeric_dtype = True _can_hold_strings = False - _is_numeric_index: bool = True + _is_backward_compat_public_numeric_index: bool = True @cache_readonly def _can_hold_na(self) -> bool: @@ -206,9 +206,11 @@ def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None: dtype = pandas_dtype(dtype) assert isinstance(dtype, np.dtype) - if cls._is_numeric_index: # NumericIndex + if cls._is_backward_compat_public_numeric_index: + # dtype for NumericIndex return dtype - else: # Int64Index, UInt64Index etc. + else: + # dtype for Int64Index, UInt64Index etc. Needed for backwards compat. return cls._default_dtype def __contains__(self, key) -> bool: @@ -244,7 +246,7 @@ def astype(self, dtype, copy=True): return Int64Index(arr, name=self.name) else: return NumericIndex(arr, name=self.name, dtype=dtype) - elif self._is_numeric_index: + elif self._is_backward_compat_public_numeric_index: if not is_extension_array_dtype(dtype) and is_numeric_dtype(dtype): return self._constructor(self, dtype=dtype, copy=copy) @@ -356,7 +358,7 @@ class IntegerIndex(NumericIndex): This is an abstract class for Int64Index, UInt64Index. """ - _is_numeric_index: bool = False + _is_backward_compat_public_numeric_index: bool = False @property def asi8(self) -> np.ndarray: @@ -422,4 +424,4 @@ class Float64Index(NumericIndex): _engine_type = libindex.Float64Engine _default_dtype = np.dtype(np.float64) _dtype_validation_metadata = (is_float_dtype, "float") - _is_numeric_index: bool = False + _is_backward_compat_public_numeric_index: bool = False diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 2c5e4b1f7bfbb..71bc4af78db6b 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -101,7 +101,7 @@ class RangeIndex(NumericIndex): _engine_type = libindex.Int64Engine _dtype_validation_metadata = (is_signed_integer_dtype, "signed integer") _range: range - _is_numeric_index: bool = False + _is_backward_compat_public_numeric_index: bool = False # -------------------------------------------------------------------- # Constructors diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py index a044b82587fae..6ca5f2f76861e 100644 --- a/pandas/tests/base/test_unique.py +++ b/pandas/tests/base/test_unique.py @@ -25,7 +25,7 @@ def test_unique(index_or_series_obj): expected = pd.MultiIndex.from_tuples(unique_values) expected.names = obj.names tm.assert_index_equal(result, expected, exact=True) - elif isinstance(obj, pd.Index) and obj._is_numeric_index: + elif isinstance(obj, pd.Index) and obj._is_backward_compat_public_numeric_index: expected = NumericIndex(unique_values, dtype=obj.dtype) tm.assert_index_equal(result, expected, exact=True) elif isinstance(obj, pd.Index): @@ -66,7 +66,7 @@ def test_unique_null(null_obj, index_or_series_obj): unique_values_not_null = [val for val in unique_values_raw if not pd.isnull(val)] unique_values = [null_obj] + unique_values_not_null - if isinstance(obj, pd.Index) and obj._is_numeric_index: + if isinstance(obj, pd.Index) and obj._is_backward_compat_public_numeric_index: expected = NumericIndex(unique_values, dtype=obj.dtype) tm.assert_index_equal(result, expected, exact=True) elif isinstance(obj, pd.Index): diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 1349c91b28670..2c4067c347a35 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -666,7 +666,7 @@ def test_map_dictlike(self, mapper, simple_index): tm.assert_index_equal(result, expected) # empty mappable - if idx._is_numeric_index: + if idx._is_backward_compat_public_numeric_index: new_index_cls = NumericIndex else: new_index_cls = Float64Index diff --git a/pandas/tests/indexes/test_any_index.py b/pandas/tests/indexes/test_any_index.py index 1ea60b02618c8..510d76ebe4407 100644 --- a/pandas/tests/indexes/test_any_index.py +++ b/pandas/tests/indexes/test_any_index.py @@ -51,7 +51,7 @@ def test_mutability(index): def test_map_identity_mapping(index): # GH#12766 result = index.map(lambda x: x) - if index._is_numeric_index: + if index._is_backward_compat_public_numeric_index: if is_float_dtype(index.dtype): expected = index.astype(np.float64) elif index.dtype == np.uint64: diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 59f0c482e45ba..a84e83e0f54b6 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -714,7 +714,7 @@ def test_map_dictlike(self, index, mapper): if index.empty: # to match proper result coercion for uints expected = Index([]) - elif index._is_numeric_index: + elif index._is_backward_compat_public_numeric_index: if is_float_dtype(index.dtype): exp_dtype = np.float64 else: From 7c7c0dd84e9d42890e6fa750fbe35ad618924300 Mon Sep 17 00:00:00 2001 From: tp Date: Thu, 29 Jul 2021 00:32:03 +0100 Subject: [PATCH 54/55] address comments --- pandas/core/indexes/base.py | 3 ++- pandas/core/indexes/category.py | 2 +- pandas/core/indexes/numeric.py | 7 +++++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 35ffa86a663e3..a140b2967b72c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -361,7 +361,8 @@ def _outer_indexer( _can_hold_strings: bool = True # Whether this index is a NumericIndex, but not a Int64Index, Float64Index, - # UInt64Index or RangeIndex. Needed for backwards compat. Remove in pandas 2.0. + # UInt64Index or RangeIndex. Needed for backwards compat. Remove this attribute and + # associated code in pandas 2.0. _is_backward_compat_public_numeric_index: bool = False _engine_type: type[libindex.IndexEngine] = libindex.ObjectEngine diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 0897d0f3f67e9..2faf2cab75117 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -282,7 +282,7 @@ def _is_dtype_compat(self, other) -> Categorical: return other @doc(Index.astype) - def astype(self, dtype, copy: bool = True) -> Index: + def astype(self, dtype: Dtype, copy: bool = True) -> Index: from pandas.core.api import NumericIndex dtype = pandas_dtype(dtype) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index a0763d7156195..fa899de35d0fc 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -166,6 +166,10 @@ def _ensure_array(cls, data, dtype, copy: bool): dtype = cls._ensure_dtype(dtype) if copy or not is_dtype_equal(data.dtype, dtype): + # the try/except below is because it's difficult to predict the error + # and/or error message from different combinations of data and type. + # Efforts to avoid this try/except welcome. + # See https://github.com/pandas-dev/pandas/pull/41153#discussion_r676206222 try: subarr = np.array(data, dtype=dtype, copy=copy) cls._validate_dtype(subarr.dtype) @@ -247,6 +251,9 @@ def astype(self, dtype, copy=True): else: return NumericIndex(arr, name=self.name, dtype=dtype) elif self._is_backward_compat_public_numeric_index: + # this block is needed so e.g. NumericIndex[int8].astype("int32") returns + # NumericIndex[int32] and not Int64Index with dtype int64. + # When Int64Index etc. are removed from the code base, removed this also. if not is_extension_array_dtype(dtype) and is_numeric_dtype(dtype): return self._constructor(self, dtype=dtype, copy=copy) From bb72c68f92120a9d2800b8b258f1989cdd139cfd Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 30 Jul 2021 00:24:00 +0100 Subject: [PATCH 55/55] add TODO --- pandas/core/indexes/numeric.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index fa899de35d0fc..f5832036ee096 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -166,8 +166,8 @@ def _ensure_array(cls, data, dtype, copy: bool): dtype = cls._ensure_dtype(dtype) if copy or not is_dtype_equal(data.dtype, dtype): - # the try/except below is because it's difficult to predict the error - # and/or error message from different combinations of data and type. + # TODO: the try/except below is because it's difficult to predict the error + # and/or error message from different combinations of data and dtype. # Efforts to avoid this try/except welcome. # See https://github.com/pandas-dev/pandas/pull/41153#discussion_r676206222 try: