From 2cc08ab75a95aada94cc30fa4862544fb4f671c0 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Fri, 4 Nov 2022 20:57:15 +0000 Subject: [PATCH 1/9] BUG: NumericIndex should not support float16 dtype --- pandas/core/indexes/numeric.py | 11 +++++++++-- pandas/tests/indexes/numeric/test_numeric.py | 14 ++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index df353e98fde4a..f8d4025bd18fe 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -75,8 +75,8 @@ class NumericIndex(Index): Notes ----- An NumericIndex instance can **only** contain numpy int64/32/16/8, uint64/32/16/8 or - float64/32/16 dtype. In particular, ``NumericIndex`` *can not* hold Pandas numeric - dtypes (:class:`Int64Dtype`, :class:`Int32Dtype` etc.). + float64/32 dtype. In particular, ``NumericIndex`` *can not* hold numpy float16 + dtype or Pandas numeric dtypes (:class:`Int64Dtype`, :class:`Int32Dtype` etc.). """ _typ = "numericindex" @@ -176,6 +176,10 @@ def _ensure_array(cls, data, dtype, copy: bool): raise ValueError("Index data must be 1-dimensional") subarr = np.asarray(subarr) + if subarr.dtype == "float16": + # float16 not supported (no indexing engine) + subarr = subarr.astype("float32") + return subarr @classmethod @@ -202,6 +206,9 @@ def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None: dtype = pandas_dtype(dtype) if not isinstance(dtype, np.dtype): raise TypeError(f"{dtype} not a numpy type") + if dtype == np.float16: + # float16 not supported (no indexing engine) + dtype = np.dtype(np.float32) if cls._is_backward_compat_public_numeric_index: # dtype for NumericIndex diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py index c06fce4811f12..e41da8ad56e46 100644 --- a/pandas/tests/indexes/numeric/test_numeric.py +++ b/pandas/tests/indexes/numeric/test_numeric.py @@ -471,6 +471,20 @@ def test_coerce_list(self): assert type(arr) is Index +class TestFloat16Index: + # float 16 indexes not supported + # GH 49535 + def test_array(self): + arr = np.array([1, 2, 3], dtype=np.float16) + result = NumericIndex(arr) + + expected = NumericIndex([1, 2, 3], dtype=np.float32) + tm.assert_index_equal(result, expected, check_exact=True) + + result = NumericIndex([1, 2, 3], dtype=np.float16) + tm.assert_index_equal(result, expected, check_exact=True) + + class TestUIntNumericIndex(NumericInt): _index_cls = NumericIndex From f73078d0bebfaef2190d78c1d63b2b156aa8e253 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sat, 12 Nov 2022 06:58:03 +0000 Subject: [PATCH 2/9] make NumericIndex fail with float16 dtype --- pandas/core/indexes/numeric.py | 5 ++++- pandas/tests/indexes/numeric/test_numeric.py | 9 ++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index f8d4025bd18fe..cc252dff92e2c 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -178,7 +178,7 @@ def _ensure_array(cls, data, dtype, copy: bool): subarr = np.asarray(subarr) if subarr.dtype == "float16": # float16 not supported (no indexing engine) - subarr = subarr.astype("float32") + raise TypeError("float16 indexes are not supported") return subarr @@ -209,6 +209,9 @@ def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None: if dtype == np.float16: # float16 not supported (no indexing engine) dtype = np.dtype(np.float32) + if dtype == "float16": + # float16 not supported (no indexing engine) + raise TypeError("float16 indexes are not supported") if cls._is_backward_compat_public_numeric_index: # dtype for NumericIndex diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py index e41da8ad56e46..8ba560dbf1d98 100644 --- a/pandas/tests/indexes/numeric/test_numeric.py +++ b/pandas/tests/indexes/numeric/test_numeric.py @@ -476,13 +476,8 @@ class TestFloat16Index: # GH 49535 def test_array(self): arr = np.array([1, 2, 3], dtype=np.float16) - result = NumericIndex(arr) - - expected = NumericIndex([1, 2, 3], dtype=np.float32) - tm.assert_index_equal(result, expected, check_exact=True) - - result = NumericIndex([1, 2, 3], dtype=np.float16) - tm.assert_index_equal(result, expected, check_exact=True) + with pytest.raises(TypeError, match="float16 indexes are not supported"): + result = NumericIndex(arr) class TestUIntNumericIndex(NumericInt): From d41cf353ee2d66bc391bf9c3b64ab51f9779e8f2 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sat, 12 Nov 2022 08:16:59 +0000 Subject: [PATCH 3/9] make NumericIndex fail with float16 dtype, II --- pandas/tests/arithmetic/test_numeric.py | 13 ++++--------- pandas/tests/indexes/numeric/test_numeric.py | 2 +- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 529dd6baa70c0..8a98ec83553c8 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -72,15 +72,10 @@ def compare_op(series, other, op): # TODO: remove this kludge once mypy stops giving false positives here # List comprehension has incompatible type List[PandasObject]; expected List[RangeIndex] # See GH#29725 -ser_or_index: list[Any] = [Series, Index] -lefts: list[Any] = [RangeIndex(10, 40, 10)] -lefts.extend( - [ - cls([10, 20, 30], dtype=dtype) - for dtype in ["i1", "i2", "i4", "i8", "u1", "u2", "u4", "u8", "f2", "f4", "f8"] - for cls in ser_or_index - ] -) +_ldtypes = ["i1", "i2", "i4", "i8", "u1", "u2", "u4", "u8", "f2", "f4", "f8"] +lefts: list[Index | Series] = [RangeIndex(10, 40, 10)] +lefts.extend([Series([10, 20, 30], dtype=dtype) for dtype in _ldtypes]) +lefts.extend([Index([10, 20, 30], dtype=dtype) for dtype in _ldtypes if dtype != "f2"]) # ------------------------------------------------------------------ # Comparisons diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py index 8ba560dbf1d98..67ed27b93d9e4 100644 --- a/pandas/tests/indexes/numeric/test_numeric.py +++ b/pandas/tests/indexes/numeric/test_numeric.py @@ -477,7 +477,7 @@ class TestFloat16Index: def test_array(self): arr = np.array([1, 2, 3], dtype=np.float16) with pytest.raises(TypeError, match="float16 indexes are not supported"): - result = NumericIndex(arr) + NumericIndex(arr) class TestUIntNumericIndex(NumericInt): From d33c7d6794b6066c227f0ec8771e6d5d98cd3c8e Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Tue, 15 Nov 2022 00:10:32 +0000 Subject: [PATCH 4/9] fix failures --- pandas/tests/arithmetic/test_numeric.py | 1 - pandas/tests/base/test_conversion.py | 8 ++++++++ pandas/tests/base/test_value_counts.py | 16 ++++++++++++++-- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 8a98ec83553c8..5c6cbf73d5bfc 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -7,7 +7,6 @@ from datetime import timedelta from decimal import Decimal import operator -from typing import Any import numpy as np import pytest diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index f244b348c6763..85ae5d91ec46b 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -62,6 +62,10 @@ def test_iterable(self, index_or_series, method, dtype, rdtype): # gh-13258 # coerce iteration to underlying python / pandas types typ = index_or_series + if dtype == "float16" and issubclass(typ, pd.Index): + with pytest.raises(TypeError, match="float16 indexes are not supported"): + typ([1], dtype=dtype) + return s = typ([1], dtype=dtype) result = method(s)[0] assert isinstance(result, rdtype) @@ -115,6 +119,10 @@ def test_iterable_map(self, index_or_series, dtype, rdtype): # gh-13236 # coerce iteration to underlying python / pandas types typ = index_or_series + if dtype == "float16" and issubclass(typ, pd.Index): + with pytest.raises(TypeError, match="float16 indexes are not supported"): + typ([1], dtype=dtype) + return s = typ([1], dtype=dtype) result = s.map(type)[0] if not isinstance(rdtype, tuple): diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index dafbd9fee1b8e..6618c796fcc19 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -28,7 +28,13 @@ def test_value_counts(index_or_series_obj): counter = collections.Counter(obj) expected = Series(dict(counter.most_common()), dtype=np.int64, name=obj.name) - expected.index = expected.index.astype(obj.dtype) + + if obj.dtype != np.float16: + expected.index = expected.index.astype(obj.dtype) + else: + with pytest.raises(TypeError, match="float16 indexes are not supported"): + expected.index.astype(obj.dtype) + return if not isinstance(result.dtype, np.dtype): # i.e IntegerDtype @@ -73,7 +79,13 @@ def test_value_counts_null(null_obj, index_or_series_obj): # np.nan would be duplicated, whereas None wouldn't counter = collections.Counter(obj.dropna()) expected = Series(dict(counter.most_common()), dtype=np.int64) - expected.index = expected.index.astype(obj.dtype) + + if obj.dtype != np.float16: + expected.index = expected.index.astype(obj.dtype) + else: + with pytest.raises(TypeError, match="float16 indexes are not supported"): + expected.index.astype(obj.dtype) + return result = obj.value_counts() if obj.duplicated().any(): From 91a42da5027d53b1608e8b18b71c4acd4718f643 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Tue, 15 Nov 2022 01:15:37 +0000 Subject: [PATCH 5/9] NotImplementedError --- pandas/core/indexes/numeric.py | 4 ++-- pandas/tests/base/test_conversion.py | 4 ++-- pandas/tests/base/test_value_counts.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index cc252dff92e2c..26efc20d9c6fa 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -178,7 +178,7 @@ def _ensure_array(cls, data, dtype, copy: bool): subarr = np.asarray(subarr) if subarr.dtype == "float16": # float16 not supported (no indexing engine) - raise TypeError("float16 indexes are not supported") + raise NotImplementedError("float16 indexes are not implemented") return subarr @@ -211,7 +211,7 @@ def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None: dtype = np.dtype(np.float32) if dtype == "float16": # float16 not supported (no indexing engine) - raise TypeError("float16 indexes are not supported") + raise NotImplementedError("float16 indexes are not supported") if cls._is_backward_compat_public_numeric_index: # dtype for NumericIndex diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index 85ae5d91ec46b..a910b20c476ff 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -63,7 +63,7 @@ def test_iterable(self, index_or_series, method, dtype, rdtype): # coerce iteration to underlying python / pandas types typ = index_or_series if dtype == "float16" and issubclass(typ, pd.Index): - with pytest.raises(TypeError, match="float16 indexes are not supported"): + with pytest.raises(NotImplementedError, match="float16 indexes are not "): typ([1], dtype=dtype) return s = typ([1], dtype=dtype) @@ -120,7 +120,7 @@ def test_iterable_map(self, index_or_series, dtype, rdtype): # coerce iteration to underlying python / pandas types typ = index_or_series if dtype == "float16" and issubclass(typ, pd.Index): - with pytest.raises(TypeError, match="float16 indexes are not supported"): + with pytest.raises(NotImplementedError, match="float16 indexes are not "): typ([1], dtype=dtype) return s = typ([1], dtype=dtype) diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index 6618c796fcc19..3aa0827b22a78 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -32,7 +32,7 @@ def test_value_counts(index_or_series_obj): if obj.dtype != np.float16: expected.index = expected.index.astype(obj.dtype) else: - with pytest.raises(TypeError, match="float16 indexes are not supported"): + with pytest.raises(NotImplementedError, match="float16 indexes are not "): expected.index.astype(obj.dtype) return @@ -83,7 +83,7 @@ def test_value_counts_null(null_obj, index_or_series_obj): if obj.dtype != np.float16: expected.index = expected.index.astype(obj.dtype) else: - with pytest.raises(TypeError, match="float16 indexes are not supported"): + with pytest.raises(NotImplementedError, match="float16 indexes are not "): expected.index.astype(obj.dtype) return From 05b016853377029c5788600824eca546ecf97124 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Tue, 15 Nov 2022 06:51:41 +0000 Subject: [PATCH 6/9] NotImplementedError II --- pandas/tests/indexes/numeric/test_numeric.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py index 67ed27b93d9e4..49399aab3fd8a 100644 --- a/pandas/tests/indexes/numeric/test_numeric.py +++ b/pandas/tests/indexes/numeric/test_numeric.py @@ -476,7 +476,8 @@ class TestFloat16Index: # GH 49535 def test_array(self): arr = np.array([1, 2, 3], dtype=np.float16) - with pytest.raises(TypeError, match="float16 indexes are not supported"): + msg = "float16 indexes are not implemented" + with pytest.raises(NotImplementedError, match=msg): NumericIndex(arr) From 7c108f2cf334fec311a42451b83132ef0c133ca5 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Thu, 8 Dec 2022 14:16:37 +0000 Subject: [PATCH 7/9] fail on float16, but allow np.exp(int8_arrays) --- pandas/core/indexes/base.py | 3 ++ pandas/core/indexes/numeric.py | 4 +++ pandas/tests/indexes/numeric/test_numeric.py | 36 +++++++++++++++++--- pandas/tests/indexes/test_numpy_compat.py | 5 ++- 4 files changed, 43 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 1938dc6d5c7b4..3dc6aed56fa24 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -881,6 +881,9 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs): # i.e. np.divmod, np.modf, np.frexp return tuple(self.__array_wrap__(x) for x in result) + if result.dtype == np.float16: + result = result.astype(np.float32) + return self.__array_wrap__(result) def __array_wrap__(self, result, context=None): diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 26efc20d9c6fa..8d53c33128093 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -133,6 +133,10 @@ def _ensure_array(cls, data, dtype, copy: bool): Ensure we have a valid array to pass to _simple_new. """ cls._validate_dtype(dtype) + if dtype == np.float16: + + # float16 not supported (no indexing engine) + raise NotImplementedError("float16 indexes are not supported") if not isinstance(data, (np.ndarray, Index)): # Coerce to ndarray if not already ndarray or Index diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py index 49399aab3fd8a..e8e6eed31db74 100644 --- a/pandas/tests/indexes/numeric/test_numeric.py +++ b/pandas/tests/indexes/numeric/test_numeric.py @@ -474,11 +474,39 @@ def test_coerce_list(self): class TestFloat16Index: # float 16 indexes not supported # GH 49535 - def test_array(self): - arr = np.array([1, 2, 3], dtype=np.float16) - msg = "float16 indexes are not implemented" + _index_cls = NumericIndex + + def test_constructor(self): + index_cls = self._index_cls + dtype = np.float16 + + msg = "float16 indexes are not supported" + + # explicit construction + with pytest.raises(NotImplementedError, match=msg): + index_cls([1, 2, 3, 4, 5], dtype=dtype) + + with pytest.raises(NotImplementedError, match=msg): + index_cls(np.array([1, 2, 3, 4, 5]), dtype=dtype) + + with pytest.raises(NotImplementedError, match=msg): + index_cls([1.0, 2, 3, 4, 5], dtype=dtype) + + with pytest.raises(NotImplementedError, match=msg): + index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=dtype) + + with pytest.raises(NotImplementedError, match=msg): + index_cls([1.0, 2, 3, 4, 5], dtype=dtype) + + with pytest.raises(NotImplementedError, match=msg): + index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=dtype) + + # nan handling + with pytest.raises(NotImplementedError, match=msg): + index_cls([np.nan, np.nan], dtype=dtype) + with pytest.raises(NotImplementedError, match=msg): - NumericIndex(arr) + index_cls(np.array([np.nan]), dtype=dtype) class TestUIntNumericIndex(NumericInt): diff --git a/pandas/tests/indexes/test_numpy_compat.py b/pandas/tests/indexes/test_numpy_compat.py index 453ece35a68e7..40fa4877d2e83 100644 --- a/pandas/tests/indexes/test_numpy_compat.py +++ b/pandas/tests/indexes/test_numpy_compat.py @@ -77,7 +77,10 @@ def test_numpy_ufuncs_basic(index, func): # coerces to float (e.g. np.sin) with np.errstate(all="ignore"): result = func(index) - exp = Index(func(index.values), name=index.name) + arr_result = func(index.values) + if arr_result.dtype == np.float16: + arr_result = arr_result.astype(np.float32) + exp = Index(arr_result, name=index.name) tm.assert_index_equal(result, exp) if type(index) is not Index or index.dtype == bool: From bba522a694b2c00f3d2414f99f853a30555e4358 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Thu, 8 Dec 2022 16:29:45 +0000 Subject: [PATCH 8/9] fail on float16, but allow np.exp(int8_arrays) II --- pandas/core/algorithms.py | 2 ++ pandas/core/base.py | 2 ++ pandas/tests/test_algos.py | 5 ++++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index de9e3ace4f0ca..aa9d1c8152019 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -891,6 +891,8 @@ def value_counts( else: values = _ensure_arraylike(values) keys, counts = value_counts_arraylike(values, dropna) + if keys.dtype == np.float16: + keys = keys.astype(np.float32) # For backwards compatibility, we let Index do its normal type # inference, _except_ for if if infers from object to bool. diff --git a/pandas/core/base.py b/pandas/core/base.py index 22a4790b32506..e5e0ac4e121ae 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1140,6 +1140,8 @@ def factorize( codes, uniques = algorithms.factorize( self._values, sort=sort, use_na_sentinel=use_na_sentinel ) + if uniques.dtype == np.float16: + uniques = uniques.astype(np.float32) if isinstance(self, ABCIndex): # preserve e.g. NumericIndex, preserve MultiIndex diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 818211cf0fa2a..767203838728b 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -66,7 +66,10 @@ def test_factorize(self, index_or_series_obj, sort): constructor = Index if isinstance(obj, MultiIndex): constructor = MultiIndex.from_tuples - expected_uniques = constructor(obj.unique()) + expected_arr = obj.unique() + if expected_arr.dtype == np.float16: + expected_arr = expected_arr.astype(np.float32) + expected_uniques = constructor(expected_arr) if ( isinstance(obj, Index) and expected_uniques.dtype == bool From 785df812c204d3dcee85daba13973cab9260f467 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Tue, 20 Dec 2022 22:29:55 +0000 Subject: [PATCH 9/9] fix NumericIndex_ensure_dtype --- pandas/core/indexes/numeric.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 8d53c33128093..7fec60babea00 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -210,10 +210,7 @@ def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None: dtype = pandas_dtype(dtype) if not isinstance(dtype, np.dtype): raise TypeError(f"{dtype} not a numpy type") - if dtype == np.float16: - # float16 not supported (no indexing engine) - dtype = np.dtype(np.float32) - if dtype == "float16": + elif dtype == np.float16: # float16 not supported (no indexing engine) raise NotImplementedError("float16 indexes are not supported")