From fd9f6230ed1950cdb25a3519ed084359350774f3 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 11 Nov 2019 14:45:22 +0000 Subject: [PATCH 1/8] Validate dtype when Int64Index, UInt64Index, or Float64Index are constructed --- pandas/core/indexes/numeric.py | 17 ++++++++++++++++- pandas/tests/indexes/test_numeric.py | 12 ++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 46bb8eafee3b9..79df15588b046 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -15,6 +15,8 @@ is_float_dtype, is_integer_dtype, is_scalar, + is_signed_integer_dtype, + is_unsigned_integer_dtype, needs_i8_conversion, pandas_dtype, ) @@ -45,7 +47,7 @@ class NumericIndex(Index): _is_numeric_dtype = True def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=None): - + cls._validate_dtype(cls, dtype) if fastpath is not None: warnings.warn( "The 'fastpath' keyword is deprecated, and will be " @@ -72,6 +74,19 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=None): name = data.name return cls._simple_new(subarr, name=name) + def _validate_dtype(cls, dtype): + if not dtype: + return + if cls._typ == "int64index": + if not is_signed_integer_dtype(dtype): + raise ValueError("Incorrect `dtype` passed") + elif cls._typ == "uint64index": + if not is_unsigned_integer_dtype(dtype): + raise ValueError("Incorrect `dtype` passed") + elif cls._typ == "float64index": + if not is_float_dtype(dtype): + raise ValueError("Incorrect `dtype` passed") + @Appender(_index_shared_docs["_maybe_cast_slice_bound"]) def _maybe_cast_slice_bound(self, label, side, kind): assert kind in ["ix", "loc", "getitem", None] diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index e424b3601a4b2..9c792c7baa640 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -167,6 +167,18 @@ def test_constructor(self): result = Index(np.array([np.nan])) assert pd.isna(result.values).all() + @pytest.mark.parametrize( + "index, dtype", + [ + (pd.Int64Index, "float64"), + (pd.UInt64Index, "categorical"), + (pd.Float64Index, "datetime64"), + ], + ) + def test_invalid_dtype(self, index, dtype): + with pytest.raises(ValueError): + index([1, 2, 3], dtype=dtype) + def test_constructor_invalid(self): # invalid From 7438eb89379ecf7ac1fa4427110074a444eb3e2a Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 11 Nov 2019 16:15:34 +0000 Subject: [PATCH 2/8] Update unnecessary dtype in existing test --- pandas/tests/series/indexing/test_numeric.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/series/indexing/test_numeric.py b/pandas/tests/series/indexing/test_numeric.py index bcddcf843df06..60b89c01cc22d 100644 --- a/pandas/tests/series/indexing/test_numeric.py +++ b/pandas/tests/series/indexing/test_numeric.py @@ -86,8 +86,7 @@ def test_get(): 1764.0, 1849.0, 1936.0, - ], - dtype="object", + ] ), ) From caf4e298e46c5d105c009b0d2820e42ef5bee3f4 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 11 Nov 2019 17:37:23 +0000 Subject: [PATCH 3/8] Add whatsnewentry, make valueerrors more specific, allow for integer dtype to be passed to float64index, match valueerror message --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/indexes/numeric.py | 31 +++++++++++++++++++--------- pandas/tests/indexes/test_numeric.py | 6 +++++- 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index cd012fe755337..0253c3237046d 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -336,6 +336,7 @@ Numeric - Bug in :class:`DataFrame` logical operations (`&`, `|`, `^`) not matching :class:`Series` behavior by filling NA values (:issue:`28741`) - Bug in :meth:`DataFrame.interpolate` where specifying axis by name references variable before it is assigned (:issue:`29142`) - Improved error message when using `frac` > 1 and `replace` = False (:issue:`27451`) +- Bug in :class:`NumericIndex` resulted in it being possible to instantiate an :class:`Int64Index`, :class:`UInt64Index`, or :class:`Float64Index` with an invalid dtype (e.g. datetime-like) (:issue:`29539`) - Conversion diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 79df15588b046..77a877b3b9d85 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -75,17 +75,28 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=None): return cls._simple_new(subarr, name=name) def _validate_dtype(cls, dtype): - if not dtype: + if dtype is None: return - if cls._typ == "int64index": - if not is_signed_integer_dtype(dtype): - raise ValueError("Incorrect `dtype` passed") - elif cls._typ == "uint64index": - if not is_unsigned_integer_dtype(dtype): - raise ValueError("Incorrect `dtype` passed") - elif cls._typ == "float64index": - if not is_float_dtype(dtype): - raise ValueError("Incorrect `dtype` passed") + if cls._typ == "int64index" and not is_signed_integer_dtype(dtype): + raise ValueError( + "Incorrect `dtype` passed: expected signed integer, received {}".format( + dtype + ) + ) + elif cls._typ == "uint64index" and not is_unsigned_integer_dtype(dtype): + raise ValueError( + "Incorrect `dtype` passed: expected unsigned integer" + ", received {}".format(dtype) + ) + elif ( + cls._typ == "float64index" + and not is_float_dtype(dtype) + and not is_integer_dtype(dtype) + ): + raise ValueError( + "Incorrect `dtype` passed: expected float or integer" + ", received {}".format(dtype) + ) @Appender(_index_shared_docs["_maybe_cast_slice_bound"]) def _maybe_cast_slice_bound(self, label, side, kind): diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 9c792c7baa640..fdd4257466006 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -176,7 +176,11 @@ def test_constructor(self): ], ) def test_invalid_dtype(self, index, dtype): - with pytest.raises(ValueError): + # GH 29539 + with pytest.raises( + ValueError, + match=r"Incorrect `dtype` passed: expected .*, received {}".format(dtype), + ): index([1, 2, 3], dtype=dtype) def test_constructor_invalid(self): From 1541a3fab5ddd561123c036ed28964c9976c1de8 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Tue, 12 Nov 2019 10:55:29 +0000 Subject: [PATCH 4/8] Don't allow Float64Index to be constructued with integer dtype, instead remove dtype from call to Float64Index in the constructor of Index --- pandas/core/indexes/base.py | 2 +- pandas/core/indexes/numeric.py | 9 ++------- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c9697c530628a..346b8bcf56260 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -380,7 +380,7 @@ def __new__( pass # Return an actual float index. - return Float64Index(data, copy=copy, dtype=dtype, name=name) + return Float64Index(data, copy=copy, name=name) elif inferred == "string": pass diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 77a877b3b9d85..78072ef2ea6d7 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -88,14 +88,9 @@ def _validate_dtype(cls, dtype): "Incorrect `dtype` passed: expected unsigned integer" ", received {}".format(dtype) ) - elif ( - cls._typ == "float64index" - and not is_float_dtype(dtype) - and not is_integer_dtype(dtype) - ): + elif cls._typ == "float64index" and not is_float_dtype(dtype): raise ValueError( - "Incorrect `dtype` passed: expected float or integer" - ", received {}".format(dtype) + "Incorrect `dtype` passed: expected float, received {}".format(dtype) ) @Appender(_index_shared_docs["_maybe_cast_slice_bound"]) From 35c9004073a9e4bbf5f087c495d2c3bcd6d05255 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Wed, 13 Nov 2019 09:58:51 +0000 Subject: [PATCH 5/8] Add RangeIndex to _validate_dtype, use f-strings because they are so awesome --- doc/source/whatsnew/v1.0.0.rst | 2 +- pandas/core/indexes/numeric.py | 32 +++++++++++++--------------- pandas/core/indexes/range.py | 6 ------ pandas/tests/indexes/test_numeric.py | 3 ++- 4 files changed, 18 insertions(+), 25 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 0253c3237046d..44c536bed4250 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -336,7 +336,7 @@ Numeric - Bug in :class:`DataFrame` logical operations (`&`, `|`, `^`) not matching :class:`Series` behavior by filling NA values (:issue:`28741`) - Bug in :meth:`DataFrame.interpolate` where specifying axis by name references variable before it is assigned (:issue:`29142`) - Improved error message when using `frac` > 1 and `replace` = False (:issue:`27451`) -- Bug in :class:`NumericIndex` resulted in it being possible to instantiate an :class:`Int64Index`, :class:`UInt64Index`, or :class:`Float64Index` with an invalid dtype (e.g. datetime-like) (:issue:`29539`) +- Bug in numeric indexes resulted in it being possible to instantiate an :class:`Int64Index`, :class:`UInt64Index`, or :class:`Float64Index` with an invalid dtype (e.g. datetime-like) (:issue:`29539`) - Conversion diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 78072ef2ea6d7..e360f30098be7 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -28,6 +28,7 @@ ) from pandas.core.dtypes.missing import isna +from pandas._typing import Dtype from pandas.core import algorithms import pandas.core.common as com from pandas.core.indexes.base import Index, InvalidIndexError, _index_shared_docs @@ -47,7 +48,7 @@ class NumericIndex(Index): _is_numeric_dtype = True def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=None): - cls._validate_dtype(cls, dtype) + cls._validate_dtype(dtype) if fastpath is not None: warnings.warn( "The 'fastpath' keyword is deprecated, and will be " @@ -74,24 +75,21 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=None): name = data.name return cls._simple_new(subarr, name=name) - def _validate_dtype(cls, dtype): + @classmethod + def _validate_dtype(cls, dtype: Dtype) -> None: if dtype is None: return - if cls._typ == "int64index" and not is_signed_integer_dtype(dtype): - raise ValueError( - "Incorrect `dtype` passed: expected signed integer, received {}".format( - dtype - ) - ) - elif cls._typ == "uint64index" and not is_unsigned_integer_dtype(dtype): - raise ValueError( - "Incorrect `dtype` passed: expected unsigned integer" - ", received {}".format(dtype) - ) - elif cls._typ == "float64index" and not is_float_dtype(dtype): - raise ValueError( - "Incorrect `dtype` passed: expected float, received {}".format(dtype) - ) + validation_metadata = { + "int64index": (is_signed_integer_dtype, "signed integer"), + "uint64index": (is_unsigned_integer_dtype, "unsigned integer"), + "float64index": (is_float_dtype, "float"), + "rangeindex": (is_signed_integer_dtype, "signed integer"), + } + + validation_func, expected = validation_metadata[cls._typ] + if not validation_func(dtype): + msg = f"Incorrect `dtype` passed: expected {expected}, received {dtype}" + raise ValueError(msg) @Appender(_index_shared_docs["_maybe_cast_slice_bound"]) def _maybe_cast_slice_bound(self, label, side, kind): diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 5fa3431fc97c0..0022fca9c0170 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -167,12 +167,6 @@ def _simple_new(cls, values, name=None, dtype=None, **kwargs): # -------------------------------------------------------------------- - @staticmethod - def _validate_dtype(dtype): - """ require dtype to be None or int64 """ - if not (dtype is None or is_int64_dtype(dtype)): - raise TypeError("Invalid to pass a non-int64 dtype to RangeIndex") - @cache_readonly def _constructor(self): """ return the class to use for construction """ diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index fdd4257466006..8b342f3a7436d 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -173,13 +173,14 @@ def test_constructor(self): (pd.Int64Index, "float64"), (pd.UInt64Index, "categorical"), (pd.Float64Index, "datetime64"), + (pd.RangeIndex, "float64"), ], ) def test_invalid_dtype(self, index, dtype): # GH 29539 with pytest.raises( ValueError, - match=r"Incorrect `dtype` passed: expected .*, received {}".format(dtype), + match=rf"Incorrect `dtype` passed: expected \w+(?: \w+)?, received {dtype}", ): index([1, 2, 3], dtype=dtype) From debca7914d9d8788058dcac515edb4a20c941ea7 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Wed, 13 Nov 2019 10:42:22 +0000 Subject: [PATCH 6/8] Update test_range tests --- pandas/core/indexes/range.py | 1 - pandas/tests/indexes/test_range.py | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 0022fca9c0170..f47783fe3271f 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -14,7 +14,6 @@ from pandas.core.dtypes.common import ( ensure_platform_int, ensure_python_int, - is_int64_dtype, is_integer, is_integer_dtype, is_list_like, diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index fa64e1bacb2e5..1288602ad222f 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -110,7 +110,7 @@ def test_constructor_same(self): result = RangeIndex(index) tm.assert_index_equal(result, index, exact=True) - with pytest.raises(TypeError): + with pytest.raises(ValueError): RangeIndex(index, dtype="float64") def test_constructor_range(self): @@ -140,7 +140,7 @@ def test_constructor_range(self): expected = RangeIndex(1, 5, 2) tm.assert_index_equal(result, expected, exact=True) - with pytest.raises(TypeError): + with pytest.raises(ValueError): Index(range(1, 5, 2), dtype="float64") msg = r"^from_range\(\) got an unexpected keyword argument" with pytest.raises(TypeError, match=msg): @@ -178,7 +178,7 @@ def test_constructor_corner(self): RangeIndex(1.1, 10.2, 1.3) # invalid passed type - with pytest.raises(TypeError): + with pytest.raises(ValueError): RangeIndex(1, 5, dtype="float64") @pytest.mark.parametrize( From b88a1b9e1d53fc414c5a1305564b608cef9d21d6 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sat, 16 Nov 2019 09:55:17 +0000 Subject: [PATCH 7/8] Match ValueError message --- pandas/tests/indexes/test_range.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 1288602ad222f..ab5147f3c9e8e 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -110,7 +110,13 @@ def test_constructor_same(self): result = RangeIndex(index) tm.assert_index_equal(result, index, exact=True) - with pytest.raises(ValueError): + with pytest.raises( + ValueError, + match=( + f"Incorrect `dtype` passed: expected signed integer" + ", received float64" + ), + ): RangeIndex(index, dtype="float64") def test_constructor_range(self): @@ -140,7 +146,13 @@ def test_constructor_range(self): expected = RangeIndex(1, 5, 2) tm.assert_index_equal(result, expected, exact=True) - with pytest.raises(ValueError): + with pytest.raises( + ValueError, + match=( + f"Incorrect `dtype` passed: expected signed integer" + ", received float64" + ), + ): Index(range(1, 5, 2), dtype="float64") msg = r"^from_range\(\) got an unexpected keyword argument" with pytest.raises(TypeError, match=msg): @@ -178,7 +190,13 @@ def test_constructor_corner(self): RangeIndex(1.1, 10.2, 1.3) # invalid passed type - with pytest.raises(ValueError): + with pytest.raises( + ValueError, + match=( + f"Incorrect `dtype` passed: expected signed integer" + ", received float64" + ), + ): RangeIndex(1, 5, dtype="float64") @pytest.mark.parametrize( From d88b0d91bc1c360970e90829e31e156af5536cd6 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sat, 16 Nov 2019 09:59:29 +0000 Subject: [PATCH 8/8] Remove now useless f string --- pandas/tests/indexes/test_range.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index ab5147f3c9e8e..b60d3126da1d5 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -112,10 +112,7 @@ def test_constructor_same(self): with pytest.raises( ValueError, - match=( - f"Incorrect `dtype` passed: expected signed integer" - ", received float64" - ), + match="Incorrect `dtype` passed: expected signed integer, received float64", ): RangeIndex(index, dtype="float64") @@ -148,10 +145,7 @@ def test_constructor_range(self): with pytest.raises( ValueError, - match=( - f"Incorrect `dtype` passed: expected signed integer" - ", received float64" - ), + match="Incorrect `dtype` passed: expected signed integer, received float64", ): Index(range(1, 5, 2), dtype="float64") msg = r"^from_range\(\) got an unexpected keyword argument" @@ -192,10 +186,7 @@ def test_constructor_corner(self): # invalid passed type with pytest.raises( ValueError, - match=( - f"Incorrect `dtype` passed: expected signed integer" - ", received float64" - ), + match="Incorrect `dtype` passed: expected signed integer, received float64", ): RangeIndex(1, 5, dtype="float64")