From ea10835a729237206b40ab4dc8bb98ce8266e569 Mon Sep 17 00:00:00 2001 From: Godwill Agbehonou Date: Mon, 14 Aug 2023 16:44:46 -0400 Subject: [PATCH 01/12] Overrided _validate_setitem_value method --- pandas/core/arrays/numpy_.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 99a3586871d10..49b9a054c53c1 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -14,6 +14,7 @@ ) from pandas.compat.numpy import function as nv +from pandas.core.dtypes import PandasDtype from pandas.core.dtypes.astype import astype_array from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike from pandas.core.dtypes.common import pandas_dtype @@ -507,6 +508,10 @@ def to_numpy( return result + def _validate_setitem_value(self, value): + if PandasDtype(type(value)) != self.dtype: + raise TypeError(f"{np.dtype(value)} != {self.dtype} => bad") + # ------------------------------------------------------------------------ # Ops From b112d5a3d516bce64a1b307d9c3173d636257096 Mon Sep 17 00:00:00 2001 From: Godwill Agbehonou Date: Mon, 14 Aug 2023 17:18:36 -0400 Subject: [PATCH 02/12] Added test cases for _validate_setitem_value --- pandas/core/arrays/numpy_.py | 5 ++--- pandas/tests/arrays/test_array.py | 8 ++++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 49b9a054c53c1..cf36eab28134e 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -14,7 +14,6 @@ ) from pandas.compat.numpy import function as nv -from pandas.core.dtypes import PandasDtype from pandas.core.dtypes.astype import astype_array from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike from pandas.core.dtypes.common import pandas_dtype @@ -509,8 +508,8 @@ def to_numpy( return result def _validate_setitem_value(self, value): - if PandasDtype(type(value)) != self.dtype: - raise TypeError(f"{np.dtype(value)} != {self.dtype} => bad") + if NumpyEADtype(type(value)) != self.dtype: + raise TypeError("bad") # ------------------------------------------------------------------------ # Ops diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index 2746cd91963a0..d1d28d36b772d 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -444,3 +444,11 @@ def test_array_to_numpy_na(): result = arr.to_numpy(na_value=True, dtype=bool) expected = np.array([True, True]) tm.assert_numpy_array_equal(result, expected) + + +def test_array_validate_setitem_value(): + # Issue# 51044 + arr = pd.Series(range(5)).array + with pytest.raises(TypeError, match="bad"): + arr._validate_setitem_value("foo") + arr._validate_setitem_value(1.5) From 352c6ce349357c6a0b05a3d228b8159bc11ed222 Mon Sep 17 00:00:00 2001 From: Godwill Agbehonou Date: Tue, 15 Aug 2023 15:26:15 -0400 Subject: [PATCH 03/12] Updated _validate_setitem_value --- pandas/core/arrays/numpy_.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index cf36eab28134e..8802fa1059cb4 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -508,8 +508,22 @@ def to_numpy( return result def _validate_setitem_value(self, value): - if NumpyEADtype(type(value)) != self.dtype: - raise TypeError("bad") + kind = self.dtype.kind + # TODO: get this all from np_can_hold_element? + if kind == "b": + if lib.is_bool(value): + return value + + elif kind == "f": + if lib.is_integer(value) or lib.is_float(value): + return value + else: + if lib.is_integer(value) or (lib.is_float(value) and value.is_integer()): + return value + if NumpyEADtype(type(value)) != self.dtype: + raise TypeError("bad") + else: + raise TypeError(f"Invalid value '{str(value)}' for dtype {self.dtype}") # ------------------------------------------------------------------------ # Ops From dc541d16f4263d0a251f0a4b0081cad397b197e0 Mon Sep 17 00:00:00 2001 From: Godwill Agbehonou Date: Tue, 15 Aug 2023 16:55:00 -0400 Subject: [PATCH 04/12] Fixed _validate_setitem_value method --- pandas/core/arrays/numpy_.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 8802fa1059cb4..ad4b78e0bd5dc 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -509,7 +509,6 @@ def to_numpy( def _validate_setitem_value(self, value): kind = self.dtype.kind - # TODO: get this all from np_can_hold_element? if kind == "b": if lib.is_bool(value): return value From a219e893f492caaad7214daa2625c4f9fdc26798 Mon Sep 17 00:00:00 2001 From: Godwill Agbehonou Date: Wed, 16 Aug 2023 15:58:21 -0400 Subject: [PATCH 05/12] Modifying _validate_setitem_value method --- pandas/core/arrays/numpy_.py | 18 +++--------------- pandas/tests/arrays/numpy_/test_numpy.py | 9 +++++++++ pandas/tests/arrays/test_array.py | 8 -------- 3 files changed, 12 insertions(+), 23 deletions(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index ad4b78e0bd5dc..63a47185d490d 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -508,21 +508,9 @@ def to_numpy( return result def _validate_setitem_value(self, value): - kind = self.dtype.kind - if kind == "b": - if lib.is_bool(value): - return value - - elif kind == "f": - if lib.is_integer(value) or lib.is_float(value): - return value - else: - if lib.is_integer(value) or (lib.is_float(value) and value.is_integer()): - return value - if NumpyEADtype(type(value)) != self.dtype: - raise TypeError("bad") - else: - raise TypeError(f"Invalid value '{str(value)}' for dtype {self.dtype}") + if NumpyEADtype(type(value)) != self.dtype: + raise TypeError("bad") + return value # ------------------------------------------------------------------------ # Ops diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py index 4217745e60e76..4a4756ee9ae81 100644 --- a/pandas/tests/arrays/numpy_/test_numpy.py +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -322,3 +322,12 @@ def test_factorize_unsigned(): tm.assert_numpy_array_equal(res_codes, exp_codes) tm.assert_extension_array_equal(res_unique, NumpyExtensionArray(exp_unique)) + + +def test_array_validate_setitem_value(): + # Issue# 51044 + arr = pd.Series(range(5)).array + with pytest.raises(TypeError, match="bad"): + arr._validate_setitem_value("foo") + with pytest.raises(TypeError, match="bad"): + arr._validate_setitem_value(1.5) diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index d1d28d36b772d..2746cd91963a0 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -444,11 +444,3 @@ def test_array_to_numpy_na(): result = arr.to_numpy(na_value=True, dtype=bool) expected = np.array([True, True]) tm.assert_numpy_array_equal(result, expected) - - -def test_array_validate_setitem_value(): - # Issue# 51044 - arr = pd.Series(range(5)).array - with pytest.raises(TypeError, match="bad"): - arr._validate_setitem_value("foo") - arr._validate_setitem_value(1.5) From 61768d9f5fe749c21f28b061c2dd0bcf6e0c38d2 Mon Sep 17 00:00:00 2001 From: Godwill Agbehonou Date: Thu, 17 Aug 2023 15:26:33 -0400 Subject: [PATCH 06/12] Adjusted the error message --- pandas/core/arrays/numpy_.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 63a47185d490d..db55a80132110 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -509,7 +509,11 @@ def to_numpy( def _validate_setitem_value(self, value): if NumpyEADtype(type(value)) != self.dtype: - raise TypeError("bad") + raise TypeError( + "value cannot be inserted without changing the dtype. value:" + f"{value}, type(value): {type(value)}, NumpyEADtype(type(value)):" + f" {NumpyEADtype(type(value))}, self.dtype: {self.dtype}" + ) return value # ------------------------------------------------------------------------ From 2d792705e2cd8a88eda3e804e7a8d38c4ef732a6 Mon Sep 17 00:00:00 2001 From: Godwill Agbehonou Date: Thu, 17 Aug 2023 17:44:07 -0400 Subject: [PATCH 07/12] Updated _validate_setitem_value --- pandas/core/arrays/numpy_.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index db55a80132110..27bbc0515386e 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -508,11 +508,17 @@ def to_numpy( return result def _validate_setitem_value(self, value): - if NumpyEADtype(type(value)) != self.dtype: + if type(value) == int: + val = np.power(value, 1, dtype=np.int64) # for int64 only + elif type(value) == float: + val = np.power(value, 1, dtype=np.float64) # for float64 only + else: + val = np.asarray([value], dtype=object) + if NumpyEADtype(type(val)) != self.dtype: raise TypeError( "value cannot be inserted without changing the dtype. value:" - f"{value}, type(value): {type(value)}, NumpyEADtype(type(value)):" - f" {NumpyEADtype(type(value))}, self.dtype: {self.dtype}" + f"{val}, type(value): {type(val)}, NumpyEADtype(type(value)):" + f" {NumpyEADtype(type(val))}, self.dtype: {self.dtype}" ) return value From 45c2f98e5c591b6b4c6adb2a3e0b0fe1f2148a4a Mon Sep 17 00:00:00 2001 From: Godwill Agbehonou Date: Tue, 22 Aug 2023 08:22:58 -0400 Subject: [PATCH 08/12] Updated _validate_setitem_value --- pandas/core/arrays/numpy_.py | 37 ++++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 27bbc0515386e..b5b38e110b702 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -509,16 +509,41 @@ def to_numpy( def _validate_setitem_value(self, value): if type(value) == int: - val = np.power(value, 1, dtype=np.int64) # for int64 only + if ( + self.dtype == NumpyEADtype("int64") + or self.dtype == NumpyEADtype("float64") + or self.dtype == NumpyEADtype("uint16") + or self.dtype == NumpyEADtype("object") + or self.dtype is None + ): + return value elif type(value) == float: - val = np.power(value, 1, dtype=np.float64) # for float64 only + if ( + self.dtype == NumpyEADtype("float64") + or self.dtype == NumpyEADtype("object") + or self.dtype is None + ): + return value + elif type(value) == str: + if ( + self.dtype == NumpyEADtype("str") + or self.dtype == NumpyEADtype("object") + or self.dtype is None + or self.dtype == NumpyEADtype("U32") + ): + return value + elif NumpyEADtype(type(value)) == NumpyEADtype(self.dtype) or NumpyEADtype( + type(value) + ) == NumpyEADtype(type(value)): + return value else: - val = np.asarray([value], dtype=object) - if NumpyEADtype(type(val)) != self.dtype: + if self.dtype == NumpyEADtype("object") or self.dtype is None: + return value + if NumpyEADtype(type(value)) != self.dtype: raise TypeError( "value cannot be inserted without changing the dtype. value:" - f"{val}, type(value): {type(val)}, NumpyEADtype(type(value)):" - f" {NumpyEADtype(type(val))}, self.dtype: {self.dtype}" + f"{value}, type(value): {type(value)}, NumpyEADtype(type(value)):" + f" {NumpyEADtype(type(value))}, self.dtype: {self.dtype}" ) return value From e353d1d23aa8bbb31a300901891f4b472187b0e6 Mon Sep 17 00:00:00 2001 From: Godwill Agbehonou Date: Wed, 23 Aug 2023 01:11:05 -0400 Subject: [PATCH 09/12] Cleaned up _validate_setitem_value --- pandas/core/arrays/numpy_.py | 58 +++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index b5b38e110b702..dd6d44ab3a7d6 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -510,42 +510,46 @@ def to_numpy( def _validate_setitem_value(self, value): if type(value) == int: if ( - self.dtype == NumpyEADtype("int64") - or self.dtype == NumpyEADtype("float64") - or self.dtype == NumpyEADtype("uint16") - or self.dtype == NumpyEADtype("object") + self.dtype + in [ + NumpyEADtype("int64"), + NumpyEADtype("float64"), + NumpyEADtype("uint16"), + NumpyEADtype("object"), + ] or self.dtype is None ): return value elif type(value) == float: if ( - self.dtype == NumpyEADtype("float64") - or self.dtype == NumpyEADtype("object") + self.dtype in [NumpyEADtype("float64"), NumpyEADtype("object")] or self.dtype is None ): return value - elif type(value) == str: - if ( - self.dtype == NumpyEADtype("str") - or self.dtype == NumpyEADtype("object") - or self.dtype is None - or self.dtype == NumpyEADtype("U32") - ): - return value - elif NumpyEADtype(type(value)) == NumpyEADtype(self.dtype) or NumpyEADtype( - type(value) - ) == NumpyEADtype(type(value)): + elif type(value) not in [int, float] and ( + self.dtype + not in [ + NumpyEADtype("int64"), + NumpyEADtype("float64"), + NumpyEADtype("uint16"), + NumpyEADtype("object"), + ] + or lib.is_list_like(value) + ): return value - else: - if self.dtype == NumpyEADtype("object") or self.dtype is None: - return value - if NumpyEADtype(type(value)) != self.dtype: - raise TypeError( - "value cannot be inserted without changing the dtype. value:" - f"{value}, type(value): {type(value)}, NumpyEADtype(type(value)):" - f" {NumpyEADtype(type(value))}, self.dtype: {self.dtype}" - ) - return value + if ( + NumpyEADtype(type(value)) == NumpyEADtype(self.dtype) + or NumpyEADtype(type(value)) == self.dtype + ): + return value + if self.dtype == NumpyEADtype("object") or self.dtype is None: + return value + + raise TypeError( + "value cannot be inserted without changing the dtype. value:" + f"{value}, type(value): {type(value)}, NumpyEADtype(type(value)):" + f" {NumpyEADtype(type(value))}, self.dtype: {self.dtype}" + ) # ------------------------------------------------------------------------ # Ops From 09a49ea1d73b37b161347fe6c3282aa21370b834 Mon Sep 17 00:00:00 2001 From: Godwill Agbehonou Date: Wed, 23 Aug 2023 01:12:40 -0400 Subject: [PATCH 10/12] Fixed test_numpy test cases --- pandas/tests/arrays/numpy_/test_numpy.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py index 4a4756ee9ae81..54f49aca6cc3e 100644 --- a/pandas/tests/arrays/numpy_/test_numpy.py +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -279,7 +279,7 @@ def test_setitem_no_coercion(): # With a value that we do coerce, check that we coerce the value # and not the underlying array. - arr[0] = 2.5 + arr[0] = 2 assert isinstance(arr[0], (int, np.integer)), type(arr[0]) @@ -295,7 +295,7 @@ def test_setitem_preserves_views(): assert view2[0] == 9 assert view3[0] == 9 - arr[-1] = 2.5 + arr[-1] = 2 view1[-1] = 5 assert arr[-1] == 5 @@ -327,7 +327,7 @@ def test_factorize_unsigned(): def test_array_validate_setitem_value(): # Issue# 51044 arr = pd.Series(range(5)).array - with pytest.raises(TypeError, match="bad"): + with pytest.raises(TypeError, match="str"): arr._validate_setitem_value("foo") - with pytest.raises(TypeError, match="bad"): + with pytest.raises(TypeError, match="float"): arr._validate_setitem_value(1.5) From 89d2b66a5d90676bc59028abe8afffe8fbe1a79e Mon Sep 17 00:00:00 2001 From: Godwill Agbehonou Date: Wed, 23 Aug 2023 01:24:46 -0400 Subject: [PATCH 11/12] Added more datatype cases --- pandas/core/arrays/numpy_.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index dd6d44ab3a7d6..f9fdf1f465e76 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -512,9 +512,16 @@ def _validate_setitem_value(self, value): if ( self.dtype in [ + NumpyEADtype("int8"), + NumpyEADtype("int16"), + NumpyEADtype("int32"), NumpyEADtype("int64"), + NumpyEADtype("float32"), NumpyEADtype("float64"), + NumpyEADtype("uint8"), NumpyEADtype("uint16"), + NumpyEADtype("uint32"), + NumpyEADtype("uint64"), NumpyEADtype("object"), ] or self.dtype is None @@ -522,7 +529,12 @@ def _validate_setitem_value(self, value): return value elif type(value) == float: if ( - self.dtype in [NumpyEADtype("float64"), NumpyEADtype("object")] + self.dtype + in [ + NumpyEADtype("float32"), + NumpyEADtype("float64"), + NumpyEADtype("object"), + ] or self.dtype is None ): return value From c5e0e248c5c3978c2c9a422c542dfdaa5bd9834a Mon Sep 17 00:00:00 2001 From: Godwill Agbehonou Date: Thu, 24 Aug 2023 17:57:55 -0400 Subject: [PATCH 12/12] Update _validate_setitem_value --- pandas/core/arrays/numpy_.py | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index f9fdf1f465e76..e35ceb8b662e1 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -15,7 +15,10 @@ from pandas.compat.numpy import function as nv from pandas.core.dtypes.astype import astype_array -from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike +from pandas.core.dtypes.cast import ( + construct_1d_object_array_from_listlike, + np_can_hold_element, +) from pandas.core.dtypes.common import pandas_dtype from pandas.core.dtypes.dtypes import NumpyEADtype from pandas.core.dtypes.missing import isna @@ -509,24 +512,11 @@ def to_numpy( def _validate_setitem_value(self, value): if type(value) == int: - if ( - self.dtype - in [ - NumpyEADtype("int8"), - NumpyEADtype("int16"), - NumpyEADtype("int32"), - NumpyEADtype("int64"), - NumpyEADtype("float32"), - NumpyEADtype("float64"), - NumpyEADtype("uint8"), - NumpyEADtype("uint16"), - NumpyEADtype("uint32"), - NumpyEADtype("uint64"), - NumpyEADtype("object"), - ] - or self.dtype is None - ): - return value + try: + np_can_hold_element(self.dtype, value) + except Exception: + pass + return value elif type(value) == float: if ( self.dtype @@ -549,12 +539,16 @@ def _validate_setitem_value(self, value): or lib.is_list_like(value) ): return value + if self.dtype is None: + return value + if not isinstance(self.dtype, NumpyEADtype): + return value if ( NumpyEADtype(type(value)) == NumpyEADtype(self.dtype) or NumpyEADtype(type(value)) == self.dtype ): return value - if self.dtype == NumpyEADtype("object") or self.dtype is None: + if self.dtype == NumpyEADtype("object"): return value raise TypeError(