From 53bbd662e96c173a03d02d2ef1191d431d85ebb7 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 2 Jan 2022 14:34:52 -0800 Subject: [PATCH 1/3] REF: separate numpy-specific part of can_hold_element --- pandas/core/dtypes/cast.py | 93 +++++++++++++++++++++++++++++--------- 1 file changed, 71 insertions(+), 22 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index f18f1c760ca28..e37da8725bc19 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -2185,80 +2185,129 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool: # ExtensionBlock._can_hold_element return True - if dtype == _dtype_obj: + try: + np_can_hold_element(arr.dtype, element) return True + except (TypeError, ValueError): + return False + + +def np_can_hold_element(dtype: np.dtype, element: Any): + """ + Raise if we cannot losslessly set this element into an ndarray with this dtype. + + Specifically about places where we disagree with numpy. i.e. there are + cases where numpy will raise in doing the setitem that we do not check + for here, e.g. setting str "X" into a numeric ndarray. + + Returns the element, potentially cast to the dtype. + + Raises + ------ + ValueError : If we cannot losslessly store this element with this dtype. + """ + if dtype == _dtype_obj: + return element tipo = maybe_infer_dtype_type(element) if dtype.kind in ["i", "u"]: if isinstance(element, range): - return _dtype_can_hold_range(element, dtype) + if _dtype_can_hold_range(element, dtype): + return element + raise ValueError if tipo is not None: if tipo.kind not in ["i", "u"]: if is_float(element) and element.is_integer(): - return True + return element if isinstance(element, np.ndarray) and element.dtype.kind == "f": # If all can be losslessly cast to integers, then we can hold them # We do something similar in putmask_smart casted = element.astype(dtype) comp = casted == element - return comp.all() + if comp.all(): + return element + raise ValueError # Anything other than integer we cannot hold - return False + raise ValueError elif dtype.itemsize < tipo.itemsize: if is_integer(element): # e.g. test_setitem_series_int8 if we have a python int 1 # tipo may be np.int32, despite the fact that it will fit # in smaller int dtypes. info = np.iinfo(dtype) - return info.min <= element <= info.max - return False + if info.min <= element <= info.max: + return element + raise ValueError + raise ValueError elif not isinstance(tipo, np.dtype): # i.e. nullable IntegerDtype; we can put this into an ndarray # losslessly iff it has no NAs - return not element._mask.any() + hasnas = element._mask.any() + # TODO: don't rely on implementation detail + if hasnas: + raise ValueError + return element - return True + return element # We have not inferred an integer from the dtype # check if we have a builtin int or a float equal to an int - return is_integer(element) or (is_float(element) and element.is_integer()) + if is_integer(element) or (is_float(element) and element.is_integer()): + return element + raise ValueError elif dtype.kind == "f": if tipo is not None: # TODO: itemsize check? if tipo.kind not in ["f", "i", "u"]: # Anything other than float/integer we cannot hold - return False + raise ValueError elif not isinstance(tipo, np.dtype): # i.e. nullable IntegerDtype or FloatingDtype; # we can put this into an ndarray losslessly iff it has no NAs - return not element._mask.any() - return True + hasnas = element._mask.any() + # TODO: don't rely on implementation detail + if hasnas: + raise ValueError + return element + return element - return lib.is_integer(element) or lib.is_float(element) + if lib.is_integer(element) or lib.is_float(element): + return element + raise ValueError elif dtype.kind == "c": if tipo is not None: - return tipo.kind in ["c", "f", "i", "u"] - return ( - lib.is_integer(element) or lib.is_complex(element) or lib.is_float(element) - ) + if tipo.kind in ["c", "f", "i", "u"]: + return element + raise ValueError + if lib.is_integer(element) or lib.is_complex(element) or lib.is_float(element): + return element + raise ValueError elif dtype.kind == "b": if tipo is not None: - return tipo.kind == "b" - return lib.is_bool(element) + if tipo.kind == "b": # FIXME: wrong with BooleanArray? + return element + raise ValueError + if lib.is_bool(element): + return element + raise ValueError elif dtype.kind == "S": # TODO: test tests.frame.methods.test_replace tests get here, # need more targeted tests. xref phofl has a PR about this if tipo is not None: - return tipo.kind == "S" and tipo.itemsize <= dtype.itemsize - return isinstance(element, bytes) and len(element) <= dtype.itemsize + if tipo.kind == "S" and tipo.itemsize <= dtype.itemsize: + return element + raise ValueError + if isinstance(element, bytes) and len(element) <= dtype.itemsize: + return element + raise ValueError raise NotImplementedError(dtype) From 7754cb09f91e260c82b55309754f0856d9f1cdc3 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 2 Jan 2022 15:27:22 -0800 Subject: [PATCH 2/3] mypy fixup --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index e37da8725bc19..934fe8a5087c7 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -2186,7 +2186,7 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool: return True try: - np_can_hold_element(arr.dtype, element) + np_can_hold_element(dtype, element) return True except (TypeError, ValueError): return False From 268786e5c54dce3bffd2a5fae9c10eda51e3d2de Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 3 Jan 2022 14:11:53 -0800 Subject: [PATCH 3/3] annotate, returns section --- pandas/core/dtypes/cast.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 934fe8a5087c7..b868abd800669 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -2192,7 +2192,7 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool: return False -def np_can_hold_element(dtype: np.dtype, element: Any): +def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: """ Raise if we cannot losslessly set this element into an ndarray with this dtype. @@ -2200,7 +2200,10 @@ def np_can_hold_element(dtype: np.dtype, element: Any): cases where numpy will raise in doing the setitem that we do not check for here, e.g. setting str "X" into a numeric ndarray. - Returns the element, potentially cast to the dtype. + Returns + ------- + Any + The element, potentially cast to the dtype. Raises ------