diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index b39afc57f34f6..5ea0108d87c9a 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -535,61 +535,6 @@ cdef class PeriodEngine(Int64Engine): return super(PeriodEngine, self).get_indexer_non_unique(ordinal_array) -cpdef convert_scalar(ndarray arr, object value): - # we don't turn integers - # into datetimes/timedeltas - - # we don't turn bools into int/float/complex - - if arr.descr.type_num == NPY_DATETIME: - if util.is_array(value): - pass - elif isinstance(value, (datetime, np.datetime64, date)): - return Timestamp(value).to_datetime64() - elif util.is_timedelta64_object(value): - # exclude np.timedelta64("NaT") from value != value below - pass - elif value is None or value != value: - return np.datetime64("NaT", "ns") - raise ValueError("cannot set a Timestamp with a non-timestamp " - f"{type(value).__name__}") - - elif arr.descr.type_num == NPY_TIMEDELTA: - if util.is_array(value): - pass - elif isinstance(value, timedelta) or util.is_timedelta64_object(value): - value = Timedelta(value) - if value is NaT: - return np.timedelta64("NaT", "ns") - return value.to_timedelta64() - elif util.is_datetime64_object(value): - # exclude np.datetime64("NaT") which would otherwise be picked up - # by the `value != value check below - pass - elif value is None or value != value: - return np.timedelta64("NaT", "ns") - raise ValueError("cannot set a Timedelta with a non-timedelta " - f"{type(value).__name__}") - - else: - validate_numeric_casting(arr.dtype, value) - - return value - - -cpdef validate_numeric_casting(dtype, object value): - # Note: we can't annotate dtype as cnp.dtype because that cases dtype.type - # to integer - if issubclass(dtype.type, (np.integer, np.bool_)): - if util.is_float_object(value) and value != value: - raise ValueError("Cannot assign nan to integer series") - - if (issubclass(dtype.type, (np.integer, np.floating, np.complex)) and - not issubclass(dtype.type, np.bool_)): - if util.is_bool_object(value): - raise ValueError("Cannot assign bool to float/integer series") - - cdef class BaseMultiIndexCodesEngine: """ Base class for MultiIndexUIntEngine and MultiIndexPyIntEngine, which diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 52c569793e499..0719b8ce6010b 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1,11 +1,18 @@ """ routings for casting """ -from datetime import datetime, timedelta +from datetime import date, datetime, timedelta import numpy as np from pandas._libs import lib, tslib, tslibs -from pandas._libs.tslibs import NaT, OutOfBoundsDatetime, Period, iNaT +from pandas._libs.tslibs import ( + NaT, + OutOfBoundsDatetime, + Period, + Timedelta, + Timestamp, + iNaT, +) from pandas._libs.tslibs.timezones import tz_compare from pandas._typing import Dtype from pandas.util._validators import validate_bool_kwarg @@ -1599,3 +1606,59 @@ def maybe_cast_to_integer_array(arr, dtype, copy: bool = False): if is_integer_dtype(dtype) and (is_float_dtype(arr) or is_object_dtype(arr)): raise ValueError("Trying to coerce float values to integers") + + +def convert_scalar_for_putitemlike(scalar, dtype: np.dtype): + """ + Convert datetimelike scalar if we are setting into a datetime64 + or timedelta64 ndarray. + + Parameters + ---------- + scalar : scalar + dtype : np.dtpye + + Returns + ------- + scalar + """ + if dtype.kind == "m": + if isinstance(scalar, (timedelta, np.timedelta64)): + # We have to cast after asm8 in case we have NaT + return Timedelta(scalar).asm8.view("timedelta64[ns]") + elif scalar is None or scalar is NaT or (is_float(scalar) and np.isnan(scalar)): + return np.timedelta64("NaT", "ns") + if dtype.kind == "M": + if isinstance(scalar, (date, np.datetime64)): + # Note: we include date, not just datetime + return Timestamp(scalar).to_datetime64() + elif scalar is None or scalar is NaT or (is_float(scalar) and np.isnan(scalar)): + return np.datetime64("NaT", "ns") + else: + validate_numeric_casting(dtype, scalar) + return scalar + + +def validate_numeric_casting(dtype: np.dtype, value): + """ + Check that we can losslessly insert the given value into an array + with the given dtype. + + Parameters + ---------- + dtype : np.dtype + value : scalar + + Raises + ------ + ValueError + """ + if issubclass(dtype.type, (np.integer, np.bool_)): + if is_float(value) and np.isnan(value): + raise ValueError("Cannot assign nan to integer series") + + if issubclass(dtype.type, (np.integer, np.floating, np.complex)) and not issubclass( + dtype.type, np.bool_ + ): + if is_bool(value): + raise ValueError("Cannot assign bool to float/integer series") diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 83a2a509c0743..8b3fd808957bb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -40,7 +40,7 @@ from pandas._config import get_option -from pandas._libs import algos as libalgos, index as libindex, lib, properties +from pandas._libs import algos as libalgos, lib, properties from pandas._typing import Axes, Axis, Dtype, FilePathOrBuffer, Label, Level, Renamer from pandas.compat import PY37 from pandas.compat._optional import import_optional_dependency @@ -69,6 +69,7 @@ maybe_infer_to_datetimelike, maybe_upcast, maybe_upcast_putmask, + validate_numeric_casting, ) from pandas.core.dtypes.common import ( ensure_float64, @@ -3025,7 +3026,7 @@ def _set_value(self, index, col, value, takeable: bool = False): series = self._get_item_cache(col) engine = self.index._engine loc = engine.get_loc(index) - libindex.validate_numeric_casting(series.dtype, value) + validate_numeric_casting(series.dtype, value) series._values[loc] = value # Note: trying to use series._set_value breaks tests in diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ccb4927d9b4b7..c13f0ae6462fc 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -18,7 +18,10 @@ from pandas.util._decorators import Appender, Substitution, cache_readonly from pandas.core.dtypes import concat as _concat -from pandas.core.dtypes.cast import maybe_cast_to_integer_array +from pandas.core.dtypes.cast import ( + maybe_cast_to_integer_array, + validate_numeric_casting, +) from pandas.core.dtypes.common import ( ensure_categorical, ensure_int64, @@ -4653,7 +4656,7 @@ def set_value(self, arr, key, value): stacklevel=2, ) loc = self._engine.get_loc(key) - libindex.validate_numeric_casting(arr.dtype, value) + validate_numeric_casting(arr.dtype, value) arr[loc] = value _index_shared_docs[ diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index cb03fbe1770b3..85a26179276f5 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -8,7 +8,6 @@ import numpy as np from pandas._libs import NaT, Timestamp, algos as libalgos, lib, tslib, writers -from pandas._libs.index import convert_scalar import pandas._libs.internals as libinternals from pandas._libs.tslibs import Timedelta, conversion from pandas._libs.tslibs.timezones import tz_compare @@ -16,6 +15,7 @@ from pandas.core.dtypes.cast import ( astype_nansafe, + convert_scalar_for_putitemlike, find_common_type, infer_dtype_from, infer_dtype_from_scalar, @@ -762,7 +762,7 @@ def replace( # The only non-DatetimeLike class that also has a non-trivial # try_coerce_args is ObjectBlock, but that overrides replace, # so does not get here. - to_replace = convert_scalar(values, to_replace) + to_replace = convert_scalar_for_putitemlike(to_replace, values.dtype) mask = missing.mask_missing(values, to_replace) if filter is not None: @@ -841,7 +841,7 @@ def setitem(self, indexer, value): # We only get here for non-Extension Blocks, so _try_coerce_args # is only relevant for DatetimeBlock and TimedeltaBlock if lib.is_scalar(value): - value = convert_scalar(values, value) + value = convert_scalar_for_putitemlike(value, values.dtype) else: # current dtype cannot store value, coerce to common dtype @@ -957,7 +957,7 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False) # We only get here for non-Extension Blocks, so _try_coerce_args # is only relevant for DatetimeBlock and TimedeltaBlock if lib.is_scalar(new): - new = convert_scalar(new_values, new) + new = convert_scalar_for_putitemlike(new, new_values.dtype) if transpose: new_values = new_values.T @@ -1200,7 +1200,7 @@ def _interpolate_with_fill( values = self.values if inplace else self.values.copy() # We only get here for non-ExtensionBlock - fill_value = convert_scalar(self.values, fill_value) + fill_value = convert_scalar_for_putitemlike(fill_value, self.values.dtype) values = missing.interpolate_2d( values, @@ -1405,7 +1405,7 @@ def where_func(cond, values, other): raise TypeError if lib.is_scalar(other) and isinstance(values, np.ndarray): # convert datetime to datetime64, timedelta to timedelta64 - other = convert_scalar(values, other) + other = convert_scalar_for_putitemlike(other, values.dtype) # By the time we get here, we should have all Series/Index # args extracted to ndarray diff --git a/pandas/core/series.py b/pandas/core/series.py index 2ffb22d2d272f..fb316c8883e78 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -23,13 +23,12 @@ from pandas._config import get_option from pandas._libs import lib, properties, reshape, tslibs -from pandas._libs.index import validate_numeric_casting from pandas._typing import Label from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution from pandas.util._validators import validate_bool_kwarg, validate_percentile -from pandas.core.dtypes.cast import convert_dtypes +from pandas.core.dtypes.cast import convert_dtypes, validate_numeric_casting from pandas.core.dtypes.common import ( _is_unorderable_exception, ensure_platform_int,