Skip to content

REF: move convert_scalar out of cython #31672

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 0 additions & 55 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -535,61 +535,6 @@ cdef class PeriodEngine(Int64Engine):
return super(PeriodEngine, self).get_indexer_non_unique(ordinal_array)


cpdef convert_scalar(ndarray arr, object value):
# we don't turn integers
# into datetimes/timedeltas

# we don't turn bools into int/float/complex

if arr.descr.type_num == NPY_DATETIME:
if util.is_array(value):
pass
elif isinstance(value, (datetime, np.datetime64, date)):
return Timestamp(value).to_datetime64()
elif util.is_timedelta64_object(value):
# exclude np.timedelta64("NaT") from value != value below
pass
elif value is None or value != value:
return np.datetime64("NaT", "ns")
raise ValueError("cannot set a Timestamp with a non-timestamp "
f"{type(value).__name__}")

elif arr.descr.type_num == NPY_TIMEDELTA:
if util.is_array(value):
pass
elif isinstance(value, timedelta) or util.is_timedelta64_object(value):
value = Timedelta(value)
if value is NaT:
return np.timedelta64("NaT", "ns")
return value.to_timedelta64()
elif util.is_datetime64_object(value):
# exclude np.datetime64("NaT") which would otherwise be picked up
# by the `value != value check below
pass
elif value is None or value != value:
return np.timedelta64("NaT", "ns")
raise ValueError("cannot set a Timedelta with a non-timedelta "
f"{type(value).__name__}")

else:
validate_numeric_casting(arr.dtype, value)

return value


cpdef validate_numeric_casting(dtype, object value):
# Note: we can't annotate dtype as cnp.dtype because that cases dtype.type
# to integer
if issubclass(dtype.type, (np.integer, np.bool_)):
if util.is_float_object(value) and value != value:
raise ValueError("Cannot assign nan to integer series")

if (issubclass(dtype.type, (np.integer, np.floating, np.complex)) and
not issubclass(dtype.type, np.bool_)):
if util.is_bool_object(value):
raise ValueError("Cannot assign bool to float/integer series")


cdef class BaseMultiIndexCodesEngine:
"""
Base class for MultiIndexUIntEngine and MultiIndexPyIntEngine, which
Expand Down
67 changes: 65 additions & 2 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
""" routings for casting """

from datetime import datetime, timedelta
from datetime import date, datetime, timedelta

import numpy as np

from pandas._libs import lib, tslib, tslibs
from pandas._libs.tslibs import NaT, OutOfBoundsDatetime, Period, iNaT
from pandas._libs.tslibs import (
NaT,
OutOfBoundsDatetime,
Period,
Timedelta,
Timestamp,
iNaT,
)
from pandas._libs.tslibs.timezones import tz_compare
from pandas._typing import Dtype
from pandas.util._validators import validate_bool_kwarg
Expand Down Expand Up @@ -1599,3 +1606,59 @@ def maybe_cast_to_integer_array(arr, dtype, copy: bool = False):

if is_integer_dtype(dtype) and (is_float_dtype(arr) or is_object_dtype(arr)):
raise ValueError("Trying to coerce float values to integers")


def convert_scalar_for_putitemlike(scalar, dtype: np.dtype):
"""
Convert datetimelike scalar if we are setting into a datetime64
or timedelta64 ndarray.

Parameters
----------
scalar : scalar
dtype : np.dtpye

Returns
-------
scalar
"""
if dtype.kind == "m":
if isinstance(scalar, (timedelta, np.timedelta64)):
# We have to cast after asm8 in case we have NaT
return Timedelta(scalar).asm8.view("timedelta64[ns]")
elif scalar is None or scalar is NaT or (is_float(scalar) and np.isnan(scalar)):
return np.timedelta64("NaT", "ns")
if dtype.kind == "M":
if isinstance(scalar, (date, np.datetime64)):
# Note: we include date, not just datetime
return Timestamp(scalar).to_datetime64()
elif scalar is None or scalar is NaT or (is_float(scalar) and np.isnan(scalar)):
return np.datetime64("NaT", "ns")
else:
validate_numeric_casting(dtype, scalar)
return scalar


def validate_numeric_casting(dtype: np.dtype, value):
"""
Check that we can losslessly insert the given value into an array
with the given dtype.

Parameters
----------
dtype : np.dtype
value : scalar

Raises
------
ValueError
"""
if issubclass(dtype.type, (np.integer, np.bool_)):
if is_float(value) and np.isnan(value):
raise ValueError("Cannot assign nan to integer series")

if issubclass(dtype.type, (np.integer, np.floating, np.complex)) and not issubclass(
dtype.type, np.bool_
):
if is_bool(value):
raise ValueError("Cannot assign bool to float/integer series")
5 changes: 3 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@

from pandas._config import get_option

from pandas._libs import algos as libalgos, index as libindex, lib, properties
from pandas._libs import algos as libalgos, lib, properties
from pandas._typing import Axes, Axis, Dtype, FilePathOrBuffer, Label, Level, Renamer
from pandas.compat import PY37
from pandas.compat._optional import import_optional_dependency
Expand Down Expand Up @@ -69,6 +69,7 @@
maybe_infer_to_datetimelike,
maybe_upcast,
maybe_upcast_putmask,
validate_numeric_casting,
)
from pandas.core.dtypes.common import (
ensure_float64,
Expand Down Expand Up @@ -3025,7 +3026,7 @@ def _set_value(self, index, col, value, takeable: bool = False):
series = self._get_item_cache(col)
engine = self.index._engine
loc = engine.get_loc(index)
libindex.validate_numeric_casting(series.dtype, value)
validate_numeric_casting(series.dtype, value)

series._values[loc] = value
# Note: trying to use series._set_value breaks tests in
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
from pandas.util._decorators import Appender, Substitution, cache_readonly

from pandas.core.dtypes import concat as _concat
from pandas.core.dtypes.cast import maybe_cast_to_integer_array
from pandas.core.dtypes.cast import (
maybe_cast_to_integer_array,
validate_numeric_casting,
)
from pandas.core.dtypes.common import (
ensure_categorical,
ensure_int64,
Expand Down Expand Up @@ -4653,7 +4656,7 @@ def set_value(self, arr, key, value):
stacklevel=2,
)
loc = self._engine.get_loc(key)
libindex.validate_numeric_casting(arr.dtype, value)
validate_numeric_casting(arr.dtype, value)
arr[loc] = value

_index_shared_docs[
Expand Down
12 changes: 6 additions & 6 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@
import numpy as np

from pandas._libs import NaT, Timestamp, algos as libalgos, lib, tslib, writers
from pandas._libs.index import convert_scalar
import pandas._libs.internals as libinternals
from pandas._libs.tslibs import Timedelta, conversion
from pandas._libs.tslibs.timezones import tz_compare
from pandas.util._validators import validate_bool_kwarg

from pandas.core.dtypes.cast import (
astype_nansafe,
convert_scalar_for_putitemlike,
find_common_type,
infer_dtype_from,
infer_dtype_from_scalar,
Expand Down Expand Up @@ -762,7 +762,7 @@ def replace(
# The only non-DatetimeLike class that also has a non-trivial
# try_coerce_args is ObjectBlock, but that overrides replace,
# so does not get here.
to_replace = convert_scalar(values, to_replace)
to_replace = convert_scalar_for_putitemlike(to_replace, values.dtype)

mask = missing.mask_missing(values, to_replace)
if filter is not None:
Expand Down Expand Up @@ -841,7 +841,7 @@ def setitem(self, indexer, value):
# We only get here for non-Extension Blocks, so _try_coerce_args
# is only relevant for DatetimeBlock and TimedeltaBlock
if lib.is_scalar(value):
value = convert_scalar(values, value)
value = convert_scalar_for_putitemlike(value, values.dtype)

else:
# current dtype cannot store value, coerce to common dtype
Expand Down Expand Up @@ -957,7 +957,7 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False)
# We only get here for non-Extension Blocks, so _try_coerce_args
# is only relevant for DatetimeBlock and TimedeltaBlock
if lib.is_scalar(new):
new = convert_scalar(new_values, new)
new = convert_scalar_for_putitemlike(new, new_values.dtype)

if transpose:
new_values = new_values.T
Expand Down Expand Up @@ -1200,7 +1200,7 @@ def _interpolate_with_fill(
values = self.values if inplace else self.values.copy()

# We only get here for non-ExtensionBlock
fill_value = convert_scalar(self.values, fill_value)
fill_value = convert_scalar_for_putitemlike(fill_value, self.values.dtype)

values = missing.interpolate_2d(
values,
Expand Down Expand Up @@ -1405,7 +1405,7 @@ def where_func(cond, values, other):
raise TypeError
if lib.is_scalar(other) and isinstance(values, np.ndarray):
# convert datetime to datetime64, timedelta to timedelta64
other = convert_scalar(values, other)
other = convert_scalar_for_putitemlike(other, values.dtype)

# By the time we get here, we should have all Series/Index
# args extracted to ndarray
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,12 @@
from pandas._config import get_option

from pandas._libs import lib, properties, reshape, tslibs
from pandas._libs.index import validate_numeric_casting
from pandas._typing import Label
from pandas.compat.numpy import function as nv
from pandas.util._decorators import Appender, Substitution
from pandas.util._validators import validate_bool_kwarg, validate_percentile

from pandas.core.dtypes.cast import convert_dtypes
from pandas.core.dtypes.cast import convert_dtypes, validate_numeric_casting
from pandas.core.dtypes.common import (
_is_unorderable_exception,
ensure_platform_int,
Expand Down