Skip to content

Commit 236f7e6

Browse files
authored
REF: move convert_scalar out of cython (#31672)
1 parent aa47971 commit 236f7e6

File tree

6 files changed

+80
-69
lines changed

6 files changed

+80
-69
lines changed

pandas/_libs/index.pyx

-55
Original file line numberDiff line numberDiff line change
@@ -535,61 +535,6 @@ cdef class PeriodEngine(Int64Engine):
535535
return super(PeriodEngine, self).get_indexer_non_unique(ordinal_array)
536536

537537

538-
cpdef convert_scalar(ndarray arr, object value):
539-
# we don't turn integers
540-
# into datetimes/timedeltas
541-
542-
# we don't turn bools into int/float/complex
543-
544-
if arr.descr.type_num == NPY_DATETIME:
545-
if util.is_array(value):
546-
pass
547-
elif isinstance(value, (datetime, np.datetime64, date)):
548-
return Timestamp(value).to_datetime64()
549-
elif util.is_timedelta64_object(value):
550-
# exclude np.timedelta64("NaT") from value != value below
551-
pass
552-
elif value is None or value != value:
553-
return np.datetime64("NaT", "ns")
554-
raise ValueError("cannot set a Timestamp with a non-timestamp "
555-
f"{type(value).__name__}")
556-
557-
elif arr.descr.type_num == NPY_TIMEDELTA:
558-
if util.is_array(value):
559-
pass
560-
elif isinstance(value, timedelta) or util.is_timedelta64_object(value):
561-
value = Timedelta(value)
562-
if value is NaT:
563-
return np.timedelta64("NaT", "ns")
564-
return value.to_timedelta64()
565-
elif util.is_datetime64_object(value):
566-
# exclude np.datetime64("NaT") which would otherwise be picked up
567-
# by the `value != value check below
568-
pass
569-
elif value is None or value != value:
570-
return np.timedelta64("NaT", "ns")
571-
raise ValueError("cannot set a Timedelta with a non-timedelta "
572-
f"{type(value).__name__}")
573-
574-
else:
575-
validate_numeric_casting(arr.dtype, value)
576-
577-
return value
578-
579-
580-
cpdef validate_numeric_casting(dtype, object value):
581-
# Note: we can't annotate dtype as cnp.dtype because that cases dtype.type
582-
# to integer
583-
if issubclass(dtype.type, (np.integer, np.bool_)):
584-
if util.is_float_object(value) and value != value:
585-
raise ValueError("Cannot assign nan to integer series")
586-
587-
if (issubclass(dtype.type, (np.integer, np.floating, np.complex)) and
588-
not issubclass(dtype.type, np.bool_)):
589-
if util.is_bool_object(value):
590-
raise ValueError("Cannot assign bool to float/integer series")
591-
592-
593538
cdef class BaseMultiIndexCodesEngine:
594539
"""
595540
Base class for MultiIndexUIntEngine and MultiIndexPyIntEngine, which

pandas/core/dtypes/cast.py

+65-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,18 @@
11
""" routings for casting """
22

3-
from datetime import datetime, timedelta
3+
from datetime import date, datetime, timedelta
44

55
import numpy as np
66

77
from pandas._libs import lib, tslib, tslibs
8-
from pandas._libs.tslibs import NaT, OutOfBoundsDatetime, Period, iNaT
8+
from pandas._libs.tslibs import (
9+
NaT,
10+
OutOfBoundsDatetime,
11+
Period,
12+
Timedelta,
13+
Timestamp,
14+
iNaT,
15+
)
916
from pandas._libs.tslibs.timezones import tz_compare
1017
from pandas._typing import Dtype
1118
from pandas.util._validators import validate_bool_kwarg
@@ -1599,3 +1606,59 @@ def maybe_cast_to_integer_array(arr, dtype, copy: bool = False):
15991606

16001607
if is_integer_dtype(dtype) and (is_float_dtype(arr) or is_object_dtype(arr)):
16011608
raise ValueError("Trying to coerce float values to integers")
1609+
1610+
1611+
def convert_scalar_for_putitemlike(scalar, dtype: np.dtype):
1612+
"""
1613+
Convert datetimelike scalar if we are setting into a datetime64
1614+
or timedelta64 ndarray.
1615+
1616+
Parameters
1617+
----------
1618+
scalar : scalar
1619+
dtype : np.dtpye
1620+
1621+
Returns
1622+
-------
1623+
scalar
1624+
"""
1625+
if dtype.kind == "m":
1626+
if isinstance(scalar, (timedelta, np.timedelta64)):
1627+
# We have to cast after asm8 in case we have NaT
1628+
return Timedelta(scalar).asm8.view("timedelta64[ns]")
1629+
elif scalar is None or scalar is NaT or (is_float(scalar) and np.isnan(scalar)):
1630+
return np.timedelta64("NaT", "ns")
1631+
if dtype.kind == "M":
1632+
if isinstance(scalar, (date, np.datetime64)):
1633+
# Note: we include date, not just datetime
1634+
return Timestamp(scalar).to_datetime64()
1635+
elif scalar is None or scalar is NaT or (is_float(scalar) and np.isnan(scalar)):
1636+
return np.datetime64("NaT", "ns")
1637+
else:
1638+
validate_numeric_casting(dtype, scalar)
1639+
return scalar
1640+
1641+
1642+
def validate_numeric_casting(dtype: np.dtype, value):
1643+
"""
1644+
Check that we can losslessly insert the given value into an array
1645+
with the given dtype.
1646+
1647+
Parameters
1648+
----------
1649+
dtype : np.dtype
1650+
value : scalar
1651+
1652+
Raises
1653+
------
1654+
ValueError
1655+
"""
1656+
if issubclass(dtype.type, (np.integer, np.bool_)):
1657+
if is_float(value) and np.isnan(value):
1658+
raise ValueError("Cannot assign nan to integer series")
1659+
1660+
if issubclass(dtype.type, (np.integer, np.floating, np.complex)) and not issubclass(
1661+
dtype.type, np.bool_
1662+
):
1663+
if is_bool(value):
1664+
raise ValueError("Cannot assign bool to float/integer series")

pandas/core/frame.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040

4141
from pandas._config import get_option
4242

43-
from pandas._libs import algos as libalgos, index as libindex, lib, properties
43+
from pandas._libs import algos as libalgos, lib, properties
4444
from pandas._typing import Axes, Axis, Dtype, FilePathOrBuffer, Label, Level, Renamer
4545
from pandas.compat import PY37
4646
from pandas.compat._optional import import_optional_dependency
@@ -69,6 +69,7 @@
6969
maybe_infer_to_datetimelike,
7070
maybe_upcast,
7171
maybe_upcast_putmask,
72+
validate_numeric_casting,
7273
)
7374
from pandas.core.dtypes.common import (
7475
ensure_float64,
@@ -3025,7 +3026,7 @@ def _set_value(self, index, col, value, takeable: bool = False):
30253026
series = self._get_item_cache(col)
30263027
engine = self.index._engine
30273028
loc = engine.get_loc(index)
3028-
libindex.validate_numeric_casting(series.dtype, value)
3029+
validate_numeric_casting(series.dtype, value)
30293030

30303031
series._values[loc] = value
30313032
# Note: trying to use series._set_value breaks tests in

pandas/core/indexes/base.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@
1818
from pandas.util._decorators import Appender, Substitution, cache_readonly
1919

2020
from pandas.core.dtypes import concat as _concat
21-
from pandas.core.dtypes.cast import maybe_cast_to_integer_array
21+
from pandas.core.dtypes.cast import (
22+
maybe_cast_to_integer_array,
23+
validate_numeric_casting,
24+
)
2225
from pandas.core.dtypes.common import (
2326
ensure_categorical,
2427
ensure_int64,
@@ -4631,7 +4634,7 @@ def set_value(self, arr, key, value):
46314634
stacklevel=2,
46324635
)
46334636
loc = self._engine.get_loc(key)
4634-
libindex.validate_numeric_casting(arr.dtype, value)
4637+
validate_numeric_casting(arr.dtype, value)
46354638
arr[loc] = value
46364639

46374640
_index_shared_docs[

pandas/core/internals/blocks.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,14 @@
88
import numpy as np
99

1010
from pandas._libs import NaT, Timestamp, algos as libalgos, lib, tslib, writers
11-
from pandas._libs.index import convert_scalar
1211
import pandas._libs.internals as libinternals
1312
from pandas._libs.tslibs import Timedelta, conversion
1413
from pandas._libs.tslibs.timezones import tz_compare
1514
from pandas.util._validators import validate_bool_kwarg
1615

1716
from pandas.core.dtypes.cast import (
1817
astype_nansafe,
18+
convert_scalar_for_putitemlike,
1919
find_common_type,
2020
infer_dtype_from,
2121
infer_dtype_from_scalar,
@@ -762,7 +762,7 @@ def replace(
762762
# The only non-DatetimeLike class that also has a non-trivial
763763
# try_coerce_args is ObjectBlock, but that overrides replace,
764764
# so does not get here.
765-
to_replace = convert_scalar(values, to_replace)
765+
to_replace = convert_scalar_for_putitemlike(to_replace, values.dtype)
766766

767767
mask = missing.mask_missing(values, to_replace)
768768
if filter is not None:
@@ -841,7 +841,7 @@ def setitem(self, indexer, value):
841841
# We only get here for non-Extension Blocks, so _try_coerce_args
842842
# is only relevant for DatetimeBlock and TimedeltaBlock
843843
if lib.is_scalar(value):
844-
value = convert_scalar(values, value)
844+
value = convert_scalar_for_putitemlike(value, values.dtype)
845845

846846
else:
847847
# current dtype cannot store value, coerce to common dtype
@@ -957,7 +957,7 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False)
957957
# We only get here for non-Extension Blocks, so _try_coerce_args
958958
# is only relevant for DatetimeBlock and TimedeltaBlock
959959
if lib.is_scalar(new):
960-
new = convert_scalar(new_values, new)
960+
new = convert_scalar_for_putitemlike(new, new_values.dtype)
961961

962962
if transpose:
963963
new_values = new_values.T
@@ -1200,7 +1200,7 @@ def _interpolate_with_fill(
12001200
values = self.values if inplace else self.values.copy()
12011201

12021202
# We only get here for non-ExtensionBlock
1203-
fill_value = convert_scalar(self.values, fill_value)
1203+
fill_value = convert_scalar_for_putitemlike(fill_value, self.values.dtype)
12041204

12051205
values = missing.interpolate_2d(
12061206
values,
@@ -1405,7 +1405,7 @@ def where_func(cond, values, other):
14051405
raise TypeError
14061406
if lib.is_scalar(other) and isinstance(values, np.ndarray):
14071407
# convert datetime to datetime64, timedelta to timedelta64
1408-
other = convert_scalar(values, other)
1408+
other = convert_scalar_for_putitemlike(other, values.dtype)
14091409

14101410
# By the time we get here, we should have all Series/Index
14111411
# args extracted to ndarray

pandas/core/series.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,12 @@
2323
from pandas._config import get_option
2424

2525
from pandas._libs import lib, properties, reshape, tslibs
26-
from pandas._libs.index import validate_numeric_casting
2726
from pandas._typing import Label
2827
from pandas.compat.numpy import function as nv
2928
from pandas.util._decorators import Appender, Substitution
3029
from pandas.util._validators import validate_bool_kwarg, validate_percentile
3130

32-
from pandas.core.dtypes.cast import convert_dtypes
31+
from pandas.core.dtypes.cast import convert_dtypes, validate_numeric_casting
3332
from pandas.core.dtypes.common import (
3433
_is_unorderable_exception,
3534
ensure_platform_int,

0 commit comments

Comments
 (0)