Skip to content

REF: move convert_scalar out of cython #31672

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 5, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 0 additions & 42 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -535,48 +535,6 @@ cdef class PeriodEngine(Int64Engine):
return super(PeriodEngine, self).get_indexer_non_unique(ordinal_array)


cpdef convert_scalar(ndarray arr, object value):
# we don't turn integers
# into datetimes/timedeltas

# we don't turn bools into int/float/complex

if arr.descr.type_num == NPY_DATETIME:
if util.is_array(value):
pass
elif isinstance(value, (datetime, np.datetime64, date)):
return Timestamp(value).to_datetime64()
elif util.is_timedelta64_object(value):
# exclude np.timedelta64("NaT") from value != value below
pass
elif value is None or value != value:
return np.datetime64("NaT", "ns")
raise ValueError("cannot set a Timestamp with a non-timestamp "
f"{type(value).__name__}")

elif arr.descr.type_num == NPY_TIMEDELTA:
if util.is_array(value):
pass
elif isinstance(value, timedelta) or util.is_timedelta64_object(value):
value = Timedelta(value)
if value is NaT:
return np.timedelta64("NaT", "ns")
return value.to_timedelta64()
elif util.is_datetime64_object(value):
# exclude np.datetime64("NaT") which would otherwise be picked up
# by the `value != value check below
pass
elif value is None or value != value:
return np.timedelta64("NaT", "ns")
raise ValueError("cannot set a Timedelta with a non-timedelta "
f"{type(value).__name__}")

else:
validate_numeric_casting(arr.dtype, value)

return value


cpdef validate_numeric_casting(dtype, object value):
# Note: we can't annotate dtype as cnp.dtype because that cases dtype.type
# to integer
Expand Down
48 changes: 40 additions & 8 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from datetime import datetime, timedelta
from datetime import date, datetime, timedelta
import functools
import inspect
import re
Expand All @@ -7,8 +7,8 @@

import numpy as np

from pandas._libs import NaT, algos as libalgos, lib, tslib, writers
from pandas._libs.index import convert_scalar
from pandas._libs import NaT, Timestamp, algos as libalgos, lib, tslib, writers
from pandas._libs.index import validate_numeric_casting
import pandas._libs.internals as libinternals
from pandas._libs.tslibs import Timedelta, conversion
from pandas._libs.tslibs.timezones import tz_compare
Expand Down Expand Up @@ -37,6 +37,7 @@
is_datetime64tz_dtype,
is_dtype_equal,
is_extension_array_dtype,
is_float,
is_float_dtype,
is_integer,
is_integer_dtype,
Expand Down Expand Up @@ -762,7 +763,7 @@ def replace(
# The only non-DatetimeLike class that also has a non-trivial
# try_coerce_args is ObjectBlock, but that overrides replace,
# so does not get here.
to_replace = convert_scalar(values, to_replace)
to_replace = _convert_scalar_for_putitemlike(to_replace, values.dtype)

mask = missing.mask_missing(values, to_replace)
if filter is not None:
Expand Down Expand Up @@ -841,7 +842,7 @@ def setitem(self, indexer, value):
# We only get here for non-Extension Blocks, so _try_coerce_args
# is only relevant for DatetimeBlock and TimedeltaBlock
if lib.is_scalar(value):
value = convert_scalar(values, value)
value = _convert_scalar_for_putitemlike(value, values.dtype)

else:
# current dtype cannot store value, coerce to common dtype
Expand Down Expand Up @@ -957,7 +958,7 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False)
# We only get here for non-Extension Blocks, so _try_coerce_args
# is only relevant for DatetimeBlock and TimedeltaBlock
if lib.is_scalar(new):
new = convert_scalar(new_values, new)
new = _convert_scalar_for_putitemlike(new, new_values.dtype)

if transpose:
new_values = new_values.T
Expand Down Expand Up @@ -1200,7 +1201,7 @@ def _interpolate_with_fill(
values = self.values if inplace else self.values.copy()

# We only get here for non-ExtensionBlock
fill_value = convert_scalar(self.values, fill_value)
fill_value = _convert_scalar_for_putitemlike(fill_value, self.values.dtype)

values = missing.interpolate_2d(
values,
Expand Down Expand Up @@ -1405,7 +1406,7 @@ def where_func(cond, values, other):
raise TypeError
if lib.is_scalar(other) and isinstance(values, np.ndarray):
# convert datetime to datetime64, timedelta to timedelta64
other = convert_scalar(values, other)
other = _convert_scalar_for_putitemlike(other, values.dtype)

# By the time we get here, we should have all Series/Index
# args extracted to ndarray
Expand Down Expand Up @@ -3204,3 +3205,34 @@ def _putmask_preserve(nv, n):
v = v.astype(dtype)

return _putmask_preserve(v, n)


def _convert_scalar_for_putitemlike(scalar, dtype: np.dtype):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe in pandas.core.dtypes.cast ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure. With convert_scalar out of libindex, we can move validate_numeric_casting to core.dtypes.cast as well (and after #31662 we'll be able to get rid of libindex.get_value_at, plus util.get_value_at and util.validate_indexer!)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sgtm.

"""
Convert datetimelike scalar if we are setting into a datetime64
or timedelta64 ndarray.

Parameters
----------
scalar : scalar
dtype : np.dtpye

Returns
-------
scalar
"""
if dtype.kind == "m":
if isinstance(scalar, (timedelta, np.timedelta64)):
# We have to cast after asm8 in case we have NaT
return Timedelta(scalar).asm8.view("timedelta64[ns]")
elif scalar is None or scalar is NaT or (is_float(scalar) and np.isnan(scalar)):
return np.timedelta64("NaT", "ns")
if dtype.kind == "M":
if isinstance(scalar, (date, np.datetime64)):
# Note: we include date, not just datetime
return Timestamp(scalar).to_datetime64()
elif scalar is None or scalar is NaT or (is_float(scalar) and np.isnan(scalar)):
return np.datetime64("NaT", "ns")
else:
validate_numeric_casting(dtype, scalar)
return scalar