Skip to content

Move remaining conversion functions to tslibs.conversion #18358

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Nov 19, 2017
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ cimport util

import numpy as np

from tslib cimport _to_i8
from tslibs.conversion cimport _maybe_datetimelike_to_i8
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note to de-private this at some point as well


from hashtable cimport HashTable

Expand Down Expand Up @@ -405,12 +405,12 @@ cdef class DatetimeEngine(Int64Engine):
if not self.is_unique:
return self._get_loc_duplicates(val)
values = self._get_index_values()
conv = _to_i8(val)
conv = _maybe_datetimelike_to_i8(val)
loc = values.searchsorted(conv, side='left')
return util.get_value_at(values, loc) == conv

self._ensure_mapping_populated()
return _to_i8(val) in self.mapping
return _maybe_datetimelike_to_i8(val) in self.mapping

cdef _get_index_values(self):
return self.vgetter().view('i8')
Expand All @@ -425,12 +425,12 @@ cdef class DatetimeEngine(Int64Engine):
# Welcome to the spaghetti factory
if self.over_size_threshold and self.is_monotonic_increasing:
if not self.is_unique:
val = _to_i8(val)
val = _maybe_datetimelike_to_i8(val)
return self._get_loc_duplicates(val)
values = self._get_index_values()

try:
conv = _to_i8(val)
conv = _maybe_datetimelike_to_i8(val)
loc = values.searchsorted(conv, side='left')
except TypeError:
self._date_check_type(val)
Expand All @@ -442,7 +442,7 @@ cdef class DatetimeEngine(Int64Engine):

self._ensure_mapping_populated()
if not self.unique:
val = _to_i8(val)
val = _maybe_datetimelike_to_i8(val)
return self._get_loc_duplicates(val)

try:
Expand All @@ -453,7 +453,7 @@ cdef class DatetimeEngine(Int64Engine):
pass

try:
val = _to_i8(val)
val = _maybe_datetimelike_to_i8(val)
return self.mapping.get_item(val)
except (TypeError, ValueError):
self._date_check_type(val)
Expand Down
2 changes: 0 additions & 2 deletions pandas/_libs/tslib.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,3 @@ from numpy cimport ndarray, int64_t
from tslibs.conversion cimport convert_to_tsobject

cdef bint _check_all_nulls(obj)

cdef _to_i8(object val)
86 changes: 2 additions & 84 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ from cpython.datetime cimport (PyDelta_Check, PyTZInfo_Check,
# import datetime C API
PyDateTime_IMPORT
# this is our datetime.pxd
from datetime cimport pandas_datetime_to_datetimestruct, _string_to_dts
from datetime cimport _string_to_dts

# stdlib datetime imports
from datetime import time as datetime_time
Expand All @@ -46,10 +46,9 @@ from tslibs.np_datetime cimport (check_dts_bounds,
reverse_ops,
cmp_scalar,
pandas_datetimestruct,
PANDAS_DATETIMEUNIT, PANDAS_FR_ns,
dt64_to_dtstruct, dtstruct_to_dt64,
pydatetime_to_dt64, pydate_to_dt64,
get_datetime64_unit, get_datetime64_value,
get_datetime64_value,
get_timedelta64_value,
days_per_month_table,
dayofweek, is_leapyear)
Expand Down Expand Up @@ -1242,43 +1241,6 @@ cpdef inline object _localize_pydatetime(object dt, object tz):
return dt.replace(tzinfo=tz)


def datetime_to_datetime64(ndarray[object] values):
cdef:
Py_ssize_t i, n = len(values)
object val, inferred_tz = None
ndarray[int64_t] iresult
pandas_datetimestruct dts
_TSObject _ts

result = np.empty(n, dtype='M8[ns]')
iresult = result.view('i8')
for i in range(n):
val = values[i]
if _checknull_with_nat(val):
iresult[i] = NPY_NAT
elif PyDateTime_Check(val):
if val.tzinfo is not None:
if inferred_tz is not None:
if get_timezone(val.tzinfo) != inferred_tz:
raise ValueError('Array must be all same time zone')
else:
inferred_tz = get_timezone(val.tzinfo)

_ts = convert_datetime_to_tsobject(val, None)
iresult[i] = _ts.value
check_dts_bounds(&_ts.dts)
else:
if inferred_tz is not None:
raise ValueError('Cannot mix tz-aware with '
'tz-naive values')
iresult[i] = pydatetime_to_dt64(val, &dts)
check_dts_bounds(&dts)
else:
raise TypeError('Unrecognized value type: %s' % type(val))

return result, inferred_tz


def format_array_from_datetime(ndarray[int64_t] values, object tz=None,
object format=None, object na_rep=None):
"""
Expand Down Expand Up @@ -1758,50 +1720,6 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
return oresult


# ----------------------------------------------------------------------
# Conversion routines

def cast_to_nanoseconds(ndarray arr):
cdef:
Py_ssize_t i, n = arr.size
ndarray[int64_t] ivalues, iresult
PANDAS_DATETIMEUNIT unit
pandas_datetimestruct dts

shape = (<object> arr).shape

ivalues = arr.view(np.int64).ravel()

result = np.empty(shape, dtype='M8[ns]')
iresult = result.ravel().view(np.int64)

if len(iresult) == 0:
return result

unit = get_datetime64_unit(arr.flat[0])
for i in range(n):
if ivalues[i] != NPY_NAT:
pandas_datetime_to_datetimestruct(ivalues[i], unit, &dts)
iresult[i] = dtstruct_to_dt64(&dts)
check_dts_bounds(&dts)
else:
iresult[i] = NPY_NAT

return result


cdef inline _to_i8(object val):
cdef pandas_datetimestruct dts
try:
return val.value
except AttributeError:
if is_datetime64_object(val):
return get_datetime64_value(val)
elif PyDateTime_Check(val):
return Timestamp(val).value
return val


# ----------------------------------------------------------------------
# Accessors

Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/conversion.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,5 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2)
cdef int64_t get_datetime64_nanos(object val) except? -1

cpdef int64_t pydt_to_i8(object pydt) except? -1

cdef _maybe_datetimelike_to_i8(object val)
119 changes: 118 additions & 1 deletion pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ from timezones cimport (
from parsing import parse_datetime_string

from nattype import nat_strings, NaT
from nattype cimport NPY_NAT
from nattype cimport NPY_NAT, _checknull_with_nat

# ----------------------------------------------------------------------
# Constants
Expand Down Expand Up @@ -73,6 +73,123 @@ cdef inline int64_t get_datetime64_nanos(object val) except? -1:

return ival


def cast_to_nanoseconds(ndarray arr):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's rename this maybe ensure_datetime64ns

"""
Ensure a np.datetime64 array has dtype specifically 'datetime64[ns]'

Parameters
----------
arr : ndarray

Returns
-------
result : ndarray with dtype datetime64[ns]

"""
cdef:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add doc-string

Py_ssize_t i, n = arr.size
ndarray[int64_t] ivalues, iresult
PANDAS_DATETIMEUNIT unit
pandas_datetimestruct dts

shape = (<object> arr).shape

ivalues = arr.view(np.int64).ravel()

result = np.empty(shape, dtype='M8[ns]')
iresult = result.ravel().view(np.int64)

if len(iresult) == 0:
return result

unit = get_datetime64_unit(arr.flat[0])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

prob should add some checks around this, IOW this could already be ns (TODO ok)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yah, this check is done elsewhere, will not be hard to implement.

If it weren't for datetime64[Y] and datetime64[M] we could take the pandas_datetime_to_datetimestruct out of these funcs altogether, avoid converting back and forth.

for i in range(n):
if ivalues[i] != NPY_NAT:
pandas_datetime_to_datetimestruct(ivalues[i], unit, &dts)
iresult[i] = dtstruct_to_dt64(&dts)
check_dts_bounds(&dts)
else:
iresult[i] = NPY_NAT

return result


def datetime_to_datetime64(ndarray[object] values):
"""
Convert ndarray of datetime-like objects to int64 array representing
nanosecond timestamps.

Parameters
----------
values : ndarray

Returns
-------
result : ndarray witth dtype int64
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo

inferred_tz : tzinfo or None
"""
cdef:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same

Py_ssize_t i, n = len(values)
object val, inferred_tz = None
ndarray[int64_t] iresult
pandas_datetimestruct dts
_TSObject _ts

result = np.empty(n, dtype='M8[ns]')
iresult = result.view('i8')
for i in range(n):
val = values[i]
if _checknull_with_nat(val):
iresult[i] = NPY_NAT
elif PyDateTime_Check(val):
if val.tzinfo is not None:
if inferred_tz is not None:
if get_timezone(val.tzinfo) != inferred_tz:
raise ValueError('Array must be all same time zone')
else:
inferred_tz = get_timezone(val.tzinfo)

_ts = convert_datetime_to_tsobject(val, None)
iresult[i] = _ts.value
check_dts_bounds(&_ts.dts)
else:
if inferred_tz is not None:
raise ValueError('Cannot mix tz-aware with '
'tz-naive values')
iresult[i] = pydatetime_to_dt64(val, &dts)
check_dts_bounds(&dts)
else:
raise TypeError('Unrecognized value type: %s' % type(val))

return result, inferred_tz


cdef inline _maybe_datetimelike_to_i8(object val):
"""
Try to convert to a nanosecond timestamp. Fall back to returning the
input value.

Parameters
----------
val : object

Returns
-------
val : int64 timestamp or original input
"""
cdef:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

doc-string.

can you rename this to something else. maybe datetime_to_i8

pandas_datetimestruct dts
try:
return val.value
except AttributeError:
if is_datetime64_object(val):
return get_datetime64_value(val)
elif PyDateTime_Check(val):
return convert_datetime_to_tsobject(val, None).value
return val


# ----------------------------------------------------------------------
# _TSObject Conversion

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ def __new__(cls, data=None,
verify_integrity = False
else:
if data.dtype != _NS_DTYPE:
subarr = libts.cast_to_nanoseconds(data)
subarr = conversion.cast_to_nanoseconds(data)
else:
subarr = data
else:
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
from pandas._libs import lib, tslib
from pandas._libs.tslib import Timedelta
from pandas._libs.lib import BlockPlacement
from pandas._libs.tslibs import conversion

from pandas.util._decorators import cache_readonly
from pandas.util._validators import validate_bool_kwarg
Expand Down Expand Up @@ -2462,7 +2463,7 @@ class DatetimeBlock(DatetimeLikeBlockMixin, Block):

def __init__(self, values, placement, fastpath=False, **kwargs):
if values.dtype != _NS_DTYPE:
values = tslib.cast_to_nanoseconds(values)
values = conversion.cast_to_nanoseconds(values)

super(DatetimeBlock, self).__init__(values, fastpath=True,
placement=placement, **kwargs)
Expand Down Expand Up @@ -2584,7 +2585,7 @@ def set(self, locs, values, check=False):
"""
if values.dtype != _NS_DTYPE:
# Workaround for numpy 1.6 bug
values = tslib.cast_to_nanoseconds(values)
values = conversion.cast_to_nanoseconds(values)

self.values[locs] = values

Expand Down Expand Up @@ -4674,7 +4675,7 @@ def form_blocks(arrays, names, axes):
complex_items.append((i, k, v))
elif issubclass(v.dtype.type, np.datetime64):
if v.dtype != _NS_DTYPE:
v = tslib.cast_to_nanoseconds(v)
v = conversion.cast_to_nanoseconds(v)

if is_datetimetz(v):
datetime_tz_items.append((i, k, v))
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from pandas._libs import tslib
from pandas._libs.tslibs.strptime import array_strptime
from pandas._libs.tslibs import parsing
from pandas._libs.tslibs import parsing, conversion
from pandas._libs.tslibs.parsing import ( # noqa
parse_time_string,
DateParseError,
Expand Down Expand Up @@ -373,7 +373,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):

except ValueError as e:
try:
values, tz = tslib.datetime_to_datetime64(arg)
values, tz = conversion.datetime_to_datetime64(arg)
return DatetimeIndex._simple_new(values, name=name, tz=tz)
except (ValueError, TypeError):
raise e
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/indexes/datetimes/test_construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
import pandas as pd
from pandas import offsets
import pandas.util.testing as tm
from pandas._libs import tslib, lib
from pandas._libs import lib
from pandas._libs.tslib import OutOfBoundsDatetime
from pandas._libs.tslibs import conversion
from pandas import (DatetimeIndex, Index, Timestamp, datetime, date_range,
to_datetime)

Expand Down Expand Up @@ -496,7 +497,7 @@ def test_index_cast_datetime64_other_units(self):
arr = np.arange(0, 100, 10, dtype=np.int64).view('M8[D]')
idx = Index(arr)

assert (idx.values == tslib.cast_to_nanoseconds(arr)).all()
assert (idx.values == conversion.cast_to_nanoseconds(arr)).all()

def test_constructor_int64_nocopy(self):
# #1624
Expand Down