Skip to content

Commit a172ff9

Browse files
jbrockmendeljreback
authored andcommitted
Move remaining conversion functions to tslibs.conversion (pandas-dev#18358)
1 parent c868423 commit a172ff9

File tree

9 files changed

+139
-102
lines changed

9 files changed

+139
-102
lines changed

pandas/_libs/index.pyx

+7-7
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ cimport util
1313

1414
import numpy as np
1515

16-
from tslib cimport _to_i8
16+
from tslibs.conversion cimport maybe_datetimelike_to_i8
1717

1818
from hashtable cimport HashTable
1919

@@ -405,12 +405,12 @@ cdef class DatetimeEngine(Int64Engine):
405405
if not self.is_unique:
406406
return self._get_loc_duplicates(val)
407407
values = self._get_index_values()
408-
conv = _to_i8(val)
408+
conv = maybe_datetimelike_to_i8(val)
409409
loc = values.searchsorted(conv, side='left')
410410
return util.get_value_at(values, loc) == conv
411411

412412
self._ensure_mapping_populated()
413-
return _to_i8(val) in self.mapping
413+
return maybe_datetimelike_to_i8(val) in self.mapping
414414

415415
cdef _get_index_values(self):
416416
return self.vgetter().view('i8')
@@ -425,12 +425,12 @@ cdef class DatetimeEngine(Int64Engine):
425425
# Welcome to the spaghetti factory
426426
if self.over_size_threshold and self.is_monotonic_increasing:
427427
if not self.is_unique:
428-
val = _to_i8(val)
428+
val = maybe_datetimelike_to_i8(val)
429429
return self._get_loc_duplicates(val)
430430
values = self._get_index_values()
431431

432432
try:
433-
conv = _to_i8(val)
433+
conv = maybe_datetimelike_to_i8(val)
434434
loc = values.searchsorted(conv, side='left')
435435
except TypeError:
436436
self._date_check_type(val)
@@ -442,7 +442,7 @@ cdef class DatetimeEngine(Int64Engine):
442442

443443
self._ensure_mapping_populated()
444444
if not self.unique:
445-
val = _to_i8(val)
445+
val = maybe_datetimelike_to_i8(val)
446446
return self._get_loc_duplicates(val)
447447

448448
try:
@@ -453,7 +453,7 @@ cdef class DatetimeEngine(Int64Engine):
453453
pass
454454

455455
try:
456-
val = _to_i8(val)
456+
val = maybe_datetimelike_to_i8(val)
457457
return self.mapping.get_item(val)
458458
except (TypeError, ValueError):
459459
self._date_check_type(val)

pandas/_libs/tslib.pxd

-2
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,3 @@ from numpy cimport ndarray, int64_t
33
from tslibs.conversion cimport convert_to_tsobject
44

55
cdef bint _check_all_nulls(obj)
6-
7-
cdef _to_i8(object val)

pandas/_libs/tslib.pyx

+2-84
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ from cpython.datetime cimport (PyDelta_Check, PyTZInfo_Check,
3636
# import datetime C API
3737
PyDateTime_IMPORT
3838
# this is our datetime.pxd
39-
from datetime cimport pandas_datetime_to_datetimestruct, _string_to_dts
39+
from datetime cimport _string_to_dts
4040

4141
# stdlib datetime imports
4242
from datetime import time as datetime_time
@@ -46,10 +46,9 @@ from tslibs.np_datetime cimport (check_dts_bounds,
4646
reverse_ops,
4747
cmp_scalar,
4848
pandas_datetimestruct,
49-
PANDAS_DATETIMEUNIT, PANDAS_FR_ns,
5049
dt64_to_dtstruct, dtstruct_to_dt64,
5150
pydatetime_to_dt64, pydate_to_dt64,
52-
get_datetime64_unit, get_datetime64_value,
51+
get_datetime64_value,
5352
get_timedelta64_value,
5453
days_per_month_table,
5554
dayofweek, is_leapyear)
@@ -1244,43 +1243,6 @@ cpdef inline object _localize_pydatetime(object dt, object tz):
12441243
return dt.replace(tzinfo=tz)
12451244

12461245

1247-
def datetime_to_datetime64(ndarray[object] values):
1248-
cdef:
1249-
Py_ssize_t i, n = len(values)
1250-
object val, inferred_tz = None
1251-
ndarray[int64_t] iresult
1252-
pandas_datetimestruct dts
1253-
_TSObject _ts
1254-
1255-
result = np.empty(n, dtype='M8[ns]')
1256-
iresult = result.view('i8')
1257-
for i in range(n):
1258-
val = values[i]
1259-
if _checknull_with_nat(val):
1260-
iresult[i] = NPY_NAT
1261-
elif PyDateTime_Check(val):
1262-
if val.tzinfo is not None:
1263-
if inferred_tz is not None:
1264-
if get_timezone(val.tzinfo) != inferred_tz:
1265-
raise ValueError('Array must be all same time zone')
1266-
else:
1267-
inferred_tz = get_timezone(val.tzinfo)
1268-
1269-
_ts = convert_datetime_to_tsobject(val, None)
1270-
iresult[i] = _ts.value
1271-
check_dts_bounds(&_ts.dts)
1272-
else:
1273-
if inferred_tz is not None:
1274-
raise ValueError('Cannot mix tz-aware with '
1275-
'tz-naive values')
1276-
iresult[i] = pydatetime_to_dt64(val, &dts)
1277-
check_dts_bounds(&dts)
1278-
else:
1279-
raise TypeError('Unrecognized value type: %s' % type(val))
1280-
1281-
return result, inferred_tz
1282-
1283-
12841246
def format_array_from_datetime(ndarray[int64_t] values, object tz=None,
12851247
object format=None, object na_rep=None):
12861248
"""
@@ -1760,50 +1722,6 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
17601722
return oresult
17611723

17621724

1763-
# ----------------------------------------------------------------------
1764-
# Conversion routines
1765-
1766-
def cast_to_nanoseconds(ndarray arr):
1767-
cdef:
1768-
Py_ssize_t i, n = arr.size
1769-
ndarray[int64_t] ivalues, iresult
1770-
PANDAS_DATETIMEUNIT unit
1771-
pandas_datetimestruct dts
1772-
1773-
shape = (<object> arr).shape
1774-
1775-
ivalues = arr.view(np.int64).ravel()
1776-
1777-
result = np.empty(shape, dtype='M8[ns]')
1778-
iresult = result.ravel().view(np.int64)
1779-
1780-
if len(iresult) == 0:
1781-
return result
1782-
1783-
unit = get_datetime64_unit(arr.flat[0])
1784-
for i in range(n):
1785-
if ivalues[i] != NPY_NAT:
1786-
pandas_datetime_to_datetimestruct(ivalues[i], unit, &dts)
1787-
iresult[i] = dtstruct_to_dt64(&dts)
1788-
check_dts_bounds(&dts)
1789-
else:
1790-
iresult[i] = NPY_NAT
1791-
1792-
return result
1793-
1794-
1795-
cdef inline _to_i8(object val):
1796-
cdef pandas_datetimestruct dts
1797-
try:
1798-
return val.value
1799-
except AttributeError:
1800-
if is_datetime64_object(val):
1801-
return get_datetime64_value(val)
1802-
elif PyDateTime_Check(val):
1803-
return Timestamp(val).value
1804-
return val
1805-
1806-
18071725
# ----------------------------------------------------------------------
18081726
# Accessors
18091727

pandas/_libs/tslibs/conversion.pxd

+2
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,5 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2)
2828
cdef int64_t get_datetime64_nanos(object val) except? -1
2929

3030
cpdef int64_t pydt_to_i8(object pydt) except? -1
31+
32+
cdef maybe_datetimelike_to_i8(object val)

pandas/_libs/tslibs/conversion.pyx

+118-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ from timezones cimport (
4040
from parsing import parse_datetime_string
4141

4242
from nattype import nat_strings, NaT
43-
from nattype cimport NPY_NAT
43+
from nattype cimport NPY_NAT, _checknull_with_nat
4444

4545
# ----------------------------------------------------------------------
4646
# Constants
@@ -73,6 +73,123 @@ cdef inline int64_t get_datetime64_nanos(object val) except? -1:
7373

7474
return ival
7575

76+
77+
def ensure_datetime64ns(ndarray arr):
78+
"""
79+
Ensure a np.datetime64 array has dtype specifically 'datetime64[ns]'
80+
81+
Parameters
82+
----------
83+
arr : ndarray
84+
85+
Returns
86+
-------
87+
result : ndarray with dtype datetime64[ns]
88+
89+
"""
90+
cdef:
91+
Py_ssize_t i, n = arr.size
92+
ndarray[int64_t] ivalues, iresult
93+
PANDAS_DATETIMEUNIT unit
94+
pandas_datetimestruct dts
95+
96+
shape = (<object> arr).shape
97+
98+
ivalues = arr.view(np.int64).ravel()
99+
100+
result = np.empty(shape, dtype='M8[ns]')
101+
iresult = result.ravel().view(np.int64)
102+
103+
if len(iresult) == 0:
104+
return result
105+
106+
unit = get_datetime64_unit(arr.flat[0])
107+
for i in range(n):
108+
if ivalues[i] != NPY_NAT:
109+
pandas_datetime_to_datetimestruct(ivalues[i], unit, &dts)
110+
iresult[i] = dtstruct_to_dt64(&dts)
111+
check_dts_bounds(&dts)
112+
else:
113+
iresult[i] = NPY_NAT
114+
115+
return result
116+
117+
118+
def datetime_to_datetime64(ndarray[object] values):
119+
"""
120+
Convert ndarray of datetime-like objects to int64 array representing
121+
nanosecond timestamps.
122+
123+
Parameters
124+
----------
125+
values : ndarray
126+
127+
Returns
128+
-------
129+
result : ndarray with dtype int64
130+
inferred_tz : tzinfo or None
131+
"""
132+
cdef:
133+
Py_ssize_t i, n = len(values)
134+
object val, inferred_tz = None
135+
ndarray[int64_t] iresult
136+
pandas_datetimestruct dts
137+
_TSObject _ts
138+
139+
result = np.empty(n, dtype='M8[ns]')
140+
iresult = result.view('i8')
141+
for i in range(n):
142+
val = values[i]
143+
if _checknull_with_nat(val):
144+
iresult[i] = NPY_NAT
145+
elif PyDateTime_Check(val):
146+
if val.tzinfo is not None:
147+
if inferred_tz is not None:
148+
if get_timezone(val.tzinfo) != inferred_tz:
149+
raise ValueError('Array must be all same time zone')
150+
else:
151+
inferred_tz = get_timezone(val.tzinfo)
152+
153+
_ts = convert_datetime_to_tsobject(val, None)
154+
iresult[i] = _ts.value
155+
check_dts_bounds(&_ts.dts)
156+
else:
157+
if inferred_tz is not None:
158+
raise ValueError('Cannot mix tz-aware with '
159+
'tz-naive values')
160+
iresult[i] = pydatetime_to_dt64(val, &dts)
161+
check_dts_bounds(&dts)
162+
else:
163+
raise TypeError('Unrecognized value type: %s' % type(val))
164+
165+
return result, inferred_tz
166+
167+
168+
cdef inline maybe_datetimelike_to_i8(object val):
169+
"""
170+
Try to convert to a nanosecond timestamp. Fall back to returning the
171+
input value.
172+
173+
Parameters
174+
----------
175+
val : object
176+
177+
Returns
178+
-------
179+
val : int64 timestamp or original input
180+
"""
181+
cdef:
182+
pandas_datetimestruct dts
183+
try:
184+
return val.value
185+
except AttributeError:
186+
if is_datetime64_object(val):
187+
return get_datetime64_value(val)
188+
elif PyDateTime_Check(val):
189+
return convert_datetime_to_tsobject(val, None).value
190+
return val
191+
192+
76193
# ----------------------------------------------------------------------
77194
# _TSObject Conversion
78195

pandas/core/indexes/datetimes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ def __new__(cls, data=None,
409409
verify_integrity = False
410410
else:
411411
if data.dtype != _NS_DTYPE:
412-
subarr = libts.cast_to_nanoseconds(data)
412+
subarr = conversion.ensure_datetime64ns(data)
413413
else:
414414
subarr = data
415415
else:

pandas/core/internals.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
from pandas._libs import lib, tslib
6969
from pandas._libs.tslib import Timedelta
7070
from pandas._libs.lib import BlockPlacement
71+
from pandas._libs.tslibs import conversion
7172

7273
from pandas.util._decorators import cache_readonly
7374
from pandas.util._validators import validate_bool_kwarg
@@ -2471,7 +2472,7 @@ class DatetimeBlock(DatetimeLikeBlockMixin, Block):
24712472

24722473
def __init__(self, values, placement, fastpath=False, **kwargs):
24732474
if values.dtype != _NS_DTYPE:
2474-
values = tslib.cast_to_nanoseconds(values)
2475+
values = conversion.ensure_datetime64ns(values)
24752476

24762477
super(DatetimeBlock, self).__init__(values, fastpath=True,
24772478
placement=placement, **kwargs)
@@ -2593,7 +2594,7 @@ def set(self, locs, values, check=False):
25932594
"""
25942595
if values.dtype != _NS_DTYPE:
25952596
# Workaround for numpy 1.6 bug
2596-
values = tslib.cast_to_nanoseconds(values)
2597+
values = conversion.ensure_datetime64ns(values)
25972598

25982599
self.values[locs] = values
25992600

@@ -4686,7 +4687,7 @@ def form_blocks(arrays, names, axes):
46864687
complex_items.append((i, k, v))
46874688
elif issubclass(v.dtype.type, np.datetime64):
46884689
if v.dtype != _NS_DTYPE:
4689-
v = tslib.cast_to_nanoseconds(v)
4690+
v = conversion.ensure_datetime64ns(v)
46904691

46914692
if is_datetimetz(v):
46924693
datetime_tz_items.append((i, k, v))

pandas/core/tools/datetimes.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from pandas._libs import tslib
66
from pandas._libs.tslibs.strptime import array_strptime
7-
from pandas._libs.tslibs import parsing
7+
from pandas._libs.tslibs import parsing, conversion
88
from pandas._libs.tslibs.parsing import ( # noqa
99
parse_time_string,
1010
DateParseError,
@@ -373,7 +373,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
373373

374374
except ValueError as e:
375375
try:
376-
values, tz = tslib.datetime_to_datetime64(arg)
376+
values, tz = conversion.datetime_to_datetime64(arg)
377377
return DatetimeIndex._simple_new(values, name=name, tz=tz)
378378
except (ValueError, TypeError):
379379
raise e

pandas/tests/indexes/datetimes/test_construction.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@
77
import pandas as pd
88
from pandas import offsets
99
import pandas.util.testing as tm
10-
from pandas._libs import tslib, lib
10+
from pandas._libs import lib
1111
from pandas._libs.tslib import OutOfBoundsDatetime
12+
from pandas._libs.tslibs import conversion
1213
from pandas import (DatetimeIndex, Index, Timestamp, datetime, date_range,
1314
to_datetime)
1415

@@ -496,7 +497,7 @@ def test_index_cast_datetime64_other_units(self):
496497
arr = np.arange(0, 100, 10, dtype=np.int64).view('M8[D]')
497498
idx = Index(arr)
498499

499-
assert (idx.values == tslib.cast_to_nanoseconds(arr)).all()
500+
assert (idx.values == conversion.ensure_datetime64ns(arr)).all()
500501

501502
def test_constructor_int64_nocopy(self):
502503
# #1624

0 commit comments

Comments
 (0)