Skip to content

Commit 427d283

Browse files
jbrockmendeljreback
authored andcommitted
Separate _TSObject into conversion (#18060)
1 parent 90fb0e3 commit 427d283

File tree

4 files changed

+324
-237
lines changed

4 files changed

+324
-237
lines changed

pandas/_libs/tslib.pxd

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from numpy cimport ndarray, int64_t
22

3-
cdef convert_to_tsobject(object, object, object, bint, bint)
3+
from tslibs.conversion cimport convert_to_tsobject
4+
45
cpdef convert_to_timedelta64(object, object)
56
cdef bint _check_all_nulls(obj)
67

pandas/_libs/tslib.pyx

+5-228
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,10 @@ from tslibs.timezones cimport (
9696
from tslibs.fields import (
9797
get_date_name_field, get_start_end_field, get_date_field,
9898
build_field_sarray)
99-
from tslibs.conversion cimport tz_convert_single, _TSObject, _localize_tso
99+
from tslibs.conversion cimport (tz_convert_single, _TSObject,
100+
convert_to_tsobject,
101+
convert_datetime_to_tsobject,
102+
get_datetime64_nanos)
100103
from tslibs.conversion import (
101104
tz_localize_to_utc, tz_convert,
102105
tz_convert_single)
@@ -1212,215 +1215,6 @@ cdef inline bint is_timestamp(object o):
12121215
return Py_TYPE(o) == ts_type # isinstance(o, Timestamp)
12131216

12141217

1215-
# helper to extract datetime and int64 from several different possibilities
1216-
cdef convert_to_tsobject(object ts, object tz, object unit,
1217-
bint dayfirst, bint yearfirst):
1218-
"""
1219-
Extract datetime and int64 from any of:
1220-
- np.int64 (with unit providing a possible modifier)
1221-
- np.datetime64
1222-
- a float (with unit providing a possible modifier)
1223-
- python int or long object (with unit providing a possible modifier)
1224-
- iso8601 string object
1225-
- python datetime object
1226-
- another timestamp object
1227-
"""
1228-
cdef:
1229-
_TSObject obj
1230-
1231-
if tz is not None:
1232-
tz = maybe_get_tz(tz)
1233-
1234-
obj = _TSObject()
1235-
1236-
if is_string_object(ts):
1237-
return convert_str_to_tsobject(ts, tz, unit, dayfirst, yearfirst)
1238-
1239-
if ts is None or ts is NaT:
1240-
obj.value = NPY_NAT
1241-
elif is_datetime64_object(ts):
1242-
if ts.view('i8') == NPY_NAT:
1243-
obj.value = NPY_NAT
1244-
else:
1245-
obj.value = _get_datetime64_nanos(ts)
1246-
dt64_to_dtstruct(obj.value, &obj.dts)
1247-
elif is_integer_object(ts):
1248-
if ts == NPY_NAT:
1249-
obj.value = NPY_NAT
1250-
else:
1251-
ts = ts * cast_from_unit(None, unit)
1252-
obj.value = ts
1253-
dt64_to_dtstruct(ts, &obj.dts)
1254-
elif is_float_object(ts):
1255-
if ts != ts or ts == NPY_NAT:
1256-
obj.value = NPY_NAT
1257-
else:
1258-
ts = cast_from_unit(ts, unit)
1259-
obj.value = ts
1260-
dt64_to_dtstruct(ts, &obj.dts)
1261-
elif PyDateTime_Check(ts):
1262-
return convert_datetime_to_tsobject(ts, tz)
1263-
elif PyDate_Check(ts):
1264-
# Keep the converter same as PyDateTime's
1265-
ts = datetime.combine(ts, datetime_time())
1266-
return convert_datetime_to_tsobject(ts, tz)
1267-
elif getattr(ts, '_typ', None) == 'period':
1268-
raise ValueError("Cannot convert Period to Timestamp "
1269-
"unambiguously. Use to_timestamp")
1270-
else:
1271-
raise TypeError('Cannot convert input [{}] of type {} to '
1272-
'Timestamp'.format(ts, type(ts)))
1273-
1274-
if obj.value != NPY_NAT:
1275-
check_dts_bounds(&obj.dts)
1276-
1277-
if tz is not None:
1278-
_localize_tso(obj, tz)
1279-
1280-
return obj
1281-
1282-
1283-
cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz,
1284-
int32_t nanos=0):
1285-
"""
1286-
Convert a datetime (or Timestamp) input `ts`, along with optional timezone
1287-
object `tz` to a _TSObject.
1288-
1289-
The optional argument `nanos` allows for cases where datetime input
1290-
needs to be supplemented with higher-precision information.
1291-
1292-
Parameters
1293-
----------
1294-
ts : datetime or Timestamp
1295-
Value to be converted to _TSObject
1296-
tz : tzinfo or None
1297-
timezone for the timezone-aware output
1298-
nanos : int32_t, default is 0
1299-
nanoseconds supplement the precision of the datetime input ts
1300-
1301-
Returns
1302-
-------
1303-
obj : _TSObject
1304-
"""
1305-
cdef:
1306-
_TSObject obj = _TSObject()
1307-
1308-
if tz is not None:
1309-
tz = maybe_get_tz(tz)
1310-
1311-
# sort of a temporary hack
1312-
if ts.tzinfo is not None:
1313-
if hasattr(tz, 'normalize') and hasattr(ts.tzinfo, '_utcoffset'):
1314-
ts = tz.normalize(ts)
1315-
obj.value = pydatetime_to_dt64(ts, &obj.dts)
1316-
obj.tzinfo = ts.tzinfo
1317-
else:
1318-
# tzoffset
1319-
try:
1320-
tz = ts.astimezone(tz).tzinfo
1321-
except:
1322-
pass
1323-
obj.value = pydatetime_to_dt64(ts, &obj.dts)
1324-
ts_offset = get_utcoffset(ts.tzinfo, ts)
1325-
obj.value -= int(ts_offset.total_seconds() * 1e9)
1326-
tz_offset = get_utcoffset(tz, ts)
1327-
obj.value += int(tz_offset.total_seconds() * 1e9)
1328-
dt64_to_dtstruct(obj.value, &obj.dts)
1329-
obj.tzinfo = tz
1330-
elif not is_utc(tz):
1331-
ts = _localize_pydatetime(ts, tz)
1332-
obj.value = pydatetime_to_dt64(ts, &obj.dts)
1333-
obj.tzinfo = ts.tzinfo
1334-
else:
1335-
# UTC
1336-
obj.value = pydatetime_to_dt64(ts, &obj.dts)
1337-
obj.tzinfo = pytz.utc
1338-
else:
1339-
obj.value = pydatetime_to_dt64(ts, &obj.dts)
1340-
obj.tzinfo = ts.tzinfo
1341-
1342-
if obj.tzinfo is not None and not is_utc(obj.tzinfo):
1343-
offset = get_utcoffset(obj.tzinfo, ts)
1344-
obj.value -= int(offset.total_seconds() * 1e9)
1345-
1346-
if is_timestamp(ts):
1347-
obj.value += ts.nanosecond
1348-
obj.dts.ps = ts.nanosecond * 1000
1349-
1350-
if nanos:
1351-
obj.value += nanos
1352-
obj.dts.ps = nanos * 1000
1353-
1354-
check_dts_bounds(&obj.dts)
1355-
return obj
1356-
1357-
1358-
cdef convert_str_to_tsobject(object ts, object tz, object unit,
1359-
bint dayfirst=False, bint yearfirst=False):
1360-
""" ts must be a string """
1361-
1362-
cdef:
1363-
_TSObject obj
1364-
int out_local = 0, out_tzoffset = 0
1365-
datetime dt
1366-
1367-
if tz is not None:
1368-
tz = maybe_get_tz(tz)
1369-
1370-
obj = _TSObject()
1371-
1372-
assert is_string_object(ts)
1373-
1374-
if len(ts) == 0 or ts in nat_strings:
1375-
ts = NaT
1376-
elif ts == 'now':
1377-
# Issue 9000, we short-circuit rather than going
1378-
# into np_datetime_strings which returns utc
1379-
ts = datetime.now(tz)
1380-
elif ts == 'today':
1381-
# Issue 9000, we short-circuit rather than going
1382-
# into np_datetime_strings which returns a normalized datetime
1383-
ts = datetime.now(tz)
1384-
# equiv: datetime.today().replace(tzinfo=tz)
1385-
else:
1386-
try:
1387-
_string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset)
1388-
obj.value = dtstruct_to_dt64(&obj.dts)
1389-
check_dts_bounds(&obj.dts)
1390-
if out_local == 1:
1391-
obj.tzinfo = pytz.FixedOffset(out_tzoffset)
1392-
obj.value = tz_convert_single(obj.value, obj.tzinfo, 'UTC')
1393-
if tz is None:
1394-
check_dts_bounds(&obj.dts)
1395-
return obj
1396-
else:
1397-
# Keep the converter same as PyDateTime's
1398-
obj = convert_to_tsobject(obj.value, obj.tzinfo,
1399-
None, 0, 0)
1400-
dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day,
1401-
obj.dts.hour, obj.dts.min, obj.dts.sec,
1402-
obj.dts.us, obj.tzinfo)
1403-
obj = convert_datetime_to_tsobject(dt, tz,
1404-
nanos=obj.dts.ps / 1000)
1405-
return obj
1406-
1407-
else:
1408-
ts = obj.value
1409-
if tz is not None:
1410-
# shift for _localize_tso
1411-
ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz,
1412-
ambiguous='raise',
1413-
errors='raise')[0]
1414-
except ValueError:
1415-
try:
1416-
ts = parse_datetime_string(ts, dayfirst=dayfirst,
1417-
yearfirst=yearfirst)
1418-
except Exception:
1419-
raise ValueError("could not convert string to Timestamp")
1420-
1421-
return convert_to_tsobject(ts, tz, unit, dayfirst, yearfirst)
1422-
1423-
14241218
def _test_parse_iso8601(object ts):
14251219
"""
14261220
TESTING ONLY: Parse string into Timestamp using iso8601 parser. Used
@@ -1841,7 +1635,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
18411635
iresult[i] = NPY_NAT
18421636
else:
18431637
try:
1844-
iresult[i] = _get_datetime64_nanos(val)
1638+
iresult[i] = get_datetime64_nanos(val)
18451639
seen_datetime = 1
18461640
except ValueError:
18471641
if is_coerce:
@@ -2779,23 +2573,6 @@ cpdef int64_t _delta_to_nanoseconds(delta) except? -1:
27792573
delta.microseconds) * 1000
27802574

27812575

2782-
cdef inline _get_datetime64_nanos(object val):
2783-
cdef:
2784-
pandas_datetimestruct dts
2785-
PANDAS_DATETIMEUNIT unit
2786-
npy_datetime ival
2787-
2788-
unit = get_datetime64_unit(val)
2789-
ival = get_datetime64_value(val)
2790-
2791-
if unit != PANDAS_FR_ns:
2792-
pandas_datetime_to_datetimestruct(ival, unit, &dts)
2793-
check_dts_bounds(&dts)
2794-
return dtstruct_to_dt64(&dts)
2795-
else:
2796-
return ival
2797-
2798-
27992576
def cast_to_nanoseconds(ndarray arr):
28002577
cdef:
28012578
Py_ssize_t i, n = arr.size

pandas/_libs/tslibs/conversion.pxd

+13-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
# -*- coding: utf-8 -*-
22
# cython: profile=False
33

4-
from numpy cimport int64_t
4+
from cpython.datetime cimport datetime
55

6-
from datetime cimport pandas_datetimestruct
6+
from numpy cimport int64_t, int32_t
7+
8+
from np_datetime cimport pandas_datetimestruct
79

810

911
cdef class _TSObject:
@@ -12,6 +14,15 @@ cdef class _TSObject:
1214
int64_t value # numpy dt64
1315
object tzinfo
1416

17+
18+
cdef convert_to_tsobject(object ts, object tz, object unit,
19+
bint dayfirst, bint yearfirst)
20+
21+
cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz,
22+
int32_t nanos=*)
23+
1524
cdef void _localize_tso(_TSObject obj, object tz)
1625

1726
cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2)
27+
28+
cdef int64_t get_datetime64_nanos(object val) except? -1

0 commit comments

Comments
 (0)