Skip to content

Commit b658045

Browse files
jbrockmendelpeterpanmj
authored andcommitted
Move timedelta-specific functions to tslibs.timedeltas (pandas-dev#17827)
1 parent 9f2d60a commit b658045

File tree

4 files changed

+310
-276
lines changed

4 files changed

+310
-276
lines changed

pandas/_libs/tslib.pyx

+1-276
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ PyDateTime_IMPORT
8383
cdef int64_t NPY_NAT = util.get_nat()
8484
iNaT = NPY_NAT
8585

86+
from tslibs.timedeltas cimport parse_timedelta_string, cast_from_unit
8687
from tslibs.timezones cimport (
8788
is_utc, is_tzlocal, is_fixed_offset,
8889
treat_tz_as_dateutil, treat_tz_as_pytz,
@@ -3083,239 +3084,6 @@ cpdef array_to_timedelta64(ndarray[object] values, unit='ns', errors='raise'):
30833084

30843085
return iresult
30853086

3086-
cdef dict timedelta_abbrevs = { 'D': 'd',
3087-
'd': 'd',
3088-
'days': 'd',
3089-
'day': 'd',
3090-
'hours': 'h',
3091-
'hour': 'h',
3092-
'hr': 'h',
3093-
'h': 'h',
3094-
'm': 'm',
3095-
'minute': 'm',
3096-
'min': 'm',
3097-
'minutes': 'm',
3098-
's': 's',
3099-
'seconds': 's',
3100-
'sec': 's',
3101-
'second': 's',
3102-
'ms': 'ms',
3103-
'milliseconds': 'ms',
3104-
'millisecond': 'ms',
3105-
'milli': 'ms',
3106-
'millis': 'ms',
3107-
'us': 'us',
3108-
'microseconds': 'us',
3109-
'microsecond': 'us',
3110-
'micro': 'us',
3111-
'micros': 'us',
3112-
'ns': 'ns',
3113-
'nanoseconds': 'ns',
3114-
'nano': 'ns',
3115-
'nanos': 'ns',
3116-
'nanosecond': 'ns',
3117-
}
3118-
timedelta_abbrevs_map = timedelta_abbrevs
3119-
3120-
cdef inline int64_t timedelta_as_neg(int64_t value, bint neg):
3121-
"""
3122-
3123-
Parameters
3124-
----------
3125-
value : int64_t of the timedelta value
3126-
neg : boolean if the a negative value
3127-
"""
3128-
if neg:
3129-
return -value
3130-
return value
3131-
3132-
cdef inline timedelta_from_spec(object number, object frac, object unit):
3133-
"""
3134-
3135-
Parameters
3136-
----------
3137-
number : a list of number digits
3138-
frac : a list of frac digits
3139-
unit : a list of unit characters
3140-
"""
3141-
cdef object n
3142-
3143-
try:
3144-
unit = ''.join(unit)
3145-
unit = timedelta_abbrevs[unit.lower()]
3146-
except KeyError:
3147-
raise ValueError("invalid abbreviation: {0}".format(unit))
3148-
3149-
n = ''.join(number) + '.' + ''.join(frac)
3150-
return cast_from_unit(float(n), unit)
3151-
3152-
cdef inline parse_timedelta_string(object ts):
3153-
"""
3154-
Parse a regular format timedelta string. Return an int64_t (in ns)
3155-
or raise a ValueError on an invalid parse.
3156-
"""
3157-
3158-
cdef:
3159-
unicode c
3160-
bint neg=0, have_dot=0, have_value=0, have_hhmmss=0
3161-
object current_unit=None
3162-
int64_t result=0, m=0, r
3163-
list number=[], frac=[], unit=[]
3164-
3165-
# neg : tracks if we have a leading negative for the value
3166-
# have_dot : tracks if we are processing a dot (either post hhmmss or
3167-
# inside an expression)
3168-
# have_value : track if we have at least 1 leading unit
3169-
# have_hhmmss : tracks if we have a regular format hh:mm:ss
3170-
3171-
if len(ts) == 0 or ts in _nat_strings:
3172-
return NPY_NAT
3173-
3174-
# decode ts if necessary
3175-
if not PyUnicode_Check(ts) and not PY3:
3176-
ts = str(ts).decode('utf-8')
3177-
3178-
for c in ts:
3179-
3180-
# skip whitespace / commas
3181-
if c == ' ' or c == ',':
3182-
pass
3183-
3184-
# positive signs are ignored
3185-
elif c == '+':
3186-
pass
3187-
3188-
# neg
3189-
elif c == '-':
3190-
3191-
if neg or have_value or have_hhmmss:
3192-
raise ValueError("only leading negative signs are allowed")
3193-
3194-
neg = 1
3195-
3196-
# number (ascii codes)
3197-
elif ord(c) >= 48 and ord(c) <= 57:
3198-
3199-
if have_dot:
3200-
3201-
# we found a dot, but now its just a fraction
3202-
if len(unit):
3203-
number.append(c)
3204-
have_dot = 0
3205-
else:
3206-
frac.append(c)
3207-
3208-
elif not len(unit):
3209-
number.append(c)
3210-
3211-
else:
3212-
r = timedelta_from_spec(number, frac, unit)
3213-
unit, number, frac = [], [c], []
3214-
3215-
result += timedelta_as_neg(r, neg)
3216-
3217-
# hh:mm:ss.
3218-
elif c == ':':
3219-
3220-
# we flip this off if we have a leading value
3221-
if have_value:
3222-
neg = 0
3223-
3224-
# we are in the pattern hh:mm:ss pattern
3225-
if len(number):
3226-
if current_unit is None:
3227-
current_unit = 'h'
3228-
m = 1000000000L * 3600
3229-
elif current_unit == 'h':
3230-
current_unit = 'm'
3231-
m = 1000000000L * 60
3232-
elif current_unit == 'm':
3233-
current_unit = 's'
3234-
m = 1000000000L
3235-
r = <int64_t> int(''.join(number)) * m
3236-
result += timedelta_as_neg(r, neg)
3237-
have_hhmmss = 1
3238-
else:
3239-
raise ValueError("expecting hh:mm:ss format, "
3240-
"received: {0}".format(ts))
3241-
3242-
unit, number = [], []
3243-
3244-
# after the decimal point
3245-
elif c == '.':
3246-
3247-
if len(number) and current_unit is not None:
3248-
3249-
# by definition we had something like
3250-
# so we need to evaluate the final field from a
3251-
# hh:mm:ss (so current_unit is 'm')
3252-
if current_unit != 'm':
3253-
raise ValueError("expected hh:mm:ss format before .")
3254-
m = 1000000000L
3255-
r = <int64_t> int(''.join(number)) * m
3256-
result += timedelta_as_neg(r, neg)
3257-
have_value = 1
3258-
unit, number, frac = [], [], []
3259-
3260-
have_dot = 1
3261-
3262-
# unit
3263-
else:
3264-
unit.append(c)
3265-
have_value = 1
3266-
have_dot = 0
3267-
3268-
# we had a dot, but we have a fractional
3269-
# value since we have an unit
3270-
if have_dot and len(unit):
3271-
r = timedelta_from_spec(number, frac, unit)
3272-
result += timedelta_as_neg(r, neg)
3273-
3274-
# we have a dot as part of a regular format
3275-
# e.g. hh:mm:ss.fffffff
3276-
elif have_dot:
3277-
3278-
if ((len(number) or len(frac)) and not len(unit) and
3279-
current_unit is None):
3280-
raise ValueError("no units specified")
3281-
3282-
if len(frac) > 0 and len(frac) <= 3:
3283-
m = 10**(3 -len(frac)) * 1000L * 1000L
3284-
elif len(frac) > 3 and len(frac) <= 6:
3285-
m = 10**(6 -len(frac)) * 1000L
3286-
else:
3287-
m = 10**(9 -len(frac))
3288-
3289-
r = <int64_t> int(''.join(frac)) * m
3290-
result += timedelta_as_neg(r, neg)
3291-
3292-
# we have a regular format
3293-
# we must have seconds at this point (hence the unit is still 'm')
3294-
elif current_unit is not None:
3295-
if current_unit != 'm':
3296-
raise ValueError("expected hh:mm:ss format")
3297-
m = 1000000000L
3298-
r = <int64_t> int(''.join(number)) * m
3299-
result += timedelta_as_neg(r, neg)
3300-
3301-
# we have a last abbreviation
3302-
elif len(unit):
3303-
if len(number):
3304-
r = timedelta_from_spec(number, frac, unit)
3305-
result += timedelta_as_neg(r, neg)
3306-
else:
3307-
raise ValueError("unit abbreviation w/o a number")
3308-
3309-
# treat as nanoseconds
3310-
# but only if we don't have anything else
3311-
else:
3312-
if have_value:
3313-
raise ValueError("have leftover units")
3314-
if len(number):
3315-
r = timedelta_from_spec(number, frac, 'ns')
3316-
result += timedelta_as_neg(r, neg)
3317-
3318-
return result
33193087

33203088
cpdef convert_to_timedelta64(object ts, object unit):
33213089
"""
@@ -3412,49 +3180,6 @@ cdef inline _get_datetime64_nanos(object val):
34123180
else:
34133181
return ival
34143182

3415-
cpdef inline int64_t cast_from_unit(object ts, object unit) except? -1:
3416-
""" return a casting of the unit represented to nanoseconds
3417-
round the fractional part of a float to our precision, p """
3418-
cdef:
3419-
int64_t m
3420-
int p
3421-
3422-
if unit == 'D' or unit == 'd':
3423-
m = 1000000000L * 86400
3424-
p = 6
3425-
elif unit == 'h':
3426-
m = 1000000000L * 3600
3427-
p = 6
3428-
elif unit == 'm':
3429-
m = 1000000000L * 60
3430-
p = 6
3431-
elif unit == 's':
3432-
m = 1000000000L
3433-
p = 6
3434-
elif unit == 'ms':
3435-
m = 1000000L
3436-
p = 3
3437-
elif unit == 'us':
3438-
m = 1000L
3439-
p = 0
3440-
elif unit == 'ns' or unit is None:
3441-
m = 1L
3442-
p = 0
3443-
else:
3444-
raise ValueError("cannot cast unit {0}".format(unit))
3445-
3446-
# just give me the unit back
3447-
if ts is None:
3448-
return m
3449-
3450-
# cast the unit, multiply base/frace separately
3451-
# to avoid precision issues from float -> int
3452-
base = <int64_t> ts
3453-
frac = ts -base
3454-
if p:
3455-
frac = round(frac, p)
3456-
return <int64_t> (base *m) + <int64_t> (frac *m)
3457-
34583183

34593184
def cast_to_nanoseconds(ndarray arr):
34603185
cdef:

pandas/_libs/tslibs/timedeltas.pxd

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# -*- coding: utf-8 -*-
2+
# cython: profile=False
3+
4+
from numpy cimport int64_t
5+
6+
# Exposed for tslib, not intended for outside use.
7+
cdef parse_timedelta_string(object ts)
8+
cpdef int64_t cast_from_unit(object ts, object unit) except? -1

0 commit comments

Comments
 (0)