Skip to content

Commit 47723c3

Browse files
committed
Convert float freqstrs to ints at finer resolution
Passing `'0.5min'` as a frequency string should generate 30 second intervals, rather than five minute intervals. By recursively increasing resolution until one is found for which the frequency is an integer, this commit ensures that that's the case for resolutions from days to microseconds. Fixes #8419
1 parent d7fb5bd commit 47723c3

File tree

5 files changed

+140
-40
lines changed

5 files changed

+140
-40
lines changed

doc/source/whatsnew/v0.19.1.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -80,4 +80,5 @@ Bug Fixes
8080

8181

8282
- Bug in ``pd.pivot_table`` may raise ``TypeError`` or ``ValueError`` when ``index`` or ``columns``
83-
is not scalar and ``values`` is not specified (:issue:`14380`)
83+
is not scalar and ``values`` is not specified (:issue:`14380`)
84+
- Bug in multiple offset aliases with decimal points regarded as ints (e.g. 0.5s as 5s) (:issue:`8419`)

pandas/src/period.pyx

+14-14
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,12 @@ cdef bint PY2 = version_info[0] == 2
4545

4646
cdef int64_t NPY_NAT = util.get_nat()
4747

48-
cdef int US_RESO = frequencies.US_RESO
49-
cdef int MS_RESO = frequencies.MS_RESO
50-
cdef int S_RESO = frequencies.S_RESO
51-
cdef int T_RESO = frequencies.T_RESO
52-
cdef int H_RESO = frequencies.H_RESO
53-
cdef int D_RESO = frequencies.D_RESO
48+
cdef int RESO_US = frequencies.RESO_US
49+
cdef int RESO_MS = frequencies.RESO_MS
50+
cdef int RESO_SEC = frequencies.RESO_SEC
51+
cdef int RESO_MIN = frequencies.RESO_MIN
52+
cdef int RESO_HR = frequencies.RESO_HR
53+
cdef int RESO_DAY = frequencies.RESO_DAY
5454

5555
cdef extern from "period_helper.h":
5656
ctypedef struct date_info:
@@ -516,7 +516,7 @@ cpdef resolution(ndarray[int64_t] stamps, tz=None):
516516
cdef:
517517
Py_ssize_t i, n = len(stamps)
518518
pandas_datetimestruct dts
519-
int reso = D_RESO, curr_reso
519+
int reso = RESO_DAY, curr_reso
520520

521521
if tz is not None:
522522
tz = maybe_get_tz(tz)
@@ -535,20 +535,20 @@ cpdef resolution(ndarray[int64_t] stamps, tz=None):
535535
cdef inline int _reso_stamp(pandas_datetimestruct *dts):
536536
if dts.us != 0:
537537
if dts.us % 1000 == 0:
538-
return MS_RESO
539-
return US_RESO
538+
return RESO_MS
539+
return RESO_US
540540
elif dts.sec != 0:
541-
return S_RESO
541+
return RESO_SEC
542542
elif dts.min != 0:
543-
return T_RESO
543+
return RESO_MIN
544544
elif dts.hour != 0:
545-
return H_RESO
546-
return D_RESO
545+
return RESO_HR
546+
return RESO_DAY
547547

548548
cdef _reso_local(ndarray[int64_t] stamps, object tz):
549549
cdef:
550550
Py_ssize_t n = len(stamps)
551-
int reso = D_RESO, curr_reso
551+
int reso = RESO_DAY, curr_reso
552552
ndarray[int64_t] trans, deltas, pos
553553
pandas_datetimestruct dts
554554

pandas/tseries/frequencies.py

+72-15
Original file line numberDiff line numberDiff line change
@@ -38,32 +38,52 @@ class FreqGroup(object):
3838
FR_NS = 12000
3939

4040

41-
US_RESO = 0
42-
MS_RESO = 1
43-
S_RESO = 2
44-
T_RESO = 3
45-
H_RESO = 4
46-
D_RESO = 5
41+
RESO_NS = 0
42+
RESO_US = 1
43+
RESO_MS = 2
44+
RESO_SEC = 3
45+
RESO_MIN = 4
46+
RESO_HR = 5
47+
RESO_DAY = 6
4748

4849

4950
class Resolution(object):
5051

5152
# defined in period.pyx
5253
# note that these are different from freq codes
53-
RESO_US = US_RESO
54-
RESO_MS = MS_RESO
55-
RESO_SEC = S_RESO
56-
RESO_MIN = T_RESO
57-
RESO_HR = H_RESO
58-
RESO_DAY = D_RESO
54+
RESOS = [RESO_NS, RESO_US, RESO_MS, RESO_SEC, RESO_MIN, RESO_HR, RESO_DAY]
5955

6056
_reso_str_map = {
57+
RESO_NS: 'nanosecond',
6158
RESO_US: 'microsecond',
6259
RESO_MS: 'millisecond',
6360
RESO_SEC: 'second',
6461
RESO_MIN: 'minute',
6562
RESO_HR: 'hour',
66-
RESO_DAY: 'day'}
63+
RESO_DAY: 'day'
64+
}
65+
66+
# factor to multiply a value by to convert it to the next finer grained
67+
# resolution
68+
_reso_mult_map = {
69+
RESO_NS: None,
70+
RESO_US: 1000,
71+
RESO_MS: 1000,
72+
RESO_SEC: 1000,
73+
RESO_MIN: 60,
74+
RESO_HR: 60,
75+
RESO_DAY: 24
76+
}
77+
78+
_reso_str_bump_map = {
79+
'D': 'H',
80+
'H': 'T',
81+
'T': 'S',
82+
'S': 'L',
83+
'L': 'U',
84+
'U': 'N',
85+
'N': None
86+
}
6787

6888
_str_reso_map = dict([(v, k) for k, v in compat.iteritems(_reso_str_map)])
6989

@@ -160,6 +180,36 @@ def get_reso_from_freq(cls, freq):
160180
"""
161181
return cls.get_reso(cls.get_str_from_freq(freq))
162182

183+
@classmethod
184+
def get_stride_from_decimal(cls, value, freq):
185+
"""
186+
Convert freq with decimal stride into a higher freq with integer stride
187+
188+
Example
189+
-------
190+
>>> Resolution.get_stride_from_decimal(1.5, 'T')
191+
(90, 'S')
192+
193+
>>> Resolution.get_stride_from_decimal(1.04, 'H')
194+
(3744, 'S')
195+
196+
>>> Resolution.get_stride_from_decimal(1, 'D')
197+
(1, 'D')
198+
"""
199+
200+
if np.isclose(value % 1, 0):
201+
return int(value), freq
202+
else:
203+
start_reso = cls.get_reso_from_freq(freq)
204+
if start_reso == 0:
205+
raise ValueError(
206+
"Could not convert to integer offset at any resolution"
207+
)
208+
209+
next_value = cls._reso_mult_map[start_reso] * value
210+
next_name = cls._reso_str_bump_map[freq]
211+
return cls.get_stride_from_decimal(next_value, next_name)
212+
163213

164214
def get_to_timestamp_base(base):
165215
"""
@@ -472,12 +522,17 @@ def to_offset(freq):
472522
splitted[2::4]):
473523
if sep != '' and not sep.isspace():
474524
raise ValueError('separator must be spaces')
475-
offset = get_offset(name)
525+
prefix = _lite_rule_alias.get(name) or name
476526
if stride_sign is None:
477527
stride_sign = -1 if stride.startswith('-') else 1
478528
if not stride:
479529
stride = 1
530+
if prefix in Resolution._reso_str_bump_map.keys():
531+
stride, name = Resolution.get_stride_from_decimal(
532+
float(stride), prefix
533+
)
480534
stride = int(stride)
535+
offset = get_offset(name)
481536
offset = offset * int(np.fabs(stride) * stride_sign)
482537
if delta is None:
483538
delta = offset
@@ -493,7 +548,9 @@ def to_offset(freq):
493548

494549

495550
# hack to handle WOM-1MON
496-
opattern = re.compile(r'([\-]?\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)')
551+
opattern = re.compile(
552+
r'([\-]?\d*|[\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)'
553+
)
497554

498555

499556
def _base_and_stride(freqstr):

pandas/tseries/tests/test_frequencies.py

+46-4
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,21 @@ def test_to_offset_multiple(self):
3939
expected = offsets.Hour(3)
4040
assert (result == expected)
4141

42+
freqstr = '2h 20.5min'
43+
result = frequencies.to_offset(freqstr)
44+
expected = offsets.Second(8430)
45+
assert (result == expected)
46+
47+
freqstr = '1.5min'
48+
result = frequencies.to_offset(freqstr)
49+
expected = offsets.Second(90)
50+
assert (result == expected)
51+
52+
freqstr = '0.5S'
53+
result = frequencies.to_offset(freqstr)
54+
expected = offsets.Milli(500)
55+
assert (result == expected)
56+
4257
freqstr = '15l500u'
4358
result = frequencies.to_offset(freqstr)
4459
expected = offsets.Micro(15500)
@@ -49,6 +64,16 @@ def test_to_offset_multiple(self):
4964
expected = offsets.Milli(10075)
5065
assert (result == expected)
5166

67+
freqstr = '1s0.25ms'
68+
result = frequencies.to_offset(freqstr)
69+
expected = offsets.Micro(1000250)
70+
assert (result == expected)
71+
72+
freqstr = '1s0.25L'
73+
result = frequencies.to_offset(freqstr)
74+
expected = offsets.Micro(1000250)
75+
assert (result == expected)
76+
5277
freqstr = '2800N'
5378
result = frequencies.to_offset(freqstr)
5479
expected = offsets.Nano(2800)
@@ -107,10 +132,8 @@ def test_to_offset_invalid(self):
107132
frequencies.to_offset('-2-3U')
108133
with tm.assertRaisesRegexp(ValueError, 'Invalid frequency: -2D:3H'):
109134
frequencies.to_offset('-2D:3H')
110-
111-
# ToDo: Must be fixed in #8419
112-
with tm.assertRaisesRegexp(ValueError, 'Invalid frequency: .5S'):
113-
frequencies.to_offset('.5S')
135+
with tm.assertRaisesRegexp(ValueError, 'Invalid frequency: 1.5.0S'):
136+
frequencies.to_offset('1.5.0S')
114137

115138
# split offsets with spaces are valid
116139
assert frequencies.to_offset('2D 3H') == offsets.Hour(51)
@@ -379,6 +402,25 @@ def test_freq_to_reso(self):
379402
result = Reso.get_freq(Reso.get_str(Reso.get_reso_from_freq(freq)))
380403
self.assertEqual(freq, result)
381404

405+
def test_resolution_bumping(self):
406+
Reso = frequencies.Resolution
407+
408+
self.assertEqual(Reso.get_stride_from_decimal(1.5, 'T'), (90, 'S'))
409+
self.assertEqual(Reso.get_stride_from_decimal(62.4, 'T'), (3744, 'S'))
410+
self.assertEqual(Reso.get_stride_from_decimal(1.04, 'H'), (3744, 'S'))
411+
self.assertEqual(Reso.get_stride_from_decimal(1, 'D'), (1, 'D'))
412+
self.assertEqual(Reso.get_stride_from_decimal(0.342931, 'H'),
413+
(1234551600, 'U'))
414+
self.assertEqual(Reso.get_stride_from_decimal(1.2345, 'D'),
415+
(106660800, 'L'))
416+
417+
with self.assertRaises(ValueError):
418+
Reso.get_stride_from_decimal(0.5, 'N')
419+
420+
# too much precision in the input can prevent
421+
with self.assertRaises(ValueError):
422+
Reso.get_stride_from_decimal(0.3429324798798269273987982, 'H')
423+
382424
def test_get_freq_code(self):
383425
# freqstr
384426
self.assertEqual(frequencies.get_freq_code('A'),

pandas/tseries/tests/test_tslib.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from pandas.tseries.index import date_range, DatetimeIndex
1515
from pandas.tseries.frequencies import (
1616
get_freq,
17-
US_RESO, MS_RESO, S_RESO, H_RESO, D_RESO, T_RESO
17+
RESO_US, RESO_MS, RESO_SEC, RESO_HR, RESO_DAY, RESO_MIN
1818
)
1919
import pandas.tseries.tools as tools
2020
import pandas.tseries.offsets as offsets
@@ -1527,11 +1527,11 @@ def test_resolution(self):
15271527

15281528
for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T',
15291529
'S', 'L', 'U'],
1530-
[D_RESO, D_RESO,
1531-
D_RESO, D_RESO,
1532-
H_RESO, T_RESO,
1533-
S_RESO, MS_RESO,
1534-
US_RESO]):
1530+
[RESO_DAY, RESO_DAY,
1531+
RESO_DAY, RESO_DAY,
1532+
RESO_HR, RESO_MIN,
1533+
RESO_SEC, RESO_MS,
1534+
RESO_US]):
15351535
for tz in [None, 'Asia/Tokyo', 'US/Eastern',
15361536
'dateutil/US/Eastern']:
15371537
idx = date_range(start='2013-04-01', periods=30, freq=freq,

0 commit comments

Comments
 (0)