-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
Fix Timestamp rounding #21507
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix Timestamp rounding #21507
Changes from 4 commits
ee8ba61
50986c6
d1c6e6f
03a42b8
a302074
33335b4
0363a72
7353c2f
82c7db1
559bb50
bfe0d16
4249fd7
2abd8e2
3546c00
342fd40
a1111b3
4423dc9
0550ef7
57c8222
8a51d07
7f2f340
56477a0
ea922c6
a37c3d2
9f9b636
c5a11d6
bcc6b8c
402905d
22d65e1
2908346
0e05de4
159756e
70a3d6d
7d664d5
7463576
f504300
07381bb
e2fb27a
f57e0eb
0921273
d2de507
aa5e1f1
676ae59
8ca172d
28780c2
a231fb2
5f44af0
1427b69
2741967
34b77d1
e788e47
fb16555
84533b1
8cbcafd
c53e001
34d74ce
a10216d
4e6a11f
ca9ce8d
6289c76
b19219d
3814d0c
ea205c0
fd8d6bc
f3a89f3
eb47287
c815e0b
91c9ec3
c019f6d
c03ed38
4668dba
07e161d
15b040c
b21efcc
a7d5bb7
8944d35
8280084
5f956ce
19b78ca
303a279
a4798c3
f901431
8794ef2
6eb0341
289f3ad
a65ea90
13a3d5a
afdcac1
9f6c154
c19017b
5e4882e
b3443da
ebeccfc
db233f3
79d982a
367ce07
8db9303
e8f5ede
58a1a08
c6660f6
7555378
45cfa62
d746bee
476717c
001dc78
59286da
dad1252
b3b047e
242ccbc
8cbfcbf
d07e61b
0829063
da3b903
2d0fa8b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -72,30 +72,50 @@ def round_ns(values, rounder, freq): | |
------- | ||
int or :obj:`ndarray` | ||
""" | ||
def _round_non_int_multiple(value): | ||
if unit < 1000: | ||
# for nano rounding, work with the last 6 digits separately | ||
# due to float precision | ||
buff = 1000000 | ||
r = (buff * (value // buff) + unit * | ||
(rounder((value % buff) * (1 / float(unit)))).astype('i8')) | ||
else: | ||
if unit % 1000 != 0: | ||
msg = 'Precision will be lost using frequency: {}' | ||
warnings.warn(msg.format(freq)) | ||
|
||
# GH19206 | ||
# to deal with round-off when unit is large | ||
if unit >= 1e9: | ||
divisor = 10 ** int(np.log10(unit / 1e7)) | ||
else: | ||
divisor = 10 | ||
|
||
r = (unit * rounder((value * (divisor / float(unit))) / divisor) | ||
.astype('i8')) | ||
|
||
return r | ||
|
||
# GH21262 If the Timestamp is multiple of the freq str | ||
# then we don't apply _round_non_int_multiple | ||
|
||
def _apply_round(value): | ||
if value % unit == 0: | ||
return value | ||
else: | ||
return _round_non_int_multiple(value) | ||
|
||
from pandas.tseries.frequencies import to_offset | ||
unit = to_offset(freq).nanos | ||
if unit < 1000: | ||
# for nano rounding, work with the last 6 digits separately | ||
# due to float precision | ||
buff = 1000000 | ||
r = (buff * (values // buff) + unit * | ||
(rounder((values % buff) * (1 / float(unit)))).astype('i8')) | ||
else: | ||
if unit % 1000 != 0: | ||
msg = 'Precision will be lost using frequency: {}' | ||
warnings.warn(msg.format(freq)) | ||
|
||
# GH19206 | ||
# to deal with round-off when unit is large | ||
if unit >= 1e9: | ||
divisor = 10 ** int(np.log10(unit / 1e7)) | ||
else: | ||
divisor = 10 | ||
|
||
r = (unit * rounder((values * (divisor / float(unit))) / divisor) | ||
.astype('i8')) | ||
if type(values) is int: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is this not just another part of the if? |
||
if values % unit == 0: | ||
return values | ||
else: | ||
return _round_non_int_multiple(values) | ||
|
||
return r | ||
else: | ||
return np.fromiter((_apply_round(item) for item in values), np.int64) | ||
|
||
|
||
# This is PITA. Because we inherit from datetime, which has very specific | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -118,6 +118,27 @@ def test_ceil_floor_edge(self, test_input, rounder, freq, expected): | |
expected = Timestamp(expected) | ||
assert result == expected | ||
|
||
@pytest.mark.parametrize('test_input, freq, expected', [ | ||
('2018-01-01 00:02:06', '2s', '2018-01-01 00:02:06'), | ||
('2018-01-01 00:02:00', '2T', '2018-01-01 00:02:00'), | ||
('2018-01-01 00:04:00', '4T', '2018-01-01 00:04:00'), | ||
('2018-01-01 00:15:00', '15T', '2018-01-01 00:15:00'), | ||
('2018-01-01 00:20:00', '20T', '2018-01-01 00:20:00'), | ||
('2018-01-01 03:00:00', '3H', '2018-01-01 03:00:00'), | ||
]) | ||
def test_round_minute_freq(self, test_input, freq, expected): | ||
# Ensure timestamps that shouldnt round dont! | ||
# GH#21262 | ||
dt = Timestamp(test_input) | ||
expected = Timestamp(expected) | ||
|
||
result_ceil = dt.ceil(freq) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you also parametrize over the rounding methods ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure makes sense to me |
||
assert result_ceil == expected | ||
result_floor = dt.floor(freq) | ||
assert result_floor == expected | ||
result_round = dt.round(freq) | ||
assert result_round == expected | ||
|
||
def test_ceil(self): | ||
dt = Timestamp('20130101 09:10:11') | ||
result = dt.ceil('D') | ||
|
@@ -257,7 +278,6 @@ def test_timestamp(self): | |
if PY3: | ||
# datetime.timestamp() converts in the local timezone | ||
with tm.set_timezone('UTC'): | ||
|
||
# should agree with datetime.timestamp method | ||
dt = ts.to_pydatetime() | ||
assert dt.timestamp() == ts.timestamp() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why don't you just have the int check inside here? this is convoluting the logic a lot