Skip to content

Commit 43fa964

Browse files
authored
[ PECO - 1768 ] PySQL: adjust HTTP retry logic to align with Go and Nodejs drivers (#467)
* Added the exponential backoff code * Added the exponential backoff algorithm and refractored the code * Added jitter and added unit tests * Reformatted * Fixed the test_retry_exponential_backoff integration test
1 parent ecdddba commit 43fa964

File tree

4 files changed

+61
-34
lines changed

4 files changed

+61
-34
lines changed

src/databricks/sql/auth/retry.py

+16-10
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
import random
23
import time
34
import typing
45
from enum import Enum
@@ -285,25 +286,30 @@ def sleep_for_retry(self, response: BaseHTTPResponse) -> bool:
285286
"""
286287
retry_after = self.get_retry_after(response)
287288
if retry_after:
288-
backoff = self.get_backoff_time()
289-
proposed_wait = max(backoff, retry_after)
290-
self.check_proposed_wait(proposed_wait)
291-
time.sleep(proposed_wait)
292-
return True
289+
proposed_wait = retry_after
290+
else:
291+
proposed_wait = self.get_backoff_time()
293292

294-
return False
293+
proposed_wait = min(proposed_wait, self.delay_max)
294+
self.check_proposed_wait(proposed_wait)
295+
time.sleep(proposed_wait)
296+
return True
295297

296298
def get_backoff_time(self) -> float:
297-
"""Calls urllib3's built-in get_backoff_time.
299+
"""
300+
This method implements the exponential backoff algorithm to calculate the delay between retries.
298301
299302
Never returns a value larger than self.delay_max
300303
A MaxRetryDurationError will be raised if the calculated backoff would exceed self.max_attempts_duration
301304
302-
Note: within urllib3, a backoff is only calculated in cases where a Retry-After header is not present
303-
in the previous unsuccessful request and `self.respect_retry_after_header` is True (which is always true)
305+
:return:
304306
"""
305307

306-
proposed_backoff = super().get_backoff_time()
308+
current_attempt = self.stop_after_attempts_count - self.total
309+
proposed_backoff = (2**current_attempt) * self.delay_min
310+
if self.backoff_jitter != 0.0:
311+
proposed_backoff += random.random() * self.backoff_jitter
312+
307313
proposed_backoff = min(proposed_backoff, self.delay_max)
308314
self.check_proposed_wait(proposed_backoff)
309315

src/databricks/sql/thrift_backend.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,8 @@
6464
# - 900s attempts-duration lines up w ODBC/JDBC drivers (for cluster startup > 10 mins)
6565
_retry_policy = { # (type, default, min, max)
6666
"_retry_delay_min": (float, 1, 0.1, 60),
67-
"_retry_delay_max": (float, 60, 5, 3600),
68-
"_retry_stop_after_attempts_count": (int, 30, 1, 60),
67+
"_retry_delay_max": (float, 30, 5, 3600),
68+
"_retry_stop_after_attempts_count": (int, 5, 1, 60),
6969
"_retry_stop_after_attempts_duration": (float, 900, 1, 86400),
7070
"_retry_delay_default": (float, 5, 1, 60),
7171
}

tests/e2e/common/retry_test_mixins.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ def test_retry_max_count_not_exceeded(self):
174174
def test_retry_exponential_backoff(self):
175175
"""GIVEN the retry policy is configured for reasonable exponential backoff
176176
WHEN the server sends nothing but 429 responses with retry-afters
177-
THEN the connector will use those retry-afters as a floor
177+
THEN the connector will use those retry-afters values as delay
178178
"""
179179
retry_policy = self._retry_policy.copy()
180180
retry_policy["_retry_delay_min"] = 1
@@ -191,10 +191,10 @@ def test_retry_exponential_backoff(self):
191191
assert isinstance(cm.value.args[1], MaxRetryDurationError)
192192

193193
# With setting delay_min to 1, the expected retry delays should be:
194-
# 3, 3, 4
195-
# The first 2 retries are allowed, the 3rd retry puts the total duration over the limit
194+
# 3, 3, 3, 3
195+
# The first 3 retries are allowed, the 4th retry puts the total duration over the limit
196196
# of 10 seconds
197-
assert mock_obj.return_value.getresponse.call_count == 3
197+
assert mock_obj.return_value.getresponse.call_count == 4
198198
assert duration > 6
199199

200200
# Should be less than 7, but this is a safe margin for CI/CD slowness

tests/unit/test_retry.py

+39-18
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
1-
from os import error
21
import time
3-
from unittest.mock import Mock, patch
2+
from unittest.mock import patch, call
43
import pytest
5-
from requests import Request
64
from urllib3 import HTTPResponse
7-
from databricks.sql.auth.retry import DatabricksRetryPolicy, RequestHistory
8-
5+
from databricks.sql.auth.retry import DatabricksRetryPolicy, RequestHistory, CommandType
6+
from urllib3.exceptions import MaxRetryError
97

108
class TestRetry:
119
@pytest.fixture()
@@ -25,32 +23,55 @@ def error_history(self) -> RequestHistory:
2523
method="POST", url=None, error=None, status=503, redirect_location=None
2624
)
2725

26+
def calculate_backoff_time(self, attempt, delay_min, delay_max):
27+
exponential_backoff_time = (2**attempt) * delay_min
28+
return min(exponential_backoff_time, delay_max)
29+
2830
@patch("time.sleep")
2931
def test_sleep__no_retry_after(self, t_mock, retry_policy, error_history):
3032
retry_policy._retry_start_time = time.time()
3133
retry_policy.history = [error_history, error_history]
3234
retry_policy.sleep(HTTPResponse(status=503))
33-
t_mock.assert_called_with(2)
35+
36+
expected_backoff_time = self.calculate_backoff_time(0, retry_policy.delay_min, retry_policy.delay_max)
37+
t_mock.assert_called_with(expected_backoff_time)
3438

3539
@patch("time.sleep")
36-
def test_sleep__retry_after_is_binding(self, t_mock, retry_policy, error_history):
40+
def test_sleep__no_retry_after_header__multiple_retries(self, t_mock, retry_policy):
41+
num_attempts = retry_policy.stop_after_attempts_count
42+
3743
retry_policy._retry_start_time = time.time()
38-
retry_policy.history = [error_history, error_history]
39-
retry_policy.sleep(HTTPResponse(status=503, headers={"Retry-After": "3"}))
40-
t_mock.assert_called_with(3)
44+
retry_policy.command_type = CommandType.OTHER
45+
46+
for attempt in range(num_attempts):
47+
retry_policy.sleep(HTTPResponse(status=503))
48+
# Internally urllib3 calls the increment function generating a new instance for every retry
49+
retry_policy = retry_policy.increment()
50+
51+
expected_backoff_times = []
52+
for attempt in range(num_attempts):
53+
expected_backoff_times.append(self.calculate_backoff_time(attempt, retry_policy.delay_min, retry_policy.delay_max))
54+
55+
# Asserts if the sleep value was called in the expected order
56+
t_mock.assert_has_calls([call(expected_time) for expected_time in expected_backoff_times])
4157

4258
@patch("time.sleep")
43-
def test_sleep__retry_after_present_but_not_binding(
44-
self, t_mock, retry_policy, error_history
45-
):
59+
def test_excessive_retry_attempts_error(self, t_mock, retry_policy):
60+
# Attempting more than stop_after_attempt_count
61+
num_attempts = retry_policy.stop_after_attempts_count + 1
62+
4663
retry_policy._retry_start_time = time.time()
47-
retry_policy.history = [error_history, error_history]
48-
retry_policy.sleep(HTTPResponse(status=503, headers={"Retry-After": "1"}))
49-
t_mock.assert_called_with(2)
64+
retry_policy.command_type = CommandType.OTHER
65+
66+
with pytest.raises(MaxRetryError):
67+
for attempt in range(num_attempts):
68+
retry_policy.sleep(HTTPResponse(status=503))
69+
# Internally urllib3 calls the increment function generating a new instance for every retry
70+
retry_policy = retry_policy.increment()
5071

5172
@patch("time.sleep")
52-
def test_sleep__retry_after_surpassed(self, t_mock, retry_policy, error_history):
73+
def test_sleep__retry_after_present(self, t_mock, retry_policy, error_history):
5374
retry_policy._retry_start_time = time.time()
5475
retry_policy.history = [error_history, error_history, error_history]
5576
retry_policy.sleep(HTTPResponse(status=503, headers={"Retry-After": "3"}))
56-
t_mock.assert_called_with(4)
77+
t_mock.assert_called_with(3)

0 commit comments

Comments
 (0)