Skip to content

Don't swallow unexpected errors during Elasticsearch verification step #1635

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 67 additions & 70 deletions elasticsearch/_async/transport.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ async def _async_init(self):

# Set our 'verified_once' implementation to one that
# works with 'asyncio' instead of 'threading'
self._verified_once = Once()
self._verify_elasticsearch_lock = asyncio.Lock()

# Now that we have a loop we can create all our HTTP connections...
self.set_connections(self.hosts)
Expand Down Expand Up @@ -338,9 +338,7 @@ async def perform_request(self, method, url, headers=None, params=None, body=Non

# Before we make the actual API call we verify the Elasticsearch instance.
if self._verified_elasticsearch is None:
await self._verified_once.call(
self._do_verify_elasticsearch, headers=headers, timeout=timeout
)
await self._do_verify_elasticsearch(headers=headers, timeout=timeout)

# If '_verified_elasticsearch' is False we know we're not connected to Elasticsearch.
if self._verified_elasticsearch is False:
Expand Down Expand Up @@ -431,74 +429,73 @@ async def _do_verify_elasticsearch(self, headers, timeout):
but we're also unable to rule it out due to a permission
error we instead emit an 'ElasticsearchWarning'.
"""
# Product check has already been done, no need to do again.
if self._verified_elasticsearch:
return

headers = {header.lower(): value for header, value in (headers or {}).items()}
# We know we definitely want JSON so request it via 'accept'
headers.setdefault("accept", "application/json")
# Ensure that there's only one async exec within this section
# at a time to not emit unnecessary index API calls.
async with self._verify_elasticsearch_lock:

info_headers = {}
info_response = {}
error = None

for conn in chain(self.connection_pool.connections, self.seed_connections):
try:
_, info_headers, info_response = await conn.perform_request(
"GET", "/", headers=headers, timeout=timeout
)

# Lowercase all the header names for consistency in accessing them.
info_headers = {
header.lower(): value for header, value in info_headers.items()
}

info_response = self.deserializer.loads(
info_response, mimetype="application/json"
)
break

# Previous versions of 7.x Elasticsearch required a specific
# permission so if we receive HTTP 401/403 we should warn
# instead of erroring out.
except (AuthenticationException, AuthorizationException):
warnings.warn(
(
"The client is unable to verify that the server is "
"Elasticsearch due security privileges on the server side"
),
ElasticsearchWarning,
stacklevel=4,
)
self._verified_elasticsearch = True
# Product check has already been completed while we were
# waiting our turn, no need to do again.
if self._verified_elasticsearch is not None:
return

# This connection didn't work, we'll try another.
except (ConnectionError, SerializationError) as err:
if error is None:
error = err

# If we received a connection error and weren't successful
# anywhere then we reraise the more appropriate error.
if error and not info_response:
raise error

# Check the information we got back from the index request.
self._verified_elasticsearch = _verify_elasticsearch(
info_headers, info_response
)


class Once:
"""Simple class which forces an async function to only execute once."""
headers = {
header.lower(): value for header, value in (headers or {}).items()
}
# We know we definitely want JSON so request it via 'accept'
headers.setdefault("accept", "application/json")

info_headers = {}
info_response = {}
error = None

attempted_conns = []
for conn in chain(self.connection_pool.connections, self.seed_connections):
# Only attempt once per connection max.
if conn in attempted_conns:
continue
attempted_conns.append(conn)

try:
_, info_headers, info_response = await conn.perform_request(
"GET", "/", headers=headers, timeout=timeout
)

def __init__(self):
self._lock = asyncio.Lock()
self._called = False
# Lowercase all the header names for consistency in accessing them.
info_headers = {
header.lower(): value for header, value in info_headers.items()
}

async def call(self, func, *args, **kwargs):
async with self._lock:
if not self._called:
self._called = True
await func(*args, **kwargs)
info_response = self.deserializer.loads(
info_response, mimetype="application/json"
)
break

# Previous versions of 7.x Elasticsearch required a specific
# permission so if we receive HTTP 401/403 we should warn
# instead of erroring out.
except (AuthenticationException, AuthorizationException):
warnings.warn(
(
"The client is unable to verify that the server is "
"Elasticsearch due security privileges on the server side"
),
ElasticsearchWarning,
stacklevel=4,
)
self._verified_elasticsearch = True
return

# This connection didn't work, we'll try another.
except (ConnectionError, SerializationError, TransportError) as err:
if error is None:
error = err

# If we received a connection error and weren't successful
# anywhere then we re-raise the more appropriate error.
if error and not info_response:
raise error

# Check the information we got back from the index request.
self._verified_elasticsearch = _verify_elasticsearch(
info_headers, info_response
)
131 changes: 64 additions & 67 deletions elasticsearch/transport.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ def __init__(

# Ensures that the ES verification request only fires once and that
# all requests block until this request returns back.
self._verified_once = Once()
self._verify_elasticsearch_lock = Lock()

def add_connection(self, host):
"""
Expand Down Expand Up @@ -406,9 +406,7 @@ def perform_request(self, method, url, headers=None, params=None, body=None):

# Before we make the actual API call we verify the Elasticsearch instance.
if self._verified_elasticsearch is None:
self._verified_once.call(
self._do_verify_elasticsearch, headers=headers, timeout=timeout
)
self._do_verify_elasticsearch(headers=headers, timeout=timeout)

# If '_verified_elasticsearch' is False we know we're not connected to Elasticsearch.
if self._verified_elasticsearch is False:
Expand Down Expand Up @@ -536,63 +534,76 @@ def _do_verify_elasticsearch(self, headers, timeout):
but we're also unable to rule it out due to a permission
error we instead emit an 'ElasticsearchWarning'.
"""
# Product check has already been done, no need to do again.
if self._verified_elasticsearch is not None:
return
# Ensure that there's only one thread within this section
# at a time to not emit unnecessary index API calls.
with self._verify_elasticsearch_lock:

headers = {header.lower(): value for header, value in (headers or {}).items()}
# We know we definitely want JSON so request it via 'accept'
headers.setdefault("accept", "application/json")
# Product check has already been completed while we were
# waiting our turn, no need to do again.
if self._verified_elasticsearch is not None:
return

info_headers = {}
info_response = {}
error = None
headers = {
header.lower(): value for header, value in (headers or {}).items()
}
# We know we definitely want JSON so request it via 'accept'
headers.setdefault("accept", "application/json")

for conn in chain(self.connection_pool.connections, self.seed_connections):
try:
_, info_headers, info_response = conn.perform_request(
"GET", "/", headers=headers, timeout=timeout
)
info_headers = {}
info_response = {}
error = None

# Lowercase all the header names for consistency in accessing them.
info_headers = {
header.lower(): value for header, value in info_headers.items()
}
attempted_conns = []
for conn in chain(self.connection_pool.connections, self.seed_connections):
# Only attempt once per connection max.
if conn in attempted_conns:
continue
attempted_conns.append(conn)

info_response = self.deserializer.loads(
info_response, mimetype="application/json"
)
break

# Previous versions of 7.x Elasticsearch required a specific
# permission so if we receive HTTP 401/403 we should warn
# instead of erroring out.
except (AuthenticationException, AuthorizationException):
warnings.warn(
(
"The client is unable to verify that the server is "
"Elasticsearch due security privileges on the server side"
),
ElasticsearchWarning,
stacklevel=5,
)
self._verified_elasticsearch = True
return
try:
_, info_headers, info_response = conn.perform_request(
"GET", "/", headers=headers, timeout=timeout
)

# This connection didn't work, we'll try another.
except (ConnectionError, SerializationError) as err:
if error is None:
error = err
# Lowercase all the header names for consistency in accessing them.
info_headers = {
header.lower(): value for header, value in info_headers.items()
}

# If we received a connection error and weren't successful
# anywhere then we reraise the more appropriate error.
if error and not info_response:
raise error
info_response = self.deserializer.loads(
info_response, mimetype="application/json"
)
break

# Check the information we got back from the index request.
self._verified_elasticsearch = _verify_elasticsearch(
info_headers, info_response
)
# Previous versions of 7.x Elasticsearch required a specific
# permission so if we receive HTTP 401/403 we should warn
# instead of erroring out.
except (AuthenticationException, AuthorizationException):
warnings.warn(
(
"The client is unable to verify that the server is "
"Elasticsearch due security privileges on the server side"
),
ElasticsearchWarning,
stacklevel=5,
)
self._verified_elasticsearch = True
return

# This connection didn't work, we'll try another.
except (ConnectionError, SerializationError, TransportError) as err:
if error is None:
error = err

# If we received a connection error and weren't successful
# anywhere then we re-raise the more appropriate error.
if error and not info_response:
raise error

# Check the information we got back from the index request.
self._verified_elasticsearch = _verify_elasticsearch(
info_headers, info_response
)


def _verify_elasticsearch(headers, response):
Expand Down Expand Up @@ -640,17 +651,3 @@ def _verify_elasticsearch(headers, response):
return False

return True


class Once:
"""Simple class which forces a function to only execute once."""

def __init__(self):
self._lock = Lock()
self._called = False

def call(self, func, *args, **kwargs):
with self._lock:
if not self._called:
self._called = True
func(*args, **kwargs)
55 changes: 54 additions & 1 deletion test_elasticsearch/test_async/test_transport.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
ConnectionError,
ElasticsearchWarning,
NotElasticsearchError,
NotFoundError,
TransportError,
)

Expand Down Expand Up @@ -770,7 +771,9 @@ async def request_task():
# The rest of the requests are 'GET /_search' afterwards
assert all(call[0][:2] == ("GET", "/_search") for call in calls[1:])

async def test_multiple_requests_verify_elasticsearch_errors(self, event_loop):
async def test_multiple_requests_verify_elasticsearch_product_error(
self, event_loop
):
t = AsyncTransport(
[
{
Expand Down Expand Up @@ -823,3 +826,53 @@ async def request_task():

# The rest of the requests are 'GET /_search' afterwards
assert all(call[0][:2] == ("GET", "/_search") for call in calls[1:])

@pytest.mark.parametrize("error_cls", [ConnectionError, NotFoundError])
async def test_multiple_requests_verify_elasticsearch_retry_on_errors(
self, event_loop, error_cls
):
t = AsyncTransport(
[
{
"exception": error_cls(),
"delay": 0.1,
}
],
connection_class=DummyConnection,
)

results = []
completed_at = []

async def request_task():
try:
results.append(await t.perform_request("GET", "/_search"))
except Exception as e:
results.append(e)
completed_at.append(event_loop.time())

# Execute a bunch of requests concurrently.
tasks = []
start_time = event_loop.time()
for _ in range(5):
tasks.append(event_loop.create_task(request_task()))
await asyncio.gather(*tasks)
end_time = event_loop.time()

# Exactly 5 results completed
assert len(results) == 5

# All results were errors and not wrapped in 'NotElasticsearchError'
assert all(isinstance(result, error_cls) for result in results)

# Assert that 5 requests were made in total (5 transport requests per x 0.1s/conn request)
duration = end_time - start_time
assert 0.5 <= duration <= 0.6

# Assert that the cluster is still in the unknown/unverified stage.
assert t._verified_elasticsearch is None

# See that the API isn't hit, instead it's the index requests that are failing.
calls = t.connection_pool.connections[0].calls
assert len(calls) == 5
assert all(call[0] == ("GET", "/") for call in calls)
Loading