Skip to content

Downstream Http Calls should use hostname rather than full URL as subsegment name #192

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 6, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions aws_xray_sdk/ext/aiohttp/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from aws_xray_sdk.core import xray_recorder
from aws_xray_sdk.core.models import http
from aws_xray_sdk.core.utils import stacktrace
from aws_xray_sdk.ext.util import inject_trace_header, strip_url
from aws_xray_sdk.ext.util import inject_trace_header, strip_url, get_hostname

# All aiohttp calls will entail outgoing HTTP requests, only in some ad-hoc
# exceptions the namespace will be flip back to local.
Expand All @@ -22,7 +22,7 @@


async def begin_subsegment(session, trace_config_ctx, params):
name = trace_config_ctx.name if trace_config_ctx.name else strip_url(str(params.url))
name = trace_config_ctx.name if trace_config_ctx.name else get_hostname(str(params.url))
subsegment = xray_recorder.begin_subsegment(name, REMOTE_NAMESPACE)

# No-op if subsegment is `None` due to `LOG_ERROR`.
Expand All @@ -31,7 +31,7 @@ async def begin_subsegment(session, trace_config_ctx, params):
else:
trace_config_ctx.give_up = False
subsegment.put_http_meta(http.METHOD, params.method)
subsegment.put_http_meta(http.URL, params.url.human_repr())
subsegment.put_http_meta(http.URL, strip_url(params.url.human_repr()))
inject_trace_header(params.headers, subsegment)


Expand Down
14 changes: 7 additions & 7 deletions aws_xray_sdk/ext/httplib/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from aws_xray_sdk.core.models import http
from aws_xray_sdk.core.exceptions.exceptions import SegmentNotFoundException
from aws_xray_sdk.core.patcher import _PATCHED_MODULES
from aws_xray_sdk.ext.util import inject_trace_header, strip_url, unwrap
from aws_xray_sdk.ext.util import inject_trace_header, strip_url, unwrap, get_hostname

if sys.version_info >= (3, 0, 0):
PY2 = False
Expand All @@ -33,7 +33,7 @@ def http_response_processor(wrapped, instance, args, kwargs, return_value,
return

subsegment.put_http_meta(http.METHOD, xray_data.method)
subsegment.put_http_meta(http.URL, xray_data.url)
subsegment.put_http_meta(http.URL, strip_url(xray_data.url))

if return_value:
subsegment.put_http_meta(http.STATUS, return_value.status)
Expand All @@ -57,7 +57,7 @@ def _xray_traced_http_getresponse(wrapped, instance, args, kwargs):

return xray_recorder.record_subsegment(
wrapped, instance, args, kwargs,
name=strip_url(xray_data.url),
name=get_hostname(xray_data.url),
namespace='remote',
meta_processor=http_response_processor,
)
Expand All @@ -71,7 +71,7 @@ def http_send_request_processor(wrapped, instance, args, kwargs, return_value,

# we don't delete the attr as we can have multiple reads
subsegment.put_http_meta(http.METHOD, xray_data.method)
subsegment.put_http_meta(http.URL, xray_data.url)
subsegment.put_http_meta(http.URL, strip_url(xray_data.url))

if exception:
subsegment.add_exception(exception, stack)
Expand Down Expand Up @@ -111,7 +111,7 @@ def decompose_args(method, url, body, headers, encode_chunked=False):
# we add a segment here in case connect fails
return xray_recorder.record_subsegment(
wrapped, instance, args, kwargs,
name=strip_url(xray_data.url),
name=get_hostname(xray_data.url),
namespace='remote',
meta_processor=http_send_request_processor
)
Expand All @@ -127,7 +127,7 @@ def http_read_processor(wrapped, instance, args, kwargs, return_value,

# we don't delete the attr as we can have multiple reads
subsegment.put_http_meta(http.METHOD, xray_data.method)
subsegment.put_http_meta(http.URL, xray_data.url)
subsegment.put_http_meta(http.URL, strip_url(xray_data.url))
subsegment.put_http_meta(http.STATUS, instance.status)

if exception:
Expand All @@ -141,7 +141,7 @@ def _xray_traced_http_client_read(wrapped, instance, args, kwargs):

return xray_recorder.record_subsegment(
wrapped, instance, args, kwargs,
name=strip_url(xray_data.url),
name=get_hostname(xray_data.url),
namespace='remote',
meta_processor=http_read_processor
)
Expand Down
6 changes: 3 additions & 3 deletions aws_xray_sdk/ext/requests/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from aws_xray_sdk.core import xray_recorder
from aws_xray_sdk.core.models import http
from aws_xray_sdk.ext.util import inject_trace_header, strip_url
from aws_xray_sdk.ext.util import inject_trace_header, strip_url, get_hostname


def patch():
Expand All @@ -26,7 +26,7 @@ def _xray_traced_requests(wrapped, instance, args, kwargs):

return xray_recorder.record_subsegment(
wrapped, instance, args, kwargs,
name=strip_url(url),
name=get_hostname(url),
namespace='remote',
meta_processor=requests_processor,
)
Expand All @@ -48,7 +48,7 @@ def requests_processor(wrapped, instance, args, kwargs,
url = kwargs.get('url') or args[1]

subsegment.put_http_meta(http.METHOD, method)
subsegment.put_http_meta(http.URL, url)
subsegment.put_http_meta(http.URL, strip_url(url))

if return_value is not None:
subsegment.put_http_meta(http.STATUS, return_value.status_code)
Expand Down
14 changes: 14 additions & 0 deletions aws_xray_sdk/ext/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
from aws_xray_sdk.core.models import http

import wrapt
import sys

if sys.version_info.major >= 3: # Python 3 and above
from urllib.parse import urlparse
else: # Python 2 and below
from urlparse import urlparse


first_cap_re = re.compile('(.)([A-Z][a-z]+)')
Expand Down Expand Up @@ -118,6 +124,14 @@ def strip_url(url):
return url.partition('?')[0] if url else url


def get_hostname(url):
if url is None:
return None
url_parse = urlparse(url)
hostname = url_parse.hostname
return hostname if hostname else url # If hostname is none, we return the regular URL; indication of malformed url
Copy link

@c1tadel c1tadel Dec 5, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this fall-through behavior consistent with other SDKs? If we ask an HTTP client for a URL and get a malformed answer I'm not sure why we'd want to pass it back to the trace -- seems like a repeat of the issue this is intended to solve.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is in the case where the URL passed in might not necessarily be a parseable URL. Since customers can randomly invoke something like: requests.get("INVALID_DOMAIN_NAME"), we'd still want to be able to capture these types of errors and indicate failure in the subsegment. Otherwise, the subsegment won't be generated at all.

The behavior on our SDKs are mixed; some of them throw NP exceptions because the hostname technically doesn't exist, and therefore doesn't produce anything (which seems like a bug to me).

With that being said, I think doing something similar to Node would be the best of both worlds. Using a known "bad hostname" name, and then still capturing the exception in the subsegment.



def unwrap(obj, attr):
"""
Will unwrap a `wrapt` attribute
Expand Down
18 changes: 9 additions & 9 deletions tests/ext/aiohttp/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from aws_xray_sdk.core import xray_recorder
from aws_xray_sdk.core.async_context import AsyncContext
from aws_xray_sdk.core.exceptions.exceptions import SegmentNotFoundException
from aws_xray_sdk.ext.util import strip_url
from aws_xray_sdk.ext.util import strip_url, get_hostname
from aws_xray_sdk.ext.aiohttp.client import aws_xray_trace_config
from aws_xray_sdk.ext.aiohttp.client import REMOTE_NAMESPACE, LOCAL_NAMESPACE

Expand Down Expand Up @@ -34,11 +34,11 @@ async def test_ok(loop, recorder):
pass

subsegment = xray_recorder.current_segment().subsegments[0]
assert subsegment.name == strip_url(url)
assert subsegment.name == get_hostname(url)
assert subsegment.namespace == REMOTE_NAMESPACE

http_meta = subsegment.http
assert http_meta['request']['url'] == url
assert http_meta['request']['url'] == strip_url(url)
assert http_meta['request']['method'] == 'GET'
assert http_meta['response']['status'] == status_code

Expand Down Expand Up @@ -66,11 +66,11 @@ async def test_error(loop, recorder):
pass

subsegment = xray_recorder.current_segment().subsegments[0]
assert subsegment.name == url
assert subsegment.name == get_hostname(url)
assert subsegment.error

http_meta = subsegment.http
assert http_meta['request']['url'] == url
assert http_meta['request']['url'] == strip_url(url)
assert http_meta['request']['method'] == 'POST'
assert http_meta['response']['status'] == status_code

Expand All @@ -85,12 +85,12 @@ async def test_throttle(loop, recorder):
pass

subsegment = xray_recorder.current_segment().subsegments[0]
assert subsegment.name == url
assert subsegment.name == get_hostname(url)
assert subsegment.error
assert subsegment.throttle

http_meta = subsegment.http
assert http_meta['request']['url'] == url
assert http_meta['request']['url'] == strip_url(url)
assert http_meta['request']['method'] == 'HEAD'
assert http_meta['response']['status'] == status_code

Expand All @@ -105,11 +105,11 @@ async def test_fault(loop, recorder):
pass

subsegment = xray_recorder.current_segment().subsegments[0]
assert subsegment.name == url
assert subsegment.name == get_hostname(url)
assert subsegment.fault

http_meta = subsegment.http
assert http_meta['request']['url'] == url
assert http_meta['request']['url'] == strip_url(url)
assert http_meta['request']['method'] == 'PUT'
assert http_meta['response']['status'] == status_code

Expand Down
22 changes: 11 additions & 11 deletions tests/ext/httplib/test_httplib.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from aws_xray_sdk.core import patch
from aws_xray_sdk.core import xray_recorder
from aws_xray_sdk.core.context import Context
from aws_xray_sdk.ext.util import strip_url
from aws_xray_sdk.ext.util import strip_url, get_hostname

if sys.version_info >= (3, 0, 0):
import http.client as httplib
Expand Down Expand Up @@ -57,10 +57,10 @@ def test_ok():
url = 'https://{}/status/{}?foo=bar&baz=foo'.format(BASE_URL, status_code)
_do_req(url)
subsegment = xray_recorder.current_segment().subsegments[1]
assert subsegment.name == strip_url(url)
assert subsegment.name == get_hostname(url)

http_meta = subsegment.http
assert http_meta['request']['url'] == url
assert http_meta['request']['url'] == strip_url(url)
assert http_meta['request']['method'].upper() == 'GET'
assert http_meta['response']['status'] == status_code

Expand All @@ -70,11 +70,11 @@ def test_error():
url = 'https://{}/status/{}'.format(BASE_URL, status_code)
_do_req(url, 'POST')
subsegment = xray_recorder.current_segment().subsegments[1]
assert subsegment.name == url
assert subsegment.name == get_hostname(url)
assert subsegment.error

http_meta = subsegment.http
assert http_meta['request']['url'] == url
assert http_meta['request']['url'] == strip_url(url)
assert http_meta['request']['method'].upper() == 'POST'
assert http_meta['response']['status'] == status_code

Expand All @@ -84,12 +84,12 @@ def test_throttle():
url = 'https://{}/status/{}'.format(BASE_URL, status_code)
_do_req(url, 'HEAD')
subsegment = xray_recorder.current_segment().subsegments[1]
assert subsegment.name == url
assert subsegment.name == get_hostname(url)
assert subsegment.error
assert subsegment.throttle

http_meta = subsegment.http
assert http_meta['request']['url'] == url
assert http_meta['request']['url'] == strip_url(url)
assert http_meta['request']['method'].upper() == 'HEAD'
assert http_meta['response']['status'] == status_code

Expand All @@ -99,11 +99,11 @@ def test_fault():
url = 'https://{}/status/{}'.format(BASE_URL, status_code)
_do_req(url, 'PUT')
subsegment = xray_recorder.current_segment().subsegments[1]
assert subsegment.name == url
assert subsegment.name == get_hostname(url)
assert subsegment.fault

http_meta = subsegment.http
assert http_meta['request']['url'] == url
assert http_meta['request']['url'] == strip_url(url)
assert http_meta['request']['method'].upper() == 'PUT'
assert http_meta['response']['status'] == status_code

Expand All @@ -126,7 +126,7 @@ def test_correct_identify_http():
url = 'http://{}/status/{}?foo=bar&baz=foo'.format(BASE_URL, status_code)
_do_req(url, use_https=False)
subsegment = xray_recorder.current_segment().subsegments[0]
assert subsegment.name == strip_url(url)
assert subsegment.name == get_hostname(url)

http_meta = subsegment.http
assert http_meta['request']['url'].split(":")[0] == 'http'
Expand All @@ -137,7 +137,7 @@ def test_correct_identify_https():
url = 'https://{}/status/{}?foo=bar&baz=foo'.format(BASE_URL, status_code)
_do_req(url, use_https=True)
subsegment = xray_recorder.current_segment().subsegments[0]
assert subsegment.name == strip_url(url)
assert subsegment.name == get_hostname(url)

https_meta = subsegment.http
assert https_meta['request']['url'].split(":")[0] == 'https'
Loading