diff --git a/aws_xray_sdk/ext/aiohttp/client.py b/aws_xray_sdk/ext/aiohttp/client.py index 951d511e..0015ef95 100644 --- a/aws_xray_sdk/ext/aiohttp/client.py +++ b/aws_xray_sdk/ext/aiohttp/client.py @@ -8,7 +8,7 @@ from aws_xray_sdk.core import xray_recorder from aws_xray_sdk.core.models import http from aws_xray_sdk.core.utils import stacktrace -from aws_xray_sdk.ext.util import inject_trace_header, strip_url +from aws_xray_sdk.ext.util import inject_trace_header, strip_url, get_hostname # All aiohttp calls will entail outgoing HTTP requests, only in some ad-hoc # exceptions the namespace will be flip back to local. @@ -22,7 +22,7 @@ async def begin_subsegment(session, trace_config_ctx, params): - name = trace_config_ctx.name if trace_config_ctx.name else strip_url(str(params.url)) + name = trace_config_ctx.name if trace_config_ctx.name else get_hostname(str(params.url)) subsegment = xray_recorder.begin_subsegment(name, REMOTE_NAMESPACE) # No-op if subsegment is `None` due to `LOG_ERROR`. @@ -31,7 +31,7 @@ async def begin_subsegment(session, trace_config_ctx, params): else: trace_config_ctx.give_up = False subsegment.put_http_meta(http.METHOD, params.method) - subsegment.put_http_meta(http.URL, params.url.human_repr()) + subsegment.put_http_meta(http.URL, strip_url(params.url.human_repr())) inject_trace_header(params.headers, subsegment) diff --git a/aws_xray_sdk/ext/httplib/patch.py b/aws_xray_sdk/ext/httplib/patch.py index a2db41d0..6a1ed24d 100644 --- a/aws_xray_sdk/ext/httplib/patch.py +++ b/aws_xray_sdk/ext/httplib/patch.py @@ -8,7 +8,7 @@ from aws_xray_sdk.core.models import http from aws_xray_sdk.core.exceptions.exceptions import SegmentNotFoundException from aws_xray_sdk.core.patcher import _PATCHED_MODULES -from aws_xray_sdk.ext.util import inject_trace_header, strip_url, unwrap +from aws_xray_sdk.ext.util import inject_trace_header, strip_url, unwrap, get_hostname if sys.version_info >= (3, 0, 0): PY2 = False @@ -33,7 +33,7 @@ def http_response_processor(wrapped, instance, args, kwargs, return_value, return subsegment.put_http_meta(http.METHOD, xray_data.method) - subsegment.put_http_meta(http.URL, xray_data.url) + subsegment.put_http_meta(http.URL, strip_url(xray_data.url)) if return_value: subsegment.put_http_meta(http.STATUS, return_value.status) @@ -57,7 +57,7 @@ def _xray_traced_http_getresponse(wrapped, instance, args, kwargs): return xray_recorder.record_subsegment( wrapped, instance, args, kwargs, - name=strip_url(xray_data.url), + name=get_hostname(xray_data.url), namespace='remote', meta_processor=http_response_processor, ) @@ -71,7 +71,7 @@ def http_send_request_processor(wrapped, instance, args, kwargs, return_value, # we don't delete the attr as we can have multiple reads subsegment.put_http_meta(http.METHOD, xray_data.method) - subsegment.put_http_meta(http.URL, xray_data.url) + subsegment.put_http_meta(http.URL, strip_url(xray_data.url)) if exception: subsegment.add_exception(exception, stack) @@ -111,7 +111,7 @@ def decompose_args(method, url, body, headers, encode_chunked=False): # we add a segment here in case connect fails return xray_recorder.record_subsegment( wrapped, instance, args, kwargs, - name=strip_url(xray_data.url), + name=get_hostname(xray_data.url), namespace='remote', meta_processor=http_send_request_processor ) @@ -127,7 +127,7 @@ def http_read_processor(wrapped, instance, args, kwargs, return_value, # we don't delete the attr as we can have multiple reads subsegment.put_http_meta(http.METHOD, xray_data.method) - subsegment.put_http_meta(http.URL, xray_data.url) + subsegment.put_http_meta(http.URL, strip_url(xray_data.url)) subsegment.put_http_meta(http.STATUS, instance.status) if exception: @@ -141,7 +141,7 @@ def _xray_traced_http_client_read(wrapped, instance, args, kwargs): return xray_recorder.record_subsegment( wrapped, instance, args, kwargs, - name=strip_url(xray_data.url), + name=get_hostname(xray_data.url), namespace='remote', meta_processor=http_read_processor ) diff --git a/aws_xray_sdk/ext/requests/patch.py b/aws_xray_sdk/ext/requests/patch.py index 5220ac46..66e7ef81 100644 --- a/aws_xray_sdk/ext/requests/patch.py +++ b/aws_xray_sdk/ext/requests/patch.py @@ -2,7 +2,7 @@ from aws_xray_sdk.core import xray_recorder from aws_xray_sdk.core.models import http -from aws_xray_sdk.ext.util import inject_trace_header, strip_url +from aws_xray_sdk.ext.util import inject_trace_header, strip_url, get_hostname def patch(): @@ -26,7 +26,7 @@ def _xray_traced_requests(wrapped, instance, args, kwargs): return xray_recorder.record_subsegment( wrapped, instance, args, kwargs, - name=strip_url(url), + name=get_hostname(url), namespace='remote', meta_processor=requests_processor, ) @@ -48,7 +48,7 @@ def requests_processor(wrapped, instance, args, kwargs, url = kwargs.get('url') or args[1] subsegment.put_http_meta(http.METHOD, method) - subsegment.put_http_meta(http.URL, url) + subsegment.put_http_meta(http.URL, strip_url(url)) if return_value is not None: subsegment.put_http_meta(http.STATUS, return_value.status_code) diff --git a/aws_xray_sdk/ext/util.py b/aws_xray_sdk/ext/util.py index 305fa3cc..8390f9ee 100644 --- a/aws_xray_sdk/ext/util.py +++ b/aws_xray_sdk/ext/util.py @@ -4,10 +4,17 @@ from aws_xray_sdk.core.models import http import wrapt +import sys + +if sys.version_info.major >= 3: # Python 3 and above + from urllib.parse import urlparse +else: # Python 2 and below + from urlparse import urlparse first_cap_re = re.compile('(.)([A-Z][a-z]+)') all_cap_re = re.compile('([a-z0-9])([A-Z])') +UNKNOWN_HOSTNAME = "UNKNOWN HOST" def inject_trace_header(headers, entity): @@ -118,6 +125,16 @@ def strip_url(url): return url.partition('?')[0] if url else url +def get_hostname(url): + if url is None: + return UNKNOWN_HOSTNAME + url_parse = urlparse(url) + hostname = url_parse.hostname + if hostname is None: + return UNKNOWN_HOSTNAME + return hostname if hostname else url # If hostname is none, we return the regular URL; indication of malformed url + + def unwrap(obj, attr): """ Will unwrap a `wrapt` attribute diff --git a/tests/ext/aiohttp/test_client.py b/tests/ext/aiohttp/test_client.py index 36ffa138..78448819 100644 --- a/tests/ext/aiohttp/test_client.py +++ b/tests/ext/aiohttp/test_client.py @@ -4,7 +4,7 @@ from aws_xray_sdk.core import xray_recorder from aws_xray_sdk.core.async_context import AsyncContext from aws_xray_sdk.core.exceptions.exceptions import SegmentNotFoundException -from aws_xray_sdk.ext.util import strip_url +from aws_xray_sdk.ext.util import strip_url, get_hostname from aws_xray_sdk.ext.aiohttp.client import aws_xray_trace_config from aws_xray_sdk.ext.aiohttp.client import REMOTE_NAMESPACE, LOCAL_NAMESPACE @@ -34,11 +34,11 @@ async def test_ok(loop, recorder): pass subsegment = xray_recorder.current_segment().subsegments[0] - assert subsegment.name == strip_url(url) + assert subsegment.name == get_hostname(url) assert subsegment.namespace == REMOTE_NAMESPACE http_meta = subsegment.http - assert http_meta['request']['url'] == url + assert http_meta['request']['url'] == strip_url(url) assert http_meta['request']['method'] == 'GET' assert http_meta['response']['status'] == status_code @@ -66,11 +66,11 @@ async def test_error(loop, recorder): pass subsegment = xray_recorder.current_segment().subsegments[0] - assert subsegment.name == url + assert subsegment.name == get_hostname(url) assert subsegment.error http_meta = subsegment.http - assert http_meta['request']['url'] == url + assert http_meta['request']['url'] == strip_url(url) assert http_meta['request']['method'] == 'POST' assert http_meta['response']['status'] == status_code @@ -85,12 +85,12 @@ async def test_throttle(loop, recorder): pass subsegment = xray_recorder.current_segment().subsegments[0] - assert subsegment.name == url + assert subsegment.name == get_hostname(url) assert subsegment.error assert subsegment.throttle http_meta = subsegment.http - assert http_meta['request']['url'] == url + assert http_meta['request']['url'] == strip_url(url) assert http_meta['request']['method'] == 'HEAD' assert http_meta['response']['status'] == status_code @@ -105,11 +105,11 @@ async def test_fault(loop, recorder): pass subsegment = xray_recorder.current_segment().subsegments[0] - assert subsegment.name == url + assert subsegment.name == get_hostname(url) assert subsegment.fault http_meta = subsegment.http - assert http_meta['request']['url'] == url + assert http_meta['request']['url'] == strip_url(url) assert http_meta['request']['method'] == 'PUT' assert http_meta['response']['status'] == status_code diff --git a/tests/ext/httplib/test_httplib.py b/tests/ext/httplib/test_httplib.py index ec528648..56061a4c 100644 --- a/tests/ext/httplib/test_httplib.py +++ b/tests/ext/httplib/test_httplib.py @@ -4,7 +4,7 @@ from aws_xray_sdk.core import patch from aws_xray_sdk.core import xray_recorder from aws_xray_sdk.core.context import Context -from aws_xray_sdk.ext.util import strip_url +from aws_xray_sdk.ext.util import strip_url, get_hostname if sys.version_info >= (3, 0, 0): import http.client as httplib @@ -57,10 +57,10 @@ def test_ok(): url = 'https://{}/status/{}?foo=bar&baz=foo'.format(BASE_URL, status_code) _do_req(url) subsegment = xray_recorder.current_segment().subsegments[1] - assert subsegment.name == strip_url(url) + assert subsegment.name == get_hostname(url) http_meta = subsegment.http - assert http_meta['request']['url'] == url + assert http_meta['request']['url'] == strip_url(url) assert http_meta['request']['method'].upper() == 'GET' assert http_meta['response']['status'] == status_code @@ -70,11 +70,11 @@ def test_error(): url = 'https://{}/status/{}'.format(BASE_URL, status_code) _do_req(url, 'POST') subsegment = xray_recorder.current_segment().subsegments[1] - assert subsegment.name == url + assert subsegment.name == get_hostname(url) assert subsegment.error http_meta = subsegment.http - assert http_meta['request']['url'] == url + assert http_meta['request']['url'] == strip_url(url) assert http_meta['request']['method'].upper() == 'POST' assert http_meta['response']['status'] == status_code @@ -84,12 +84,12 @@ def test_throttle(): url = 'https://{}/status/{}'.format(BASE_URL, status_code) _do_req(url, 'HEAD') subsegment = xray_recorder.current_segment().subsegments[1] - assert subsegment.name == url + assert subsegment.name == get_hostname(url) assert subsegment.error assert subsegment.throttle http_meta = subsegment.http - assert http_meta['request']['url'] == url + assert http_meta['request']['url'] == strip_url(url) assert http_meta['request']['method'].upper() == 'HEAD' assert http_meta['response']['status'] == status_code @@ -99,11 +99,11 @@ def test_fault(): url = 'https://{}/status/{}'.format(BASE_URL, status_code) _do_req(url, 'PUT') subsegment = xray_recorder.current_segment().subsegments[1] - assert subsegment.name == url + assert subsegment.name == get_hostname(url) assert subsegment.fault http_meta = subsegment.http - assert http_meta['request']['url'] == url + assert http_meta['request']['url'] == strip_url(url) assert http_meta['request']['method'].upper() == 'PUT' assert http_meta['response']['status'] == status_code @@ -126,7 +126,7 @@ def test_correct_identify_http(): url = 'http://{}/status/{}?foo=bar&baz=foo'.format(BASE_URL, status_code) _do_req(url, use_https=False) subsegment = xray_recorder.current_segment().subsegments[0] - assert subsegment.name == strip_url(url) + assert subsegment.name == get_hostname(url) http_meta = subsegment.http assert http_meta['request']['url'].split(":")[0] == 'http' @@ -137,7 +137,7 @@ def test_correct_identify_https(): url = 'https://{}/status/{}?foo=bar&baz=foo'.format(BASE_URL, status_code) _do_req(url, use_https=True) subsegment = xray_recorder.current_segment().subsegments[0] - assert subsegment.name == strip_url(url) + assert subsegment.name == get_hostname(url) https_meta = subsegment.http assert https_meta['request']['url'].split(":")[0] == 'https' diff --git a/tests/ext/requests/test_requests.py b/tests/ext/requests/test_requests.py index 73817648..a31e108e 100644 --- a/tests/ext/requests/test_requests.py +++ b/tests/ext/requests/test_requests.py @@ -4,7 +4,7 @@ from aws_xray_sdk.core import patch from aws_xray_sdk.core import xray_recorder from aws_xray_sdk.core.context import Context -from aws_xray_sdk.ext.util import strip_url +from aws_xray_sdk.ext.util import strip_url, get_hostname patch(('requests',)) @@ -32,10 +32,11 @@ def test_ok(): url = 'http://{}/status/{}?foo=bar'.format(BASE_URL, status_code) requests.get(url) subsegment = xray_recorder.current_segment().subsegments[0] - assert subsegment.name == strip_url(url) + assert get_hostname(url) == BASE_URL + assert subsegment.name == get_hostname(url) http_meta = subsegment.http - assert http_meta['request']['url'] == url + assert http_meta['request']['url'] == strip_url(url) assert http_meta['request']['method'].upper() == 'GET' assert http_meta['response']['status'] == status_code @@ -45,11 +46,11 @@ def test_error(): url = 'http://{}/status/{}'.format(BASE_URL, status_code) requests.post(url) subsegment = xray_recorder.current_segment().subsegments[0] - assert subsegment.name == url + assert subsegment.name == get_hostname(url) assert subsegment.error http_meta = subsegment.http - assert http_meta['request']['url'] == url + assert http_meta['request']['url'] == strip_url(url) assert http_meta['request']['method'].upper() == 'POST' assert http_meta['response']['status'] == status_code @@ -59,12 +60,12 @@ def test_throttle(): url = 'http://{}/status/{}'.format(BASE_URL, status_code) requests.head(url) subsegment = xray_recorder.current_segment().subsegments[0] - assert subsegment.name == url + assert subsegment.name == get_hostname(url) assert subsegment.error assert subsegment.throttle http_meta = subsegment.http - assert http_meta['request']['url'] == url + assert http_meta['request']['url'] == strip_url(url) assert http_meta['request']['method'].upper() == 'HEAD' assert http_meta['response']['status'] == status_code @@ -74,16 +75,16 @@ def test_fault(): url = 'http://{}/status/{}'.format(BASE_URL, status_code) requests.put(url) subsegment = xray_recorder.current_segment().subsegments[0] - assert subsegment.name == url + assert subsegment.name == get_hostname(url) assert subsegment.fault http_meta = subsegment.http - assert http_meta['request']['url'] == url + assert http_meta['request']['url'] == strip_url(url) assert http_meta['request']['method'].upper() == 'PUT' assert http_meta['response']['status'] == status_code -def test_invalid_url(): +def test_nonexistent_domain(): try: requests.get('http://doesnt.exist') except Exception: @@ -94,3 +95,65 @@ def test_invalid_url(): exception = subsegment.cause['exceptions'][0] assert exception.type == 'ConnectionError' + + +def test_invalid_url(): + url = 'KLSDFJKLSDFJKLSDJF' + try: + requests.get(url) + except Exception: + # prevent uncatch exception from breaking test run + pass + subsegment = xray_recorder.current_segment().subsegments[0] + assert subsegment.name == get_hostname(url) + assert subsegment.fault + + http_meta = subsegment.http + assert http_meta['request']['url'] == strip_url(url) + + exception = subsegment.cause['exceptions'][0] + assert exception.type == 'MissingSchema' + + +def test_name_uses_hostname(): + url1 = 'http://{}/fakepath/stuff/koo/lai/ahh'.format(BASE_URL) + requests.get(url1) + subsegment = xray_recorder.current_segment().subsegments[-1] + assert subsegment.name == BASE_URL + http_meta1 = subsegment.http + assert http_meta1['request']['url'] == strip_url(url1) + assert http_meta1['request']['method'].upper() == 'GET' + + url2 = 'http://{}/'.format(BASE_URL) + requests.get(url2, params={"some": "payload", "not": "toBeIncluded"}) + subsegment = xray_recorder.current_segment().subsegments[-1] + assert subsegment.name == BASE_URL + http_meta2 = subsegment.http + assert http_meta2['request']['url'] == strip_url(url2) + assert http_meta2['request']['method'].upper() == 'GET' + + url3 = 'http://subdomain.{}/fakepath/stuff/koo/lai/ahh'.format(BASE_URL) + try: + requests.get(url3) + except Exception: + # This is an invalid url so we dont want to break the test + pass + subsegment = xray_recorder.current_segment().subsegments[-1] + assert subsegment.name == "subdomain." + BASE_URL + http_meta3 = subsegment.http + assert http_meta3['request']['url'] == strip_url(url3) + assert http_meta3['request']['method'].upper() == 'GET' + + +def test_strip_http_url(): + status_code = 200 + url = 'http://{}/get?foo=bar'.format(BASE_URL) + requests.get(url) + subsegment = xray_recorder.current_segment().subsegments[0] + assert subsegment.name == get_hostname(url) + + http_meta = subsegment.http + assert http_meta['request']['url'] == strip_url(url) + assert http_meta['request']['method'].upper() == 'GET' + assert http_meta['response']['status'] == status_code + diff --git a/tests/test_utils.py b/tests/test_utils.py index bd85421d..939fde42 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,4 +1,6 @@ -from aws_xray_sdk.ext.util import to_snake_case +from aws_xray_sdk.ext.util import to_snake_case, get_hostname, strip_url + +UNKNOWN_HOST = "UNKNOWN HOST" def test_to_snake_case(): @@ -13,3 +15,40 @@ def test_to_snake_case(): s4 = to_snake_case('getHTTPResponse') assert s4 == 'get_http_response' + + +def test_get_hostname(): + s1 = get_hostname("https://amazon.com/") + assert s1 == "amazon.com" + + s2 = get_hostname("https://amazon.com/avery_long/path/and/stuff") + assert s2 == "amazon.com" + + s3 = get_hostname("http://aws.amazon.com/should_get/sub/domains") + assert s3 == "aws.amazon.com" + + s4 = get_hostname("https://amazon.com/somestuff?get=request&data=chiem") + assert s4 == "amazon.com" + + s5 = get_hostname("INVALID_URL") + assert s5 == UNKNOWN_HOST + + s6 = get_hostname("") + assert s6 == UNKNOWN_HOST + + s7 = get_hostname(None) + assert s7 == UNKNOWN_HOST + + +def test_strip_url(): + s1 = strip_url("https://amazon.com/page?getdata=response&stuff=morestuff") + assert s1 == "https://amazon.com/page" + + s2 = strip_url("aws.google.com/index.html?field=data&suchcool=data") + assert s2 == "aws.google.com/index.html" + + s3 = strip_url("INVALID_URL") + assert s3 == "INVALID_URL" + + assert strip_url("") == "" + assert not strip_url(None)