From e5593f0279cc8fb17bd5dae8ab909e0a9e657935 Mon Sep 17 00:00:00 2001 From: Jonathan Green Date: Wed, 16 Dec 2020 21:31:05 -0400 Subject: [PATCH 1/7] Expand ability to ignore some httplib calls. --- aws_xray_sdk/ext/httplib/__init__.py | 4 +-- aws_xray_sdk/ext/httplib/patch.py | 41 ++++++++++++++++++++++++++-- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/aws_xray_sdk/ext/httplib/__init__.py b/aws_xray_sdk/ext/httplib/__init__.py index bad026d2..ca643471 100644 --- a/aws_xray_sdk/ext/httplib/__init__.py +++ b/aws_xray_sdk/ext/httplib/__init__.py @@ -1,3 +1,3 @@ -from .patch import patch, unpatch +from .patch import patch, unpatch, add_ignored, reset_ignored -__all__ = ['patch', 'unpatch'] +__all__ = ['patch', 'unpatch', 'add_ignored', 'reset_ignored'] diff --git a/aws_xray_sdk/ext/httplib/patch.py b/aws_xray_sdk/ext/httplib/patch.py index 6a1ed24d..a05bbc68 100644 --- a/aws_xray_sdk/ext/httplib/patch.py +++ b/aws_xray_sdk/ext/httplib/patch.py @@ -22,8 +22,33 @@ _XRAY_PROP = '_xray_prop' _XRay_Data = namedtuple('xray_data', ['method', 'host', 'url']) +_XRay_Ignore = namedtuple('xray_ignore', ['subclass', 'hostname', 'urls']) # A flag indicates whether this module is X-Ray patched or not PATCH_FLAG = '__xray_patched' +# Calls that should be ignored +_XRAY_IGNORE = set() + + +def add_ignored(subclass=None, hostname=None, urls=None): + global _XRAY_IGNORE + if subclass is not None or hostname is not None or urls is not None: + urls = urls if urls is None else tuple(urls) + _XRAY_IGNORE.add(_XRay_Ignore(subclass=subclass, hostname=hostname, urls=urls)) + + +def reset_ignored(): + global _XRAY_IGNORE + _XRAY_IGNORE.clear() + _ignored_add_default() + + +def _ignored_add_default(): + # skip httplib tracing for SDK built-in centralized sampling pollers + add_ignored(subclass='botocore.awsrequest.AWSHTTPConnection', urls=['/GetSamplingRules', '/SamplingTargets']) + + +# make sure we have the default rules +_ignored_add_default() def http_response_processor(wrapped, instance, args, kwargs, return_value, @@ -77,11 +102,21 @@ def http_send_request_processor(wrapped, instance, args, kwargs, return_value, subsegment.add_exception(exception, stack) +def _ignore_request(subclass, hostname, url): + global _XRAY_IGNORE + for rule in _XRAY_IGNORE: + subclass_match = subclass == rule.subclass if rule.subclass is not None else True + host_match = hostname == rule.hostname if rule.hostname is not None else True + url_match = url in rule.urls if rule.urls is not None else True + if url_match and host_match and subclass_match: + return True + return False + + def _send_request(wrapped, instance, args, kwargs): def decompose_args(method, url, body, headers, encode_chunked=False): - # skip httplib tracing for SDK built-in centralized sampling pollers - if (('/GetSamplingRules' in args or '/SamplingTargets' in args) and - type(instance).__name__ == 'botocore.awsrequest.AWSHTTPConnection'): + # skip any ignored requests + if _ignore_request(type(instance).__name__, instance.host, url): return wrapped(*args, **kwargs) # Only injects headers when the subsegment for the outgoing From 57898ce63832aee39420d03283b24221c4a319d9 Mon Sep 17 00:00:00 2001 From: Jonathan Green Date: Wed, 16 Dec 2020 21:31:24 -0400 Subject: [PATCH 2/7] Add tests. --- tests/ext/httplib/test_httplib.py | 52 +++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/tests/ext/httplib/test_httplib.py b/tests/ext/httplib/test_httplib.py index 56061a4c..ea626684 100644 --- a/tests/ext/httplib/test_httplib.py +++ b/tests/ext/httplib/test_httplib.py @@ -141,3 +141,55 @@ def test_correct_identify_https(): https_meta = subsegment.http assert https_meta['request']['url'].split(":")[0] == 'https' + + +def test_ignore_url(): + from aws_xray_sdk.ext.httplib import add_ignored, reset_ignored + path = '/status/200' + url = 'https://{}{}'.format(BASE_URL, path) + add_ignored(urls=[path]) + _do_req(url, use_https=True) + assert len(xray_recorder.current_segment().subsegments) == 0 + reset_ignored() + + +def test_ignore_hostname(): + from aws_xray_sdk.ext.httplib import add_ignored, reset_ignored + path = '/status/200' + url = 'https://{}{}'.format(BASE_URL, path) + add_ignored(hostname=BASE_URL) + _do_req(url, use_https=True) + assert len(xray_recorder.current_segment().subsegments) == 0 + reset_ignored() + + +def test_ignore_subclass(): + class TestClass(httplib.HTTPSConnection): + pass + from aws_xray_sdk.ext.httplib import add_ignored, reset_ignored + path = '/status/200' + add_ignored(subclass='TestClass') + conn = TestClass(BASE_URL) + conn.request('GET', path) + conn.getresponse() + assert len(xray_recorder.current_segment().subsegments) == 0 + reset_ignored() + + +def test_ignore_multiple(): + class TestClass(httplib.HTTPSConnection): + pass + from aws_xray_sdk.ext.httplib import add_ignored, reset_ignored + path = '/status/200' + add_ignored(subclass='TestClass', hostname=BASE_URL) + conn = TestClass(BASE_URL) + conn.request('GET', path) + conn.getresponse() + assert len(xray_recorder.current_segment().subsegments) == 0 + reset_ignored() + add_ignored(subclass='TestClass', hostname='fake.host') + conn = TestClass(BASE_URL) + conn.request('GET', path) + conn.getresponse() + assert len(xray_recorder.current_segment().subsegments) > 0 + reset_ignored() From cf9f93df6447b126421761f54649d8c6b1df0e2e Mon Sep 17 00:00:00 2001 From: Jonathan Green Date: Wed, 16 Dec 2020 21:47:30 -0400 Subject: [PATCH 3/7] Add glob match to httplib ignore hostname. --- aws_xray_sdk/ext/httplib/patch.py | 4 ++-- tests/ext/httplib/test_httplib.py | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/aws_xray_sdk/ext/httplib/patch.py b/aws_xray_sdk/ext/httplib/patch.py index a05bbc68..36658a09 100644 --- a/aws_xray_sdk/ext/httplib/patch.py +++ b/aws_xray_sdk/ext/httplib/patch.py @@ -1,7 +1,7 @@ from collections import namedtuple import sys import wrapt - +import fnmatch import urllib3.connection from aws_xray_sdk.core import xray_recorder @@ -106,7 +106,7 @@ def _ignore_request(subclass, hostname, url): global _XRAY_IGNORE for rule in _XRAY_IGNORE: subclass_match = subclass == rule.subclass if rule.subclass is not None else True - host_match = hostname == rule.hostname if rule.hostname is not None else True + host_match = fnmatch.fnmatch(hostname, rule.hostname) if rule.hostname is not None else True url_match = url in rule.urls if rule.urls is not None else True if url_match and host_match and subclass_match: return True diff --git a/tests/ext/httplib/test_httplib.py b/tests/ext/httplib/test_httplib.py index ea626684..cf27a4d3 100644 --- a/tests/ext/httplib/test_httplib.py +++ b/tests/ext/httplib/test_httplib.py @@ -163,6 +163,16 @@ def test_ignore_hostname(): reset_ignored() +def test_ignore_hostname_glob(): + from aws_xray_sdk.ext.httplib import add_ignored, reset_ignored + path = '/status/200' + url = 'https://{}{}'.format(BASE_URL, path) + add_ignored(hostname='http*.org') + _do_req(url, use_https=True) + assert len(xray_recorder.current_segment().subsegments) == 0 + reset_ignored() + + def test_ignore_subclass(): class TestClass(httplib.HTTPSConnection): pass From eceaae2bf9c4f8633c1d0a7f2543991dfff653bb Mon Sep 17 00:00:00 2001 From: Jonathan Green Date: Wed, 16 Dec 2020 22:02:32 -0400 Subject: [PATCH 4/7] Clean up httplib tests. --- tests/ext/httplib/test_httplib.py | 34 +++++++++++++++---------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/tests/ext/httplib/test_httplib.py b/tests/ext/httplib/test_httplib.py index cf27a4d3..b0c2fe0d 100644 --- a/tests/ext/httplib/test_httplib.py +++ b/tests/ext/httplib/test_httplib.py @@ -25,7 +25,7 @@ def construct_ctx(): so that later subsegment can be attached. After each test run it cleans up context storage again. """ - from aws_xray_sdk.ext.httplib import unpatch + from aws_xray_sdk.ext.httplib import unpatch, reset_ignored patch(('httplib',)) xray_recorder.configure(service='test', sampling=False, context=Context()) @@ -35,6 +35,7 @@ def construct_ctx(): yield xray_recorder.clear_trace_entities() unpatch() + reset_ignored() def _do_req(url, method='GET', use_https=True): @@ -144,62 +145,61 @@ def test_correct_identify_https(): def test_ignore_url(): - from aws_xray_sdk.ext.httplib import add_ignored, reset_ignored + from aws_xray_sdk.ext.httplib import add_ignored path = '/status/200' url = 'https://{}{}'.format(BASE_URL, path) add_ignored(urls=[path]) _do_req(url, use_https=True) assert len(xray_recorder.current_segment().subsegments) == 0 - reset_ignored() def test_ignore_hostname(): - from aws_xray_sdk.ext.httplib import add_ignored, reset_ignored + from aws_xray_sdk.ext.httplib import add_ignored path = '/status/200' url = 'https://{}{}'.format(BASE_URL, path) add_ignored(hostname=BASE_URL) _do_req(url, use_https=True) assert len(xray_recorder.current_segment().subsegments) == 0 - reset_ignored() def test_ignore_hostname_glob(): - from aws_xray_sdk.ext.httplib import add_ignored, reset_ignored + from aws_xray_sdk.ext.httplib import add_ignored path = '/status/200' url = 'https://{}{}'.format(BASE_URL, path) add_ignored(hostname='http*.org') _do_req(url, use_https=True) assert len(xray_recorder.current_segment().subsegments) == 0 - reset_ignored() + + +class TestClass(httplib.HTTPSConnection): + pass def test_ignore_subclass(): - class TestClass(httplib.HTTPSConnection): - pass - from aws_xray_sdk.ext.httplib import add_ignored, reset_ignored + from aws_xray_sdk.ext.httplib import add_ignored path = '/status/200' add_ignored(subclass='TestClass') conn = TestClass(BASE_URL) conn.request('GET', path) conn.getresponse() assert len(xray_recorder.current_segment().subsegments) == 0 - reset_ignored() -def test_ignore_multiple(): - class TestClass(httplib.HTTPSConnection): - pass - from aws_xray_sdk.ext.httplib import add_ignored, reset_ignored +def test_ignore_multiple_match(): + from aws_xray_sdk.ext.httplib import add_ignored path = '/status/200' add_ignored(subclass='TestClass', hostname=BASE_URL) conn = TestClass(BASE_URL) conn.request('GET', path) conn.getresponse() assert len(xray_recorder.current_segment().subsegments) == 0 - reset_ignored() + + +def test_ignore_multiple_no_match(): + from aws_xray_sdk.ext.httplib import add_ignored + path = '/status/200' add_ignored(subclass='TestClass', hostname='fake.host') conn = TestClass(BASE_URL) conn.request('GET', path) conn.getresponse() assert len(xray_recorder.current_segment().subsegments) > 0 - reset_ignored() From f1b2989b224a223bd512c570300700e90f807c0e Mon Sep 17 00:00:00 2001 From: Jonathan Green Date: Wed, 16 Dec 2020 23:39:46 -0400 Subject: [PATCH 5/7] Use full module path for subclass. --- aws_xray_sdk/ext/httplib/patch.py | 9 +++++++-- tests/ext/httplib/test_httplib.py | 17 ++++++++++------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/aws_xray_sdk/ext/httplib/patch.py b/aws_xray_sdk/ext/httplib/patch.py index 36658a09..bfb4fce2 100644 --- a/aws_xray_sdk/ext/httplib/patch.py +++ b/aws_xray_sdk/ext/httplib/patch.py @@ -102,8 +102,13 @@ def http_send_request_processor(wrapped, instance, args, kwargs, return_value, subsegment.add_exception(exception, stack) -def _ignore_request(subclass, hostname, url): +def _ignore_request(instance, hostname, url): global _XRAY_IGNORE + module = instance.__class__.__module__ + if module is None or module == str.__class__.__module__: + subclass = instance.__class__.__name__ + else: + subclass = module + '.' + instance.__class__.__name__ for rule in _XRAY_IGNORE: subclass_match = subclass == rule.subclass if rule.subclass is not None else True host_match = fnmatch.fnmatch(hostname, rule.hostname) if rule.hostname is not None else True @@ -116,7 +121,7 @@ def _ignore_request(subclass, hostname, url): def _send_request(wrapped, instance, args, kwargs): def decompose_args(method, url, body, headers, encode_chunked=False): # skip any ignored requests - if _ignore_request(type(instance).__name__, instance.host, url): + if _ignore_request(instance, instance.host, url): return wrapped(*args, **kwargs) # Only injects headers when the subsegment for the outgoing diff --git a/tests/ext/httplib/test_httplib.py b/tests/ext/httplib/test_httplib.py index b0c2fe0d..5ddf9573 100644 --- a/tests/ext/httplib/test_httplib.py +++ b/tests/ext/httplib/test_httplib.py @@ -171,15 +171,16 @@ def test_ignore_hostname_glob(): assert len(xray_recorder.current_segment().subsegments) == 0 -class TestClass(httplib.HTTPSConnection): +class CustomHttpsConnection(httplib.HTTPSConnection): pass def test_ignore_subclass(): from aws_xray_sdk.ext.httplib import add_ignored path = '/status/200' - add_ignored(subclass='TestClass') - conn = TestClass(BASE_URL) + subclass = 'tests.ext.httplib.test_httplib.CustomHttpsConnection' + add_ignored(subclass=subclass) + conn = CustomHttpsConnection(BASE_URL) conn.request('GET', path) conn.getresponse() assert len(xray_recorder.current_segment().subsegments) == 0 @@ -188,8 +189,9 @@ def test_ignore_subclass(): def test_ignore_multiple_match(): from aws_xray_sdk.ext.httplib import add_ignored path = '/status/200' - add_ignored(subclass='TestClass', hostname=BASE_URL) - conn = TestClass(BASE_URL) + subclass = 'tests.ext.httplib.test_httplib.CustomHttpsConnection' + add_ignored(subclass=subclass, hostname=BASE_URL) + conn = CustomHttpsConnection(BASE_URL) conn.request('GET', path) conn.getresponse() assert len(xray_recorder.current_segment().subsegments) == 0 @@ -198,8 +200,9 @@ def test_ignore_multiple_match(): def test_ignore_multiple_no_match(): from aws_xray_sdk.ext.httplib import add_ignored path = '/status/200' - add_ignored(subclass='TestClass', hostname='fake.host') - conn = TestClass(BASE_URL) + subclass = 'tests.ext.httplib.test_httplib.CustomHttpsConnection' + add_ignored(subclass=subclass, hostname='fake.host') + conn = CustomHttpsConnection(BASE_URL) conn.request('GET', path) conn.getresponse() assert len(xray_recorder.current_segment().subsegments) > 0 From 7a93c1a68c6366b4fa3509f67414e5b513ab4c42 Mon Sep 17 00:00:00 2001 From: Jonathan Green Date: Mon, 4 Jan 2021 13:20:58 -0400 Subject: [PATCH 6/7] Add documentation for ignoring httplib requests --- README.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/README.md b/README.md index 4fa33d71..30e979c8 100644 --- a/README.md +++ b/README.md @@ -473,6 +473,27 @@ XRayMiddleware(app, xray_recorder) db = XRayFlaskSqlAlchemy(app) ``` + +### Ignoring httplib requests + +If you want to ignore certain httplib requests you can do so based on the hostname or URL that is being requsted. + +```python +from aws_xray_sdk.ext.httplib import add_ignored as xray_add_ignored + +# ignore requests to test.myapp.com +xray_add_ignored(hostname='test.myapp.com') + +# ignore requests to a subdomain of myapp.com with a glob pattern +xray_add_ignored(hostname='*.myapp.com') + +# ignore requests to /test-url and /other-test-url +xray_add_ignored(urls=['/test-path', '/other-test-path']) + +# ignore requests to myapp.com for /test-url +xray_add_ignored(hostname='myapp.com', urls=['/test-url']) +``` + ## License The AWS X-Ray SDK for Python is licensed under the Apache 2.0 License. See LICENSE and NOTICE.txt for more information. From ff94da31dc3aabc22c081457245a6386c76eb4f2 Mon Sep 17 00:00:00 2001 From: Jonathan Green Date: Mon, 4 Jan 2021 15:52:00 -0400 Subject: [PATCH 7/7] Code review feedback --- README.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 30e979c8..48591b34 100644 --- a/README.md +++ b/README.md @@ -476,7 +476,7 @@ db = XRayFlaskSqlAlchemy(app) ### Ignoring httplib requests -If you want to ignore certain httplib requests you can do so based on the hostname or URL that is being requsted. +If you want to ignore certain httplib requests you can do so based on the hostname or URL that is being requsted. The hostname is matched using the Python [fnmatch library](https://docs.python.org/3/library/fnmatch.html) which does Unix glob style matching. ```python from aws_xray_sdk.ext.httplib import add_ignored as xray_add_ignored @@ -494,6 +494,15 @@ xray_add_ignored(urls=['/test-path', '/other-test-path']) xray_add_ignored(hostname='myapp.com', urls=['/test-url']) ``` +If you use a subclass of httplib to make your requests, you can also filter on the class name that initiates the request. This must use the complete package name to do the match. + +```python +from aws_xray_sdk.ext.httplib import add_ignored as xray_add_ignored + +# ignore all requests made by botocore +xray_add_ignored(subclass='botocore.awsrequest.AWSHTTPConnection') +``` + ## License The AWS X-Ray SDK for Python is licensed under the Apache 2.0 License. See LICENSE and NOTICE.txt for more information.