Skip to content

httplib support #19

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Feb 23, 2018
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions aws_xray_sdk/core/patcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
'requests',
'sqlite3',
'mysql',
'httplib', # TODO: perhaps requests should map to this below
Copy link
Contributor

@haotianw465 haotianw465 Feb 16, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would keep the requests patch for now. If a user's intention is to only patch requests but has other libraries depend on httplib, then patching at a lower level may result in unexpected behavior. IMO the SDK should only patch httplib when users explicit do so.

A small paragraph on https://github.com/aws/aws-xray-sdk-python/blob/master/docs/thirdparty.rst explaining httplib/httplib2/requests dependency and which to patch could be very helpful.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed comment, and added some docs

)

_PATCHED_MODULES = set()
Expand Down
3 changes: 3 additions & 0 deletions aws_xray_sdk/ext/httplib/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .patch import patch

__all__ = ['patch']
126 changes: 126 additions & 0 deletions aws_xray_sdk/ext/httplib/patch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import wrapt
from collections import namedtuple

from aws_xray_sdk.core import xray_recorder
from aws_xray_sdk.core.models import http
from aws_xray_sdk.ext.util import inject_trace_header

import ssl


_XRAY_PROP = '_xray_prop'
_XRay_Data = namedtuple('xray_data', ['method', 'host', 'url'])


# ? is not a valid entity, and we don't want things after the ? for the segment name
def _strip_url(url: str):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you move this helper method to aws_xray_sdk.ext.util? You use it on requests patch as well. It can be further used to strip any url string on any library patcher.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW as a helper method a None check could be useful in case the caller forgets to do the check.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

moved, didn't put the None check as that's a fairly low-level API and you'll get an exception of operating on a 'None' already...thoughts?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would suggest make it no-op if input is None. This becomes a general helper under a utility module so it could be used on logging, other patching code etc by other developers. It is bad if the SDK throws a None type error from this part.

Also a nitpick could you remove the prefix _ since its usage is not private?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

return url.partition('?')[0]


def http_response_processor(wrapped, instance, args, kwargs, return_value,
exception, subsegment, stack):
xray_data = getattr(instance, _XRAY_PROP)

subsegment.put_http_meta(http.METHOD, xray_data.method)
subsegment.put_http_meta(http.URL, xray_data.url)

if return_value:
subsegment.put_http_meta(http.STATUS, return_value.code)

# propagate to response object
xray_data = _XRay_Data('READ', xray_data.host, xray_data.url)
setattr(return_value, _XRAY_PROP, xray_data)

if exception:
subsegment.add_exception(exception, stack)


def _xray_traced_http_client(wrapped, instance, args, kwargs):
if kwargs.get('buffering', False):
return wrapped(*args, **kwargs) # ignore py2 calls that fail as 'buffering` only exists in py2.

xray_data = getattr(instance, _XRAY_PROP)

return xray_recorder.record_subsegment(
wrapped, instance, args, kwargs,
name=_strip_url(xray_data.url),
namespace='remote',
meta_processor=http_response_processor,
)


def http_request_processor(wrapped, instance, args, kwargs, return_value,
exception, subsegment, stack):
xray_data = getattr(instance, _XRAY_PROP)

# we don't delete the attr as we can have multiple reads
subsegment.put_http_meta(http.METHOD, xray_data.method)
subsegment.put_http_meta(http.URL, xray_data.url)

if exception:
subsegment.add_exception(exception, stack)


def _prep_request(wrapped, instance, args, kwargs):
def decompose_args(method, url, body, headers, encode_chunked):
inject_trace_header(headers, xray_recorder.current_subsegment())

# we have to check against sock because urllib3's HTTPSConnection inherit's from http.client.HTTPConnection
scheme = 'https' if isinstance(instance.sock, ssl.SSLSocket) else 'http'
xray_url = '{}://{}{}'.format(scheme, instance.host, url)
xray_data = _XRay_Data(method, instance.host, xray_url)
setattr(instance, _XRAY_PROP, xray_data)

return xray_recorder.record_subsegment(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any reason to record this local computation as a subsegment?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIRC if the connect fails, getresponse will not be called.

wrapped, instance, args, kwargs,
name=_strip_url(xray_data.url),
namespace='remote',
meta_processor=http_request_processor
)

return decompose_args(*args, **kwargs)


def http_read_processor(wrapped, instance, args, kwargs, return_value,
exception, subsegment, stack):
xray_data = getattr(instance, _XRAY_PROP)

# we don't delete the attr as we can have multiple reads
subsegment.put_http_meta(http.METHOD, xray_data.method)
subsegment.put_http_meta(http.URL, xray_data.url)
subsegment.put_http_meta(http.STATUS, instance.status)
subsegment.apply_status_code(instance.status)
Copy link
Contributor

@haotianw465 haotianw465 Feb 16, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't have to manually call apply_status_code. put_http_meta has logic to adjust fault/error/throttle flags if a status code is added.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ahh, probably copied from an old version of the AWS SDK, removed


if exception:
subsegment.add_exception(exception, stack)


def _xray_traced_http_client_read(wrapped, instance, args, kwargs):
xray_data = getattr(instance, _XRAY_PROP)

return xray_recorder.record_subsegment(
wrapped, instance, args, kwargs,
name=_strip_url(xray_data.url),
namespace='remote',
meta_processor=http_read_processor
)


def patch():
wrapt.wrap_function_wrapper(
'http.client',
'HTTPConnection._send_request',
_prep_request
)

wrapt.wrap_function_wrapper(
'http.client',
'HTTPConnection.getresponse',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are patching both HTTPConnection.getresponse and HTTPResponse.read. Is the latter some duplicate work since the former already set url, method and status?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure I follow, on the read you're reading the body, which you may not do on 204s, and you want all the information from the requests to be marked on the subsegment of the read in case the response segment is sampled out.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Understood.

_xray_traced_http_client
)

wrapt.wrap_function_wrapper(
'http.client',
'HTTPResponse.read',
_xray_traced_http_client_read
)
3 changes: 2 additions & 1 deletion aws_xray_sdk/ext/requests/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from aws_xray_sdk.core import xray_recorder
from aws_xray_sdk.core.models import http
from aws_xray_sdk.ext.util import inject_trace_header
from aws_xray_sdk.ext.httplib.patch import _strip_url


def patch():
Expand All @@ -26,7 +27,7 @@ def _xray_traced_requests(wrapped, instance, args, kwargs):

return xray_recorder.record_subsegment(
wrapped, instance, args, kwargs,
name=url,
name=_strip_url(url),
namespace='remote',
meta_processor=requests_processor,
)
Expand Down
Empty file added tests/ext/httplib/__init__.py
Empty file.
95 changes: 95 additions & 0 deletions tests/ext/httplib/test_httplib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import pytest
import requests

from aws_xray_sdk.core import patch
from aws_xray_sdk.core import xray_recorder
from aws_xray_sdk.core.context import Context


patch(('httplib',))

# httpbin.org is created by the same author of requests to make testing http easy.
BASE_URL = 'httpbin.org'


@pytest.fixture(autouse=True)
def construct_ctx():
"""
Clean up context storage on each test run and begin a segment
so that later subsegment can be attached. After each test run
it cleans up context storage again.
"""
xray_recorder.configure(service='test', sampling=False, context=Context())
xray_recorder.clear_trace_entities()
xray_recorder.begin_segment('name')
yield
xray_recorder.clear_trace_entities()


def test_ok():
Copy link
Contributor

@haotianw465 haotianw465 Feb 16, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure your intention on using requests syntax to test httplib patch. This unit test is valid given the knowledge of requests depends on httplib. But I feel like this is a little bit anti-pattern since the scope is beyond httplib and involves requests implementation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rewrote to use httplib

status_code = 200
url = 'http://{}/status/{}?foo=bar&baz=foo'.format(BASE_URL, status_code)
requests.get(url)
subsegment = xray_recorder.current_segment().subsegments[1]
assert subsegment.name == url
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this assertion pass when query string is stripped?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry, bug

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on your patching code you use the string before "?" as the subsegment name.
So to verify the http subsegment is named correctly, should the test asserts name == http://{}/status/{} and pass?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed


http_meta = subsegment.http
assert http_meta['request']['url'] == url
assert http_meta['request']['method'].upper() == 'GET'
assert http_meta['response']['status'] == status_code


def test_error():
status_code = 400
url = 'http://{}/status/{}'.format(BASE_URL, status_code)
requests.post(url)
subsegment = xray_recorder.current_segment().subsegments[1]
assert subsegment.name == url
assert subsegment.error

http_meta = subsegment.http
assert http_meta['request']['url'] == url
assert http_meta['request']['method'].upper() == 'POST'
assert http_meta['response']['status'] == status_code


def test_throttle():
status_code = 429
url = 'http://{}/status/{}'.format(BASE_URL, status_code)
requests.head(url)
subsegment = xray_recorder.current_segment().subsegments[1]
assert subsegment.name == url
assert subsegment.error
assert subsegment.throttle

http_meta = subsegment.http
assert http_meta['request']['url'] == url
assert http_meta['request']['method'].upper() == 'HEAD'
assert http_meta['response']['status'] == status_code


def test_fault():
status_code = 500
url = 'http://{}/status/{}'.format(BASE_URL, status_code)
requests.put(url)
subsegment = xray_recorder.current_segment().subsegments[1]
assert subsegment.name == url
assert subsegment.fault

http_meta = subsegment.http
assert http_meta['request']['url'] == url
assert http_meta['request']['method'].upper() == 'PUT'
assert http_meta['response']['status'] == status_code


def test_invalid_url():
try:
requests.get('http://doesnt.exist')
except Exception:
# prevent uncatch exception from breaking test run
pass
subsegment = xray_recorder.current_segment().subsegments[0]
assert subsegment.fault

exception = subsegment.cause['exceptions'][0]
assert exception.type == 'NewConnectionError'
2 changes: 1 addition & 1 deletion tests/ext/requests/test_requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def construct_ctx():

def test_ok():
status_code = 200
url = 'http://{}/status/{}'.format(BASE_URL, status_code)
url = 'http://{}/status/{}?foo=bar'.format(BASE_URL, status_code)
requests.get(url)
subsegment = xray_recorder.current_segment().subsegments[0]
assert subsegment.name == url
Copy link
Contributor

@haotianw465 haotianw465 Feb 16, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same question about the segment name assertion

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

Expand Down