From 690563ff26e84045be4d998b555c3685afbdb2b6 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 5 Jul 2018 18:45:42 +0100 Subject: [PATCH 01/12] Use uritools instead of urlparse, and support proper file scheme. Don't default to urlopen when unknown scheme encountered (`raise ValueError(scheme)``) --- jsonschema/_utils.py | 5 +-- jsonschema/compat.py | 23 ++---------- jsonschema/validators.py | 75 ++++++++++++++++++++++++---------------- setup.py | 1 + 4 files changed, 51 insertions(+), 53 deletions(-) diff --git a/jsonschema/_utils.py b/jsonschema/_utils.py index 5b245a34a..c6840142b 100644 --- a/jsonschema/_utils.py +++ b/jsonschema/_utils.py @@ -3,7 +3,8 @@ import pkgutil import re -from jsonschema.compat import str_types, MutableMapping, urlsplit +from jsonschema.compat import str_types, MutableMapping +from uritools import urisplit class URIDict(MutableMapping): @@ -13,7 +14,7 @@ class URIDict(MutableMapping): """ def normalize(self, uri): - return urlsplit(uri).geturl() + return urisplit(uri).getpath() def __init__(self, *args, **kwargs): self.store = dict() diff --git a/jsonschema/compat.py b/jsonschema/compat.py index ff91fe620..6bf6b7d90 100644 --- a/jsonschema/compat.py +++ b/jsonschema/compat.py @@ -14,7 +14,7 @@ from functools import lru_cache from io import StringIO from urllib.parse import ( - unquote, urljoin, urlunsplit, SplitResult, urlsplit as _urlsplit + unquote, urljoin, urlunsplit, SplitResult ) from urllib.request import urlopen str_types = str, @@ -24,7 +24,7 @@ from itertools import izip as zip # noqa from StringIO import StringIO from urlparse import ( - urljoin, urlunsplit, SplitResult, urlsplit as _urlsplit # noqa + urljoin, urlunsplit, SplitResult # noqa ) from urllib import unquote # noqa from urllib2 import urlopen # noqa @@ -34,23 +34,4 @@ from functools32 import lru_cache - -# On python < 3.3 fragments are not handled properly with unknown schemes -def urlsplit(url): - scheme, netloc, path, query, fragment = _urlsplit(url) - if "#" in path: - path, fragment = path.split("#", 1) - return SplitResult(scheme, netloc, path, query, fragment) - - -def urldefrag(url): - if "#" in url: - s, n, p, q, frag = urlsplit(url) - defrag = urlunsplit((s, n, p, q, '')) - else: - defrag = url - frag = '' - return defrag, frag - - # flake8: noqa diff --git a/jsonschema/validators.py b/jsonschema/validators.py index a47c3aefa..083481f33 100644 --- a/jsonschema/validators.py +++ b/jsonschema/validators.py @@ -5,11 +5,12 @@ import json import numbers +from uritools import urisplit, uridefrag, urijoin from six import add_metaclass from jsonschema import _utils, _validators, _types from jsonschema.compat import ( - Sequence, urljoin, urlsplit, urldefrag, unquote, urlopen, + Sequence, unquote, urlopen, str_types, int_types, iteritems, lru_cache, ) from jsonschema.exceptions import ( @@ -522,7 +523,7 @@ class RefResolver(object): A mapping from URI schemes to functions that should be used to retrieve them - urljoin_cache (functools.lru_cache): + urijoin_cache (functools.lru_cache): A cache that will be used for caching the results of joining the resolution scope to subscopes. @@ -547,17 +548,19 @@ def __init__( store=(), cache_remote=True, handlers=(), - urljoin_cache=None, + urijoin_cache=None, remote_cache=None, ): - if urljoin_cache is None: - urljoin_cache = lru_cache(1024)(urljoin) + if urijoin_cache is None: + urijoin_cache = lru_cache(1024)(urijoin) if remote_cache is None: remote_cache = lru_cache(1024)(self.resolve_from_url) self.referrer = referrer self.cache_remote = cache_remote - self.handlers = dict(handlers) + self.handlers = {'http': self.http_handler, 'https': self.http_handler, + 'file': self.http_handler} + self.handlers.update(handlers) self._scopes_stack = [base_uri] self.store = _utils.URIDict( @@ -567,7 +570,7 @@ def __init__( self.store.update(store) self.store[base_uri] = referrer - self._urljoin_cache = urljoin_cache + self._urijoin_cache = urijoin_cache self._remote_cache = remote_cache @classmethod @@ -595,9 +598,38 @@ def from_schema( return cls(base_uri=id_of(schema), referrer=schema, *args, **kwargs) + def http_handler(self, uri): + try: + import requests + except ImportError: + pass + else: + if hasattr(requests.Response, "json"): + session = requests.Session() + + requests_supports_scheme = True + if urisplit(uri).scheme == "file": + try: + import requests_file + except ImportError: + requests_supports_scheme = False + else: + session.mount("file://", requests_file.FileAdapter()) + + if requests_supports_scheme: + # Requests has support for detecting the correct encoding of + # json over http + if callable(requests.Response.json): + return session.get(uri).json() + else: + return session.get(uri).json + + # Otherwise, pass off to urllib and assume utf-8 + return json.loads(urlopen(uri).read().decode("utf-8")) + def push_scope(self, scope): self._scopes_stack.append( - self._urljoin_cache(self.resolution_scope, scope), + self._urijoin_cache(self.resolution_scope, scope), ) def pop_scope(self): @@ -616,7 +648,7 @@ def resolution_scope(self): @property def base_uri(self): - uri, _ = urldefrag(self.resolution_scope) + uri, _ = uridefrag(self.resolution_scope) return uri @contextlib.contextmanager @@ -649,11 +681,11 @@ def resolving(self, ref): self.pop_scope() def resolve(self, ref): - url = self._urljoin_cache(self.resolution_scope, ref) + url = self._urijoin_cache(self.resolution_scope, ref) return url, self._remote_cache(url) def resolve_from_url(self, url): - url, fragment = urldefrag(url) + url, fragment = uridefrag(url) try: document = self.store[url] except KeyError: @@ -731,29 +763,12 @@ def resolve_remote(self, uri): .. _requests: http://pypi.python.org/pypi/requests/ """ - try: - import requests - except ImportError: - requests = None - - scheme = urlsplit(uri).scheme + scheme = urisplit(uri).scheme if scheme in self.handlers: result = self.handlers[scheme](uri) - elif ( - scheme in [u"http", u"https"] and - requests and - getattr(requests.Response, "json", None) is not None - ): - # Requests has support for detecting the correct encoding of - # json over http - if callable(requests.Response.json): - result = requests.get(uri).json() - else: - result = requests.get(uri).json else: - # Otherwise, pass off to urllib and assume utf-8 - result = json.loads(urlopen(uri).read().decode("utf-8")) + raise ValueError(scheme) if self.cache_remote: self.store[uri] = result diff --git a/setup.py b/setup.py index 37cadea62..975e3fb17 100644 --- a/setup.py +++ b/setup.py @@ -41,6 +41,7 @@ "pyrsistent>=0.14.0", "six>=1.11.0", "functools32;python_version<'3'", + "uritools>=2.2.0" ], extras_require={ "format": [ From 76f18a9e5d5ee8a37f5215dc6055fb13adf35e5d Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 5 Jul 2018 18:55:39 +0100 Subject: [PATCH 02/12] Bugfix for API difference --- jsonschema/validators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jsonschema/validators.py b/jsonschema/validators.py index 083481f33..a65001d66 100644 --- a/jsonschema/validators.py +++ b/jsonschema/validators.py @@ -694,7 +694,7 @@ def resolve_from_url(self, url): except Exception as exc: raise RefResolutionError(exc) - return self.resolve_fragment(document, fragment) + return self.resolve_fragment(document, fragment or '') def resolve_fragment(self, document, fragment): """ From 03f644c3c182a4dd161027151c5e2d76b2792416 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 5 Jul 2018 19:16:56 +0100 Subject: [PATCH 03/12] Bugfix for uri normalize --- jsonschema/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jsonschema/_utils.py b/jsonschema/_utils.py index c6840142b..e7808a3b1 100644 --- a/jsonschema/_utils.py +++ b/jsonschema/_utils.py @@ -14,7 +14,7 @@ class URIDict(MutableMapping): """ def normalize(self, uri): - return urisplit(uri).getpath() + return urisplit(uri).geturi() def __init__(self, *args, **kwargs): self.store = dict() From a56ac8e6a1d450cdb3b89e036abe1819c332580e Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 5 Jul 2018 19:50:38 +0100 Subject: [PATCH 04/12] Bugfix for uri normalize --- jsonschema/_utils.py | 5 +++-- jsonschema/tests/test_validators.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/jsonschema/_utils.py b/jsonschema/_utils.py index e7808a3b1..3a4939247 100644 --- a/jsonschema/_utils.py +++ b/jsonschema/_utils.py @@ -4,7 +4,7 @@ import re from jsonschema.compat import str_types, MutableMapping -from uritools import urisplit +from uritools import urisplit, uriunsplit class URIDict(MutableMapping): @@ -14,7 +14,8 @@ class URIDict(MutableMapping): """ def normalize(self, uri): - return urisplit(uri).geturi() + result = urisplit(uri) + return uriunsplit((result.scheme, result.authority, result.path, None, None)) def __init__(self, *args, **kwargs): self.store = dict() diff --git a/jsonschema/tests/test_validators.py b/jsonschema/tests/test_validators.py index f2f5a1f8b..428fd067f 100644 --- a/jsonschema/tests/test_validators.py +++ b/jsonschema/tests/test_validators.py @@ -1178,10 +1178,10 @@ def test_it_retrieves_unstored_refs_via_requests(self): schema = {"baz": 12} with MockImport("requests", mock.Mock()) as requests: - requests.get.return_value.json.return_value = schema + requests.Session.get.return_value.json.return_value = schema with self.resolver.resolving(ref) as resolved: self.assertEqual(resolved, 12) - requests.get.assert_called_once_with("http://bar") + requests.Session().get.assert_called_once_with("http://bar") def test_it_retrieves_unstored_refs_via_urlopen(self): ref = "http://bar#baz" From cb2e361bff4b51c7640ae07c8ecd2493f0740185 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 5 Jul 2018 20:12:05 +0100 Subject: [PATCH 05/12] Add tests for file scheme --- jsonschema/tests/test_validators.py | 35 +++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/jsonschema/tests/test_validators.py b/jsonschema/tests/test_validators.py index 428fd067f..534317d8d 100644 --- a/jsonschema/tests/test_validators.py +++ b/jsonschema/tests/test_validators.py @@ -1195,6 +1195,41 @@ def test_it_retrieves_unstored_refs_via_urlopen(self): self.assertEqual(resolved, 12) urlopen.assert_called_once_with("http://bar") + def test_it_retrieves_unstored_file_refs_via_urlopen(self): + ref = "file://bar.json#baz" + schema = {"baz": 12} + + with MockImport("requests", None): + with mock.patch("jsonschema.validators.urlopen") as urlopen: + urlopen.return_value.read.return_value = (json.dumps(schema).encode("utf8")) + with self.resolver.resolving(ref) as resolved: + self.assertEqual(resolved, 12) + urlopen.assert_called_once_with("file://bar.json") + + def test_it_retrieves_unstored_file_refs_via_requests_and_requests_file(self): + ref = "file://bar.json#baz" + schema = {"baz": 12} + + with MockImport("requests", mock.Mock()) as requests: + with MockImport("requests_file", mock.Mock()) as requests_file: + requests.Session.get.return_value.json.return_value = schema + with self.resolver.resolving(ref) as resolved: + self.assertEqual(resolved, 12) + requests.Session().get.assert_called_once_with("file://bar.json") + + def test_it_retrieves_unstored_refs_via_urlopen_when_requests_file_missing(self): + ref = "file://bar.json#baz" + schema = {"baz": 12} + + with MockImport("requests", mock.Mock()) as requests: + with mock.patch("jsonschema.validators.urlopen") as urlopen: + urlopen.return_value.read.return_value = ( + json.dumps(schema).encode("utf8")) + with self.resolver.resolving(ref) as resolved: + self.assertEqual(resolved, 12) + requests.Session().get.assert_not_called() + urlopen.assert_called_once_with("file://bar.json") + def test_it_can_construct_a_base_uri_from_a_schema(self): schema = {"id": "foo"} resolver = validators.RefResolver.from_schema( From 2391ce8ca20beed37b44a6086498fd577e20f618 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 5 Jul 2018 20:15:13 +0100 Subject: [PATCH 06/12] Satisfy flake8 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 975e3fb17..3127d3409 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ "pyrsistent>=0.14.0", "six>=1.11.0", "functools32;python_version<'3'", - "uritools>=2.2.0" + "uritools>=2.2.0", ], extras_require={ "format": [ From 699ae89ccb0841b91350f8043873b7bddd331359 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 5 Jul 2018 20:27:58 +0100 Subject: [PATCH 07/12] Satisfy flake8 (again) --- jsonschema/tests/test_validators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jsonschema/tests/test_validators.py b/jsonschema/tests/test_validators.py index 534317d8d..ed20d9f90 100644 --- a/jsonschema/tests/test_validators.py +++ b/jsonschema/tests/test_validators.py @@ -1217,7 +1217,7 @@ def test_it_retrieves_unstored_file_refs_via_requests_and_requests_file(self): self.assertEqual(resolved, 12) requests.Session().get.assert_called_once_with("file://bar.json") - def test_it_retrieves_unstored_refs_via_urlopen_when_requests_file_missing(self): + def test_it_retrieves_unstored_refs_via_urlopen_without_requests_file(self): ref = "file://bar.json#baz" schema = {"baz": 12} From 782a337bdb2bffce0cac745f33d092ddd2386e85 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 5 Jul 2018 20:52:49 +0100 Subject: [PATCH 08/12] Remove request-file --- jsonschema/tests/test_validators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jsonschema/tests/test_validators.py b/jsonschema/tests/test_validators.py index ed20d9f90..ccc17c80a 100644 --- a/jsonschema/tests/test_validators.py +++ b/jsonschema/tests/test_validators.py @@ -1217,7 +1217,7 @@ def test_it_retrieves_unstored_file_refs_via_requests_and_requests_file(self): self.assertEqual(resolved, 12) requests.Session().get.assert_called_once_with("file://bar.json") - def test_it_retrieves_unstored_refs_via_urlopen_without_requests_file(self): + def test_it_retrieves_unstored_refs_via_urlopen_no_requests_file(self): ref = "file://bar.json#baz" schema = {"baz": 12} From 925ff186e22bd2743efa17d0f66e9a44a192bd87 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 5 Jul 2018 21:04:12 +0100 Subject: [PATCH 09/12] style: local variable never used --- jsonschema/tests/test_validators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jsonschema/tests/test_validators.py b/jsonschema/tests/test_validators.py index ccc17c80a..3b7aa3308 100644 --- a/jsonschema/tests/test_validators.py +++ b/jsonschema/tests/test_validators.py @@ -1211,7 +1211,7 @@ def test_it_retrieves_unstored_file_refs_via_requests_and_requests_file(self): schema = {"baz": 12} with MockImport("requests", mock.Mock()) as requests: - with MockImport("requests_file", mock.Mock()) as requests_file: + with MockImport("requests_file", mock.Mock()): requests.Session.get.return_value.json.return_value = schema with self.resolver.resolving(ref) as resolved: self.assertEqual(resolved, 12) From 43c106dd22b1d0e916828562114360bd0f99d1d2 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 5 Jul 2018 21:08:05 +0100 Subject: [PATCH 10/12] Flake8 cleanups --- jsonschema/_utils.py | 3 ++- jsonschema/tests/test_validators.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/jsonschema/_utils.py b/jsonschema/_utils.py index 3a4939247..4c114c99e 100644 --- a/jsonschema/_utils.py +++ b/jsonschema/_utils.py @@ -15,7 +15,8 @@ class URIDict(MutableMapping): def normalize(self, uri): result = urisplit(uri) - return uriunsplit((result.scheme, result.authority, result.path, None, None)) + return uriunsplit((result.scheme, result.authority, + result.path, None, None)) def __init__(self, *args, **kwargs): self.store = dict() diff --git a/jsonschema/tests/test_validators.py b/jsonschema/tests/test_validators.py index 3b7aa3308..19df5c23d 100644 --- a/jsonschema/tests/test_validators.py +++ b/jsonschema/tests/test_validators.py @@ -1201,12 +1201,13 @@ def test_it_retrieves_unstored_file_refs_via_urlopen(self): with MockImport("requests", None): with mock.patch("jsonschema.validators.urlopen") as urlopen: - urlopen.return_value.read.return_value = (json.dumps(schema).encode("utf8")) + urlopen.return_value.read.return_value = ( + json.dumps(schema).encode("utf8")) with self.resolver.resolving(ref) as resolved: self.assertEqual(resolved, 12) urlopen.assert_called_once_with("file://bar.json") - def test_it_retrieves_unstored_file_refs_via_requests_and_requests_file(self): + def test_it_retrieves_unstored_file_refs_via_requests_file(self): ref = "file://bar.json#baz" schema = {"baz": 12} From cf346368b3ab3fd14bbe630bef40fa3cb57fdfc9 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Fri, 6 Jul 2018 12:23:27 +0100 Subject: [PATCH 11/12] Rename URL methods to URI --- jsonschema/validators.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/jsonschema/validators.py b/jsonschema/validators.py index a65001d66..9347ac5b5 100644 --- a/jsonschema/validators.py +++ b/jsonschema/validators.py @@ -531,7 +531,7 @@ class RefResolver(object): remote_cache (functools.lru_cache): A cache that will be used for caching the results of - resolved remote URLs. + resolved remote URIs. Attributes: @@ -554,7 +554,7 @@ def __init__( if urijoin_cache is None: urijoin_cache = lru_cache(1024)(urijoin) if remote_cache is None: - remote_cache = lru_cache(1024)(self.resolve_from_url) + remote_cache = lru_cache(1024)(self.resolve_from_uri) self.referrer = referrer self.cache_remote = cache_remote @@ -673,24 +673,24 @@ def resolving(self, ref): """ - url, resolved = self.resolve(ref) - self.push_scope(url) + uri, resolved = self.resolve(ref) + self.push_scope(uri) try: yield resolved finally: self.pop_scope() def resolve(self, ref): - url = self._urijoin_cache(self.resolution_scope, ref) - return url, self._remote_cache(url) + uri = self._urijoin_cache(self.resolution_scope, ref) + return uri, self._remote_cache(uri) - def resolve_from_url(self, url): - url, fragment = uridefrag(url) + def resolve_from_uri(self, uri): + uri, fragment = uridefrag(uri) try: - document = self.store[url] + document = self.store[uri] except KeyError: try: - document = self.resolve_remote(url) + document = self.resolve_remote(uri) except Exception as exc: raise RefResolutionError(exc) From 7bf6275cf314f8c3dfd192b48ae2a8fcbfc87f14 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Fri, 6 Jul 2018 12:40:05 +0100 Subject: [PATCH 12/12] Use contextmanager instead of `push_scope` and `pop_scope`. Facilitate this by modifying `in_scope` to only push scopes which boolean evaluate to True. Update `LegacyRefResolver` in test suite to support this (no-op `in_scope` context manager). --- jsonschema/_validators.py | 17 +++-------------- jsonschema/tests/test_validators.py | 11 +++++++---- jsonschema/validators.py | 18 ++++++------------ 3 files changed, 16 insertions(+), 30 deletions(-) diff --git a/jsonschema/_validators.py b/jsonschema/_validators.py index 0d5b2b009..e222da69a 100644 --- a/jsonschema/_validators.py +++ b/jsonschema/_validators.py @@ -301,20 +301,9 @@ def enum(validator, enums, instance, schema): def ref(validator, ref, instance, schema): - resolve = getattr(validator.resolver, "resolve", None) - if resolve is None: - with validator.resolver.resolving(ref) as resolved: - for error in validator.descend(instance, resolved): - yield error - else: - scope, resolved = validator.resolver.resolve(ref) - validator.resolver.push_scope(scope) - - try: - for error in validator.descend(instance, resolved): - yield error - finally: - validator.resolver.pop_scope() + with validator.resolver.resolving(ref) as resolved: + for error in validator.descend(instance, resolved): + yield error def type_draft3(validator, types, instance, schema): diff --git a/jsonschema/tests/test_validators.py b/jsonschema/tests/test_validators.py index 19df5c23d..a1e3a7bcf 100644 --- a/jsonschema/tests/test_validators.py +++ b/jsonschema/tests/test_validators.py @@ -870,12 +870,11 @@ def test_it_delegates_to_a_ref_resolver(self): resolver = validators.RefResolver("", {}) schema = {"$ref": mock.Mock()} - with mock.patch.object(resolver, "resolve") as resolve: - resolve.return_value = "url", {"type": "integer"} + with mock.patch.object(resolver, "resolving") as resolving: + resolving.return_value.__enter__.return_value = {"type": "integer"} with self.assertRaises(ValidationError): self.validator_class(schema, resolver=resolver).validate(None) - - resolve.assert_called_once_with(schema["$ref"]) + resolving.assert_called_once_with(schema["$ref"]) def test_it_delegates_to_a_legacy_ref_resolver(self): """ @@ -885,6 +884,10 @@ def test_it_delegates_to_a_legacy_ref_resolver(self): """ class LegacyRefResolver(object): + @contextmanager + def in_scope(self, scope): + yield + @contextmanager def resolving(this, ref): self.assertEqual(ref, "the ref") diff --git a/jsonschema/validators.py b/jsonschema/validators.py index 9347ac5b5..57b73271c 100644 --- a/jsonschema/validators.py +++ b/jsonschema/validators.py @@ -255,9 +255,7 @@ def iter_errors(self, instance, _schema=None): return scope = id_of(_schema) - if scope: - self.resolver.push_scope(scope) - try: + with self.resolver.in_scope(scope): ref = _schema.get(u"$ref") if ref is not None: validators = [(u"$ref", ref)] @@ -281,9 +279,6 @@ def iter_errors(self, instance, _schema=None): if k != u"$ref": error.schema_path.appendleft(k) yield error - finally: - if scope: - self.resolver.pop_scope() def descend(self, instance, schema, path=None, schema_path=None): for error in self.iter_errors(instance, schema): @@ -653,11 +648,13 @@ def base_uri(self): @contextlib.contextmanager def in_scope(self, scope): - self.push_scope(scope) + if scope: + self.push_scope(scope) try: yield finally: - self.pop_scope() + if scope: + self.pop_scope() @contextlib.contextmanager def resolving(self, ref): @@ -674,11 +671,8 @@ def resolving(self, ref): """ uri, resolved = self.resolve(ref) - self.push_scope(uri) - try: + with self.in_scope(uri): yield resolved - finally: - self.pop_scope() def resolve(self, ref): uri = self._urijoin_cache(self.resolution_scope, ref)