Skip to content

Commit 7569e99

Browse files
committed
Use RFC3986 instead of manual string parsing
1 parent 8023267 commit 7569e99

File tree

5 files changed

+86
-60
lines changed

5 files changed

+86
-60
lines changed

jsonschema/_utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import pkgutil
44
import re
55

6-
from jsonschema.compat import str_types, MutableMapping, urlsplit
6+
from jsonschema.compat import str_types, MutableMapping
77

88

99
class URIDict(MutableMapping):
@@ -13,7 +13,9 @@ class URIDict(MutableMapping):
1313
"""
1414

1515
def normalize(self, uri):
16-
return urlsplit(uri).geturl()
16+
normalized = uri.normalize()
17+
assert not normalized.fragment, "URI had unexpected non-empty fragment"
18+
return normalized.copy_with(fragment=None)
1719

1820
def __init__(self, *args, **kwargs):
1921
self.store = dict()

jsonschema/compat.py

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -13,19 +13,13 @@
1313
zip = zip
1414
from functools import lru_cache
1515
from io import StringIO
16-
from urllib.parse import (
17-
unquote, urljoin, urlunsplit, SplitResult, urlsplit as _urlsplit
18-
)
1916
from urllib.request import urlopen
2017
str_types = str,
2118
int_types = int,
2219
iteritems = operator.methodcaller("items")
2320
else:
2421
from itertools import izip as zip # noqa
2522
from StringIO import StringIO
26-
from urlparse import (
27-
urljoin, urlunsplit, SplitResult, urlsplit as _urlsplit # noqa
28-
)
2923
from urllib import unquote # noqa
3024
from urllib2 import urlopen # noqa
3125
str_types = basestring
@@ -35,22 +29,4 @@
3529
from functools32 import lru_cache
3630

3731

38-
# On python < 3.3 fragments are not handled properly with unknown schemes
39-
def urlsplit(url):
40-
scheme, netloc, path, query, fragment = _urlsplit(url)
41-
if "#" in path:
42-
path, fragment = path.split("#", 1)
43-
return SplitResult(scheme, netloc, path, query, fragment)
44-
45-
46-
def urldefrag(url):
47-
if "#" in url:
48-
s, n, p, q, frag = urlsplit(url)
49-
defrag = urlunsplit((s, n, p, q, ''))
50-
else:
51-
defrag = url
52-
frag = ''
53-
return defrag, frag
54-
55-
5632
# flake8: noqa

jsonschema/tests/test_validators.py

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import sys
66
import unittest
77

8+
from rfc3986 import uri_reference
89
from twisted.trial.unittest import SynchronousTestCase
910

1011
from jsonschema import (
@@ -77,16 +78,16 @@ def test_if_a_version_is_not_provided_it_is_not_registered(self):
7778
self.assertFalse(validates.called)
7879

7980
def test_validates_registers_meta_schema_id(self):
80-
meta_schema_key = "meta schema id"
81-
my_meta_schema = {u"id": meta_schema_key}
81+
my_meta_schema = {u"id": "meta schema id"}
82+
id_of = lambda s: uri_reference(s.get("id", ""))
8283

8384
validators.create(
8485
meta_schema=my_meta_schema,
8586
version="my version",
86-
id_of=lambda s: s.get("id", ""),
87+
id_of=id_of,
8788
)
8889

89-
self.assertIn(meta_schema_key, validators.meta_schemas)
90+
self.assertIn(id_of(my_meta_schema), validators.meta_schemas)
9091

9192
def test_validates_registers_meta_schema_draft6_id(self):
9293
meta_schema_key = "meta schema $id"
@@ -97,7 +98,7 @@ def test_validates_registers_meta_schema_draft6_id(self):
9798
version="my version",
9899
)
99100

100-
self.assertIn(meta_schema_key, validators.meta_schemas)
101+
self.assertIn(uri_reference(meta_schema_key), validators.meta_schemas)
101102

102103
def test_extend(self):
103104
original_validators = dict(self.Validator.VALIDATORS)
@@ -1053,7 +1054,7 @@ def test_custom_validator(self):
10531054
Validator = validators.create(
10541055
meta_schema={"id": "meta schema id"},
10551056
version="12",
1056-
id_of=lambda s: s.get("id", ""),
1057+
id_of=lambda s: uri_reference(s.get("id", "")),
10571058
)
10581059
schema = {"$schema": "meta schema id"}
10591060
self.assertIs(
@@ -1204,7 +1205,9 @@ def test_it_retrieves_stored_refs(self):
12041205
with self.resolver.resolving(self.stored_uri) as resolved:
12051206
self.assertIs(resolved, self.stored_schema)
12061207

1207-
self.resolver.store["cached_ref"] = {"foo": 12}
1208+
cached_uri = uri_reference("cached_ref").resolve_with(
1209+
self.resolver.base_uri)
1210+
self.resolver.store[cached_uri] = {"foo": 12}
12081211
with self.resolver.resolving("cached_ref#/foo") as resolved:
12091212
self.assertEqual(resolved, 12)
12101213

@@ -1231,27 +1234,29 @@ def test_it_retrieves_unstored_refs_via_urlopen(self):
12311234
urlopen.assert_called_once_with("http://bar")
12321235

12331236
def test_it_can_construct_a_base_uri_from_a_schema(self):
1234-
schema = {"id": "foo"}
1237+
schema = {"id": "http://foo.json#"}
12351238
resolver = validators.RefResolver.from_schema(
12361239
schema,
12371240
id_of=lambda schema: schema.get(u"id", u""),
12381241
)
1239-
self.assertEqual(resolver.base_uri, "foo")
1240-
self.assertEqual(resolver.resolution_scope, "foo")
1242+
self.assertEqual(resolver.base_uri, "http://foo.json")
1243+
self.assertEqual(resolver.resolution_scope, "http://foo.json")
12411244
with resolver.resolving("") as resolved:
12421245
self.assertEqual(resolved, schema)
12431246
with resolver.resolving("#") as resolved:
12441247
self.assertEqual(resolved, schema)
1245-
with resolver.resolving("foo") as resolved:
1248+
with resolver.resolving("http://foo.json") as resolved:
12461249
self.assertEqual(resolved, schema)
1247-
with resolver.resolving("foo#") as resolved:
1250+
with resolver.resolving("http://foo.json#") as resolved:
12481251
self.assertEqual(resolved, schema)
12491252

12501253
def test_it_can_construct_a_base_uri_from_a_schema_without_id(self):
12511254
schema = {}
12521255
resolver = validators.RefResolver.from_schema(schema)
1253-
self.assertEqual(resolver.base_uri, "")
1254-
self.assertEqual(resolver.resolution_scope, "")
1256+
self.assertEqual(resolver.base_uri,
1257+
validators.RefResolver.DEFAULT_BASE_URI)
1258+
self.assertEqual(resolver.resolution_scope,
1259+
validators.RefResolver.DEFAULT_BASE_URI)
12551260
with resolver.resolving("") as resolved:
12561261
self.assertEqual(resolved, schema)
12571262
with resolver.resolving("#") as resolved:

jsonschema/validators.py

Lines changed: 62 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@
66
import numbers
77

88
from six import add_metaclass
9+
from rfc3986 import uri_reference
910

1011
from jsonschema import _utils, _validators, _types
1112
from jsonschema.compat import (
12-
Sequence, urljoin, urlsplit, urldefrag, unquote, urlopen,
13+
Sequence, unquote, urlopen,
1314
str_types, int_types, iteritems, lru_cache,
1415
)
1516
from jsonschema.exceptions import (
@@ -108,10 +109,31 @@ def DEFAULT_TYPES(self):
108109
return self._DEFAULT_TYPES
109110

110111

112+
def _as_uri(uri_or_str):
113+
"""Return URIReference parse result of input string,
114+
or pass through URIReference argument
115+
"""
116+
if isinstance(uri_or_str, basestring):
117+
return uri_reference(uri_or_str)
118+
return uri_or_str
119+
120+
121+
def _join_uri(base, ref):
122+
"""Join absolute base URI with relative URI reference"""
123+
return _as_uri(ref).resolve_with(base, strict=True)
124+
125+
126+
def _load_uri_from_schema(schema, key):
127+
"""Return URIReference object from URI given by key in schema.
128+
Return URIReference.fromstring('') if key not found
129+
"""
130+
return uri_reference(schema.get(key, ""))
131+
132+
111133
def _id_of(schema):
112134
if schema is True or schema is False:
113135
return u""
114-
return schema.get(u"$id", u"")
136+
return _load_uri_from_schema(schema, "$id")
115137

116138

117139
def create(
@@ -256,7 +278,7 @@ def iter_errors(self, instance, _schema=None):
256278
return
257279

258280
scope = id_of(_schema)
259-
if scope:
281+
if scope.unsplit():
260282
self.resolver.push_scope(scope)
261283
try:
262284
ref = _schema.get(u"$ref")
@@ -283,7 +305,7 @@ def iter_errors(self, instance, _schema=None):
283305
error.schema_path.appendleft(k)
284306
yield error
285307
finally:
286-
if scope:
308+
if scope.unsplit():
287309
self.resolver.pop_scope()
288310

289311
def descend(self, instance, schema, path=None, schema_path=None):
@@ -416,7 +438,7 @@ def extend(validator, validators=(), version=None, type_checker=None):
416438
},
417439
type_checker=_types.draft3_type_checker,
418440
version="draft3",
419-
id_of=lambda schema: schema.get(u"id", ""),
441+
id_of=lambda schema: _load_uri_from_schema(schema, u"id"),
420442
)
421443

422444
Draft4Validator = create(
@@ -451,7 +473,7 @@ def extend(validator, validators=(), version=None, type_checker=None):
451473
},
452474
type_checker=_types.draft4_type_checker,
453475
version="draft4",
454-
id_of=lambda schema: schema.get(u"id", ""),
476+
id_of=lambda schema: _load_uri_from_schema(schema, u"id"),
455477
)
456478

457479

@@ -542,6 +564,10 @@ class RefResolver(object):
542564
543565
"""
544566

567+
DEFAULT_BASE_URI = uri_reference(
568+
"urn:uuid:00000000-0000-0000-0000-000000000000"
569+
)
570+
545571
def __init__(
546572
self,
547573
base_uri,
@@ -553,10 +579,21 @@ def __init__(
553579
remote_cache=None,
554580
):
555581
if urljoin_cache is None:
556-
urljoin_cache = lru_cache(1024)(urljoin)
582+
urljoin_cache = lru_cache(1024)(_join_uri)
557583
if remote_cache is None:
558584
remote_cache = lru_cache(1024)(self.resolve_from_url)
559585

586+
if isinstance(base_uri, basestring):
587+
base_uri = uri_reference(base_uri)
588+
589+
if not base_uri.unsplit():
590+
base_uri = self.DEFAULT_BASE_URI
591+
592+
if not base_uri.is_absolute():
593+
if base_uri.fragment:
594+
raise ValueError("Base URI must not have non-empty fragment")
595+
base_uri = base_uri.copy_with(fragment=None)
596+
560597
self.referrer = referrer
561598
self.cache_remote = cache_remote
562599
self.handlers = dict(handlers)
@@ -566,7 +603,8 @@ def __init__(
566603
(id, validator.META_SCHEMA)
567604
for id, validator in iteritems(meta_schemas)
568605
)
569-
self.store.update(store)
606+
607+
self.store.update({_as_uri(k): v for k, v in dict(store).items()})
570608
self.store[base_uri] = referrer
571609

572610
self._urljoin_cache = urljoin_cache
@@ -599,7 +637,7 @@ def from_schema(
599637

600638
def push_scope(self, scope):
601639
self._scopes_stack.append(
602-
self._urljoin_cache(self.resolution_scope, scope),
640+
self._urljoin_cache(self.base_uri, scope)
603641
)
604642

605643
def pop_scope(self):
@@ -618,8 +656,7 @@ def resolution_scope(self):
618656

619657
@property
620658
def base_uri(self):
621-
uri, _ = urldefrag(self.resolution_scope)
622-
return uri
659+
return self.resolution_scope.copy_with(fragment=None)
623660

624661
@contextlib.contextmanager
625662
def in_scope(self, scope):
@@ -651,11 +688,17 @@ def resolving(self, ref):
651688
self.pop_scope()
652689

653690
def resolve(self, ref):
654-
url = self._urljoin_cache(self.resolution_scope, ref)
691+
assert self.base_uri
692+
url = self._urljoin_cache(self.base_uri, ref)
655693
return url, self._remote_cache(url)
656694

657695
def resolve_from_url(self, url):
658-
url, fragment = urldefrag(url)
696+
if url.fragment:
697+
fragment = url.fragment
698+
url = url.copy_with(fragment=None)
699+
else:
700+
fragment = ''
701+
659702
try:
660703
document = self.store[url]
661704
except KeyError:
@@ -722,7 +765,7 @@ def resolve_remote(self, uri):
722765
723766
Arguments:
724767
725-
uri (str):
768+
uri (URIReference):
726769
727770
The URI to resolve
728771
@@ -738,8 +781,7 @@ def resolve_remote(self, uri):
738781
except ImportError:
739782
requests = None
740783

741-
scheme = urlsplit(uri).scheme
742-
784+
scheme = uri.scheme
743785
if scheme in self.handlers:
744786
result = self.handlers[scheme](uri)
745787
elif (
@@ -750,12 +792,12 @@ def resolve_remote(self, uri):
750792
# Requests has support for detecting the correct encoding of
751793
# json over http
752794
if callable(requests.Response.json):
753-
result = requests.get(uri).json()
795+
result = requests.get(uri.unsplit()).json()
754796
else:
755-
result = requests.get(uri).json
797+
result = requests.get(uri.unsplit()).json
756798
else:
757799
# Otherwise, pass off to urllib and assume utf-8
758-
result = json.loads(urlopen(uri).read().decode("utf-8"))
800+
result = json.loads(urlopen(uri.unsplit()).read().decode("utf-8"))
759801

760802
if self.cache_remote:
761803
self.store[uri] = result
@@ -845,4 +887,4 @@ def validator_for(schema, default=_LATEST_VERSION):
845887
"""
846888
if schema is True or schema is False:
847889
return default
848-
return meta_schemas.get(schema.get(u"$schema", u""), default)
890+
return meta_schemas.get(_load_uri_from_schema(schema, u"$schema"), default)

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
"pyrsistent>=0.14.0",
4242
"six>=1.11.0",
4343
"functools32;python_version<'3'",
44+
"rfc3986>=1.1.0",
4445
],
4546
extras_require={
4647
"format": [

0 commit comments

Comments
 (0)