Skip to content

Commit cd88c19

Browse files
committed
issue python-jsonschema#158: TRY to speed-up scope & $ref url-handling by keeping
fragments separated from URL (and avoid redunant frag/defrag).
1 parent 468b016 commit cd88c19

File tree

3 files changed

+39
-16
lines changed

3 files changed

+39
-16
lines changed

jsonschema/compat.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from __future__ import unicode_literals
2-
import sys
2+
33
import operator
4+
import sys
5+
46

57
try:
68
from collections import MutableMapping, Sequence # noqa
@@ -13,7 +15,8 @@
1315
zip = zip
1416
from io import StringIO
1517
from urllib.parse import (
16-
unquote, urljoin, urlunsplit, SplitResult, urlsplit as _urlsplit
18+
unquote, urljoin, urlunsplit, SplitResult, urlsplit as _urlsplit,
19+
DefragResult
1720
)
1821
from urllib.request import urlopen
1922
str_types = str,
@@ -23,7 +26,8 @@
2326
from itertools import izip as zip # noqa
2427
from StringIO import StringIO
2528
from urlparse import (
26-
urljoin, urlunsplit, SplitResult, urlsplit as _urlsplit # noqa
29+
urljoin, urlunsplit, SplitResult, urlsplit as _urlsplit, # noqa
30+
DefragResult
2731
)
2832
from urllib import unquote # noqa
2933
from urllib2 import urlopen # noqa
@@ -47,7 +51,7 @@ def urldefrag(url):
4751
else:
4852
defrag = url
4953
frag = ''
50-
return defrag, frag
54+
return DefragResult(defrag, frag)
5155

5256

5357
# flake8: noqa

jsonschema/tests/test_benchmarks.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ def test_V3_meta_schema(self):
5050
5151
348 runs in 2.00 sec
5252
ms/run: mean(5.73), std(0.38), MIN(5.50), MAX(8.00)
53+
splitted_fragments: Time @ Xeon 3.2GHz (x 1.05 faster)::
54+
55+
290 runs in 2.00 sec
56+
ms/run: mean(6.88), std(0.66), MIN(6.00), MAX(12.00)
5357
"""
5458

5559
stats = []
@@ -75,6 +79,10 @@ def test_V4_meta_schema(self):
7579
7680
201 runs in 2.01 sec
7781
ms/run: mean(9.95), std(0.57), MIN(9.50), MAX(12.50)
82+
splitted_fragments: Time @ Xeon 3.2GHz (x 1.16 faster)::
83+
84+
191 runs in 2.01 sec
85+
ms/run: mean(10.47), std(0.81), MIN(9.50), MAX(14.00)
7886
"""
7987

8088
stats = []
@@ -100,6 +108,10 @@ def test_both_meta_schemas(self):
100108
101109
125 runs in 2.02 sec
102110
ms/run: mean(16.00), std(0.88), MIN(15.00), MAX(19.00)
111+
splitted_fragments: Time @ Xeon 3.2GHz (x 1.11 faster)::
112+
113+
115 runs in 2.00 sec
114+
ms/run: mean(17.24), std(1.24), MIN(16.00), MAX(24.51)
103115
"""
104116

105117
v_classes = [Draft3Validator, Draft4Validator]
@@ -127,6 +139,10 @@ def test_ref_model(self):
127139
128140
30 runs in 2.02 sec
129141
ms/run: mean(65.05), std(2.11), MIN(62.01), MAX(71.51)
142+
splitted_fragments: Time @ Xeon 3.2GHz (x 1.12 faster)::
143+
144+
19 runs in 2.10 sec
145+
ms/run: mean(105.07), std(2.45), MIN(102.02), MAX(109.02)
130146
"""
131147

132148
stats = []

jsonschema/validators.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111

1212
from jsonschema import _utils, _validators
1313
from jsonschema.compat import (
14-
Sequence, urljoin, urlsplit, urldefrag, unquote, urlopen,
14+
Sequence, urljoin, urlsplit, urldefrag, unquote, urlopen, DefragResult,
15+
1516
str_types, int_types, iteritems,
1617
)
1718
from jsonschema.exceptions import ErrorTree # Backwards compatibility # noqa
@@ -230,7 +231,7 @@ class RefResolver(object):
230231
231232
:argument str base_uri: URI of the referring document
232233
:argument referrer: the actual referring document
233-
:argument dict store: a mapping from URIs to documents to cache
234+
:argument dict store: a mapping from URIs (without fragments!) to documents to cache
234235
:argument bool cache_remote: whether remote refs should be cached after
235236
first resolution
236237
:argument dict handlers: a mapping from URI schemes to functions that
@@ -241,6 +242,7 @@ class RefResolver(object):
241242
def __init__(
242243
self, base_uri, referrer, store=(), cache_remote=True, handlers=(),
243244
):
245+
base_uri = urldefrag(base_uri)
244246
self.base_uri = base_uri
245247
self.resolution_scope = base_uri
246248
# This attribute is not used, it is for backwards compatibility
@@ -251,11 +253,11 @@ def __init__(
251253

252254
self.old_scopes = []
253255
self.store = _utils.URIDict(
254-
(id, validator.META_SCHEMA)
256+
(id, validator.META_SCHEMA) ## IDs assumed pure urls (no fragments).
255257
for id, validator in iteritems(meta_schemas)
256258
)
257259
self.store.update(store)
258-
self.store[base_uri] = referrer
260+
self.store[base_uri.url] = referrer
259261

260262
@classmethod
261263
def from_schema(cls, schema, *args, **kwargs):
@@ -287,19 +289,20 @@ def resolving(self, ref):
287289
288290
"""
289291

290-
full_uri = urljoin(self.resolution_scope, ref)
291-
uri, fragment = urldefrag(full_uri)
292-
if not uri:
293-
uri = self.base_uri
292+
ref = urldefrag(ref)
294293

295-
if uri in self.store:
296-
document = self.store[uri]
297-
else:
294+
url = urljoin(self.resolution_scope.url, ref.url, allow_fragments=False) \
295+
if ref.url else self.resolution_scope.url
296+
297+
try:
298+
document = self.store[url]
299+
except KeyError:
298300
try:
299-
document = self.resolve_remote(uri)
301+
document = self.resolve_remote(url)
300302
except Exception as exc:
301303
raise RefResolutionError(exc)
302304

305+
uri = DefragResult(url, ref.fragment)
303306
old_base_uri, self.base_uri = self.base_uri, uri
304307
try:
305308
self.push_scope(uri)

0 commit comments

Comments
 (0)