Skip to content

Commit 1a24e3c

Browse files
committed
Auto-create metaschemas referenced in "$schema"
This change allows the tests added in PR json-schema-org/JSON-Schema-Test-Suite#646 to pass, in accordance with §9.3.1 of the spec, "Detecting a Meta-Schema". * Catalog.get_metaschema() is added, and is analogous in behavior to Catalog.get_schema(), relying on Catalog.create_metaschema() * It is called from the now-cached JSONSchema.metaschema property, to keep the parallel with references which are only resolved when used * Catalog.create_metaschema() now returns the created metaschema to avoid having to immediately look it up again, as does Catalog.create_vocabulary() * The core_vocabulary parameters have become default_core_vocabulary parameters to allow not knowing the core in advance * The Metaschema constructor now looks in "$vocabulary" for a vocabulary URI matching r'^https://json-schema\.org/draft/[^/]*/core' and if it finds a unique match, uses that instead of the default * Lack of a recognizable core vocabulary still results in a JSONSchemaError exception if no default is provided
1 parent 6bdd8ce commit 1a24e3c

10 files changed

+270
-19
lines changed

CHANGELOG.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ Features:
1111
* JSON ``null``, ``true``, ``false`` literals
1212
* Relative JSON Pointer ``+``/``-`` array index adjustments
1313
* Unknown keywords are collected as annotations
14+
* Automatically create metaschemas as referenced by ``"$schema"``
15+
* Automatically detect the core vocabulary in metaschemas,
16+
but allow specifying a default to use when none is detectable
1417

1518
Experimental:
1619

@@ -23,6 +26,11 @@ Breaking changes:
2326
* ``Catalog.add_format_validators()`` superseded by ``@format_validator`` / ``Catalog.enable_formats()``
2427
* Rename ``Catalog.session()`` context manager to ``Catalog.cache()``
2528
* Rename ``session`` parameter to ``cacheid`` in many places
29+
* Added ``Catalog.get_metaschema()``, analogous to ``Catalog.get_schema()``
30+
* ``Catalog.create_metashema()`` and ``Catalog.create_vocabulary()`` return the created instance
31+
* Rename ``core_vocabulary`` and ``core_vocabulary_uri`` parameters for
32+
``Metaschema.__init__()`` and ``Catalog.create_metaschema()`` respectively to
33+
``default_core_vocabulary`` and ``default_core_vocabulary_uri``
2634
* Rename public functions in the ``jsonpatch`` module
2735

2836
Bug Fixes:

jschon/catalog/__init__.py

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -137,16 +137,19 @@ def load_json(self, uri: URI) -> JSONCompatible:
137137

138138
raise CatalogError(f'A source is not available for "{uri}"')
139139

140-
def create_vocabulary(self, uri: URI, *kwclasses: KeywordClass) -> None:
140+
def create_vocabulary(self, uri: URI, *kwclasses: KeywordClass) -> Vocabulary:
141141
"""Create a :class:`~jschon.vocabulary.Vocabulary` object, which
142142
may be used by a :class:`~jschon.vocabulary.Metaschema` to provide
143143
keyword classes used in schema construction.
144144
145145
:param uri: the URI identifying the vocabulary
146146
:param kwclasses: the :class:`~jschon.vocabulary.Keyword` classes
147147
constituting the vocabulary
148+
149+
:returns: the newly created :class:`Vocabulary` instance
148150
"""
149151
self._vocabularies[uri] = Vocabulary(uri, *kwclasses)
152+
return self._vocabularies[uri]
150153

151154
def get_vocabulary(self, uri: URI) -> Vocabulary:
152155
"""Get a :class:`~jschon.vocabulary.Vocabulary` by its `uri`.
@@ -162,37 +165,72 @@ def get_vocabulary(self, uri: URI) -> Vocabulary:
162165
def create_metaschema(
163166
self,
164167
uri: URI,
165-
core_vocabulary_uri: URI,
168+
default_core_vocabulary_uri: Optional[URI] = None,
166169
*default_vocabulary_uris: URI,
167170
**kwargs: Any,
168-
) -> None:
171+
) -> Metaschema:
169172
"""Create, cache and validate a :class:`~jschon.vocabulary.Metaschema`.
170173
171174
:param uri: the URI identifying the metaschema
172-
:param core_vocabulary_uri: the URI identifying the metaschema's
173-
core :class:`~jschon.vocabulary.Vocabulary`
175+
:param default_core_vocabulary_uri: the URI identifying the metaschema's
176+
core :class:`~jschon.vocabulary.Vocabulary`, used in the absence
177+
of a ``"$vocabulary"`` keyword in the metaschema JSON file, or
178+
if a known core vocabulary is not present under ``"$vocabulary"``
174179
:param default_vocabulary_uris: default :class:`~jschon.vocabulary.Vocabulary`
175180
URIs, used in the absence of a ``"$vocabulary"`` keyword in the
176181
metaschema JSON file
177182
:param kwargs: additional keyword arguments to pass through to the
178183
:class:`~jschon.jsonschema.JSONSchema` constructor
184+
185+
:returns: the newly created :class:`Metaschema` instance
186+
187+
:raise CatalogError: if the metaschema is not valid
179188
"""
180189
metaschema_doc = self.load_json(uri)
181-
core_vocabulary = self.get_vocabulary(core_vocabulary_uri)
190+
default_core_vocabulary = (
191+
self.get_vocabulary(default_core_vocabulary_uri)
192+
if default_core_vocabulary_uri
193+
else None
194+
)
182195
default_vocabularies = [
183196
self.get_vocabulary(vocab_uri)
184197
for vocab_uri in default_vocabulary_uris
185198
]
186199
metaschema = Metaschema(
187200
self,
188201
metaschema_doc,
189-
core_vocabulary,
202+
default_core_vocabulary,
190203
*default_vocabularies,
191204
**kwargs,
192205
uri=uri,
193206
)
194207
if not metaschema.validate().valid:
195-
raise CatalogError("The metaschema is invalid against itself")
208+
raise CatalogError(
209+
"The metaschema is invalid against its own metaschema "
210+
f'"{metaschema_doc["$schema"]}"'
211+
)
212+
return metaschema
213+
214+
def get_metaschema(self, uri: URI) -> Metaschema:
215+
"""Get a metaschema identified by `uri` from a cache, or
216+
load it from configured sources if not already cached.
217+
218+
Note that metaschemas that do not declare a known core vocabulary
219+
in ``$vocabulary`` must first be created using :meth:`create_schema`.
220+
221+
:param uri: the URI identifying the metaschema
222+
223+
:raise CatalogError: if the object referenced by `uri` is not
224+
a :class:`~jschon.vocabulary.Metaschema`, or if it is not valid
225+
:raise JSONSchemaError: if the metaschema is loaded from sources
226+
but no known core vocabulary is present in ``$vocabulary``
227+
"""
228+
metaschema = self._schema_cache['__meta__'].get(uri)
229+
if not metaschema:
230+
metaschema = self.create_metaschema(uri)
231+
if not isinstance(metaschema, Metaschema):
232+
raise CatalogError(f"The schema referenced by {uri} is not a metaschema")
233+
return metaschema
196234

197235
def enable_formats(self, *format_attr: str) -> None:
198236
"""Enable validation of the specified format attributes.

jschon/jsonschema.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -218,21 +218,15 @@ def parentschema(self) -> Optional[JSONSchema]:
218218
return parent
219219
parent = parent.parent
220220

221-
@property
221+
@cached_property
222222
def metaschema(self) -> Metaschema:
223223
"""The schema's :class:`~jschon.vocabulary.Metaschema`."""
224224
from jschon.vocabulary import Metaschema
225225

226226
if (uri := self.metaschema_uri) is None:
227227
raise JSONSchemaError("The schema's metaschema URI has not been set")
228228

229-
if not isinstance(
230-
metaschema := self.catalog.get_schema(uri, cacheid='__meta__'),
231-
Metaschema,
232-
):
233-
raise JSONSchemaError(f"The schema referenced by {uri} is not a metachema")
234-
235-
return metaschema
229+
return self.catalog.get_metaschema(uri)
236230

237231
@property
238232
def metaschema_uri(self) -> Optional[URI]:

jschon/vocabulary/__init__.py

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
from __future__ import annotations
22

3+
import re
34
import inspect
45
from typing import Any, Dict, Mapping, Optional, Sequence, TYPE_CHECKING, Tuple, Type
56

67
from jschon.json import JSON, JSONCompatible
78
from jschon.jsonschema import JSONSchema, Result
9+
from jschon.exceptions import JSONSchemaError
810
from jschon.uri import URI
911

1012
if TYPE_CHECKING:
@@ -30,17 +32,54 @@ class Metaschema(JSONSchema):
3032
:class:`Metaschema` is itself a subclass of :class:`~jschon.jsonschema.JSONSchema`,
3133
and may be used to validate any referencing schema.
3234
"""
35+
_CORE_VOCAB_RE = r'https://json-schema\.org/draft/[^/]*/vocab/core$'
3336

3437
def __init__(
3538
self,
3639
catalog: Catalog,
3740
value: Mapping[str, JSONCompatible],
38-
core_vocabulary: Vocabulary,
41+
default_core_vocabulary: Optional[Vocabulary] = None,
3942
*default_vocabularies: Vocabulary,
4043
**kwargs: Any,
4144
):
42-
self.core_vocabulary: Vocabulary = core_vocabulary
45+
"""Initialize a :class:`Metaschema` instance from the given
46+
schema-compatible `value`.
47+
48+
:param catalog: catalog instance or catalog name
49+
:param value: a schema-compatible Python object
50+
:param default_core_vocabulary: the the metaschema's
51+
core :class:`~jschon.vocabulary.Vocabulary`, used in the absence
52+
of a ``"$vocabulary"`` keyword in the metaschema JSON file, or
53+
if a known core vocabulary is not present under ``"$vocabulary"``
54+
:param default_vocabulary: default :class:`~jschon.vocabulary.Vocabulary`
55+
instances, used in the absence of a ``"$vocabulary"`` keyword in the
56+
metaschema JSON file
57+
:param kwargs: additional keyword arguments to pass through to the
58+
:class:`~jschon.jsonschema.JSONSchema` constructor
59+
60+
:raise JSONSchemaError: if no core vocabulary can be determined
61+
:raise CatalogError: if the created metaschema is not valid
62+
"""
4363
self.default_vocabularies: Tuple[Vocabulary, ...] = default_vocabularies
64+
self.core_vocabulary: Vocabulary = default_core_vocabulary
65+
66+
if vocabularies := value.get("$vocabulary"):
67+
possible_cores = list(filter(
68+
lambda v: re.match(self._CORE_VOCAB_RE, v),
69+
vocabularies,
70+
))
71+
if len(possible_cores) == 1:
72+
self.core_vocabulary = catalog.get_vocabulary(URI(possible_cores[0]))
73+
else:
74+
raise JSONSchemaError(
75+
'Cannot determine unique known core vocabulary from '
76+
f'candidates "{vocabularies.keys()}"'
77+
)
78+
if self.core_vocabulary is None:
79+
raise JSONSchemaError(
80+
f'No core vocabulary in "$vocabulary": {value}, and no default provided'
81+
)
82+
4483
self.kwclasses: Dict[str, KeywordClass] = {}
4584
super().__init__(value, catalog=catalog, cacheid='__meta__', **kwargs)
4685

tests/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44
metaschema_uri_2020_12 = URI("https://json-schema.org/draft/2020-12/schema")
55
metaschema_uri_next = URI("https://json-schema.org/draft/next/schema")
66

7+
core_vocab_uri_2019_09 = URI("https://json-schema.org/draft/2019-09/vocab/core")
8+
core_vocab_uri_2020_12 = URI("https://json-schema.org/draft/2020-12/vocab/core")
9+
core_vocab_uri_next = URI("https://json-schema.org/draft/next/vocab/core")
10+
711
example_schema = {
812
"$schema": "https://json-schema.org/draft/2020-12/schema",
913
"$id": "dynamicRef8_main.json",

tests/data/meta_invalid.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"$schema": "https://json-schema.org/draft/2020-12/schema",
3+
"$id": "https://example.com/meta_invalid",
4+
"$vocabulary": {
5+
"https://json-schema.org/draft/2020-12/vocab/core": true
6+
},
7+
"type": {"lol": "cats"}
8+
}

tests/data/meta_no_vocabs.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"$schema": "https://json-schema.org/draft/2020-12/schema",
3+
"$id": "https://example.com/meta_no_vocabs"
4+
}

tests/data/meta_with_core.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"$schema": "https://json-schema.org/draft/2020-12/schema",
3+
"$id": "https://example.com/meta_with_core",
4+
"$vocabulary": {
5+
"https://json-schema.org/draft/2020-12/vocab/core": true
6+
}
7+
}

tests/test_catalog.py

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import pathlib
33
import tempfile
44
import uuid
5+
import itertools
56

67
import pytest
78

@@ -16,7 +17,8 @@
1617
LocalSource,
1718
RemoteSource,
1819
)
19-
from tests import example_schema, metaschema_uri_2020_12
20+
from jschon.vocabulary import Metaschema, Keyword
21+
from tests import example_schema, metaschema_uri_2020_12, core_vocab_uri_2020_12
2022

2123
json_example = {"foo": "bar"}
2224

@@ -142,6 +144,17 @@ def test_get_vocabulary(uri, is_known, catalog):
142144
catalog.get_vocabulary(URI(uri))
143145

144146

147+
def test_create_vocabulary(catalog):
148+
class CustomKeyword(Keyword):
149+
key = 'custom'
150+
151+
custom_uri = URI('https://example.com/custom')
152+
custom_vocab = catalog.create_vocabulary(custom_uri, CustomKeyword)
153+
assert custom_vocab.uri is custom_uri
154+
assert custom_vocab.kwclasses == {CustomKeyword.key: CustomKeyword}
155+
assert catalog.get_vocabulary(custom_uri) is custom_vocab
156+
157+
145158
@pytest.fixture
146159
def example_schema_uri():
147160
schema = JSONSchema(example_schema)
@@ -199,3 +212,67 @@ def test_metaschema_isolation():
199212
assert okay_schema.evaluate(JSON(True)).valid is True
200213
okay_schema = cached_schema(uri, {"$ref": str(metaschema_uri_2020_12)}, None)
201214
assert okay_schema.evaluate(JSON(True)).valid is True
215+
216+
217+
def test_get_metaschema_detect_core(local_catalog):
218+
uri = URI('https://example.com/meta_with_core')
219+
core_vocab = local_catalog.get_vocabulary(core_vocab_uri_2020_12)
220+
221+
m = local_catalog.get_metaschema(uri)
222+
assert isinstance(m, Metaschema)
223+
assert m['$id'].data == str(uri)
224+
assert m.core_vocabulary.uri == core_vocab.uri
225+
assert m.kwclasses == core_vocab.kwclasses
226+
227+
s = local_catalog.get_schema(uri)
228+
assert isinstance(s, JSONSchema)
229+
assert s is not m
230+
assert s == m
231+
232+
233+
def test_get_metaschema_wrong_type(local_catalog):
234+
uri = URI('https://example.com/meta_with_core')
235+
non_meta = local_catalog.get_schema(uri)
236+
local_catalog._schema_cache['__meta__'][uri] = non_meta
237+
with pytest.raises(CatalogError, match='not a metaschema'):
238+
local_catalog.get_metaschema(uri)
239+
240+
241+
def test_get_metaschema_invalid(local_catalog):
242+
uri = URI('https://example.com/meta_invalid')
243+
with pytest.raises(CatalogError, match='metaschema is invalid'):
244+
local_catalog.create_metaschema(uri)
245+
246+
247+
def test_create_metaschema_no_vocabs(local_catalog):
248+
class ExtraKeyword(Keyword):
249+
key='extra'
250+
251+
uri = URI('https://example.com/meta_no_vocabs')
252+
core_vocab = local_catalog.get_vocabulary(core_vocab_uri_2020_12)
253+
applicator_vocab = local_catalog.get_vocabulary(
254+
URI('https://json-schema.org/draft/2020-12/vocab/applicator')
255+
)
256+
257+
extra_vocab = local_catalog.create_vocabulary(
258+
URI('https://example.com/vocab/whatever'),
259+
ExtraKeyword,
260+
)
261+
262+
m = local_catalog.create_metaschema(
263+
uri,
264+
core_vocab.uri,
265+
applicator_vocab.uri,
266+
extra_vocab.uri,
267+
)
268+
assert isinstance(m, Metaschema)
269+
assert m['$id'].data == str(uri)
270+
assert m.core_vocabulary is core_vocab
271+
assert m.kwclasses.keys() == frozenset(
272+
itertools.chain.from_iterable([
273+
v.kwclasses.keys() for v in
274+
[core_vocab, applicator_vocab, extra_vocab]
275+
])
276+
)
277+
m1 = local_catalog.get_metaschema(uri)
278+
assert m1 is m

0 commit comments

Comments
 (0)