Skip to content

Commit c509412

Browse files
feat(validation): support JSON Schema referencing in validation utils (#4508)
* feat: add support for fastjsonschema's handlers param in validation utilities * docs: document new handlers param in validation utilities * Add provider_options validation arg, fix test JSON schema * Update provider_options default arg value * Remove unused import * Lint files * Small changes + example refactor * Small changes + example refactor * Addressing Heitor's feedback * Addressing Heitor's feedback --------- Co-authored-by: Leandro Damascena <[email protected]>
1 parent d1bedfd commit c509412

File tree

10 files changed

+192
-13
lines changed

10 files changed

+192
-13
lines changed

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ complexity-baseline:
8686
$(info Maintenability index)
8787
poetry run radon mi aws_lambda_powertools
8888
$(info Cyclomatic complexity index)
89-
poetry run xenon --max-absolute C --max-modules A --max-average A aws_lambda_powertools --exclude aws_lambda_powertools/shared/json_encoder.py
89+
poetry run xenon --max-absolute C --max-modules A --max-average A aws_lambda_powertools --exclude aws_lambda_powertools/shared/json_encoder.py,aws_lambda_powertools/utilities/validation/base.py
9090

9191
#
9292
# Use `poetry version <major>/<minor></patch>` for version bump

aws_lambda_powertools/utilities/validation/base.py

+16-3
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,18 @@
33

44
import fastjsonschema # type: ignore
55

6-
from .exceptions import InvalidSchemaFormatError, SchemaValidationError
6+
from aws_lambda_powertools.utilities.validation.exceptions import InvalidSchemaFormatError, SchemaValidationError
77

88
logger = logging.getLogger(__name__)
99

1010

11-
def validate_data_against_schema(data: Union[Dict, str], schema: Dict, formats: Optional[Dict] = None):
11+
def validate_data_against_schema(
12+
data: Union[Dict, str],
13+
schema: Dict,
14+
formats: Optional[Dict] = None,
15+
handlers: Optional[Dict] = None,
16+
provider_options: Optional[Dict] = None,
17+
):
1218
"""Validate dict data against given JSON Schema
1319
1420
Parameters
@@ -19,6 +25,11 @@ def validate_data_against_schema(data: Union[Dict, str], schema: Dict, formats:
1925
JSON Schema to validate against
2026
formats: Dict
2127
Custom formats containing a key (e.g. int64) and a value expressed as regex or callback returning bool
28+
handlers: Dict
29+
Custom methods to retrieve remote schemes, keyed off of URI scheme
30+
provider_options: Dict
31+
Arguments that will be passed directly to the underlying validation call, in this case fastjsonchema.validate.
32+
For all supported arguments see: https://horejsek.github.io/python-fastjsonschema/#fastjsonschema.validate
2233
2334
Raises
2435
------
@@ -29,7 +40,9 @@ def validate_data_against_schema(data: Union[Dict, str], schema: Dict, formats:
2940
"""
3041
try:
3142
formats = formats or {}
32-
fastjsonschema.validate(definition=schema, data=data, formats=formats)
43+
handlers = handlers or {}
44+
provider_options = provider_options or {}
45+
fastjsonschema.validate(definition=schema, data=data, formats=formats, handlers=handlers, **provider_options)
3346
except (TypeError, AttributeError, fastjsonschema.JsonSchemaDefinitionException) as e:
3447
raise InvalidSchemaFormatError(f"Schema received: {schema}, Formats: {formats}. Error: {e}")
3548
except fastjsonschema.JsonSchemaValueException as e:

aws_lambda_powertools/utilities/validation/exceptions.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from typing import Any, List, Optional
22

3-
from ...exceptions import InvalidEnvelopeExpressionError
3+
from aws_lambda_powertools.exceptions import InvalidEnvelopeExpressionError
44

55

66
class SchemaValidationError(Exception):

aws_lambda_powertools/utilities/validation/validator.py

+44-6
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
import logging
22
from typing import Any, Callable, Dict, Optional, Union
33

4+
from aws_lambda_powertools.middleware_factory import lambda_handler_decorator
45
from aws_lambda_powertools.utilities import jmespath_utils
5-
6-
from ...middleware_factory import lambda_handler_decorator
7-
from .base import validate_data_against_schema
6+
from aws_lambda_powertools.utilities.validation.base import validate_data_against_schema
87

98
logger = logging.getLogger(__name__)
109

@@ -16,8 +15,12 @@ def validator(
1615
context: Any,
1716
inbound_schema: Optional[Dict] = None,
1817
inbound_formats: Optional[Dict] = None,
18+
inbound_handlers: Optional[Dict] = None,
19+
inbound_provider_options: Optional[Dict] = None,
1920
outbound_schema: Optional[Dict] = None,
2021
outbound_formats: Optional[Dict] = None,
22+
outbound_handlers: Optional[Dict] = None,
23+
outbound_provider_options: Optional[Dict] = None,
2124
envelope: str = "",
2225
jmespath_options: Optional[Dict] = None,
2326
**kwargs: Any,
@@ -44,6 +47,17 @@ def validator(
4447
Custom formats containing a key (e.g. int64) and a value expressed as regex or callback returning bool
4548
outbound_formats: Dict
4649
Custom formats containing a key (e.g. int64) and a value expressed as regex or callback returning bool
50+
inbound_handlers: Dict
51+
Custom methods to retrieve remote schemes, keyed off of URI scheme
52+
outbound_handlers: Dict
53+
Custom methods to retrieve remote schemes, keyed off of URI scheme
54+
inbound_provider_options: Dict
55+
Arguments that will be passed directly to the underlying validation call, in this case fastjsonchema.validate.
56+
For all supported arguments see: https://horejsek.github.io/python-fastjsonschema/#fastjsonschema.validate
57+
outbound_provider_options: Dict
58+
Arguments that will be passed directly to the underlying validation call, in this case fastjsonchema.validate.
59+
For all supported arguments see: https://horejsek.github.io/python-fastjsonschema/#fastjsonschema.validate
60+
4761
4862
Example
4963
-------
@@ -127,13 +141,25 @@ def handler(event, context):
127141

128142
if inbound_schema:
129143
logger.debug("Validating inbound event")
130-
validate_data_against_schema(data=event, schema=inbound_schema, formats=inbound_formats)
144+
validate_data_against_schema(
145+
data=event,
146+
schema=inbound_schema,
147+
formats=inbound_formats,
148+
handlers=inbound_handlers,
149+
provider_options=inbound_provider_options,
150+
)
131151

132152
response = handler(event, context, **kwargs)
133153

134154
if outbound_schema:
135155
logger.debug("Validating outbound event")
136-
validate_data_against_schema(data=response, schema=outbound_schema, formats=outbound_formats)
156+
validate_data_against_schema(
157+
data=response,
158+
schema=outbound_schema,
159+
formats=outbound_formats,
160+
handlers=outbound_handlers,
161+
provider_options=outbound_provider_options,
162+
)
137163

138164
return response
139165

@@ -142,6 +168,8 @@ def validate(
142168
event: Any,
143169
schema: Dict,
144170
formats: Optional[Dict] = None,
171+
handlers: Optional[Dict] = None,
172+
provider_options: Optional[Dict] = None,
145173
envelope: Optional[str] = None,
146174
jmespath_options: Optional[Dict] = None,
147175
):
@@ -161,6 +189,10 @@ def validate(
161189
Alternative JMESPath options to be included when filtering expr
162190
formats: Dict
163191
Custom formats containing a key (e.g. int64) and a value expressed as regex or callback returning bool
192+
handlers: Dict
193+
Custom methods to retrieve remote schemes, keyed off of URI scheme
194+
provider_options: Dict
195+
Arguments that will be passed directly to the underlying validate call
164196
165197
Example
166198
-------
@@ -229,4 +261,10 @@ def handler(event, context):
229261
jmespath_options=jmespath_options,
230262
)
231263

232-
validate_data_against_schema(data=event, schema=schema, formats=formats)
264+
validate_data_against_schema(
265+
data=event,
266+
schema=schema,
267+
formats=formats,
268+
handlers=handlers,
269+
provider_options=provider_options,
270+
)

docs/utilities/validation.md

+32-2
Original file line numberDiff line numberDiff line change
@@ -147,10 +147,10 @@ Here is a handy table with built-in envelopes along with their JMESPath expressi
147147
| **`API_GATEWAY_HTTP`** | `powertools_json(body)` |
148148
| **`API_GATEWAY_REST`** | `powertools_json(body)` |
149149
| **`CLOUDWATCH_EVENTS_SCHEDULED`** | `detail` |
150-
| **`CLOUDWATCH_LOGS`** | `awslogs.powertools_base64_gzip(data) | powertools_json(@).logEvents[*]` |
150+
| **`CLOUDWATCH_LOGS`** | `awslogs.powertools_base64_gzip(data)` or `powertools_json(@).logEvents[*]` |
151151
| **`EVENTBRIDGE`** | `detail` |
152152
| **`KINESIS_DATA_STREAM`** | `Records[*].kinesis.powertools_json(powertools_base64(data))` |
153-
| **`SNS`** | `Records[0].Sns.Message | powertools_json(@)` |
153+
| **`SNS`** | `Records[0].Sns.Message` or `powertools_json(@)` |
154154
| **`SQS`** | `Records[*].powertools_json(body)` |
155155

156156
## Advanced
@@ -199,3 +199,33 @@ You can use our built-in [JMESPath functions](./jmespath_functions.md){target="_
199199

200200
???+ info
201201
We use these for [built-in envelopes](#built-in-envelopes) to easily to decode and unwrap events from sources like Kinesis, CloudWatch Logs, etc.
202+
203+
### Validating with external references
204+
205+
JSON Schema [allows schemas to reference other schemas](https://json-schema.org/understanding-json-schema/structuring#dollarref) using the `$ref` keyword with a URI value. By default, `fastjsonschema` will make a HTTP request to resolve this URI.
206+
207+
You can use `handlers` parameter to have full control over how references schemas are fetched. This is useful when you might want to optimize caching, reducing HTTP calls, or fetching them from non-HTTP endpoints.
208+
209+
=== "custom_handlers.py"
210+
211+
```python hl_lines="1 7 8 11"
212+
--8<-- "examples/validation/src/custom_handlers.py"
213+
```
214+
215+
=== "custom_handlers_parent_schema"
216+
217+
```python hl_lines="1 7"
218+
--8<-- "examples/validation/src/custom_handlers_schema.py"
219+
```
220+
221+
=== "custom_handlers_child_schema"
222+
223+
```python hl_lines="12"
224+
--8<-- "examples/validation/src/custom_handlers_schema.py"
225+
```
226+
227+
=== "custom_handlers_payload.json"
228+
229+
```json hl_lines="2"
230+
--8<-- "examples/validation/src/custom_handlers_payload.json"
231+
```
+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from custom_handlers_schema import CHILD_SCHEMA, PARENT_SCHEMA
2+
3+
from aws_lambda_powertools.utilities.typing import LambdaContext
4+
from aws_lambda_powertools.utilities.validation import validator
5+
6+
7+
# Function to return the child schema
8+
def get_child_schema(uri: str):
9+
return CHILD_SCHEMA
10+
11+
12+
@validator(inbound_schema=PARENT_SCHEMA, inbound_handlers={"https": get_child_schema})
13+
def lambda_handler(event, context: LambdaContext) -> dict:
14+
return event
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"ParentSchema":
3+
{
4+
"project": "powertools"
5+
}
6+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
PARENT_SCHEMA = {
2+
"$schema": "http://json-schema.org/draft-07/schema#",
3+
"$id": "https://example.com/schemas/parent.json",
4+
"type": "object",
5+
"properties": {
6+
"ParentSchema": {
7+
"$ref": "https://SCHEMA",
8+
},
9+
},
10+
}
11+
12+
CHILD_SCHEMA = {
13+
"$schema": "http://json-schema.org/draft-07/schema#",
14+
"$id": "https://example.com/schemas/child.json",
15+
"type": "object",
16+
"properties": {
17+
"project": {
18+
"type": "string",
19+
},
20+
},
21+
"required": ["project"],
22+
}

tests/functional/validator/conftest.py

+52
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,53 @@ def schema_response():
8585
}
8686

8787

88+
@pytest.fixture
89+
def schema_refs():
90+
return {
91+
"ParentSchema": {
92+
"$schema": "http://json-schema.org/draft-07/schema",
93+
"$id": "testschema://ParentSchema",
94+
"type": "object",
95+
"title": "Sample schema",
96+
"description": "Sample JSON Schema that references another schema",
97+
"examples": [{"parent_object": {"child_string": "hello world"}}],
98+
"required": ["parent_object"],
99+
"properties": {
100+
"parent_object": {
101+
"$id": "#/properties/parent_object",
102+
"$ref": "testschema://ChildSchema",
103+
},
104+
},
105+
},
106+
"ChildSchema": {
107+
"$schema": "http://json-schema.org/draft-07/schema",
108+
"$id": "testschema://ChildSchema",
109+
"type": "object",
110+
"title": "Sample schema",
111+
"description": "Sample JSON Schema that is referenced by another schema",
112+
"examples": [{"child_string": "hello world"}],
113+
"required": ["child_string"],
114+
"properties": {
115+
"child_string": {
116+
"$id": "#/properties/child_string",
117+
"type": "string",
118+
"title": "The child string",
119+
"examples": ["hello world"],
120+
},
121+
},
122+
},
123+
}
124+
125+
126+
@pytest.fixture
127+
def schema_ref_handlers(schema_refs):
128+
def handle_test_schema(uri):
129+
schema_key = uri.split("://")[1]
130+
return schema_refs[schema_key]
131+
132+
return {"testschema": handle_test_schema}
133+
134+
88135
@pytest.fixture
89136
def raw_event():
90137
return {"message": "hello hello", "username": "blah blah"}
@@ -105,6 +152,11 @@ def wrapped_event_base64_json_string():
105152
return {"data": "eyJtZXNzYWdlIjogImhlbGxvIGhlbGxvIiwgInVzZXJuYW1lIjogImJsYWggYmxhaCJ9="}
106153

107154

155+
@pytest.fixture
156+
def parent_ref_event():
157+
return {"parent_object": {"child_string": "hello world"}}
158+
159+
108160
@pytest.fixture
109161
def raw_response():
110162
return {"statusCode": 200, "body": "response"}

tests/functional/validator/test_validator.py

+4
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,10 @@ def test_validate_invalid_custom_format(
8383
)
8484

8585

86+
def test_validate_custom_handlers(schema_refs, schema_ref_handlers, parent_ref_event):
87+
validate(event=parent_ref_event, schema=schema_refs["ParentSchema"], handlers=schema_ref_handlers)
88+
89+
8690
def test_validate_invalid_envelope_expression(schema, wrapped_event):
8791
with pytest.raises(exceptions.InvalidEnvelopeExpressionError):
8892
validate(event=wrapped_event, schema=schema, envelope=True)

0 commit comments

Comments
 (0)