Skip to content

feat(validation): support JSON Schema referencing in validation utils #4508

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ complexity-baseline:
$(info Maintenability index)
poetry run radon mi aws_lambda_powertools
$(info Cyclomatic complexity index)
poetry run xenon --max-absolute C --max-modules A --max-average A aws_lambda_powertools --exclude aws_lambda_powertools/shared/json_encoder.py
poetry run xenon --max-absolute C --max-modules A --max-average A aws_lambda_powertools --exclude aws_lambda_powertools/shared/json_encoder.py,aws_lambda_powertools/utilities/validation/base.py

#
# Use `poetry version <major>/<minor></patch>` for version bump
Expand Down
19 changes: 16 additions & 3 deletions aws_lambda_powertools/utilities/validation/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,18 @@

import fastjsonschema # type: ignore

from .exceptions import InvalidSchemaFormatError, SchemaValidationError
from aws_lambda_powertools.utilities.validation.exceptions import InvalidSchemaFormatError, SchemaValidationError

logger = logging.getLogger(__name__)


def validate_data_against_schema(data: Union[Dict, str], schema: Dict, formats: Optional[Dict] = None):
def validate_data_against_schema(
data: Union[Dict, str],
schema: Dict,
formats: Optional[Dict] = None,
handlers: Optional[Dict] = None,
provider_options: Optional[Dict] = None,
):
"""Validate dict data against given JSON Schema

Parameters
Expand All @@ -19,6 +25,11 @@ def validate_data_against_schema(data: Union[Dict, str], schema: Dict, formats:
JSON Schema to validate against
formats: Dict
Custom formats containing a key (e.g. int64) and a value expressed as regex or callback returning bool
handlers: Dict
Custom methods to retrieve remote schemes, keyed off of URI scheme
provider_options: Dict
Arguments that will be passed directly to the underlying validation call, in this case fastjsonchema.validate.
For all supported arguments see: https://horejsek.github.io/python-fastjsonschema/#fastjsonschema.validate

Raises
------
Expand All @@ -29,7 +40,9 @@ def validate_data_against_schema(data: Union[Dict, str], schema: Dict, formats:
"""
try:
formats = formats or {}
fastjsonschema.validate(definition=schema, data=data, formats=formats)
handlers = handlers or {}
provider_options = provider_options or {}
fastjsonschema.validate(definition=schema, data=data, formats=formats, handlers=handlers, **provider_options)
except (TypeError, AttributeError, fastjsonschema.JsonSchemaDefinitionException) as e:
raise InvalidSchemaFormatError(f"Schema received: {schema}, Formats: {formats}. Error: {e}")
except fastjsonschema.JsonSchemaValueException as e:
Expand Down
2 changes: 1 addition & 1 deletion aws_lambda_powertools/utilities/validation/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Any, List, Optional

from ...exceptions import InvalidEnvelopeExpressionError
from aws_lambda_powertools.exceptions import InvalidEnvelopeExpressionError


class SchemaValidationError(Exception):
Expand Down
50 changes: 44 additions & 6 deletions aws_lambda_powertools/utilities/validation/validator.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import logging
from typing import Any, Callable, Dict, Optional, Union

from aws_lambda_powertools.middleware_factory import lambda_handler_decorator
from aws_lambda_powertools.utilities import jmespath_utils

from ...middleware_factory import lambda_handler_decorator
from .base import validate_data_against_schema
from aws_lambda_powertools.utilities.validation.base import validate_data_against_schema

logger = logging.getLogger(__name__)

Expand All @@ -16,8 +15,12 @@ def validator(
context: Any,
inbound_schema: Optional[Dict] = None,
inbound_formats: Optional[Dict] = None,
inbound_handlers: Optional[Dict] = None,
inbound_provider_options: Optional[Dict] = None,
outbound_schema: Optional[Dict] = None,
outbound_formats: Optional[Dict] = None,
outbound_handlers: Optional[Dict] = None,
outbound_provider_options: Optional[Dict] = None,
envelope: str = "",
jmespath_options: Optional[Dict] = None,
**kwargs: Any,
Expand All @@ -44,6 +47,17 @@ def validator(
Custom formats containing a key (e.g. int64) and a value expressed as regex or callback returning bool
outbound_formats: Dict
Custom formats containing a key (e.g. int64) and a value expressed as regex or callback returning bool
inbound_handlers: Dict
Custom methods to retrieve remote schemes, keyed off of URI scheme
outbound_handlers: Dict
Custom methods to retrieve remote schemes, keyed off of URI scheme
inbound_provider_options: Dict
Arguments that will be passed directly to the underlying validation call, in this case fastjsonchema.validate.
For all supported arguments see: https://horejsek.github.io/python-fastjsonschema/#fastjsonschema.validate
outbound_provider_options: Dict
Arguments that will be passed directly to the underlying validation call, in this case fastjsonchema.validate.
For all supported arguments see: https://horejsek.github.io/python-fastjsonschema/#fastjsonschema.validate


Example
-------
Expand Down Expand Up @@ -127,13 +141,25 @@ def handler(event, context):

if inbound_schema:
logger.debug("Validating inbound event")
validate_data_against_schema(data=event, schema=inbound_schema, formats=inbound_formats)
validate_data_against_schema(
data=event,
schema=inbound_schema,
formats=inbound_formats,
handlers=inbound_handlers,
provider_options=inbound_provider_options,
)

response = handler(event, context, **kwargs)

if outbound_schema:
logger.debug("Validating outbound event")
validate_data_against_schema(data=response, schema=outbound_schema, formats=outbound_formats)
validate_data_against_schema(
data=response,
schema=outbound_schema,
formats=outbound_formats,
handlers=outbound_handlers,
provider_options=outbound_provider_options,
)

return response

Expand All @@ -142,6 +168,8 @@ def validate(
event: Any,
schema: Dict,
formats: Optional[Dict] = None,
handlers: Optional[Dict] = None,
provider_options: Optional[Dict] = None,
envelope: Optional[str] = None,
jmespath_options: Optional[Dict] = None,
):
Expand All @@ -161,6 +189,10 @@ def validate(
Alternative JMESPath options to be included when filtering expr
formats: Dict
Custom formats containing a key (e.g. int64) and a value expressed as regex or callback returning bool
handlers: Dict
Custom methods to retrieve remote schemes, keyed off of URI scheme
provider_options: Dict
Arguments that will be passed directly to the underlying validate call

Example
-------
Expand Down Expand Up @@ -229,4 +261,10 @@ def handler(event, context):
jmespath_options=jmespath_options,
)

validate_data_against_schema(data=event, schema=schema, formats=formats)
validate_data_against_schema(
data=event,
schema=schema,
formats=formats,
handlers=handlers,
provider_options=provider_options,
)
34 changes: 32 additions & 2 deletions docs/utilities/validation.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,10 +147,10 @@ Here is a handy table with built-in envelopes along with their JMESPath expressi
| **`API_GATEWAY_HTTP`** | `powertools_json(body)` |
| **`API_GATEWAY_REST`** | `powertools_json(body)` |
| **`CLOUDWATCH_EVENTS_SCHEDULED`** | `detail` |
| **`CLOUDWATCH_LOGS`** | `awslogs.powertools_base64_gzip(data) | powertools_json(@).logEvents[*]` |
| **`CLOUDWATCH_LOGS`** | `awslogs.powertools_base64_gzip(data)` or `powertools_json(@).logEvents[*]` |
| **`EVENTBRIDGE`** | `detail` |
| **`KINESIS_DATA_STREAM`** | `Records[*].kinesis.powertools_json(powertools_base64(data))` |
| **`SNS`** | `Records[0].Sns.Message | powertools_json(@)` |
| **`SNS`** | `Records[0].Sns.Message` or `powertools_json(@)` |
| **`SQS`** | `Records[*].powertools_json(body)` |

## Advanced
Expand Down Expand Up @@ -199,3 +199,33 @@ You can use our built-in [JMESPath functions](./jmespath_functions.md){target="_

???+ info
We use these for [built-in envelopes](#built-in-envelopes) to easily to decode and unwrap events from sources like Kinesis, CloudWatch Logs, etc.

### Validating with external references

JSON Schema [allows schemas to reference other schemas](https://json-schema.org/understanding-json-schema/structuring#dollarref) using the `$ref` keyword with a URI value. By default, `fastjsonschema` will make a HTTP request to resolve this URI.

You can use `handlers` parameter to have full control over how references schemas are fetched. This is useful when you might want to optimize caching, reducing HTTP calls, or fetching them from non-HTTP endpoints.

=== "custom_handlers.py"

```python hl_lines="1 7 8 11"
--8<-- "examples/validation/src/custom_handlers.py"
```

=== "custom_handlers_parent_schema"

```python hl_lines="1 7"
--8<-- "examples/validation/src/custom_handlers_schema.py"
```

=== "custom_handlers_child_schema"

```python hl_lines="12"
--8<-- "examples/validation/src/custom_handlers_schema.py"
```

=== "custom_handlers_payload.json"

```json hl_lines="2"
--8<-- "examples/validation/src/custom_handlers_payload.json"
```
13 changes: 13 additions & 0 deletions examples/validation/src/custom_handlers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from custom_handlers_schema import CHILD_SCHEMA, PARENT_SCHEMA

from aws_lambda_powertools.utilities.typing import LambdaContext
from aws_lambda_powertools.utilities.validation import validator


def get_child_schema(uri: str):
return CHILD_SCHEMA


@validator(inbound_schema=PARENT_SCHEMA, inbound_handlers={"https": get_child_schema})
def lambda_handler(event, context: LambdaContext) -> dict:
return event
6 changes: 6 additions & 0 deletions examples/validation/src/custom_handlers_payload.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"ParentSchema":
{
"project": "powertools"
}
}
22 changes: 22 additions & 0 deletions examples/validation/src/custom_handlers_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
PARENT_SCHEMA = {
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "https://example.com/schemas/parent.json",
"type": "object",
"properties": {
"ParentSchema": {
"$ref": "https://SCHEMA",
},
},
}

CHILD_SCHEMA = {
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "https://example.com/schemas/child.json",
"type": "object",
"properties": {
"project": {
"type": "string",
},
},
"required": ["project"],
}
52 changes: 52 additions & 0 deletions tests/functional/validator/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,53 @@ def schema_response():
}


@pytest.fixture
def schema_refs():
return {
"ParentSchema": {
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "testschema://ParentSchema",
"type": "object",
"title": "Sample schema",
"description": "Sample JSON Schema that references another schema",
"examples": [{"parent_object": {"child_string": "hello world"}}],
"required": ["parent_object"],
"properties": {
"parent_object": {
"$id": "#/properties/parent_object",
"$ref": "testschema://ChildSchema",
},
},
},
"ChildSchema": {
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "testschema://ChildSchema",
"type": "object",
"title": "Sample schema",
"description": "Sample JSON Schema that is referenced by another schema",
"examples": [{"child_string": "hello world"}],
"required": ["child_string"],
"properties": {
"child_string": {
"$id": "#/properties/child_string",
"type": "string",
"title": "The child string",
"examples": ["hello world"],
},
},
},
}


@pytest.fixture
def schema_ref_handlers(schema_refs):
def handle_test_schema(uri):
schema_key = uri.split("://")[1]
return schema_refs[schema_key]

return {"testschema": handle_test_schema}


@pytest.fixture
def raw_event():
return {"message": "hello hello", "username": "blah blah"}
Expand All @@ -105,6 +152,11 @@ def wrapped_event_base64_json_string():
return {"data": "eyJtZXNzYWdlIjogImhlbGxvIGhlbGxvIiwgInVzZXJuYW1lIjogImJsYWggYmxhaCJ9="}


@pytest.fixture
def parent_ref_event():
return {"parent_object": {"child_string": "hello world"}}


@pytest.fixture
def raw_response():
return {"statusCode": 200, "body": "response"}
Expand Down
4 changes: 4 additions & 0 deletions tests/functional/validator/test_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@ def test_validate_invalid_custom_format(
)


def test_validate_custom_handlers(schema_refs, schema_ref_handlers, parent_ref_event):
validate(event=parent_ref_event, schema=schema_refs["ParentSchema"], handlers=schema_ref_handlers)


def test_validate_invalid_envelope_expression(schema, wrapped_event):
with pytest.raises(exceptions.InvalidEnvelopeExpressionError):
validate(event=wrapped_event, schema=schema, envelope=True)
Expand Down