diff --git a/Makefile b/Makefile index 85bb6a9d81d..b66744e936b 100644 --- a/Makefile +++ b/Makefile @@ -86,7 +86,7 @@ complexity-baseline: $(info Maintenability index) poetry run radon mi aws_lambda_powertools $(info Cyclomatic complexity index) - poetry run xenon --max-absolute C --max-modules A --max-average A aws_lambda_powertools --exclude aws_lambda_powertools/shared/json_encoder.py + poetry run xenon --max-absolute C --max-modules A --max-average A aws_lambda_powertools --exclude aws_lambda_powertools/shared/json_encoder.py,aws_lambda_powertools/utilities/validation/base.py # # Use `poetry version /` for version bump diff --git a/aws_lambda_powertools/utilities/validation/base.py b/aws_lambda_powertools/utilities/validation/base.py index 61d692d7f28..9d7a36874aa 100644 --- a/aws_lambda_powertools/utilities/validation/base.py +++ b/aws_lambda_powertools/utilities/validation/base.py @@ -3,12 +3,18 @@ import fastjsonschema # type: ignore -from .exceptions import InvalidSchemaFormatError, SchemaValidationError +from aws_lambda_powertools.utilities.validation.exceptions import InvalidSchemaFormatError, SchemaValidationError logger = logging.getLogger(__name__) -def validate_data_against_schema(data: Union[Dict, str], schema: Dict, formats: Optional[Dict] = None): +def validate_data_against_schema( + data: Union[Dict, str], + schema: Dict, + formats: Optional[Dict] = None, + handlers: Optional[Dict] = None, + provider_options: Optional[Dict] = None, +): """Validate dict data against given JSON Schema Parameters @@ -19,6 +25,11 @@ def validate_data_against_schema(data: Union[Dict, str], schema: Dict, formats: JSON Schema to validate against formats: Dict Custom formats containing a key (e.g. int64) and a value expressed as regex or callback returning bool + handlers: Dict + Custom methods to retrieve remote schemes, keyed off of URI scheme + provider_options: Dict + Arguments that will be passed directly to the underlying validation call, in this case fastjsonchema.validate. + For all supported arguments see: https://horejsek.github.io/python-fastjsonschema/#fastjsonschema.validate Raises ------ @@ -29,7 +40,9 @@ def validate_data_against_schema(data: Union[Dict, str], schema: Dict, formats: """ try: formats = formats or {} - fastjsonschema.validate(definition=schema, data=data, formats=formats) + handlers = handlers or {} + provider_options = provider_options or {} + fastjsonschema.validate(definition=schema, data=data, formats=formats, handlers=handlers, **provider_options) except (TypeError, AttributeError, fastjsonschema.JsonSchemaDefinitionException) as e: raise InvalidSchemaFormatError(f"Schema received: {schema}, Formats: {formats}. Error: {e}") except fastjsonschema.JsonSchemaValueException as e: diff --git a/aws_lambda_powertools/utilities/validation/exceptions.py b/aws_lambda_powertools/utilities/validation/exceptions.py index 8789e3f2e80..c9ba0e2a75e 100644 --- a/aws_lambda_powertools/utilities/validation/exceptions.py +++ b/aws_lambda_powertools/utilities/validation/exceptions.py @@ -1,6 +1,6 @@ from typing import Any, List, Optional -from ...exceptions import InvalidEnvelopeExpressionError +from aws_lambda_powertools.exceptions import InvalidEnvelopeExpressionError class SchemaValidationError(Exception): diff --git a/aws_lambda_powertools/utilities/validation/validator.py b/aws_lambda_powertools/utilities/validation/validator.py index 968656ee49c..74861f7de3f 100644 --- a/aws_lambda_powertools/utilities/validation/validator.py +++ b/aws_lambda_powertools/utilities/validation/validator.py @@ -1,10 +1,9 @@ import logging from typing import Any, Callable, Dict, Optional, Union +from aws_lambda_powertools.middleware_factory import lambda_handler_decorator from aws_lambda_powertools.utilities import jmespath_utils - -from ...middleware_factory import lambda_handler_decorator -from .base import validate_data_against_schema +from aws_lambda_powertools.utilities.validation.base import validate_data_against_schema logger = logging.getLogger(__name__) @@ -16,8 +15,12 @@ def validator( context: Any, inbound_schema: Optional[Dict] = None, inbound_formats: Optional[Dict] = None, + inbound_handlers: Optional[Dict] = None, + inbound_provider_options: Optional[Dict] = None, outbound_schema: Optional[Dict] = None, outbound_formats: Optional[Dict] = None, + outbound_handlers: Optional[Dict] = None, + outbound_provider_options: Optional[Dict] = None, envelope: str = "", jmespath_options: Optional[Dict] = None, **kwargs: Any, @@ -44,6 +47,17 @@ def validator( Custom formats containing a key (e.g. int64) and a value expressed as regex or callback returning bool outbound_formats: Dict Custom formats containing a key (e.g. int64) and a value expressed as regex or callback returning bool + inbound_handlers: Dict + Custom methods to retrieve remote schemes, keyed off of URI scheme + outbound_handlers: Dict + Custom methods to retrieve remote schemes, keyed off of URI scheme + inbound_provider_options: Dict + Arguments that will be passed directly to the underlying validation call, in this case fastjsonchema.validate. + For all supported arguments see: https://horejsek.github.io/python-fastjsonschema/#fastjsonschema.validate + outbound_provider_options: Dict + Arguments that will be passed directly to the underlying validation call, in this case fastjsonchema.validate. + For all supported arguments see: https://horejsek.github.io/python-fastjsonschema/#fastjsonschema.validate + Example ------- @@ -127,13 +141,25 @@ def handler(event, context): if inbound_schema: logger.debug("Validating inbound event") - validate_data_against_schema(data=event, schema=inbound_schema, formats=inbound_formats) + validate_data_against_schema( + data=event, + schema=inbound_schema, + formats=inbound_formats, + handlers=inbound_handlers, + provider_options=inbound_provider_options, + ) response = handler(event, context, **kwargs) if outbound_schema: logger.debug("Validating outbound event") - validate_data_against_schema(data=response, schema=outbound_schema, formats=outbound_formats) + validate_data_against_schema( + data=response, + schema=outbound_schema, + formats=outbound_formats, + handlers=outbound_handlers, + provider_options=outbound_provider_options, + ) return response @@ -142,6 +168,8 @@ def validate( event: Any, schema: Dict, formats: Optional[Dict] = None, + handlers: Optional[Dict] = None, + provider_options: Optional[Dict] = None, envelope: Optional[str] = None, jmespath_options: Optional[Dict] = None, ): @@ -161,6 +189,10 @@ def validate( Alternative JMESPath options to be included when filtering expr formats: Dict Custom formats containing a key (e.g. int64) and a value expressed as regex or callback returning bool + handlers: Dict + Custom methods to retrieve remote schemes, keyed off of URI scheme + provider_options: Dict + Arguments that will be passed directly to the underlying validate call Example ------- @@ -229,4 +261,10 @@ def handler(event, context): jmespath_options=jmespath_options, ) - validate_data_against_schema(data=event, schema=schema, formats=formats) + validate_data_against_schema( + data=event, + schema=schema, + formats=formats, + handlers=handlers, + provider_options=provider_options, + ) diff --git a/docs/utilities/validation.md b/docs/utilities/validation.md index 1b569ddc14c..51085d417fa 100644 --- a/docs/utilities/validation.md +++ b/docs/utilities/validation.md @@ -147,10 +147,10 @@ Here is a handy table with built-in envelopes along with their JMESPath expressi | **`API_GATEWAY_HTTP`** | `powertools_json(body)` | | **`API_GATEWAY_REST`** | `powertools_json(body)` | | **`CLOUDWATCH_EVENTS_SCHEDULED`** | `detail` | -| **`CLOUDWATCH_LOGS`** | `awslogs.powertools_base64_gzip(data) | powertools_json(@).logEvents[*]` | +| **`CLOUDWATCH_LOGS`** | `awslogs.powertools_base64_gzip(data)` or `powertools_json(@).logEvents[*]` | | **`EVENTBRIDGE`** | `detail` | | **`KINESIS_DATA_STREAM`** | `Records[*].kinesis.powertools_json(powertools_base64(data))` | -| **`SNS`** | `Records[0].Sns.Message | powertools_json(@)` | +| **`SNS`** | `Records[0].Sns.Message` or `powertools_json(@)` | | **`SQS`** | `Records[*].powertools_json(body)` | ## Advanced @@ -199,3 +199,33 @@ You can use our built-in [JMESPath functions](./jmespath_functions.md){target="_ ???+ info We use these for [built-in envelopes](#built-in-envelopes) to easily to decode and unwrap events from sources like Kinesis, CloudWatch Logs, etc. + +### Validating with external references + +JSON Schema [allows schemas to reference other schemas](https://json-schema.org/understanding-json-schema/structuring#dollarref) using the `$ref` keyword with a URI value. By default, `fastjsonschema` will make a HTTP request to resolve this URI. + +You can use `handlers` parameter to have full control over how references schemas are fetched. This is useful when you might want to optimize caching, reducing HTTP calls, or fetching them from non-HTTP endpoints. + +=== "custom_handlers.py" + + ```python hl_lines="1 7 8 11" + --8<-- "examples/validation/src/custom_handlers.py" + ``` + +=== "custom_handlers_parent_schema" + + ```python hl_lines="1 7" + --8<-- "examples/validation/src/custom_handlers_schema.py" + ``` + +=== "custom_handlers_child_schema" + + ```python hl_lines="12" + --8<-- "examples/validation/src/custom_handlers_schema.py" + ``` + +=== "custom_handlers_payload.json" + + ```json hl_lines="2" + --8<-- "examples/validation/src/custom_handlers_payload.json" + ``` diff --git a/examples/validation/src/custom_handlers.py b/examples/validation/src/custom_handlers.py new file mode 100644 index 00000000000..4cbc5d65b93 --- /dev/null +++ b/examples/validation/src/custom_handlers.py @@ -0,0 +1,14 @@ +from custom_handlers_schema import CHILD_SCHEMA, PARENT_SCHEMA + +from aws_lambda_powertools.utilities.typing import LambdaContext +from aws_lambda_powertools.utilities.validation import validator + + +# Function to return the child schema +def get_child_schema(uri: str): + return CHILD_SCHEMA + + +@validator(inbound_schema=PARENT_SCHEMA, inbound_handlers={"https": get_child_schema}) +def lambda_handler(event, context: LambdaContext) -> dict: + return event diff --git a/examples/validation/src/custom_handlers_payload.json b/examples/validation/src/custom_handlers_payload.json new file mode 100644 index 00000000000..09ab994f892 --- /dev/null +++ b/examples/validation/src/custom_handlers_payload.json @@ -0,0 +1,6 @@ +{ + "ParentSchema": + { + "project": "powertools" + } +} diff --git a/examples/validation/src/custom_handlers_schema.py b/examples/validation/src/custom_handlers_schema.py new file mode 100644 index 00000000000..ab911e3d63f --- /dev/null +++ b/examples/validation/src/custom_handlers_schema.py @@ -0,0 +1,22 @@ +PARENT_SCHEMA = { + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://example.com/schemas/parent.json", + "type": "object", + "properties": { + "ParentSchema": { + "$ref": "https://SCHEMA", + }, + }, +} + +CHILD_SCHEMA = { + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://example.com/schemas/child.json", + "type": "object", + "properties": { + "project": { + "type": "string", + }, + }, + "required": ["project"], +} diff --git a/tests/functional/validator/conftest.py b/tests/functional/validator/conftest.py index 750f7648d40..3b9033c82d4 100644 --- a/tests/functional/validator/conftest.py +++ b/tests/functional/validator/conftest.py @@ -85,6 +85,53 @@ def schema_response(): } +@pytest.fixture +def schema_refs(): + return { + "ParentSchema": { + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "testschema://ParentSchema", + "type": "object", + "title": "Sample schema", + "description": "Sample JSON Schema that references another schema", + "examples": [{"parent_object": {"child_string": "hello world"}}], + "required": ["parent_object"], + "properties": { + "parent_object": { + "$id": "#/properties/parent_object", + "$ref": "testschema://ChildSchema", + }, + }, + }, + "ChildSchema": { + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "testschema://ChildSchema", + "type": "object", + "title": "Sample schema", + "description": "Sample JSON Schema that is referenced by another schema", + "examples": [{"child_string": "hello world"}], + "required": ["child_string"], + "properties": { + "child_string": { + "$id": "#/properties/child_string", + "type": "string", + "title": "The child string", + "examples": ["hello world"], + }, + }, + }, + } + + +@pytest.fixture +def schema_ref_handlers(schema_refs): + def handle_test_schema(uri): + schema_key = uri.split("://")[1] + return schema_refs[schema_key] + + return {"testschema": handle_test_schema} + + @pytest.fixture def raw_event(): return {"message": "hello hello", "username": "blah blah"} @@ -105,6 +152,11 @@ def wrapped_event_base64_json_string(): return {"data": "eyJtZXNzYWdlIjogImhlbGxvIGhlbGxvIiwgInVzZXJuYW1lIjogImJsYWggYmxhaCJ9="} +@pytest.fixture +def parent_ref_event(): + return {"parent_object": {"child_string": "hello world"}} + + @pytest.fixture def raw_response(): return {"statusCode": 200, "body": "response"} diff --git a/tests/functional/validator/test_validator.py b/tests/functional/validator/test_validator.py index 23b4943223a..f18787990ff 100644 --- a/tests/functional/validator/test_validator.py +++ b/tests/functional/validator/test_validator.py @@ -83,6 +83,10 @@ def test_validate_invalid_custom_format( ) +def test_validate_custom_handlers(schema_refs, schema_ref_handlers, parent_ref_event): + validate(event=parent_ref_event, schema=schema_refs["ParentSchema"], handlers=schema_ref_handlers) + + def test_validate_invalid_envelope_expression(schema, wrapped_event): with pytest.raises(exceptions.InvalidEnvelopeExpressionError): validate(event=wrapped_event, schema=schema, envelope=True)