Skip to content

feat: support custom formats in JSON Schema validation #247

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jan 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions aws_lambda_powertools/utilities/validation/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
from typing import Any, Dict
from typing import Any, Dict, Optional

import fastjsonschema
import jmespath
Expand All @@ -11,7 +11,7 @@
logger = logging.getLogger(__name__)


def validate_data_against_schema(data: Dict, schema: Dict):
def validate_data_against_schema(data: Dict, schema: Dict, formats: Optional[Dict] = None):
"""Validate dict data against given JSON Schema

Parameters
Expand All @@ -20,6 +20,8 @@ def validate_data_against_schema(data: Dict, schema: Dict):
Data set to be validated
schema : Dict
JSON Schema to validate against
formats: Dict
Custom formats containing a key (e.g. int64) and a value expressed as regex or callback returning bool

Raises
------
Expand All @@ -29,12 +31,12 @@ def validate_data_against_schema(data: Dict, schema: Dict):
When JSON schema provided is invalid
"""
try:
fastjsonschema.validate(definition=schema, data=data)
fastjsonschema.validate(definition=schema, data=data, formats=formats)
except (TypeError, AttributeError, fastjsonschema.JsonSchemaDefinitionException) as e:
raise InvalidSchemaFormatError(f"Schema received: {schema}, Formats: {formats}. Error: {e}")
except fastjsonschema.JsonSchemaException as e:
message = f"Failed schema validation. Error: {e.message}, Path: {e.path}, Data: {e.value}" # noqa: B306, E501
raise SchemaValidationError(message)
except (TypeError, AttributeError) as e:
raise InvalidSchemaFormatError(f"Schema received: {schema}. Error: {e}")


def unwrap_event_from_envelope(data: Dict, envelope: str, jmespath_options: Dict) -> Any:
Expand Down
83 changes: 50 additions & 33 deletions aws_lambda_powertools/utilities/validation/validator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
from typing import Any, Callable, Dict, Union
from typing import Any, Callable, Dict, Optional, Union

from ...middleware_factory import lambda_handler_decorator
from .base import unwrap_event_from_envelope, validate_data_against_schema
Expand All @@ -13,12 +13,35 @@ def validator(
event: Union[Dict, str],
context: Any,
inbound_schema: Dict = None,
inbound_formats: Optional[Dict] = None,
outbound_schema: Dict = None,
outbound_formats: Optional[Dict] = None,
envelope: str = None,
jmespath_options: Dict = None,
) -> Any:
"""Lambda handler decorator to validate incoming/outbound data using a JSON Schema

Parameters
----------
handler : Callable
Method to annotate on
event : Dict
Lambda event to be validated
context : Any
Lambda context object
inbound_schema : Dict
JSON Schema to validate incoming event
outbound_schema : Dict
JSON Schema to validate outbound event
envelope : Dict
JMESPath expression to filter data against
jmespath_options : Dict
Alternative JMESPath options to be included when filtering expr
inbound_formats: Dict
Custom formats containing a key (e.g. int64) and a value expressed as regex or callback returning bool
outbound_formats: Dict
Custom formats containing a key (e.g. int64) and a value expressed as regex or callback returning bool

Example
-------

Expand Down Expand Up @@ -78,23 +101,6 @@ def handler(event, context):
def handler(event, context):
return event

Parameters
----------
handler : Callable
Method to annotate on
event : Dict
Lambda event to be validated
context : Any
Lambda context object
inbound_schema : Dict
JSON Schema to validate incoming event
outbound_schema : Dict
JSON Schema to validate outbound event
envelope : Dict
JMESPath expression to filter data against
jmespath_options : Dict
Alternative JMESPath options to be included when filtering expr

Returns
-------
Any
Expand All @@ -114,22 +120,44 @@ def handler(event, context):

if inbound_schema:
logger.debug("Validating inbound event")
validate_data_against_schema(data=event, schema=inbound_schema)
validate_data_against_schema(data=event, schema=inbound_schema, formats=inbound_formats)

response = handler(event, context)

if outbound_schema:
logger.debug("Validating outbound event")
validate_data_against_schema(data=response, schema=outbound_schema)
validate_data_against_schema(data=response, schema=outbound_schema, formats=outbound_formats)

return response


def validate(event: Dict, schema: Dict = None, envelope: str = None, jmespath_options: Dict = None):
def validate(
event: Dict,
schema: Dict = None,
formats: Optional[Dict] = None,
envelope: str = None,
jmespath_options: Dict = None,
):
"""Standalone function to validate event data using a JSON Schema

Typically used when you need more control over the validation process.

Parameters
----------
event : Dict
Lambda event to be validated
schema : Dict
JSON Schema to validate incoming event
envelope : Dict
JMESPath expression to filter data against
jmespath_options : Dict
Alternative JMESPath options to be included when filtering expr
formats: Dict
Custom formats containing a key (e.g. int64) and a value expressed as regex or callback returning bool

Example
-------

**Validate event**

from aws_lambda_powertools.utilities.validation import validate
Expand Down Expand Up @@ -178,17 +206,6 @@ def handler(event, context):
validate(event=event, schema=json_schema_dict, envelope="awslogs.powertools_base64_gzip(data) | powertools_json(@).logEvents[*]")
return event

Parameters
----------
event : Dict
Lambda event to be validated
schema : Dict
JSON Schema to validate incoming event
envelope : Dict
JMESPath expression to filter data against
jmespath_options : Dict
Alternative JMESPath options to be included when filtering expr

Raises
------
SchemaValidationError
Expand All @@ -201,4 +218,4 @@ def handler(event, context):
if envelope:
event = unwrap_event_from_envelope(data=event, envelope=envelope, jmespath_options=jmespath_options)

validate_data_against_schema(data=event, schema=schema)
validate_data_against_schema(data=event, schema=schema, formats=formats)
34 changes: 34 additions & 0 deletions docs/content/utilities/validation.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,40 @@ def handler(event, context):
return event
```

### Validating custom formats

> New in 1.10.0
> **NOTE**: JSON Schema DRAFT 7 [has many new built-in formats](https://json-schema.org/understanding-json-schema/reference/string.html#format) such as date, time, and specifically a regex format which might be a better replacement for a custom format, if you do have control over the schema.

If you have JSON Schemas with custom formats, for example having a `int64` for high precision integers, you can pass an optional validation to handle each type using `formats` parameter - Otherwise it'll fail validation:

**Example of custom integer format**

```json
{
"lastModifiedTime": {
"format": "int64",
"type": "integer"
}
}
```

For each format defined in a dictionary key, you must use a regex, or a function that returns a boolean to instruct the validator on how to proceed when encountering that type.

```python
from aws_lambda_powertools.utilities.validation import validate

event = {} # some event
schema_with_custom_format = {} # some JSON schema that defines a custom format

custom_format = {
"int64": True, # simply ignore it,
"positive": lambda x: False if x < 0 else True
}

validate(event=event, schema=schema_with_custom_format, formats=custom_format)
```

## Unwrapping events prior to validation

You might want to validate only a portion of your event - This is where the `envelope` parameter is for.
Expand Down
Loading