Skip to content

Commit 4c5ea71

Browse files
n2N8ZOliver Friesenheitorlessa
authored
feat: support custom formats in JSON Schema validation (#247)
* Issue: 246 * Issue: 246 * Issue: 246 test * fix: non-mutable argument, and type annotation * chore: adopt project code style * improv: catch invalid formats exception * docs: add a sub-section to explain its use Co-authored-by: Oliver Friesen <[email protected]> Co-authored-by: heitorlessa <[email protected]>
1 parent b5e3593 commit 4c5ea71

File tree

5 files changed

+322
-38
lines changed

5 files changed

+322
-38
lines changed

aws_lambda_powertools/utilities/validation/base.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import logging
2-
from typing import Any, Dict
2+
from typing import Any, Dict, Optional
33

44
import fastjsonschema
55
import jmespath
@@ -11,7 +11,7 @@
1111
logger = logging.getLogger(__name__)
1212

1313

14-
def validate_data_against_schema(data: Dict, schema: Dict):
14+
def validate_data_against_schema(data: Dict, schema: Dict, formats: Optional[Dict] = None):
1515
"""Validate dict data against given JSON Schema
1616
1717
Parameters
@@ -20,6 +20,8 @@ def validate_data_against_schema(data: Dict, schema: Dict):
2020
Data set to be validated
2121
schema : Dict
2222
JSON Schema to validate against
23+
formats: Dict
24+
Custom formats containing a key (e.g. int64) and a value expressed as regex or callback returning bool
2325
2426
Raises
2527
------
@@ -29,12 +31,12 @@ def validate_data_against_schema(data: Dict, schema: Dict):
2931
When JSON schema provided is invalid
3032
"""
3133
try:
32-
fastjsonschema.validate(definition=schema, data=data)
34+
fastjsonschema.validate(definition=schema, data=data, formats=formats)
35+
except (TypeError, AttributeError, fastjsonschema.JsonSchemaDefinitionException) as e:
36+
raise InvalidSchemaFormatError(f"Schema received: {schema}, Formats: {formats}. Error: {e}")
3337
except fastjsonschema.JsonSchemaException as e:
3438
message = f"Failed schema validation. Error: {e.message}, Path: {e.path}, Data: {e.value}" # noqa: B306, E501
3539
raise SchemaValidationError(message)
36-
except (TypeError, AttributeError) as e:
37-
raise InvalidSchemaFormatError(f"Schema received: {schema}. Error: {e}")
3840

3941

4042
def unwrap_event_from_envelope(data: Dict, envelope: str, jmespath_options: Dict) -> Any:

aws_lambda_powertools/utilities/validation/validator.py

+50-33
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import logging
2-
from typing import Any, Callable, Dict, Union
2+
from typing import Any, Callable, Dict, Optional, Union
33

44
from ...middleware_factory import lambda_handler_decorator
55
from .base import unwrap_event_from_envelope, validate_data_against_schema
@@ -13,12 +13,35 @@ def validator(
1313
event: Union[Dict, str],
1414
context: Any,
1515
inbound_schema: Dict = None,
16+
inbound_formats: Optional[Dict] = None,
1617
outbound_schema: Dict = None,
18+
outbound_formats: Optional[Dict] = None,
1719
envelope: str = None,
1820
jmespath_options: Dict = None,
1921
) -> Any:
2022
"""Lambda handler decorator to validate incoming/outbound data using a JSON Schema
2123
24+
Parameters
25+
----------
26+
handler : Callable
27+
Method to annotate on
28+
event : Dict
29+
Lambda event to be validated
30+
context : Any
31+
Lambda context object
32+
inbound_schema : Dict
33+
JSON Schema to validate incoming event
34+
outbound_schema : Dict
35+
JSON Schema to validate outbound event
36+
envelope : Dict
37+
JMESPath expression to filter data against
38+
jmespath_options : Dict
39+
Alternative JMESPath options to be included when filtering expr
40+
inbound_formats: Dict
41+
Custom formats containing a key (e.g. int64) and a value expressed as regex or callback returning bool
42+
outbound_formats: Dict
43+
Custom formats containing a key (e.g. int64) and a value expressed as regex or callback returning bool
44+
2245
Example
2346
-------
2447
@@ -78,23 +101,6 @@ def handler(event, context):
78101
def handler(event, context):
79102
return event
80103
81-
Parameters
82-
----------
83-
handler : Callable
84-
Method to annotate on
85-
event : Dict
86-
Lambda event to be validated
87-
context : Any
88-
Lambda context object
89-
inbound_schema : Dict
90-
JSON Schema to validate incoming event
91-
outbound_schema : Dict
92-
JSON Schema to validate outbound event
93-
envelope : Dict
94-
JMESPath expression to filter data against
95-
jmespath_options : Dict
96-
Alternative JMESPath options to be included when filtering expr
97-
98104
Returns
99105
-------
100106
Any
@@ -114,22 +120,44 @@ def handler(event, context):
114120

115121
if inbound_schema:
116122
logger.debug("Validating inbound event")
117-
validate_data_against_schema(data=event, schema=inbound_schema)
123+
validate_data_against_schema(data=event, schema=inbound_schema, formats=inbound_formats)
118124

119125
response = handler(event, context)
120126

121127
if outbound_schema:
122128
logger.debug("Validating outbound event")
123-
validate_data_against_schema(data=response, schema=outbound_schema)
129+
validate_data_against_schema(data=response, schema=outbound_schema, formats=outbound_formats)
124130

125131
return response
126132

127133

128-
def validate(event: Dict, schema: Dict = None, envelope: str = None, jmespath_options: Dict = None):
134+
def validate(
135+
event: Dict,
136+
schema: Dict = None,
137+
formats: Optional[Dict] = None,
138+
envelope: str = None,
139+
jmespath_options: Dict = None,
140+
):
129141
"""Standalone function to validate event data using a JSON Schema
130142
131143
Typically used when you need more control over the validation process.
132144
145+
Parameters
146+
----------
147+
event : Dict
148+
Lambda event to be validated
149+
schema : Dict
150+
JSON Schema to validate incoming event
151+
envelope : Dict
152+
JMESPath expression to filter data against
153+
jmespath_options : Dict
154+
Alternative JMESPath options to be included when filtering expr
155+
formats: Dict
156+
Custom formats containing a key (e.g. int64) and a value expressed as regex or callback returning bool
157+
158+
Example
159+
-------
160+
133161
**Validate event**
134162
135163
from aws_lambda_powertools.utilities.validation import validate
@@ -178,17 +206,6 @@ def handler(event, context):
178206
validate(event=event, schema=json_schema_dict, envelope="awslogs.powertools_base64_gzip(data) | powertools_json(@).logEvents[*]")
179207
return event
180208
181-
Parameters
182-
----------
183-
event : Dict
184-
Lambda event to be validated
185-
schema : Dict
186-
JSON Schema to validate incoming event
187-
envelope : Dict
188-
JMESPath expression to filter data against
189-
jmespath_options : Dict
190-
Alternative JMESPath options to be included when filtering expr
191-
192209
Raises
193210
------
194211
SchemaValidationError
@@ -201,4 +218,4 @@ def handler(event, context):
201218
if envelope:
202219
event = unwrap_event_from_envelope(data=event, envelope=envelope, jmespath_options=jmespath_options)
203220

204-
validate_data_against_schema(data=event, schema=schema)
221+
validate_data_against_schema(data=event, schema=schema, formats=formats)

docs/content/utilities/validation.mdx

+34
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,40 @@ def handler(event, context):
6969
return event
7070
```
7171

72+
### Validating custom formats
73+
74+
> New in 1.10.0
75+
> **NOTE**: JSON Schema DRAFT 7 [has many new built-in formats](https://json-schema.org/understanding-json-schema/reference/string.html#format) such as date, time, and specifically a regex format which might be a better replacement for a custom format, if you do have control over the schema.
76+
77+
If you have JSON Schemas with custom formats, for example having a `int64` for high precision integers, you can pass an optional validation to handle each type using `formats` parameter - Otherwise it'll fail validation:
78+
79+
**Example of custom integer format**
80+
81+
```json
82+
{
83+
"lastModifiedTime": {
84+
"format": "int64",
85+
"type": "integer"
86+
}
87+
}
88+
```
89+
90+
For each format defined in a dictionary key, you must use a regex, or a function that returns a boolean to instruct the validator on how to proceed when encountering that type.
91+
92+
```python
93+
from aws_lambda_powertools.utilities.validation import validate
94+
95+
event = {} # some event
96+
schema_with_custom_format = {} # some JSON schema that defines a custom format
97+
98+
custom_format = {
99+
"int64": True, # simply ignore it,
100+
"positive": lambda x: False if x < 0 else True
101+
}
102+
103+
validate(event=event, schema=schema_with_custom_format, formats=custom_format)
104+
```
105+
72106
## Unwrapping events prior to validation
73107

74108
You might want to validate only a portion of your event - This is where the `envelope` parameter is for.

0 commit comments

Comments
 (0)