diff --git a/aws_lambda_powertools/utilities/parser/__init__.py b/aws_lambda_powertools/utilities/parser/__init__.py index 62aa4bd73d7..1bc67934b13 100644 --- a/aws_lambda_powertools/utilities/parser/__init__.py +++ b/aws_lambda_powertools/utilities/parser/__init__.py @@ -2,16 +2,17 @@ """ from . import envelopes from .envelopes import BaseEnvelope -from .exceptions import ModelValidationError -from .parser import event_parser -from .pydantic import BaseModel, root_validator, validator +from .parser import event_parser, parse +from .pydantic import BaseModel, Field, ValidationError, root_validator, validator __all__ = [ "event_parser", + "parse", "envelopes", "BaseEnvelope", "BaseModel", + "Field", "validator", "root_validator", - "ModelValidationError", + "ValidationError", ] diff --git a/aws_lambda_powertools/utilities/parser/envelopes/__init__.py b/aws_lambda_powertools/utilities/parser/envelopes/__init__.py index 766021a3f92..2398840a756 100644 --- a/aws_lambda_powertools/utilities/parser/envelopes/__init__.py +++ b/aws_lambda_powertools/utilities/parser/envelopes/__init__.py @@ -1,6 +1,6 @@ from .base import BaseEnvelope -from .dynamodb import DynamoDBEnvelope +from .dynamodb import DynamoDBStreamEnvelope from .event_bridge import EventBridgeEnvelope from .sqs import SqsEnvelope -__all__ = ["DynamoDBEnvelope", "EventBridgeEnvelope", "SqsEnvelope", "BaseEnvelope"] +__all__ = ["DynamoDBStreamEnvelope", "EventBridgeEnvelope", "SqsEnvelope", "BaseEnvelope"] diff --git a/aws_lambda_powertools/utilities/parser/envelopes/base.py b/aws_lambda_powertools/utilities/parser/envelopes/base.py index baf6cd33420..4cf9c1b94ec 100644 --- a/aws_lambda_powertools/utilities/parser/envelopes/base.py +++ b/aws_lambda_powertools/utilities/parser/envelopes/base.py @@ -1,8 +1,8 @@ import logging from abc import ABC, abstractmethod -from typing import Any, Dict, Union +from typing import Any, Dict, Optional, TypeVar, Union -from pydantic import BaseModel +from ..types import Model logger = logging.getLogger(__name__) @@ -11,14 +11,14 @@ class BaseEnvelope(ABC): """ABC implementation for creating a supported Envelope""" @staticmethod - def _parse(data: Union[Dict[str, Any], str], model: BaseModel) -> Any: + def _parse(data: Optional[Union[Dict[str, Any], Any]], model: Model) -> Union[Model, None]: """Parses envelope data against model provided Parameters ---------- data : Dict Data to be parsed and validated - model + model : Model Data model to parse and validate data against Returns @@ -38,7 +38,7 @@ def _parse(data: Union[Dict[str, Any], str], model: BaseModel) -> Any: return model.parse_obj(data) @abstractmethod - def parse(self, data: Dict[str, Any], model: BaseModel): + def parse(self, data: Optional[Union[Dict[str, Any], Any]], model: Model): """Implementation to parse data against envelope model, then against the data model NOTE: Call `_parse` method to fully parse data with model provided. @@ -56,3 +56,8 @@ def parse(...): return self._parse(data=parsed_envelope.detail, model=data_model) """ return NotImplemented # pragma: no cover + + +# Generic to support type annotations throughout parser +# Note: Can't be defined under types.py due to circular dependency +Envelope = TypeVar("Envelope", bound=BaseEnvelope) diff --git a/aws_lambda_powertools/utilities/parser/envelopes/dynamodb.py b/aws_lambda_powertools/utilities/parser/envelopes/dynamodb.py index ef166a5c48f..38d19ffe1c6 100644 --- a/aws_lambda_powertools/utilities/parser/envelopes/dynamodb.py +++ b/aws_lambda_powertools/utilities/parser/envelopes/dynamodb.py @@ -1,45 +1,44 @@ import logging -from typing import Any, Dict, List - -from pydantic import BaseModel -from typing_extensions import Literal +from typing import Any, Dict, List, Optional, Union from ..models import DynamoDBStreamModel +from ..types import Model from .base import BaseEnvelope logger = logging.getLogger(__name__) -class DynamoDBEnvelope(BaseEnvelope): +class DynamoDBStreamEnvelope(BaseEnvelope): """ DynamoDB Stream Envelope to extract data within NewImage/OldImage Note: Values are the parsed models. Images' values can also be None, and length of the list is the record's amount in the original event. """ - def parse(self, data: Dict[str, Any], model: BaseModel) -> List[Dict[Literal["NewImage", "OldImage"], BaseModel]]: + def parse(self, data: Optional[Union[Dict[str, Any], Any]], model: Model) -> List[Dict[str, Optional[Model]]]: """Parses DynamoDB Stream records found in either NewImage and OldImage with model provided Parameters ---------- data : Dict Lambda event to be parsed - model : BaseModel + model : Model Data model provided to parse after extracting data using envelope Returns ------- List - List of records parsed with model provided + List of dictionaries with NewImage and OldImage records parsed with model provided """ - parsed_envelope = DynamoDBStreamModel(**data) + logger.debug(f"Parsing incoming data with DynamoDB Stream model {DynamoDBStreamModel}") + parsed_envelope = DynamoDBStreamModel.parse_obj(data) output = [] + logger.debug(f"Parsing DynamoDB Stream new and old records with {model}") for record in parsed_envelope.Records: output.append( { - "NewImage": self._parse(record.dynamodb.NewImage, model), - "OldImage": self._parse(record.dynamodb.OldImage, model), + "NewImage": self._parse(data=record.dynamodb.NewImage, model=model), + "OldImage": self._parse(data=record.dynamodb.OldImage, model=model), } ) - # noinspection PyTypeChecker return output diff --git a/aws_lambda_powertools/utilities/parser/envelopes/event_bridge.py b/aws_lambda_powertools/utilities/parser/envelopes/event_bridge.py index 8b91266e848..5b40926b482 100644 --- a/aws_lambda_powertools/utilities/parser/envelopes/event_bridge.py +++ b/aws_lambda_powertools/utilities/parser/envelopes/event_bridge.py @@ -1,9 +1,8 @@ import logging -from typing import Any, Dict - -from pydantic import BaseModel +from typing import Any, Dict, Optional, Union from ..models import EventBridgeModel +from ..types import Model from .base import BaseEnvelope logger = logging.getLogger(__name__) @@ -12,14 +11,14 @@ class EventBridgeEnvelope(BaseEnvelope): """EventBridge envelope to extract data within detail key""" - def parse(self, data: Dict[str, Any], model: BaseModel) -> BaseModel: + def parse(self, data: Optional[Union[Dict[str, Any], Any]], model: Model) -> Optional[Model]: """Parses data found with model provided Parameters ---------- data : Dict Lambda event to be parsed - model : BaseModel + model : Model Data model provided to parse after extracting data using envelope Returns @@ -27,5 +26,7 @@ def parse(self, data: Dict[str, Any], model: BaseModel) -> BaseModel: Any Parsed detail payload with model provided """ - parsed_envelope = EventBridgeModel(**data) + logger.debug(f"Parsing incoming data with EventBridge model {EventBridgeModel}") + parsed_envelope = EventBridgeModel.parse_obj(data) + logger.debug(f"Parsing event payload in `detail` with {model}") return self._parse(data=parsed_envelope.detail, model=model) diff --git a/aws_lambda_powertools/utilities/parser/envelopes/sqs.py b/aws_lambda_powertools/utilities/parser/envelopes/sqs.py index 7bf326206f3..3ed479ffa31 100644 --- a/aws_lambda_powertools/utilities/parser/envelopes/sqs.py +++ b/aws_lambda_powertools/utilities/parser/envelopes/sqs.py @@ -1,9 +1,8 @@ import logging -from typing import Any, Dict, List, Union - -from pydantic import BaseModel +from typing import Any, Dict, List, Optional, Union from ..models import SqsModel +from ..types import Model from .base import BaseEnvelope logger = logging.getLogger(__name__) @@ -19,14 +18,14 @@ class SqsEnvelope(BaseEnvelope): all items in the list will be parsed as str and npt as JSON (and vice versa) """ - def parse(self, data: Dict[str, Any], model: Union[BaseModel, str]) -> List[Union[BaseModel, str]]: + def parse(self, data: Optional[Union[Dict[str, Any], Any]], model: Model) -> List[Optional[Model]]: """Parses records found with model provided Parameters ---------- data : Dict Lambda event to be parsed - model : BaseModel + model : Model Data model provided to parse after extracting data using envelope Returns @@ -34,8 +33,10 @@ def parse(self, data: Dict[str, Any], model: Union[BaseModel, str]) -> List[Unio List List of records parsed with model provided """ - parsed_envelope = SqsModel(**data) + logger.debug(f"Parsing incoming data with SQS model {SqsModel}") + parsed_envelope = SqsModel.parse_obj(data) output = [] + logger.debug(f"Parsing SQS records in `body` with {model}") for record in parsed_envelope.Records: - output.append(self._parse(record.body, model)) + output.append(self._parse(data=record.body, model=model)) return output diff --git a/aws_lambda_powertools/utilities/parser/exceptions.py b/aws_lambda_powertools/utilities/parser/exceptions.py index 93e259df371..0df217e8522 100644 --- a/aws_lambda_powertools/utilities/parser/exceptions.py +++ b/aws_lambda_powertools/utilities/parser/exceptions.py @@ -2,9 +2,5 @@ class InvalidEnvelopeError(Exception): """Input envelope is not callable and instance of BaseEnvelope""" -class ModelValidationError(Exception): - """Input data does not conform with model""" - - class InvalidModelTypeError(Exception): """Input data model does not implement BaseModel""" diff --git a/aws_lambda_powertools/utilities/parser/parser.py b/aws_lambda_powertools/utilities/parser/parser.py index a58ee90f4e9..16cdc45c907 100644 --- a/aws_lambda_powertools/utilities/parser/parser.py +++ b/aws_lambda_powertools/utilities/parser/parser.py @@ -1,23 +1,22 @@ import logging from typing import Any, Callable, Dict, Optional -from pydantic import BaseModel, ValidationError - from ...middleware_factory import lambda_handler_decorator from ..typing import LambdaContext -from .envelopes.base import BaseEnvelope -from .exceptions import InvalidEnvelopeError, InvalidModelTypeError, ModelValidationError +from .envelopes.base import Envelope +from .exceptions import InvalidEnvelopeError, InvalidModelTypeError +from .types import Model logger = logging.getLogger(__name__) @lambda_handler_decorator def event_parser( - handler: Callable[[Dict, Any], Any], + handler: Callable[[Any, LambdaContext], Any], event: Dict[str, Any], context: LambdaContext, - model: BaseModel, - envelope: Optional[BaseEnvelope] = None, + model: Model, + envelope: Optional[Envelope] = None, ) -> Any: """Lambda handler decorator to parse & validate events using Pydantic models @@ -65,14 +64,14 @@ def handler(event: Order, context: LambdaContext): Lambda event to be parsed & validated context: LambdaContext Lambda context object - model: BaseModel + model: Model Your data model that will replace the event. - envelope: BaseEnvelope + envelope: Envelope Optional envelope to extract the model from Raises ------ - ModelValidationError + ValidationError When input event does not conform with model provided InvalidModelTypeError When model given does not implement BaseModel @@ -84,7 +83,7 @@ def handler(event: Order, context: LambdaContext): return handler(parsed_event, context) -def parse(event: Dict[str, Any], model: BaseModel, envelope: Optional[BaseEnvelope] = None) -> Any: +def parse(event: Dict[str, Any], model: Model, envelope: Optional[Envelope] = None) -> Model: """Standalone function to parse & validate events using Pydantic models Typically used when you need fine-grained control over error handling compared to event_parser decorator. @@ -94,7 +93,7 @@ def parse(event: Dict[str, Any], model: BaseModel, envelope: Optional[BaseEnvelo **Lambda handler decorator to parse & validate event** - from aws_lambda_powertools.utilities.parser.exceptions import ModelValidationError + from aws_lambda_powertools.utilities.parser import ValidationError class Order(BaseModel): id: int @@ -104,7 +103,7 @@ class Order(BaseModel): def handler(event: Order, context: LambdaContext): try: parse(model=Order) - except ModelValidationError: + except ValidationError: ... **Lambda handler decorator to parse & validate event - using built-in envelope** @@ -117,21 +116,21 @@ class Order(BaseModel): def handler(event: Order, context: LambdaContext): try: parse(model=Order, envelope=envelopes.EVENTBRIDGE) - except ModelValidationError: + except ValidationError: ... Parameters ---------- event: Dict Lambda event to be parsed & validated - model: BaseModel + model: Model Your data model that will replace the event - envelope: BaseEnvelope + envelope: Envelope Optional envelope to extract the model from Raises ------ - ModelValidationError + ValidationError When input event does not conform with model provided InvalidModelTypeError When model given does not implement BaseModel @@ -144,13 +143,12 @@ def handler(event: Order, context: LambdaContext): return envelope().parse(data=event, model=model) except AttributeError: raise InvalidEnvelopeError(f"Envelope must implement BaseEnvelope, envelope={envelope}") - except (ValidationError, TypeError) as e: - raise ModelValidationError(f"Input event does not conform with model, envelope={envelope}") from e try: logger.debug("Parsing and validating event model; no envelope used") + if isinstance(event, str): + return model.parse_raw(event) + return model.parse_obj(event) - except (ValidationError, TypeError) as e: - raise ModelValidationError("Input event does not conform with model") from e except AttributeError: - raise InvalidModelTypeError("Input model must implement BaseModel") + raise InvalidModelTypeError(f"Input model must implement BaseModel, model={model}") diff --git a/aws_lambda_powertools/utilities/parser/types.py b/aws_lambda_powertools/utilities/parser/types.py new file mode 100644 index 00000000000..cada13b18e3 --- /dev/null +++ b/aws_lambda_powertools/utilities/parser/types.py @@ -0,0 +1,6 @@ +"""Generics and other shared types used across parser""" +from typing import TypeVar + +from pydantic import BaseModel + +Model = TypeVar("Model", bound=BaseModel) diff --git a/docs/content/utilities/parser.mdx b/docs/content/utilities/parser.mdx new file mode 100644 index 00000000000..c0eedbcc3d1 --- /dev/null +++ b/docs/content/utilities/parser.mdx @@ -0,0 +1,572 @@ +--- +title: Parser +description: Utility +--- + + +import Note from "../../src/components/Note" + + + It requires an extra dependency before using it. +
+ +This utility provides data parsing and deep validation using [Pydantic](https://pydantic-docs.helpmanual.io/). + +**Key features** + +* Defines data in pure Python classes, then parse, validate and extract only what you want +* Built-in envelopes to unwrap, extend, and validate popular event sources payloads +* Enforces type hints at runtime with user friendly errors + +**Extra dependency** + + + This will install pydantic and typing_extensions +
+ +Install parser's extra dependencies using **`pip install aws-lambda-powertools[pydantic]`**. + +## Defining models + +You can define models to parse incoming events by inheriting from `BaseModel`. + +```python:title=hello_world_model.py +from aws_lambda_powertools.utilities.parser import BaseModel +from typing import List, Optional + +class OrderItem(BaseModel): + id: int + quantity: int + description: str + +class Order(BaseModel): + id: int + description: str + items: List[OrderItem] # nesting models are supported + optional_field: Optional[str] # this field may or may not be available when parsing +``` + +These are simply Python classes that inherit from BaseModel. **Parser** enforces type hints declared in your model at runtime. + +## Parsing events + +You can parse inbound events using **event_parser** decorator, or the standalone `parse` function. Both are also able to parse either dictionary or JSON string as an input. + +### event_parser decorator + +Use the decorator for fail fast scenarios where you want your Lambda function to raise an exception in the event of a malformed payload. + +`event_parser` decorator will throw a `ValidationError` if your event cannot be parsed according to the model. + +> NOTE: **This decorator will replace the `event` object with the parsed model if successful**. This means you might be careful when nesting other decorators that expect `event` to be a `dict`. + +```python:title=event_parser_decorator.py +from aws_lambda_powertools.utilities.parser import event_parser, BaseModel, ValidationError +from aws_lambda_powertools.utilities.typing import LambdaContext + +import json + +class OrderItem(BaseModel): + id: int + quantity: int + description: str + +class Order(BaseModel): + id: int + description: str + items: List[OrderItem] # nesting models are supported + optional_field: Optional[str] # this field may or may not be available when parsing + +payload = { + "id": 10876546789, + "description": "My order", + "items": [ + { + "id": 1015938732, + "quantity": 1, + "description": "item xpto" + } + ] +} + +@event_parser(model=Order) # highlight-line +def handler(event: Order, context: LambdaContext): + print(event.id) + print(event.description) + print(event.items) + + order_items = [items for item in event.items] + ... + +handler(event=payload, context=LambdaContext()) +handler(event=json.dumps(payload), context=LambdaContext()) # also works if event is a JSON string +``` + +### parse function + +Use this standalone function when you want more control over the data validation process, for example returning a 400 error for malformed payloads. + +```python:title=parse_standalone_example.py +from aws_lambda_powertools.utilities.parser import parse, BaseModel, ValidationError + +class OrderItem(BaseModel): + id: int + quantity: int + description: str + +class Order(BaseModel): + id: int + description: str + items: List[OrderItem] # nesting models are supported + optional_field: Optional[str] # this field may or may not be available when parsing + + +payload = { + "id": 10876546789, + "description": "My order", + "items": [ + { + # this will cause a validation error + "id": [1015938732], # highlight-line + "quantity": 1, + "description": "item xpto" + } + ] +} + +def my_function(): + try: + parsed_payload: Order = parse(event=payload, model=Order) # highlight-line + # payload dict is now parsed into our model + return parsed_payload.items + except ValidationError: + return { + "status_code": 400, + "message": "Invalid order" + } +``` + +## Extending built-in models + +Parser comes with the following built-in models: + +Model name | Description +------------------------------------------------- | ---------------------------------------------------------------------------------------------------------- +**DynamoDBStreamModel** | Lambda Event Source payload for Amazon DynamoDB Streams +**EventBridgeModel** | Lambda Event Source payload for Amazon EventBridge +**SqsModel** | Lambda Event Source payload for Amazon SQS + +You can extend them to include your own models, and yet have all other known fields parsed along the way. + +**EventBridge example** + +```python:title=extending_builtin_models.py +from aws_lambda_powertools.utilities.parser import parse, BaseModel +from aws_lambda_powertools.utilities.parser.models import EventBridgeModel + +from typing import List, Optional + +class OrderItem(BaseModel): + id: int + quantity: int + description: str + +class Order(BaseModel): + id: int + description: str + items: List[OrderItem] + +# highlight-start +class OrderEventModel(EventBridgeModel): + detail: Order +# highlight-end + +payload = { + "version": "0", + "id": "6a7e8feb-b491-4cf7-a9f1-bf3703467718", + "detail-type": "OrderPurchased", + "source": "OrderService", + "account": "111122223333", + "time": "2020-10-22T18:43:48Z", + "region": "us-west-1", + "resources": ["some_additional"], + "detail": { # highlight-line + "id": 10876546789, + "description": "My order", + "items": [ + { + "id": 1015938732, + "quantity": 1, + "description": "item xpto" + } + ] + } +} + +ret = parse(model=OrderEventModel, event=payload) # highlight-line + +assert ret.source == "OrderService" +assert ret.detail.description == "My order" +assert ret.detail_type == "OrderPurchased" # we rename it to snake_case since detail-type is an invalid name + +for order_item in ret.detail.items: + ... +``` + +**What's going on here, you might ask**: + +1. We imported our built-in model `EventBridgeModel` from the parser utility +2. Defined how our `Order` should look like +3. Defined how part of our EventBridge event should look like by overriding `detail` key within our `OrderEventModel` +4. Parser parsed the original event against `OrderEventModel` + + +## Envelopes + +When trying to parse your payloads wrapped in a known structure, you might encounter the following situations: + +* Your actual payload is wrapped around a known structure, for example Lambda Event Sources like EventBridge +* You're only interested in a portion of the payload, for example parsing the `detail` of custom events in EventBridge, or `body` of SQS records + +You can either solve these situations by creating a model of these known structures, parsing them, then extracting and parsing a key where your payload is. + +This can become difficult quite quickly. Parser makes this problem easier through a feature named `Envelope`. + +Envelopes can be used via `envelope` parameter available in both `parse` function and `event_parser` decorator. + +Here's an example of parsing a model found in an event coming from EventBridge, where all you want is what's inside the `detail` key. + +```python:title=parse_eventbridge_payload.py +from aws_lambda_powertools.utilities.parser import event_parser, parse, BaseModel, envelopes +from aws_lambda_powertools.utilities.typing import LambdaContext + +class UserModel(BaseModel): + username: str + password1: str + password2: str + +payload = { + "version": "0", + "id": "6a7e8feb-b491-4cf7-a9f1-bf3703467718", + "detail-type": "CustomerSignedUp", + "source": "CustomerService", + "account": "111122223333", + "time": "2020-10-22T18:43:48Z", + "region": "us-west-1", + "resources": ["some_additional_"], + # highlight-start + "detail": { + "username": "universe", + "password1": "myp@ssword", + "password2": "repeat password" + } + # highlight-end +} + +ret = parse(model=UserModel, envelope=envelopes.EventBridgeModel, event=payload) # highlight-line + +# Parsed model only contains our actual model, not the entire EventBridge + Payload parsed +assert ret.password1 == ret.password2 + +# Same behaviour but using our decorator +@event_parser(model=UserModel, envelope=envelopes.EventBridgeModel) # highlight-line +def handler(event: UserModel, context: LambdaContext): + assert event.password1 == event.password2 +``` + +**What's going on here, you might ask**: + +1. We imported built-in `envelopes` from the parser utility +2. Used `envelopes.EventBridgeModel` as the envelope for our `UserModel` model +3. Parser parsed the original event against the EventBridge model +4. Parser then parsed the `detail` key using `UserModel` + + +### Built-in envelopes + +Parser comes with the following built-in envelopes, where `Model` in the return section is your given model. + +Envelope name | Behaviour | Return +------------------------------------------------- | ---------------------------------------------------------------------------------------------------------- | ------------------------------------ +**DynamoDBStreamEnvelope** | 1. Parses data using `DynamoDBStreamModel`.
2. Parses records in `NewImage` and `OldImage` keys using your model.
3. Returns a list with a dictionary containing `NewImage` and `OldImage` keys | `List[Dict[str, Optional[Model]]]` +**EventBridgeEnvelope** | 1. Parses data using `EventBridgeModel`.
2. Parses `detail` key using your model and returns it. | `Model` +**SqsEnvelope** | 1. Parses data using `SqsModel`.
2. Parses records in `body` key using your model and return them in a list. | `List[Model]` + +### Bringing your own envelope + +You can create your own Envelope model and logic by inheriting from `BaseEnvelope`, and implementing the `parse` method. + +Here's an snippet of how the EventBridge Envelope we demonstrated previously is implemented. + +**EventBridge Model** + +```python:title=eventbridge_model.py +from datetime import datetime +from typing import Any, Dict, List + +from aws_lambda_powertools.utilities.parser import BaseModel, Field + + +class EventBridgeModel(BaseModel): + version: str + id: str # noqa: A003,VNE003 + source: str + account: str + time: datetime + region: str + resources: List[str] + detail_type: str = Field(None, alias="detail-type") + detail: Dict[str, Any] +``` + +**EventBridge Envelope** + +```python:title=eventbridge_envelope.py +from aws_lambda_powertools.utilities.parser import BaseEnvelope, models +from aws_lambda_powertools.utilities.parser.models import EventBridgeModel + +from typing import Any, Dict, Optional, TypeVar + +Model = TypeVar("Model", bound=BaseModel) + +class EventBridgeEnvelope(BaseEnvelope): # highlight-line + + def parse(self, data: Optional[Union[Dict[str, Any], Any]], model: Model) -> Optional[Model]: # highlight-line + """Parses data found with model provided + + Parameters + ---------- + data : Dict + Lambda event to be parsed + model : Model + Data model provided to parse after extracting data using envelope + + Returns + ------- + Any + Parsed detail payload with model provided + """ + parsed_envelope = EventBridgeModel.parse_obj(data) # highlight-line + return self._parse(data=parsed_envelope.detail, model=model) # highlight-line +``` + +**What's going on here, you might ask**: + +1. We defined an envelope named `EventBridgeEnvelope` inheriting from `BaseEnvelope` +2. Implemented the `parse` abstract method taking `data` and `model` as parameters +3. Then, we parsed the incoming data with our envelope to confirm it matches EventBridge's structure defined in `EventBridgeModel` +4. Lastly, we call `_parse` from `BaseEnvelope` to parse the data in our envelope (.detail) using the customer model + +### Data model validation + + + This is radically different from the Validator utility which validates events against JSON Schema. +
+ +You can use parser's validator for deep inspection of object values and complex relationships. + +There are two types of class method decorators you can use: + +* **`validator`** - Useful to quickly validate an individual field and its value +* **`root_validator`** - Useful to validate the entire model's data + +Keep the following in mind regardless of which decorator you end up using it: + +* You must raise either `ValueError`, `TypeError`, or `AssertionError` when value is not compliant +* You must return the value(s) itself if compliant + +#### Validating fields + +Quick validation to verify whether the field `message` has the value of `hello world`. + +```python:title=deep_data_validation.py +from aws_lambda_powertools.utilities.parser import parse, BaseModel, validator + +class HelloWorldModel(BaseModel): + message: str + + @validator('message') # highlight-line + def is_hello_world(cls, v): + if v != "hello world": + raise ValueError("Message must be hello world!") + return v + +parse(model=HelloWorldModel, event={"message": "hello universe"}) +``` + +If you run as-is, you should expect the following error with the message we provided in our exception: + +``` +message + Message must be hello world! (type=value_error) +``` + +Alternatively, you can pass `'*'` as an argument for the decorator so that you can validate every value available. + +```python:title=validate_all_field_values.py +from aws_lambda_powertools.utilities.parser import parse, BaseModel, validator + +class HelloWorldModel(BaseModel): + message: str + sender: str + + @validator('*') # highlight-line + def has_whitespace(cls, v): + if ' ' not in v: + raise ValueError("Must have whitespace...") + + return v + +parse(model=HelloWorldModel, event={"message": "hello universe", "sender": "universe"}) +``` + +#### Validating entire model + +`root_validator` can help when you have a complex validation mechanism. For example finding whether data has been omitted, comparing field values, etc. + +```python:title=validate_all_field_values.py +from aws_lambda_powertools.utilities.parser import parse, BaseModel, validator + +class UserModel(BaseModel): + username: str + password1: str + password2: str + + @root_validator + def check_passwords_match(cls, values): + pw1, pw2 = values.get('password1'), values.get('password2') + if pw1 is not None and pw2 is not None and pw1 != pw2: + raise ValueError('passwords do not match') + return values + +payload = { + "username": "universe", + "password1": "myp@ssword", + "password2": "repeat password" +} + +parse(model=UserModel, event=payload) +``` + + + You can read more about validating list items, reusing validators, validating raw inputs, and a lot more in Pydantic's documentation. +
+ + +## Advanced use cases + + + Looking to auto-generate models from JSON, YAML, JSON Schemas, OpenApi, etc? +

+ Use Koudai Aono's data model code generation tool for Pydantic +

+ +There are number of advanced use cases well documented in Pydantic's doc such as creating [immutable models](https://pydantic-docs.helpmanual.io/usage/models/#faux-immutability), [declaring fields with dynamic values]((https://pydantic-docs.helpmanual.io/usage/models/#field-with-dynamic-default-value)) e.g. UUID, and [helper functions to parse models from files, str](https://pydantic-docs.helpmanual.io/usage/models/#helper-functions), etc. + +Two possible unknown use cases are Models and exception' serialization. Models have methods to [export them](https://pydantic-docs.helpmanual.io/usage/exporting_models/) as `dict`, `JSON`, `JSON Schema`, and Validation exceptions can be exported as JSON. + +```python:title=serializing_models_exceptions.py +from aws_lambda_powertools.utilities import Logger +from aws_lambda_powertools.utilities.parser import parse, BaseModel, ValidationError, validator + +logger = Logger(service="user") + +class UserModel(BaseModel): + username: str + password1: str + password2: str + +payload = { + "username": "universe", + "password1": "myp@ssword", + "password2": "repeat password" +} + +def my_function(): + try: + return parse(model=UserModel, event=payload) + except ValidationError as e: + logger.exception(e.json()) # highlight-line + return { + "status_code": 400, + "message": "Invalid username" + } + +User: UserModel = my_function() +# highlight-start +user_dict = User.dict() +user_json = User.json() +user_json_schema_as_dict = User.schema() +user_json_schema_as_json = User.schema_json(indent=2) +# highlight-end +``` + +These can be quite useful when manipulating models that later need to be serialized as inputs for services like DynamoDB, EventBridge, etc. + +## FAQ + +**When should I use parser vs data_classes utility?** + +Use data classes utility when you're after autocomplete, self-documented attributes and helpers to extract data from common event sources. + +Parser is best suited for those looking for a trade-off between defining their models for deep validation, parsing and autocomplete for an additional dependency to be brought in. + +**How do I import X from Pydantic?** + +We export most common classes, exceptions, and utilities from Pydantic as part of parser e.g. `from aws_lambda_powertools.utilities.parser import BaseModel`. + +If what's your trying to use isn't available as part of the high level import system, use the following escape hatch mechanism: + +```python:title=escape_hatch.py +from aws_lambda_powertools.utilities.parser.pydantic import +``` + +**What is the cold start impact in bringing this additional dependency?** + +No significant cold start impact. It does increase the final uncompressed package by **71M**, when you bring the additional dependency that parser requires. + +Artillery load test sample against a [hello world sample](https://github.com/aws-samples/cookiecutter-aws-sam-python) using Tracer, Metrics, and Logger with and without parser. + +**No parser** + +> **Uncompressed package size**: 55M, **p99**: 180.3ms + +``` +Summary report @ 14:36:07(+0200) 2020-10-23 + Scenarios launched: 10 + Scenarios completed: 10 + Requests completed: 2000 + Mean response/sec: 114.81 + Response time (msec): + min: 54.9 + max: 1684.9 + median: 68 + p95: 109.1 + p99: 180.3 + Scenario counts: + 0: 10 (100%) + Codes: + 200: 2000 +``` + +**With parser** + +> **Uncompressed package size**: 128M, **p99**: 193.1ms + +``` +Summary report @ 14:29:23(+0200) 2020-10-23 + Scenarios launched: 10 + Scenarios completed: 10 + Requests completed: 2000 + Mean response/sec: 111.67 + Response time (msec): + min: 54.3 + max: 1887.2 + median: 66.1 + p95: 113.3 + p99: 193.1 + Scenario counts: + 0: 10 (100%) + Codes: + 200: 2000 +``` diff --git a/docs/gatsby-config.js b/docs/gatsby-config.js index 087f23a9634..af82f3e2e3d 100644 --- a/docs/gatsby-config.js +++ b/docs/gatsby-config.js @@ -35,7 +35,8 @@ module.exports = { 'utilities/batch', 'utilities/typing', 'utilities/validation', - 'utilities/data_classes' + 'utilities/data_classes', + 'utilities/parser' ], }, navConfig: { diff --git a/tests/functional/parser/conftest.py b/tests/functional/parser/conftest.py index 27fd4b2d1f6..34199a322b2 100644 --- a/tests/functional/parser/conftest.py +++ b/tests/functional/parser/conftest.py @@ -1,9 +1,9 @@ from typing import Any, Dict import pytest -from pydantic import BaseModel, ValidationError +from pydantic import BaseModel -from aws_lambda_powertools.utilities.parser import BaseEnvelope, ModelValidationError +from aws_lambda_powertools.utilities.parser import BaseEnvelope @pytest.fixture @@ -37,10 +37,7 @@ class MyDummyEnvelope(BaseEnvelope): """Unwrap dummy event within payload key""" def parse(self, data: Dict[str, Any], model: BaseModel): - try: - parsed_enveloped = dummy_envelope_schema(**data) - except (ValidationError, TypeError) as e: - raise ModelValidationError("Dummy input does not conform with schema") from e + parsed_enveloped = dummy_envelope_schema(**data) return self._parse(data=parsed_enveloped.payload, model=model) return MyDummyEnvelope diff --git a/tests/functional/parser/test_dynamodb.py b/tests/functional/parser/test_dynamodb.py index ac5ebab40c3..bd7e0795f42 100644 --- a/tests/functional/parser/test_dynamodb.py +++ b/tests/functional/parser/test_dynamodb.py @@ -2,13 +2,13 @@ import pytest -from aws_lambda_powertools.utilities.parser import envelopes, event_parser, exceptions +from aws_lambda_powertools.utilities.parser import ValidationError, envelopes, event_parser from aws_lambda_powertools.utilities.typing import LambdaContext from tests.functional.parser.schemas import MyAdvancedDynamoBusiness, MyDynamoBusiness from tests.functional.parser.utils import load_event -@event_parser(model=MyDynamoBusiness, envelope=envelopes.DynamoDBEnvelope) +@event_parser(model=MyDynamoBusiness, envelope=envelopes.DynamoDBStreamEnvelope) def handle_dynamodb(event: List[Dict[str, MyDynamoBusiness]], _: LambdaContext): assert len(event) == 2 assert event[0]["OldImage"] is None @@ -59,11 +59,11 @@ def test_dynamo_db_stream_trigger_event_no_envelope(): def test_validate_event_does_not_conform_with_model_no_envelope(): event_dict: Any = {"hello": "s"} - with pytest.raises(exceptions.ModelValidationError): + with pytest.raises(ValidationError): handle_dynamodb_no_envelope(event_dict, LambdaContext()) def test_validate_event_does_not_conform_with_model(): event_dict: Any = {"hello": "s"} - with pytest.raises(exceptions.ModelValidationError): + with pytest.raises(ValidationError): handle_dynamodb(event_dict, LambdaContext()) diff --git a/tests/functional/parser/test_eventbridge.py b/tests/functional/parser/test_eventbridge.py index 07387e9ba0a..1af481bc52d 100644 --- a/tests/functional/parser/test_eventbridge.py +++ b/tests/functional/parser/test_eventbridge.py @@ -2,7 +2,7 @@ import pytest -from aws_lambda_powertools.utilities.parser import envelopes, event_parser, exceptions +from aws_lambda_powertools.utilities.parser import ValidationError, envelopes, event_parser from aws_lambda_powertools.utilities.typing import LambdaContext from tests.functional.parser.schemas import MyAdvancedEventbridgeBusiness, MyEventbridgeBusiness from tests.functional.parser.utils import load_event @@ -46,7 +46,7 @@ def test_validate_event_does_not_conform_with_user_dict_model(): "resources": ["arn:aws:ec2:us-west-1:123456789012:instance/i-1234567890abcdef0"], "detail": {}, } - with pytest.raises(exceptions.ModelValidationError) as e: + with pytest.raises(ValidationError) as e: handle_eventbridge(event_dict, LambdaContext()) print(e.exconly()) @@ -57,5 +57,5 @@ def test_handle_eventbridge_trigger_event_no_envelope(): def test_handle_invalid_event_with_eventbridge_envelope(): - with pytest.raises(exceptions.ModelValidationError): + with pytest.raises(ValidationError): handle_eventbridge(event={}, context=LambdaContext()) diff --git a/tests/functional/parser/test_parser.py b/tests/functional/parser/test_parser.py index 162b52ee439..5e9e40faec4 100644 --- a/tests/functional/parser/test_parser.py +++ b/tests/functional/parser/test_parser.py @@ -1,8 +1,9 @@ -from typing import Dict +import json +from typing import Dict, Union import pytest -from aws_lambda_powertools.utilities.parser import event_parser, exceptions +from aws_lambda_powertools.utilities.parser import ValidationError, event_parser, exceptions from aws_lambda_powertools.utilities.typing import LambdaContext @@ -12,7 +13,7 @@ def test_parser_unsupported_event(dummy_schema, invalid_value): def handle_no_envelope(event: Dict, _: LambdaContext): return event - with pytest.raises(exceptions.ModelValidationError): + with pytest.raises(ValidationError): handle_no_envelope(event=invalid_value, context=LambdaContext()) @@ -55,3 +56,13 @@ def handle_no_envelope(event: Dict, _: LambdaContext): with pytest.raises(exceptions.InvalidModelTypeError): handle_no_envelope(event=dummy_event, context=LambdaContext()) + + +def test_parser_event_as_json_string(dummy_event, dummy_schema): + dummy_event = json.dumps(dummy_event["payload"]) + + @event_parser(model=dummy_schema) + def handle_no_envelope(event: Union[Dict, str], _: LambdaContext): + return event + + handle_no_envelope(dummy_event, LambdaContext()) diff --git a/tests/functional/parser/test_sqs.py b/tests/functional/parser/test_sqs.py index 2ee992e2fa1..0cea8246b50 100644 --- a/tests/functional/parser/test_sqs.py +++ b/tests/functional/parser/test_sqs.py @@ -2,7 +2,7 @@ import pytest -from aws_lambda_powertools.utilities.parser import envelopes, event_parser, exceptions +from aws_lambda_powertools.utilities.parser import ValidationError, envelopes, event_parser from aws_lambda_powertools.utilities.typing import LambdaContext from tests.functional.parser.schemas import MyAdvancedSqsBusiness, MySqsBusiness from tests.functional.parser.utils import load_event @@ -23,7 +23,7 @@ def test_handle_sqs_trigger_event_json_body(sqs_event): # noqa: F811 def test_validate_event_does_not_conform_with_model(): event: Any = {"invalid": "event"} - with pytest.raises(exceptions.ModelValidationError): + with pytest.raises(ValidationError): handle_sqs_json_body(event, LambdaContext()) @@ -51,7 +51,7 @@ def test_validate_event_does_not_conform_user_json_string_with_model(): ] } - with pytest.raises(exceptions.ModelValidationError): + with pytest.raises(ValidationError): handle_sqs_json_body(event, LambdaContext())