Skip to content

feat(data-masking): add support for Pydantic models, dataclasses, and standard classes #6413

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 49 additions & 1 deletion aws_lambda_powertools/utilities/data_masking/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from __future__ import annotations

import dataclasses
import functools
import logging
import warnings
Expand All @@ -27,6 +28,51 @@
logger = logging.getLogger(__name__)


def prepare_data(data: Any, _visited: set[int] | None = None) -> Any:
"""
Recursively convert complex objects into dictionaries or simple types.
Handles dataclasses, Pydantic models, and prevents circular references.
"""
_visited = _visited or set()

# Handle circular references and primitive types
data_id = id(data)
if data_id in _visited or isinstance(data, (str, int, float, bool, type(None))):
return data

_visited.add(data_id)

# Define handlers as (condition, transformer) pairs
handlers: list[tuple[Callable[[Any], bool], Callable[[Any], Any]]] = [
# Dataclasses
(lambda x: hasattr(x, "__dataclass_fields__"), lambda x: prepare_data(dataclasses.asdict(x), _visited)),
# Pydantic models
(lambda x: callable(getattr(x, "model_dump", None)), lambda x: prepare_data(x.model_dump(), _visited)),
# Objects with dict() method
(
lambda x: callable(getattr(x, "dict", None)) and not isinstance(x, dict),
lambda x: prepare_data(x.dict(), _visited),
),
# Dictionaries
(
lambda x: isinstance(x, dict),
lambda x: {prepare_data(k, _visited): prepare_data(v, _visited) for k, v in x.items()},
),
# Lists, tuples, sets
(lambda x: isinstance(x, (list, tuple, set)), lambda x: type(x)(prepare_data(item, _visited) for item in x)),
# Objects with __dict__
(lambda x: hasattr(x, "__dict__"), lambda x: prepare_data(vars(x), _visited)),
]

# Find and apply the first matching handler
for condition, transformer in handlers:
if condition(data):
return transformer(data)

# Default fallback
return data

Check warning on line 73 in aws_lambda_powertools/utilities/data_masking/base.py

View check run for this annotation

Codecov / codecov/patch

aws_lambda_powertools/utilities/data_masking/base.py#L73

Added line #L73 was not covered by tests


class DataMasking:
"""
The DataMasking class orchestrates erasing, encrypting, and decrypting
Expand Down Expand Up @@ -93,6 +139,7 @@
data_masker = DataMasking(provider=encryption_provider)
encrypted = data_masker.encrypt({"secret": "value"})
"""
data = prepare_data(data)
return self._apply_action(
data=data,
fields=None,
Expand Down Expand Up @@ -135,7 +182,7 @@
data_masker = DataMasking(provider=encryption_provider)
encrypted = data_masker.decrypt(encrypted_data)
"""

data = prepare_data(data)
return self._apply_action(
data=data,
fields=None,
Expand Down Expand Up @@ -184,6 +231,7 @@
Any
The data with sensitive information erased or masked.
"""
data = prepare_data(data)
if masking_rules:
return self._apply_masking_rules(data=data, masking_rules=masking_rules)
else:
Expand Down
36 changes: 28 additions & 8 deletions docs/utilities/data_masking.md
Original file line number Diff line number Diff line change
Expand Up @@ -440,21 +440,41 @@ Note that the return will be a deserialized JSON and your desired fields updated

### Data serialization

???+ note "Current limitations"
1. Python classes, `Dataclasses`, and `Pydantic models` are not supported yet.
???+ tip "Extended input support"
We support `Pydantic models`, `Dataclasses`, and custom classes with `dict()` or `__dict__` for input.

These types are automatically converted into dictionaries before `masking` and `encrypting` operations. Please not that we **don't convert back** to the original type, and the returned object will be a dictionary.

Before we traverse the data structure, we perform two important operations on input data:

1. If `JSON string`, **deserialize** using default or provided deserializer.
2. If `dictionary`, **normalize** into `JSON` to prevent traversing unsupported data types.

When decrypting, we revert the operation to restore the original data structure.
2. If `dictionary or complex types`, **normalize** into `JSON` to prevent traversing unsupported data types.

For compatibility or performance, you can optionally pass your own JSON serializer and deserializer to replace `json.dumps` and `json.loads` respectively:

```python hl_lines="17-18" title="advanced_custom_serializer.py"
--8<-- "examples/data_masking/src/advanced_custom_serializer.py"
```
=== "Working with custom types"

```python
--8<-- "examples/data_masking/src/working_with_custom_types.py"
```

=== "Working with Pydantic"

```python
--8<-- "examples/data_masking/src/working_with_pydantic_types.py"
```

=== "Working with dataclasses"

```python
--8<-- "examples/data_masking/src/working_with_dataclass_types.py"
```

=== "Working with serializer"

```python
--8<-- "examples/data_masking/src/advanced_custom_serializer.py"
```

### Using multiple keys

Expand Down
17 changes: 17 additions & 0 deletions examples/data_masking/src/working_with_custom_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from aws_lambda_powertools.utilities.data_masking import DataMasking

data_masker = DataMasking()


class User:
def __init__(self, name, age):
self.name = name
self.age = age

def dict(self):
return {"name": self.name, "age": self.age}


def lambda_handler(event, context):
user = User("powertools", 42)
return data_masker.erase(user, fields=["age"])
16 changes: 16 additions & 0 deletions examples/data_masking/src/working_with_dataclass_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from dataclasses import dataclass

from aws_lambda_powertools.utilities.data_masking import DataMasking

data_masker = DataMasking()


@dataclass
class User:
name: str
age: int


def lambda_handler(event, context):
user = User(name="powertools", age=42)
return data_masker.erase(user, fields=["age"])
15 changes: 15 additions & 0 deletions examples/data_masking/src/working_with_pydantic_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from pydantic import BaseModel

from aws_lambda_powertools.utilities.data_masking import DataMasking

data_masker = DataMasking()


class User(BaseModel):
name: str
age: int


def lambda_handler(event, context):
user = User(name="powertools", age=42)
return data_masker.erase(user, fields=["age"])
Loading
Loading