Skip to content

Commit 86cfdae

Browse files
feat(data-masking): add support for Pydantic models, dataclasses, and standard classes (#6413)
* feat(data-masking): support masking of Pydantic models, dataclasses, and standard classes (#3473) * feat(data_masking): support complex input types via robust prepare_data() with and updated tests * docs(data-masking): add support docs for Pydantic, dataclasses, and custom classes and updated test code * docs(data-masking): update examples to use Lambda function entry points for supported input types and updated codebase * refactoring prepare_data method --------- Co-authored-by: Leandro Damascena <[email protected]>
1 parent 6f59874 commit 86cfdae

File tree

6 files changed

+342
-9
lines changed

6 files changed

+342
-9
lines changed

aws_lambda_powertools/utilities/data_masking/base.py

+49-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from __future__ import annotations
88

9+
import dataclasses
910
import functools
1011
import logging
1112
import warnings
@@ -27,6 +28,51 @@
2728
logger = logging.getLogger(__name__)
2829

2930

31+
def prepare_data(data: Any, _visited: set[int] | None = None) -> Any:
32+
"""
33+
Recursively convert complex objects into dictionaries or simple types.
34+
Handles dataclasses, Pydantic models, and prevents circular references.
35+
"""
36+
_visited = _visited or set()
37+
38+
# Handle circular references and primitive types
39+
data_id = id(data)
40+
if data_id in _visited or isinstance(data, (str, int, float, bool, type(None))):
41+
return data
42+
43+
_visited.add(data_id)
44+
45+
# Define handlers as (condition, transformer) pairs
46+
handlers: list[tuple[Callable[[Any], bool], Callable[[Any], Any]]] = [
47+
# Dataclasses
48+
(lambda x: hasattr(x, "__dataclass_fields__"), lambda x: prepare_data(dataclasses.asdict(x), _visited)),
49+
# Pydantic models
50+
(lambda x: callable(getattr(x, "model_dump", None)), lambda x: prepare_data(x.model_dump(), _visited)),
51+
# Objects with dict() method
52+
(
53+
lambda x: callable(getattr(x, "dict", None)) and not isinstance(x, dict),
54+
lambda x: prepare_data(x.dict(), _visited),
55+
),
56+
# Dictionaries
57+
(
58+
lambda x: isinstance(x, dict),
59+
lambda x: {prepare_data(k, _visited): prepare_data(v, _visited) for k, v in x.items()},
60+
),
61+
# Lists, tuples, sets
62+
(lambda x: isinstance(x, (list, tuple, set)), lambda x: type(x)(prepare_data(item, _visited) for item in x)),
63+
# Objects with __dict__
64+
(lambda x: hasattr(x, "__dict__"), lambda x: prepare_data(vars(x), _visited)),
65+
]
66+
67+
# Find and apply the first matching handler
68+
for condition, transformer in handlers:
69+
if condition(data):
70+
return transformer(data)
71+
72+
# Default fallback
73+
return data
74+
75+
3076
class DataMasking:
3177
"""
3278
The DataMasking class orchestrates erasing, encrypting, and decrypting
@@ -93,6 +139,7 @@ def encrypt(
93139
data_masker = DataMasking(provider=encryption_provider)
94140
encrypted = data_masker.encrypt({"secret": "value"})
95141
"""
142+
data = prepare_data(data)
96143
return self._apply_action(
97144
data=data,
98145
fields=None,
@@ -135,7 +182,7 @@ def decrypt(
135182
data_masker = DataMasking(provider=encryption_provider)
136183
encrypted = data_masker.decrypt(encrypted_data)
137184
"""
138-
185+
data = prepare_data(data)
139186
return self._apply_action(
140187
data=data,
141188
fields=None,
@@ -184,6 +231,7 @@ def erase(
184231
Any
185232
The data with sensitive information erased or masked.
186233
"""
234+
data = prepare_data(data)
187235
if masking_rules:
188236
return self._apply_masking_rules(data=data, masking_rules=masking_rules)
189237
else:

docs/utilities/data_masking.md

+28-8
Original file line numberDiff line numberDiff line change
@@ -440,21 +440,41 @@ Note that the return will be a deserialized JSON and your desired fields updated
440440

441441
### Data serialization
442442

443-
???+ note "Current limitations"
444-
1. Python classes, `Dataclasses`, and `Pydantic models` are not supported yet.
443+
???+ tip "Extended input support"
444+
We support `Pydantic models`, `Dataclasses`, and custom classes with `dict()` or `__dict__` for input.
445+
446+
These types are automatically converted into dictionaries before `masking` and `encrypting` operations. Please not that we **don't convert back** to the original type, and the returned object will be a dictionary.
445447

446448
Before we traverse the data structure, we perform two important operations on input data:
447449

448450
1. If `JSON string`, **deserialize** using default or provided deserializer.
449-
2. If `dictionary`, **normalize** into `JSON` to prevent traversing unsupported data types.
450-
451-
When decrypting, we revert the operation to restore the original data structure.
451+
2. If `dictionary or complex types`, **normalize** into `JSON` to prevent traversing unsupported data types.
452452

453453
For compatibility or performance, you can optionally pass your own JSON serializer and deserializer to replace `json.dumps` and `json.loads` respectively:
454454

455-
```python hl_lines="17-18" title="advanced_custom_serializer.py"
456-
--8<-- "examples/data_masking/src/advanced_custom_serializer.py"
457-
```
455+
=== "Working with custom types"
456+
457+
```python
458+
--8<-- "examples/data_masking/src/working_with_custom_types.py"
459+
```
460+
461+
=== "Working with Pydantic"
462+
463+
```python
464+
--8<-- "examples/data_masking/src/working_with_pydantic_types.py"
465+
```
466+
467+
=== "Working with dataclasses"
468+
469+
```python
470+
--8<-- "examples/data_masking/src/working_with_dataclass_types.py"
471+
```
472+
473+
=== "Working with serializer"
474+
475+
```python
476+
--8<-- "examples/data_masking/src/advanced_custom_serializer.py"
477+
```
458478

459479
### Using multiple keys
460480

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from aws_lambda_powertools.utilities.data_masking import DataMasking
2+
3+
data_masker = DataMasking()
4+
5+
6+
class User:
7+
def __init__(self, name, age):
8+
self.name = name
9+
self.age = age
10+
11+
def dict(self):
12+
return {"name": self.name, "age": self.age}
13+
14+
15+
def lambda_handler(event, context):
16+
user = User("powertools", 42)
17+
return data_masker.erase(user, fields=["age"])
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from dataclasses import dataclass
2+
3+
from aws_lambda_powertools.utilities.data_masking import DataMasking
4+
5+
data_masker = DataMasking()
6+
7+
8+
@dataclass
9+
class User:
10+
name: str
11+
age: int
12+
13+
14+
def lambda_handler(event, context):
15+
user = User(name="powertools", age=42)
16+
return data_masker.erase(user, fields=["age"])
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from pydantic import BaseModel
2+
3+
from aws_lambda_powertools.utilities.data_masking import DataMasking
4+
5+
data_masker = DataMasking()
6+
7+
8+
class User(BaseModel):
9+
name: str
10+
age: int
11+
12+
13+
def lambda_handler(event, context):
14+
user = User(name="powertools", age=42)
15+
return data_masker.erase(user, fields=["age"])

0 commit comments

Comments
 (0)