Skip to content

Commit 27eca17

Browse files
committed
Added functional tests and put input data in separate file
1 parent 5423f7f commit 27eca17

File tree

5 files changed

+202
-103
lines changed

5 files changed

+202
-103
lines changed

aws_lambda_powertools/utilities/data_masking/providers/aws_encryption_sdk.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from aws_lambda_powertools.utilities.data_masking.provider import Provider
1313
from aws_lambda_powertools.shared.user_agent import register_feature_to_botocore_session
1414

15+
1516
class SingletonMeta(type):
1617
"""Metaclass to cache class instances to optimize encryption"""
1718

@@ -33,7 +34,8 @@ def __call__(cls, *args, **provider_options):
3334
class AwsEncryptionSdkProvider(Provider, metaclass=SingletonMeta):
3435
cache = LocalCryptoMaterialsCache(CACHE_CAPACITY)
3536
session = botocore.session.Session()
36-
register_feature_to_botocore_session(session, "data-masking")
37+
register_feature_to_botocore_session(session, "data-masking")
38+
3739
def __init__(self, keys: List[str], client: Optional[EncryptionSDKClient] = None) -> None:
3840
self.client = client or EncryptionSDKClient()
3941
self.keys = keys

tests/functional/data_masking/__init__.py

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
from unittest.mock import patch
2+
3+
import pytest
4+
5+
from aws_lambda_powertools.utilities.data_masking.base import DataMasking
6+
from aws_lambda_powertools.utilities.data_masking.providers.aws_encryption_sdk import AwsEncryptionSdkProvider
7+
from tests.unit.data_masking.setup import *
8+
9+
10+
AWS_SDK_KEY = "arn:aws:kms:us-west-2:683517028648:key/269301eb-81eb-4067-ac72-98e8e49bf2b3"
11+
12+
13+
@pytest.fixture
14+
def data_masker():
15+
return DataMasking(provider=AwsEncryptionSdkProvider(keys=[AWS_SDK_KEY]))
16+
17+
18+
@pytest.mark.parametrize("value, value_masked", data_types_and_masks)
19+
def test_mask_types(value, value_masked, data_masker):
20+
# GIVEN any data type
21+
22+
# WHEN the AWS encryption provider's mask method is called with no fields argument
23+
masked_string = data_masker.mask(value)
24+
25+
# THEN the result is the full input data masked
26+
assert masked_string == value_masked
27+
28+
29+
def test_mask_with_fields(data_masker):
30+
# GIVEN the data type is a dictionary, or a json representation of a dictionary
31+
32+
# WHEN the AWS encryption provider's mask is called with a list of fields specified
33+
masked_string = data_masker.mask(python_dict, dict_fields)
34+
masked_json_string = data_masker.mask(json_dict, dict_fields)
35+
36+
# THEN the result is only the specified fields are masked
37+
assert masked_string == masked_with_fields
38+
assert masked_json_string == masked_with_fields
39+
40+
41+
@pytest.mark.parametrize("value", data_types)
42+
def test_encrypt_decrypt(value, data_masker):
43+
# GIVEN an instantiation of DataMasking with the AWS encryption provider
44+
45+
# AWS Encryption SDK encrypt method only takes in bytes or strings
46+
value = bytes(str(value), "utf-8")
47+
48+
# WHEN encrypting and then decrypting the encrypted data
49+
encrypted_data = data_masker.encrypt(value)
50+
decrypted_data = data_masker.decrypt(encrypted_data)
51+
52+
# THEN the result is the original input data
53+
assert decrypted_data == value
54+
55+
56+
@pytest.mark.parametrize("value, fields", zip(dictionaries, fields_to_mask))
57+
def test_encrypt_decrypt_with_fields(value, fields, data_masker):
58+
# GIVEN an instantiation of DataMasking with the AWS encryption provider
59+
60+
# WHEN encrypting and then decrypting the encrypted data with a list of fields
61+
encrypted_data = data_masker.encrypt(value, fields)
62+
decrypted_data = data_masker.decrypt(encrypted_data, fields)
63+
64+
# THEN the result is the original input data
65+
# AWS Encryption SDK decrypt method only returns bytes
66+
if value == json_blob:
67+
assert decrypted_data == aws_encrypted_json_blob
68+
else:
69+
assert decrypted_data == aws_encrypted_with_fields
70+
71+
72+
@patch("aws_encryption_sdk.EncryptionSDKClient")
73+
def test_mock(get_encryption_sdk_client_mock):
74+
get_encryption_sdk_client_mock.return_value = "mock_value"
75+
76+
d_m = DataMasking(provider=AwsEncryptionSdkProvider(keys=["mock_value"]))
77+
encrypted_data = d_m.encrypt(b"secret_data")
78+
decrypted_data = d_m.decrypt(encrypted_data)
79+
assert decrypted_data == b"secret_data"

tests/unit/data_masking/setup.py

+117
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
import json
2+
import copy
3+
from itsdangerous.url_safe import URLSafeSerializer
4+
from aws_lambda_powertools.shared.constants import DATA_MASKING_STRING
5+
from aws_lambda_powertools.utilities.data_masking.base import DataMasking
6+
from aws_lambda_powertools.utilities.data_masking.provider import Provider
7+
8+
9+
class MyEncryptionProvider(Provider):
10+
"""Custom encryption provider class"""
11+
12+
def __init__(self, keys, salt=None):
13+
self.keys = keys
14+
self.salt = salt
15+
16+
def encrypt(self, data: str) -> str:
17+
if data is None:
18+
return data
19+
serialize = URLSafeSerializer(self.keys)
20+
return serialize.dumps(data)
21+
22+
def decrypt(self, data: str) -> str:
23+
if data is None:
24+
return data
25+
serialize = URLSafeSerializer(self.keys)
26+
return serialize.loads(data)
27+
28+
29+
data_maskers = [
30+
DataMasking(),
31+
DataMasking(provider=MyEncryptionProvider(keys="secret-key")),
32+
]
33+
34+
35+
python_dict = {
36+
"a": {
37+
"1": {"None": "hello", "four": "world"}, # None type key doesn't work
38+
"b": {"3": {"4": "goodbye", "e": "world"}}, # key "4.5" doesn't work
39+
}
40+
}
41+
42+
43+
json_dict = json.dumps(python_dict)
44+
45+
46+
dict_fields = ["a.1.None", "a.b.3.4"]
47+
48+
49+
masked_with_fields = {
50+
"a": {"1": {"None": DATA_MASKING_STRING, "four": "world"}, "b": {"3": {"4": DATA_MASKING_STRING, "e": "world"}}}
51+
}
52+
53+
aws_encrypted_with_fields = {
54+
"a": {
55+
"1": {"None": bytes("hello", "utf-8"), "four": "world"},
56+
"b": {"3": {"4": bytes("goodbye", "utf-8"), "e": "world"}},
57+
}
58+
}
59+
60+
# 10kb JSON blob for latency testing
61+
json_blob = {
62+
"id": 1,
63+
"name": "John Doe",
64+
"age": 30,
65+
"email": "[email protected]",
66+
"address": {"street": "123 Main St", "city": "Anytown", "state": "CA", "zip": "12345"},
67+
"phone_numbers": ["+1-555-555-1234", "+1-555-555-5678"],
68+
"interests": ["Hiking", "Traveling", "Photography", "Reading"],
69+
"job_history": {
70+
"company": "Acme Inc.",
71+
"position": "Software Engineer",
72+
"start_date": "2015-01-01",
73+
"end_date": "2017-12-31",
74+
},
75+
"about_me": """
76+
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla tincidunt velit quis
77+
sapien mollis, at egestas massa tincidunt. Suspendisse ultrices arcu a dolor dapibus,
78+
ut pretium turpis volutpat. Vestibulum at sapien quis sapien dignissim volutpat ut a enim.
79+
Praesent fringilla sem eu dui convallis luctus. Donec ullamcorper, sapien ut convallis congue,
80+
risus mauris pretium tortor, nec dignissim arcu urna a nisl. Vivamus non fermentum ex. Proin
81+
interdum nisi id sagittis egestas. Nam sit amet nisi nec quam pharetra sagittis. Aliquam erat
82+
volutpat. Donec nec luctus sem, nec ornare lorem. Vivamus vitae orci quis enim faucibus placerat.
83+
Nulla facilisi. Proin in turpis orci. Donec imperdiet velit ac tellus gravida, eget laoreet tellus
84+
malesuada. Praesent venenatis tellus ac urna blandit, at varius felis posuere. Integer a commodo nunc.
85+
""",
86+
}
87+
88+
89+
json_blob_fields = ["address.street", "job_history.company"]
90+
aws_encrypted_json_blob = copy.deepcopy(json_blob)
91+
aws_encrypted_json_blob["address"]["street"] = bytes("123 Main St", "utf-8")
92+
aws_encrypted_json_blob["job_history"]["company"] = bytes("Acme Inc.", "utf-8")
93+
94+
dictionaries = [python_dict, json_dict, json_blob]
95+
fields_to_mask = [dict_fields, dict_fields, json_blob_fields]
96+
97+
98+
data_types_and_masks = [
99+
# simple data types
100+
[42, DATA_MASKING_STRING],
101+
[4.22, DATA_MASKING_STRING],
102+
[True, DATA_MASKING_STRING],
103+
[None, DATA_MASKING_STRING],
104+
["this is a string", DATA_MASKING_STRING],
105+
# iterables
106+
[[1, 2, 3, 4], [DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING]],
107+
[
108+
["hello", 1, 2, 3, "world"],
109+
[DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING],
110+
],
111+
# dictionaries
112+
[python_dict, DATA_MASKING_STRING],
113+
[json_dict, DATA_MASKING_STRING],
114+
]
115+
116+
117+
data_types = [item[0] for item in data_types_and_masks]

tests/unit/data_masking/test_data_masking.py

+3-102
Original file line numberDiff line numberDiff line change
@@ -1,111 +1,12 @@
1-
import copy
21
import json
32

43
import pytest
5-
from itsdangerous.url_safe import URLSafeSerializer
6-
74
from aws_lambda_powertools.shared.constants import DATA_MASKING_STRING
85
from aws_lambda_powertools.utilities.data_masking.base import DataMasking
9-
from aws_lambda_powertools.utilities.data_masking.provider import Provider
10-
11-
12-
class MyEncryptionProvider(Provider):
13-
"""Custom encryption provider class"""
14-
15-
def __init__(self, keys, salt=None):
16-
self.keys = keys
17-
self.salt = salt
18-
19-
def encrypt(self, data: str) -> str:
20-
if data is None:
21-
return data
22-
serialize = URLSafeSerializer(self.keys)
23-
return serialize.dumps(data)
24-
25-
def decrypt(self, data: str) -> str:
26-
if data is None:
27-
return data
28-
serialize = URLSafeSerializer(self.keys)
29-
return serialize.loads(data)
6+
from tests.unit.data_masking.setup import *
307

31-
32-
data_maskers = [
33-
DataMasking(),
34-
DataMasking(provider=MyEncryptionProvider(keys="secret-key")),
35-
]
36-
37-
38-
python_dict = {
39-
"a": {
40-
"1": {"None": "hello", "four": "world"}, # None type key doesn't work
41-
"b": {"3": {"4": "goodbye", "e": "world"}}, # key "4.5" doesn't work
42-
}
43-
}
44-
json_dict = json.dumps(python_dict)
45-
dict_fields = ["a.1.None", "a.b.3.4"]
46-
masked_with_fields = {
47-
"a": {"1": {"None": DATA_MASKING_STRING, "four": "world"}, "b": {"3": {"4": DATA_MASKING_STRING, "e": "world"}}}
48-
}
49-
aws_encrypted_with_fields = {
50-
"a": {
51-
"1": {"None": bytes("hello", "utf-8"), "four": "world"},
52-
"b": {"3": {"4": bytes("goodbye", "utf-8"), "e": "world"}},
53-
}
54-
}
55-
56-
# 10kb JSON blob for latency testing
57-
json_blob = {
58-
"id": 1,
59-
"name": "John Doe",
60-
"age": 30,
61-
"email": "[email protected]",
62-
"address": {"street": "123 Main St", "city": "Anytown", "state": "CA", "zip": "12345"},
63-
"phone_numbers": ["+1-555-555-1234", "+1-555-555-5678"],
64-
"interests": ["Hiking", "Traveling", "Photography", "Reading"],
65-
"job_history": {
66-
"company": "Acme Inc.",
67-
"position": "Software Engineer",
68-
"start_date": "2015-01-01",
69-
"end_date": "2017-12-31",
70-
},
71-
"about_me": """
72-
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla tincidunt velit quis
73-
sapien mollis, at egestas massa tincidunt. Suspendisse ultrices arcu a dolor dapibus,
74-
ut pretium turpis volutpat. Vestibulum at sapien quis sapien dignissim volutpat ut a enim.
75-
Praesent fringilla sem eu dui convallis luctus. Donec ullamcorper, sapien ut convallis congue,
76-
risus mauris pretium tortor, nec dignissim arcu urna a nisl. Vivamus non fermentum ex. Proin
77-
interdum nisi id sagittis egestas. Nam sit amet nisi nec quam pharetra sagittis. Aliquam erat
78-
volutpat. Donec nec luctus sem, nec ornare lorem. Vivamus vitae orci quis enim faucibus placerat.
79-
Nulla facilisi. Proin in turpis orci. Donec imperdiet velit ac tellus gravida, eget laoreet tellus
80-
malesuada. Praesent venenatis tellus ac urna blandit, at varius felis posuere. Integer a commodo nunc.
81-
""",
82-
}
83-
json_blob_fields = ["address.street", "job_history.company"]
84-
aws_encrypted_json_blob = copy.deepcopy(json_blob)
85-
aws_encrypted_json_blob["address"]["street"] = bytes("123 Main St", "utf-8")
86-
aws_encrypted_json_blob["job_history"]["company"] = bytes("Acme Inc.", "utf-8")
87-
88-
dictionaries = [python_dict, json_dict, json_blob]
89-
fields_to_mask = [dict_fields, dict_fields, json_blob_fields]
90-
91-
data_types_and_masks = [
92-
# simple data types
93-
[42, DATA_MASKING_STRING],
94-
[4.22, DATA_MASKING_STRING],
95-
[True, DATA_MASKING_STRING],
96-
[None, DATA_MASKING_STRING],
97-
["this is a string", DATA_MASKING_STRING],
98-
# iterables
99-
[[1, 2, 3, 4], [DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING]],
100-
[
101-
["hello", 1, 2, 3, "world"],
102-
[DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING],
103-
],
104-
# dictionaries
105-
[python_dict, DATA_MASKING_STRING],
106-
[json_dict, DATA_MASKING_STRING],
107-
]
108-
data_types = [item[0] for item in data_types_and_masks]
8+
# should be conftest? no other conftest in unit tests
9+
# didn't work when i made them all pytest.fixtures
10910

11011

11112
@pytest.mark.parametrize("data_masker", data_maskers)

0 commit comments

Comments
 (0)