Skip to content

Commit 162e922

Browse files
ericpark3Eric Park
and
Eric Park
authored
feature: support different types of deletion mode (#3786)
Co-authored-by: Eric Park <[email protected]>
1 parent fa802ff commit 162e922

File tree

5 files changed

+115
-2
lines changed

5 files changed

+115
-2
lines changed

src/sagemaker/feature_store/feature_group.py

+6
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
FeatureValue,
6161
FeatureParameter,
6262
TableFormatEnum,
63+
DeletionModeEnum,
6364
)
6465
from sagemaker.utils import resolve_value_from_config
6566

@@ -785,6 +786,7 @@ def delete_record(
785786
self,
786787
record_identifier_value_as_string: str,
787788
event_time: str,
789+
deletion_mode: DeletionModeEnum = DeletionModeEnum.SOFT_DELETE,
788790
):
789791
"""Delete a single record from a FeatureGroup.
790792
@@ -793,11 +795,15 @@ def delete_record(
793795
a String representing the value of the record identifier.
794796
event_time (String):
795797
a timestamp format String indicating when the deletion event occurred.
798+
deletion_mode (DeletionModeEnum):
799+
deletion mode for deleting record. (default: DetectionModeEnum.SOFT_DELETE)
796800
"""
801+
797802
return self.sagemaker_session.delete_record(
798803
feature_group_name=self.name,
799804
record_identifier_value_as_string=record_identifier_value_as_string,
800805
event_time=event_time,
806+
deletion_mode=deletion_mode.value,
801807
)
802808

803809
def ingest(

src/sagemaker/feature_store/inputs.py

+10
Original file line numberDiff line numberDiff line change
@@ -369,3 +369,13 @@ def to_dict(self) -> Dict[str, Any]:
369369
RecordIdentifiersValueAsString=self.record_identifiers_value_as_string,
370370
FeatureNames=None if not self.feature_names else self.feature_names,
371371
)
372+
373+
374+
class DeletionModeEnum(Enum):
375+
"""Enum of deletion modes.
376+
377+
The deletion mode for deleting records can be SoftDelete or HardDelete.
378+
"""
379+
380+
SOFT_DELETE = "SoftDelete"
381+
HARD_DELETE = "HardDelete"

src/sagemaker/session.py

+3
Original file line numberDiff line numberDiff line change
@@ -5196,18 +5196,21 @@ def delete_record(
51965196
feature_group_name: str,
51975197
record_identifier_value_as_string: str,
51985198
event_time: str,
5199+
deletion_mode: str = None,
51995200
):
52005201
"""Deletes a single record from the FeatureGroup.
52015202
52025203
Args:
52035204
feature_group_name (str): name of the FeatureGroup.
52045205
record_identifier_value_as_string (str): name of the record identifier.
52055206
event_time (str): a timestamp indicating when the deletion event occurred.
5207+
deletion_mode: (str): deletion mode for deleting record.
52065208
"""
52075209
return self.sagemaker_featurestore_runtime_client.delete_record(
52085210
FeatureGroupName=feature_group_name,
52095211
RecordIdentifierValueAsString=record_identifier_value_as_string,
52105212
EventTime=event_time,
5213+
DeletionMode=deletion_mode,
52115214
)
52125215

52135216
def get_record(

tests/integ/test_feature_store.py

+60-1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
Filter,
3434
ResourceEnum,
3535
Identifier,
36+
DeletionModeEnum,
3637
)
3738
from sagemaker.feature_store.dataset_builder import (
3839
JoinTypeEnum,
@@ -162,6 +163,15 @@ def pandas_data_frame_without_string():
162163
return df
163164

164165

166+
@pytest.fixture
167+
def historic_record():
168+
return [
169+
FeatureValue(feature_name="feature1", value_as_string="10.0"),
170+
FeatureValue(feature_name="feature2", value_as_string="7"),
171+
FeatureValue(feature_name="feature3", value_as_string="2020-10-29T03:43:21Z"),
172+
]
173+
174+
165175
@pytest.fixture
166176
def record():
167177
return [
@@ -398,7 +408,7 @@ def test_get_and_batch_get_record(
398408
assert feature["FeatureName"] is not removed_feature_name
399409

400410

401-
def test_delete_record(
411+
def test_soft_delete_record(
402412
feature_store_session,
403413
role,
404414
feature_group_name,
@@ -437,6 +447,55 @@ def test_delete_record(
437447
assert retrieved_record is None
438448

439449

450+
def test_hard_delete_record(
451+
feature_store_session,
452+
role,
453+
feature_group_name,
454+
pandas_data_frame,
455+
historic_record,
456+
record,
457+
):
458+
feature_group = FeatureGroup(name=feature_group_name, sagemaker_session=feature_store_session)
459+
feature_group.load_feature_definitions(data_frame=pandas_data_frame)
460+
461+
record_identifier_value_as_string = record[0].value_as_string
462+
historic_record_identifier_value_as_string = historic_record[0].value_as_string
463+
with cleanup_feature_group(feature_group):
464+
feature_group.create(
465+
s3_uri=False,
466+
record_identifier_name="feature1",
467+
event_time_feature_name="feature3",
468+
role_arn=role,
469+
enable_online_store=True,
470+
)
471+
_wait_for_feature_group_create(feature_group)
472+
# Ingest data
473+
feature_group.put_record(record=record)
474+
# Retrieve data
475+
retrieved_record = feature_group.get_record(
476+
record_identifier_value_as_string=record_identifier_value_as_string,
477+
)
478+
assert retrieved_record is not None
479+
# Delete data
480+
feature_group.delete_record(
481+
record_identifier_value_as_string=record_identifier_value_as_string,
482+
event_time=datetime.datetime.now().replace(microsecond=0).isoformat() + "Z",
483+
deletion_mode=DeletionModeEnum.HARD_DELETE,
484+
)
485+
# Retrieve data
486+
retrieved_record = feature_group.get_record(
487+
record_identifier_value_as_string=record_identifier_value_as_string,
488+
)
489+
assert retrieved_record is None
490+
# Ingest data
491+
feature_group.put_record(historic_record)
492+
# Retrieve data
493+
retrieved_record = feature_group.get_record(
494+
record_identifier_value_as_string=historic_record_identifier_value_as_string,
495+
)
496+
assert retrieved_record is not None
497+
498+
440499
def test_update_feature_group(
441500
feature_store_session,
442501
role,

tests/unit/sagemaker/feature_store/test_feature_group.py

+36-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
AthenaQuery,
3232
IngestionError,
3333
)
34-
from sagemaker.feature_store.inputs import FeatureParameter
34+
from sagemaker.feature_store.inputs import FeatureParameter, DeletionModeEnum
3535

3636
from tests.unit import SAGEMAKER_CONFIG_FEATURE_GROUP
3737

@@ -296,6 +296,41 @@ def test_delete_record(sagemaker_session_mock):
296296
feature_group_name="MyFeatureGroup",
297297
record_identifier_value_as_string=record_identifier_value_as_string,
298298
event_time=event_time,
299+
deletion_mode=DeletionModeEnum.SOFT_DELETE.value,
300+
)
301+
302+
303+
def test_soft_delete_record(sagemaker_session_mock):
304+
feature_group = FeatureGroup(name="MyFeatureGroup", sagemaker_session=sagemaker_session_mock)
305+
record_identifier_value_as_string = "1.0"
306+
event_time = "2022-09-14"
307+
feature_group.delete_record(
308+
record_identifier_value_as_string=record_identifier_value_as_string,
309+
event_time=event_time,
310+
deletion_mode=DeletionModeEnum.SOFT_DELETE,
311+
)
312+
sagemaker_session_mock.delete_record.assert_called_with(
313+
feature_group_name="MyFeatureGroup",
314+
record_identifier_value_as_string=record_identifier_value_as_string,
315+
event_time=event_time,
316+
deletion_mode=DeletionModeEnum.SOFT_DELETE.value,
317+
)
318+
319+
320+
def test_hard_delete_record(sagemaker_session_mock):
321+
feature_group = FeatureGroup(name="MyFeatureGroup", sagemaker_session=sagemaker_session_mock)
322+
record_identifier_value_as_string = "1.0"
323+
event_time = "2022-09-14"
324+
feature_group.delete_record(
325+
record_identifier_value_as_string=record_identifier_value_as_string,
326+
event_time=event_time,
327+
deletion_mode=DeletionModeEnum.HARD_DELETE,
328+
)
329+
sagemaker_session_mock.delete_record.assert_called_with(
330+
feature_group_name="MyFeatureGroup",
331+
record_identifier_value_as_string=record_identifier_value_as_string,
332+
event_time=event_time,
333+
deletion_mode=DeletionModeEnum.HARD_DELETE.value,
299334
)
300335

301336

0 commit comments

Comments
 (0)