14
14
15
15
import datetime
16
16
17
+ import pandas as pd
17
18
import pytest
18
19
from mock import Mock
19
20
20
21
from sagemaker .feature_store .dataset_builder import DatasetBuilder
22
+ from sagemaker .feature_store .feature_group import FeatureGroup
21
23
22
24
23
25
@pytest .fixture
@@ -30,6 +32,58 @@ def feature_group_mock():
30
32
return Mock ()
31
33
32
34
35
+ def test_with_feature_group_throw_runtime_error (sagemaker_session_mock ):
36
+ feature_group = FeatureGroup (name = "MyFeatureGroup" , sagemaker_session = sagemaker_session_mock )
37
+ dataset_builder = DatasetBuilder (
38
+ sagemaker_session = sagemaker_session_mock ,
39
+ base = feature_group ,
40
+ output_path = "file/to/path" ,
41
+ )
42
+ sagemaker_session_mock .describe_feature_group .return_value = {"OfflineStoreConfig" : {}}
43
+ with pytest .raises (RuntimeError ) as error :
44
+ dataset_builder .with_feature_group (
45
+ feature_group , "target-feature" , ["feature-1" , "feature-2" ]
46
+ )
47
+ assert "No metastore is configured with FeatureGroup MyFeatureGroup." in str (error )
48
+
49
+
50
+ def test_with_feature_group (sagemaker_session_mock ):
51
+ feature_group = FeatureGroup (name = "MyFeatureGroup" , sagemaker_session = sagemaker_session_mock )
52
+ dataframe = pd .DataFrame ({"feature-1" : [420 , 380 , 390 ], "feature-2" : [50 , 40 , 45 ]})
53
+ feature_group .load_feature_definitions (dataframe )
54
+ dataset_builder = DatasetBuilder (
55
+ sagemaker_session = sagemaker_session_mock ,
56
+ base = feature_group ,
57
+ output_path = "file/to/path" ,
58
+ )
59
+ sagemaker_session_mock .describe_feature_group .return_value = {
60
+ "OfflineStoreConfig" : {"DataCatalogConfig" : {"TableName" : "table" , "Database" : "database" }},
61
+ "RecordIdentifierFeatureName" : "feature-1" ,
62
+ "EventTimeFeatureName" : "feature-2" ,
63
+ }
64
+ dataset_builder .with_feature_group (feature_group , "target-feature" , ["feature-1" , "feature-2" ])
65
+ assert len (dataset_builder ._feature_groups_to_be_merged ) == 1
66
+ assert dataset_builder ._feature_groups_to_be_merged [0 ].features == ["feature-1" , "feature-2" ]
67
+ assert dataset_builder ._feature_groups_to_be_merged [0 ].included_feature_names == [
68
+ "feature-1" ,
69
+ "feature-2" ,
70
+ ]
71
+ assert dataset_builder ._feature_groups_to_be_merged [0 ].database == "database"
72
+ assert dataset_builder ._feature_groups_to_be_merged [0 ].table_name == "table"
73
+ assert (
74
+ dataset_builder ._feature_groups_to_be_merged [0 ].record_identifier_feature_name
75
+ == "feature-1"
76
+ )
77
+ assert (
78
+ dataset_builder ._feature_groups_to_be_merged [0 ].event_time_identifier_feature_name
79
+ == "feature-2"
80
+ )
81
+ assert (
82
+ dataset_builder ._feature_groups_to_be_merged [0 ].target_feature_name_in_base
83
+ == "target-feature"
84
+ )
85
+
86
+
33
87
def test_point_in_time_accurate_join (sagemaker_session_mock , feature_group_mock ):
34
88
dataset_builder = DatasetBuilder (
35
89
sagemaker_session = sagemaker_session_mock ,
0 commit comments