Merge branch 'master' into loc-config-file

Dan · web-flow · commit 75fb75bb965f · 2021-02-24T10:44:47.000-08:00
diff --git a/src/sagemaker/feature_store/feature_group.py b/src/sagemaker/feature_store/feature_group.py
@@ -185,6 +185,7 @@ def _ingest_single_batch(
                     feature_name=data_frame.columns[index], value_as_string=str(row[index])
                 )
                 for index in range(len(row))
+                if pd.notna(row[index])
             ]
             sagemaker_session.put_record(
                 feature_group_name=feature_group_name, record=[value.to_dict() for value in record]
diff --git a/tests/integ/test_feature_store.py b/tests/integ/test_feature_store.py
@@ -100,6 +100,7 @@ def pandas_data_frame():
             "feature1": pd.Series(np.arange(10.0), dtype="float64"),
             "feature2": pd.Series(np.arange(10), dtype="int64"),
             "feature3": pd.Series(["2020-10-30T03:43:21Z"] * 10, dtype="string"),
+            "feature4": pd.Series(np.arange(5.0), dtype="float64"),  # contains nan
         }
     )
     return df
@@ -132,6 +133,7 @@ def create_table_ddl():
         "  feature1 FLOAT\n"
         "  feature2 INT\n"
         "  feature3 STRING\n"
+        "  feature4 FLOAT\n"
         "  write_time TIMESTAMP\n"
         "  event_time TIMESTAMP\n"
         "  is_deleted BOOLEAN\n"
@@ -214,6 +216,9 @@ def test_create_feature_store(
                 time.sleep(60)
 
         assert df.shape[0] == 11
+        nans = pd.isna(df.loc[df["feature1"].isin([5, 6, 7, 8, 9])]["feature4"])
+        for is_na in nans.items():
+            assert is_na
         assert (
             create_table_ddl.format(
                 feature_group_name=feature_group_name,
diff --git a/tests/integ/test_smdataparallel_tf.py b/tests/integ/test_smdataparallel_tf.py
@@ -31,6 +31,7 @@
     integ.test_region() not in integ.DATA_PARALLEL_TESTING_REGIONS,
     reason="Only allow this test to run in IAD and CMH to limit usage of p3.16xlarge",
 )
+@pytest.mark.skip("Failing due to bad DLC image release. Disable temporarily.")
 def test_smdataparallel_tf_mnist(
     sagemaker_session,
     tensorflow_training_latest_version,
diff --git a/tests/unit/sagemaker/lineage/test_visualizer.py b/tests/unit/sagemaker/lineage/test_visualizer.py
@@ -49,64 +49,17 @@ def test_trial_component_name(viz, sagemaker_session):
         "TrialComponentArn": "tc-arn",
     }
 
-    sagemaker_session.sagemaker_client.list_associations.side_effect = [
-        {
-            "AssociationSummaries": [
-                {
-                    "SourceArn": "a:b:c:d:e:artifact/src-arn-1",
-                    "SourceName": "source-name-1",
-                    "SourceType": "source-type-1",
-                    "DestinationArn": "a:b:c:d:e:artifact/dest-arn-1",
-                    "DestinationName": "dest-name-1",
-                    "DestinationType": "dest-type-1",
-                    "AssociationType": "type-1",
-                }
-            ]
-        },
-        {
-            "AssociationSummaries": [
-                {
-                    "SourceArn": "a:b:c:d:e:artifact/src-arn-2",
-                    "SourceName": "source-name-2",
-                    "SourceType": "source-type-2",
-                    "DestinationArn": "a:b:c:d:e:artifact/dest-arn-2",
-                    "DestinationName": "dest-name-2",
-                    "DestinationType": "dest-type-2",
-                    "AssociationType": "type-2",
-                }
-            ]
-        },
-    ]
+    get_list_associations_side_effect(sagemaker_session)
 
     df = viz.show(trial_component_name=name)
 
     sagemaker_session.sagemaker_client.describe_trial_component.assert_called_with(
         TrialComponentName=name,
     )
 
-    expected_calls = [
-        unittest.mock.call(
-            DestinationArn="tc-arn",
-        ),
-        unittest.mock.call(
-            SourceArn="tc-arn",
-        ),
-    ]
-    assert expected_calls == sagemaker_session.sagemaker_client.list_associations.mock_calls
+    assert_list_associations_mock_calls(sagemaker_session)
 
-    expected_dataframe = pd.DataFrame.from_dict(
-        OrderedDict(
-            [
-                ("Name/Source", ["source-name-1", "dest-name-2"]),
-                ("Direction", ["Input", "Output"]),
-                ("Type", ["source-type-1", "dest-type-2"]),
-                ("Association Type", ["type-1", "type-2"]),
-                ("Lineage Type", ["artifact", "artifact"]),
-            ]
-        )
-    )
-
-    pd.testing.assert_frame_equal(expected_dataframe, df)
+    pd.testing.assert_frame_equal(get_expected_dataframe(), df)
 
 
 def test_model_package_arn(viz, sagemaker_session):
@@ -116,34 +69,7 @@ def test_model_package_arn(viz, sagemaker_session):
         "ArtifactSummaries": [{"ArtifactArn": "artifact-arn"}]
     }
 
-    sagemaker_session.sagemaker_client.list_associations.side_effect = [
-        {
-            "AssociationSummaries": [
-                {
-                    "SourceArn": "a:b:c:d:e:artifact/src-arn-1",
-                    "SourceName": "source-name-1",
-                    "SourceType": "source-type-1",
-                    "DestinationArn": "a:b:c:d:e:artifact/dest-arn-1",
-                    "DestinationName": "dest-name-1",
-                    "DestinationType": "dest-type-1",
-                    "AssociationType": "type-1",
-                }
-            ]
-        },
-        {
-            "AssociationSummaries": [
-                {
-                    "SourceArn": "a:b:c:d:e:artifact/src-arn-2",
-                    "SourceName": "source-name-2",
-                    "SourceType": "source-type-2",
-                    "DestinationArn": "a:b:c:d:e:artifact/dest-arn-2",
-                    "DestinationName": "dest-name-2",
-                    "DestinationType": "dest-type-2",
-                    "AssociationType": "type-2",
-                }
-            ]
-        },
-    ]
+    get_list_associations_side_effect(sagemaker_session)
 
     df = viz.show(model_package_arn=name)
 
@@ -161,19 +87,7 @@ def test_model_package_arn(viz, sagemaker_session):
     ]
     assert expected_calls == sagemaker_session.sagemaker_client.list_associations.mock_calls
 
-    expected_dataframe = pd.DataFrame.from_dict(
-        OrderedDict(
-            [
-                ("Name/Source", ["source-name-1", "dest-name-2"]),
-                ("Direction", ["Input", "Output"]),
-                ("Type", ["source-type-1", "dest-type-2"]),
-                ("Association Type", ["type-1", "type-2"]),
-                ("Lineage Type", ["artifact", "artifact"]),
-            ]
-        )
-    )
-
-    pd.testing.assert_frame_equal(expected_dataframe, df)
+    pd.testing.assert_frame_equal(get_expected_dataframe(), df)
 
 
 def test_endpoint_arn(viz, sagemaker_session):
@@ -183,34 +97,7 @@ def test_endpoint_arn(viz, sagemaker_session):
         "ContextSummaries": [{"ContextArn": "context-arn"}]
     }
 
-    sagemaker_session.sagemaker_client.list_associations.side_effect = [
-        {
-            "AssociationSummaries": [
-                {
-                    "SourceArn": "a:b:c:d:e:context/src-arn-1",
-                    "SourceName": "source-name-1",
-                    "SourceType": "source-type-1",
-                    "DestinationArn": "a:b:c:d:e:context/dest-arn-1",
-                    "DestinationName": "dest-name-1",
-                    "DestinationType": "dest-type-1",
-                    "AssociationType": "type-1",
-                }
-            ]
-        },
-        {
-            "AssociationSummaries": [
-                {
-                    "SourceArn": "a:b:c:d:e:context/src-arn-2",
-                    "SourceName": "source-name-2",
-                    "SourceType": "source-type-2",
-                    "DestinationArn": "a:b:c:d:e:context/dest-arn-2",
-                    "DestinationName": "dest-name-2",
-                    "DestinationType": "dest-type-2",
-                    "AssociationType": "type-2",
-                }
-            ]
-        },
-    ]
+    get_list_associations_side_effect(sagemaker_session)
 
     df = viz.show(endpoint_arn=name)
 
@@ -228,27 +115,74 @@ def test_endpoint_arn(viz, sagemaker_session):
     ]
     assert expected_calls == sagemaker_session.sagemaker_client.list_associations.mock_calls
 
-    expected_dataframe = pd.DataFrame.from_dict(
-        OrderedDict(
-            [
-                ("Name/Source", ["source-name-1", "dest-name-2"]),
-                ("Direction", ["Input", "Output"]),
-                ("Type", ["source-type-1", "dest-type-2"]),
-                ("Association Type", ["type-1", "type-2"]),
-                ("Lineage Type", ["context", "context"]),
-            ]
-        )
+    pd.testing.assert_frame_equal(get_expected_dataframe(), df)
+
+
+def test_processing_job_pipeline_execution_step(viz, sagemaker_session):
+
+    sagemaker_session.sagemaker_client.list_trial_components.return_value = {
+        "TrialComponentSummaries": [{"TrialComponentArn": "tc-arn"}]
+    }
+
+    get_list_associations_side_effect(sagemaker_session)
+
+    step = {"Metadata": {"ProcessingJob": {"Arn": "proc-job-arn"}}}
+
+    df = viz.show(pipeline_execution_step=step)
+
+    sagemaker_session.sagemaker_client.list_trial_components.assert_called_with(
+        SourceArn="proc-job-arn",
     )
 
-    pd.testing.assert_frame_equal(expected_dataframe, df)
+    assert_list_associations_mock_calls(sagemaker_session)
 
+    pd.testing.assert_frame_equal(get_expected_dataframe(), df)
 
-def test_processing_job_pipeline_execution_step(viz, sagemaker_session):
+
+def test_training_job_pipeline_execution_step(viz, sagemaker_session):
 
     sagemaker_session.sagemaker_client.list_trial_components.return_value = {
         "TrialComponentSummaries": [{"TrialComponentArn": "tc-arn"}]
     }
 
+    get_list_associations_side_effect(sagemaker_session)
+
+    step = {"Metadata": {"TrainingJob": {"Arn": "training-job-arn"}}}
+
+    df = viz.show(pipeline_execution_step=step)
+
+    sagemaker_session.sagemaker_client.list_trial_components.assert_called_with(
+        SourceArn="training-job-arn",
+    )
+
+    assert_list_associations_mock_calls(sagemaker_session)
+
+    pd.testing.assert_frame_equal(get_expected_dataframe(), df)
+
+
+def test_transform_job_pipeline_execution_step(viz, sagemaker_session):
+
+    sagemaker_session.sagemaker_client.list_trial_components.return_value = {
+        "TrialComponentSummaries": [{"TrialComponentArn": "tc-arn"}]
+    }
+
+    get_list_associations_side_effect(sagemaker_session)
+
+    step = {"Metadata": {"TransformJob": {"Arn": "transform-job-arn"}}}
+
+    df = viz.show(pipeline_execution_step=step)
+
+    sagemaker_session.sagemaker_client.list_trial_components.assert_called_with(
+        SourceArn="transform-job-arn",
+    )
+
+    assert_list_associations_mock_calls(sagemaker_session)
+
+    pd.testing.assert_frame_equal(get_expected_dataframe(), df)
+
+
+def get_list_associations_side_effect(sagemaker_session):
+
     sagemaker_session.sagemaker_client.list_associations.side_effect = [
         {
             "AssociationSummaries": [
@@ -278,13 +212,8 @@ def test_processing_job_pipeline_execution_step(viz, sagemaker_session):
         },
     ]
 
-    step = {"Metadata": {"ProcessingJob": {"Arn": "proc-job-arn"}}}
-
-    df = viz.show(pipeline_execution_step=step)
 
-    sagemaker_session.sagemaker_client.list_trial_components.assert_called_with(
-        SourceArn="proc-job-arn",
-    )
+def assert_list_associations_mock_calls(sagemaker_session):
 
     expected_calls = [
         unittest.mock.call(
@@ -296,6 +225,9 @@ def test_processing_job_pipeline_execution_step(viz, sagemaker_session):
     ]
     assert expected_calls == sagemaker_session.sagemaker_client.list_associations.mock_calls
 
+
+def get_expected_dataframe():
+
     expected_dataframe = pd.DataFrame.from_dict(
         OrderedDict(
             [
@@ -308,4 +240,4 @@ def test_processing_job_pipeline_execution_step(viz, sagemaker_session):
         )
     )
 
-    pd.testing.assert_frame_equal(expected_dataframe, df)
+    return expected_dataframe

Original file line number	Diff line number	Diff line change
`@@ -185,6 +185,7 @@ def _ingest_single_batch(`
`185`	`185`	`feature_name=data_frame.columns[index], value_as_string=str(row[index])`
`186`	`186`	`)`
`187`	`187`	`for index in range(len(row))`
	`188`	`+ if pd.notna(row[index])`
`188`	`189`	`]`
`189`	`190`	`sagemaker_session.put_record(`
`190`	`191`	`feature_group_name=feature_group_name, record=[value.to_dict() for value in record]`
Original file line number	Diff line number	Diff line change
`@@ -31,6 +31,7 @@`
`31`	`31`	`integ.test_region() not in integ.DATA_PARALLEL_TESTING_REGIONS,`
`32`	`32`	`reason="Only allow this test to run in IAD and CMH to limit usage of p3.16xlarge",`
`33`	`33`	`)`
	`34`	`+@pytest.mark.skip("Failing due to bad DLC image release. Disable temporarily.")`
`34`	`35`	`def test_smdataparallel_tf_mnist(`
`35`	`36`	`sagemaker_session,`
`36`	`37`	`tensorflow_training_latest_version,`