change: reshape Artifacts into data frame in ExperimentsAnalytics

yzhu0 · web-flow · commit 301541071eaa · 2020-07-15T11:58:19.000-07:00
diff --git a/src/sagemaker/analytics.py b/src/sagemaker/analytics.py
@@ -431,6 +431,8 @@ def __init__(
         metric_names=None,
         parameter_names=None,
         sagemaker_session=None,
+        input_artifact_names=None,
+        output_artifact_names=None,
     ):
         """Initialize a ``ExperimentAnalytics`` instance.
 
@@ -450,6 +452,11 @@ def __init__(
             sagemaker_session (sagemaker.session.Session): Session object which manages interactions
                 with Amazon SageMaker APIs and any other AWS services needed. If not specified,
                 one is created using the default AWS configuration chain.
+            input_artifact_names(dict optional):The input artifacts for the experiment. Examples of
+                input artifacts are datasets, algorithms, hyperparameters, source code, and instance
+                types.
+            output_artifact_names(dict optional): The output artifacts for the experiment. Examples
+                of output artifacts are metrics, snapshots, logs, and images.
         """
         sagemaker_session = sagemaker_session or Session()
         self._sage_client = sagemaker_session.sagemaker_client
@@ -463,6 +470,8 @@ def __init__(
         self._sort_order = sort_order
         self._metric_names = metric_names
         self._parameter_names = parameter_names
+        self._input_artifact_names = input_artifact_names
+        self._output_artifact_names = output_artifact_names
         self._trial_components = None
         super(ExperimentAnalytics, self).__init__()
         self.clear_cache()
@@ -516,6 +525,21 @@ def _reshape_metrics(self, metrics):
                     out["{} - {}".format(metric_name, stat_type)] = stat_value
         return out
 
+    def _reshape_artifacts(self, artifacts, _artifact_names):
+        """Reshape trial component input/output artifacts to a pandas column
+        Args:
+            artifacts: trial component input/output artifacts
+        Returns:
+            dict: Key: artifacts name, Value: artifacts value
+        """
+        out = OrderedDict()
+        for name, value in sorted(artifacts.items()):
+            if _artifact_names and (name not in _artifact_names):
+                continue
+            out["{} - {}".format(name, "MediaType")] = value.get("MediaType")
+            out["{} - {}".format(name, "Value")] = value.get("Value")
+        return out
+
     def _reshape(self, trial_component):
         """Reshape trial component data to pandas columns
         Args:
@@ -533,6 +557,16 @@ def _reshape(self, trial_component):
 
         out.update(self._reshape_parameters(trial_component.get("Parameters", [])))
         out.update(self._reshape_metrics(trial_component.get("Metrics", [])))
+        out.update(
+            self._reshape_artifacts(
+                trial_component.get("InputArtifacts", []), self._input_artifact_names
+            )
+        )
+        out.update(
+            self._reshape_artifacts(
+                trial_component.get("OutputArtifacts", []), self._output_artifact_names
+            )
+        )
         return out
 
     def _fetch_dataframe(self):
diff --git a/tests/integ/test_experiments_analytics.py b/tests/integ/test_experiments_analytics.py
@@ -43,6 +43,65 @@ def experiment(sagemaker_session):
         _delete_resources(sm, experiment_name, trials)
 
 
+@contextmanager
+def experiment_with_artifacts(sagemaker_session):
+    sm = sagemaker_session.sagemaker_client
+    trials = {}  # for resource cleanup
+
+    experiment_name = "experiment-" + str(uuid.uuid4())
+    try:
+        sm.create_experiment(ExperimentName=experiment_name)
+
+        # Search returns 10 results by default. Add 20 trials to verify pagination.
+        for i in range(20):
+            trial_name = "trial-" + str(uuid.uuid4())
+            sm.create_trial(TrialName=trial_name, ExperimentName=experiment_name)
+
+            trial_component_name = "tc-" + str(uuid.uuid4())
+            trials[trial_name] = trial_component_name
+
+            sm.create_trial_component(
+                TrialComponentName=trial_component_name, DisplayName="Training"
+            )
+            sm.update_trial_component(
+                TrialComponentName=trial_component_name,
+                Parameters={"hp1": {"NumberValue": i}},
+                InputArtifacts={
+                    "inputArtifacts1": {"MediaType": "text/csv", "Value": "s3:/foo/bar1"}
+                },
+                OutputArtifacts={
+                    "outputArtifacts1": {"MediaType": "text/plain", "Value": "s3:/foo/bar2"}
+                },
+            )
+            sm.associate_trial_component(
+                TrialComponentName=trial_component_name, TrialName=trial_name
+            )
+
+        time.sleep(15)  # wait for search to get updated
+
+        yield experiment_name
+    finally:
+        _delete_resources(sm, experiment_name, trials)
+
+
+@pytest.mark.canary_quick
+def test_experiment_analytics_artifacts(sagemaker_session):
+    with experiment_with_artifacts(sagemaker_session) as experiment_name:
+        analytics = ExperimentAnalytics(
+            experiment_name=experiment_name, sagemaker_session=sagemaker_session
+        )
+
+        assert list(analytics.dataframe().columns) == [
+            "TrialComponentName",
+            "DisplayName",
+            "hp1",
+            "inputArtifacts1 - MediaType",
+            "inputArtifacts1 - Value",
+            "outputArtifacts1 - MediaType",
+            "outputArtifacts1 - Value",
+        ]
+
+
 @pytest.mark.canary_quick
 def test_experiment_analytics(sagemaker_session):
     with experiment(sagemaker_session) as experiment_name:
diff --git a/tests/unit/test_experiments_analytics.py b/tests/unit/test_experiments_analytics.py
@@ -40,6 +40,14 @@ def trial_component(trial_component_name):
                 "Count": 2.0,
             },
         ],
+        "InputArtifacts": {
+            "inputArtifacts1": {"MediaType": "text/plain", "Value": "s3:/foo/bar1"},
+            "inputArtifacts2": {"MediaType": "text/plain", "Value": "s3:/foo/bar2"},
+        },
+        "OutputArtifacts": {
+            "outputArtifacts1": {"MediaType": "text/csv", "Value": "s3:/sky/far1"},
+            "outputArtifacts2": {"MediaType": "text/csv", "Value": "s3:/sky/far2"},
+        },
     }
 
 
@@ -72,6 +80,14 @@ def test_trial_analytics_dataframe_all_metrics_hyperparams(mock_session):
                 ("metric2 - StdDev", [0.05, 0.05]),
                 ("metric2 - Last", [7.0, 7.0]),
                 ("metric2 - Count", [2.0, 2.0]),
+                ("inputArtifacts1 - MediaType", ["text/plain", "text/plain"]),
+                ("inputArtifacts1 - Value", ["s3:/foo/bar1", "s3:/foo/bar1"]),
+                ("inputArtifacts2 - MediaType", ["text/plain", "text/plain"]),
+                ("inputArtifacts2 - Value", ["s3:/foo/bar2", "s3:/foo/bar2"]),
+                ("outputArtifacts1 - MediaType", ["text/csv", "text/csv"]),
+                ("outputArtifacts1 - Value", ["s3:/sky/far1", "s3:/sky/far1"]),
+                ("outputArtifacts2 - MediaType", ["text/csv", "text/csv"]),
+                ("outputArtifacts2 - Value", ["s3:/sky/far2", "s3:/sky/far2"]),
             ]
         )
     )
@@ -117,6 +133,14 @@ def test_trial_analytics_dataframe_selected_hyperparams(mock_session):
                 ("metric2 - StdDev", [0.05, 0.05]),
                 ("metric2 - Last", [7.0, 7.0]),
                 ("metric2 - Count", [2.0, 2.0]),
+                ("inputArtifacts1 - MediaType", ["text/plain", "text/plain"]),
+                ("inputArtifacts1 - Value", ["s3:/foo/bar1", "s3:/foo/bar1"]),
+                ("inputArtifacts2 - MediaType", ["text/plain", "text/plain"]),
+                ("inputArtifacts2 - Value", ["s3:/foo/bar2", "s3:/foo/bar2"]),
+                ("outputArtifacts1 - MediaType", ["text/csv", "text/csv"]),
+                ("outputArtifacts1 - Value", ["s3:/sky/far1", "s3:/sky/far1"]),
+                ("outputArtifacts2 - MediaType", ["text/csv", "text/csv"]),
+                ("outputArtifacts2 - Value", ["s3:/sky/far2", "s3:/sky/far2"]),
             ]
         )
     )
@@ -157,6 +181,14 @@ def test_trial_analytics_dataframe_selected_metrics(mock_session):
                 ("metric1 - StdDev", [1.0, 1.0]),
                 ("metric1 - Last", [2.0, 2.0]),
                 ("metric1 - Count", [2.0, 2.0]),
+                ("inputArtifacts1 - MediaType", ["text/plain", "text/plain"]),
+                ("inputArtifacts1 - Value", ["s3:/foo/bar1", "s3:/foo/bar1"]),
+                ("inputArtifacts2 - MediaType", ["text/plain", "text/plain"]),
+                ("inputArtifacts2 - Value", ["s3:/foo/bar2", "s3:/foo/bar2"]),
+                ("outputArtifacts1 - MediaType", ["text/csv", "text/csv"]),
+                ("outputArtifacts1 - Value", ["s3:/sky/far1", "s3:/sky/far1"]),
+                ("outputArtifacts2 - MediaType", ["text/csv", "text/csv"]),
+                ("outputArtifacts2 - Value", ["s3:/sky/far2", "s3:/sky/far2"]),
             ]
         )
     )
@@ -203,6 +235,14 @@ def test_trial_analytics_dataframe_search_pagination(mock_session):
                 ("metric2 - StdDev", [0.05, 0.05]),
                 ("metric2 - Last", [7.0, 7.0]),
                 ("metric2 - Count", [2.0, 2.0]),
+                ("inputArtifacts1 - MediaType", ["text/plain", "text/plain"]),
+                ("inputArtifacts1 - Value", ["s3:/foo/bar1", "s3:/foo/bar1"]),
+                ("inputArtifacts2 - MediaType", ["text/plain", "text/plain"]),
+                ("inputArtifacts2 - Value", ["s3:/foo/bar2", "s3:/foo/bar2"]),
+                ("outputArtifacts1 - MediaType", ["text/csv", "text/csv"]),
+                ("outputArtifacts1 - Value", ["s3:/sky/far1", "s3:/sky/far1"]),
+                ("outputArtifacts2 - MediaType", ["text/csv", "text/csv"]),
+                ("outputArtifacts2 - Value", ["s3:/sky/far2", "s3:/sky/far2"]),
             ]
         )
     )

Original file line number	Diff line number	Diff line change
`@@ -40,6 +40,14 @@ def trial_component(trial_component_name):`
`40`	`40`	`"Count": 2.0,`
`41`	`41`	`},`
`42`	`42`	`],`
	`43`	`+ "InputArtifacts": {`
	`44`	`+ "inputArtifacts1": {"MediaType": "text/plain", "Value": "s3:/foo/bar1"},`
	`45`	`+ "inputArtifacts2": {"MediaType": "text/plain", "Value": "s3:/foo/bar2"},`
	`46`	`+ },`
	`47`	`+ "OutputArtifacts": {`
	`48`	`+ "outputArtifacts1": {"MediaType": "text/csv", "Value": "s3:/sky/far1"},`
	`49`	`+ "outputArtifacts2": {"MediaType": "text/csv", "Value": "s3:/sky/far2"},`
	`50`	`+ },`
`43`	`51`	`}`
`44`	`52`
`45`	`53`
`@@ -72,6 +80,14 @@ def test_trial_analytics_dataframe_all_metrics_hyperparams(mock_session):`
`72`	`80`	`("metric2 - StdDev", [0.05, 0.05]),`
`73`	`81`	`("metric2 - Last", [7.0, 7.0]),`
`74`	`82`	`("metric2 - Count", [2.0, 2.0]),`
	`83`	`+ ("inputArtifacts1 - MediaType", ["text/plain", "text/plain"]),`
	`84`	`+ ("inputArtifacts1 - Value", ["s3:/foo/bar1", "s3:/foo/bar1"]),`
	`85`	`+ ("inputArtifacts2 - MediaType", ["text/plain", "text/plain"]),`
	`86`	`+ ("inputArtifacts2 - Value", ["s3:/foo/bar2", "s3:/foo/bar2"]),`
	`87`	`+ ("outputArtifacts1 - MediaType", ["text/csv", "text/csv"]),`
	`88`	`+ ("outputArtifacts1 - Value", ["s3:/sky/far1", "s3:/sky/far1"]),`
	`89`	`+ ("outputArtifacts2 - MediaType", ["text/csv", "text/csv"]),`
	`90`	`+ ("outputArtifacts2 - Value", ["s3:/sky/far2", "s3:/sky/far2"]),`
`75`	`91`	`]`
`76`	`92`	`)`
`77`	`93`	`)`
`@@ -117,6 +133,14 @@ def test_trial_analytics_dataframe_selected_hyperparams(mock_session):`
`117`	`133`	`("metric2 - StdDev", [0.05, 0.05]),`
`118`	`134`	`("metric2 - Last", [7.0, 7.0]),`
`119`	`135`	`("metric2 - Count", [2.0, 2.0]),`
	`136`	`+ ("inputArtifacts1 - MediaType", ["text/plain", "text/plain"]),`
	`137`	`+ ("inputArtifacts1 - Value", ["s3:/foo/bar1", "s3:/foo/bar1"]),`
	`138`	`+ ("inputArtifacts2 - MediaType", ["text/plain", "text/plain"]),`
	`139`	`+ ("inputArtifacts2 - Value", ["s3:/foo/bar2", "s3:/foo/bar2"]),`
	`140`	`+ ("outputArtifacts1 - MediaType", ["text/csv", "text/csv"]),`
	`141`	`+ ("outputArtifacts1 - Value", ["s3:/sky/far1", "s3:/sky/far1"]),`
	`142`	`+ ("outputArtifacts2 - MediaType", ["text/csv", "text/csv"]),`
	`143`	`+ ("outputArtifacts2 - Value", ["s3:/sky/far2", "s3:/sky/far2"]),`
`120`	`144`	`]`
`121`	`145`	`)`
`122`	`146`	`)`
`@@ -157,6 +181,14 @@ def test_trial_analytics_dataframe_selected_metrics(mock_session):`
`157`	`181`	`("metric1 - StdDev", [1.0, 1.0]),`
`158`	`182`	`("metric1 - Last", [2.0, 2.0]),`
`159`	`183`	`("metric1 - Count", [2.0, 2.0]),`
	`184`	`+ ("inputArtifacts1 - MediaType", ["text/plain", "text/plain"]),`
	`185`	`+ ("inputArtifacts1 - Value", ["s3:/foo/bar1", "s3:/foo/bar1"]),`
	`186`	`+ ("inputArtifacts2 - MediaType", ["text/plain", "text/plain"]),`
	`187`	`+ ("inputArtifacts2 - Value", ["s3:/foo/bar2", "s3:/foo/bar2"]),`
	`188`	`+ ("outputArtifacts1 - MediaType", ["text/csv", "text/csv"]),`
	`189`	`+ ("outputArtifacts1 - Value", ["s3:/sky/far1", "s3:/sky/far1"]),`
	`190`	`+ ("outputArtifacts2 - MediaType", ["text/csv", "text/csv"]),`
	`191`	`+ ("outputArtifacts2 - Value", ["s3:/sky/far2", "s3:/sky/far2"]),`
`160`	`192`	`]`
`161`	`193`	`)`
`162`	`194`	`)`
`@@ -203,6 +235,14 @@ def test_trial_analytics_dataframe_search_pagination(mock_session):`
`203`	`235`	`("metric2 - StdDev", [0.05, 0.05]),`
`204`	`236`	`("metric2 - Last", [7.0, 7.0]),`
`205`	`237`	`("metric2 - Count", [2.0, 2.0]),`
	`238`	`+ ("inputArtifacts1 - MediaType", ["text/plain", "text/plain"]),`
	`239`	`+ ("inputArtifacts1 - Value", ["s3:/foo/bar1", "s3:/foo/bar1"]),`
	`240`	`+ ("inputArtifacts2 - MediaType", ["text/plain", "text/plain"]),`
	`241`	`+ ("inputArtifacts2 - Value", ["s3:/foo/bar2", "s3:/foo/bar2"]),`
	`242`	`+ ("outputArtifacts1 - MediaType", ["text/csv", "text/csv"]),`
	`243`	`+ ("outputArtifacts1 - Value", ["s3:/sky/far1", "s3:/sky/far1"]),`
	`244`	`+ ("outputArtifacts2 - MediaType", ["text/csv", "text/csv"]),`
	`245`	`+ ("outputArtifacts2 - Value", ["s3:/sky/far2", "s3:/sky/far2"]),`
`206`	`246`	`]`
`207`	`247`	`)`
`208`	`248`	`)`