fix: Fix Tensorflow default model_dir generation when output_path is pipeline variable (aws#3146)

qidewenwhen · Lokiiiiii · commit 406052ba5b9f · 2022-06-02T16:19:51.000-07:00
diff --git a/src/sagemaker/tensorflow/estimator.py b/src/sagemaker/tensorflow/estimator.py
@@ -26,6 +26,7 @@
 from sagemaker.transformer import Transformer
 from sagemaker.vpc_utils import VPC_CONFIG_DEFAULT
 from sagemaker.tensorflow.training_compiler.config import TrainingCompilerConfig
+from sagemaker.workflow import is_pipeline_variable
 
 logger = logging.getLogger("sagemaker")
 
@@ -392,6 +393,9 @@ def _default_s3_path(self, directory, mpi=False):
         if mpi:
             return "/opt/ml/model"
         if self._current_job_name:
+            if is_pipeline_variable(self.output_path):
+                output_path = "s3://{}".format(self.sagemaker_session.default_bucket())
+                return s3.s3_path_join(output_path, self._current_job_name, directory)
             return s3.s3_path_join(self.output_path, self._current_job_name, directory)
         return None
 
diff --git a/tests/integ/sagemaker/workflow/test_model_steps.py b/tests/integ/sagemaker/workflow/test_model_steps.py
@@ -608,6 +608,9 @@ def test_model_registration_with_tensorflow_model_with_pipeline_model(
     )
     inputs = TrainingInput(s3_data=input_path)
     instance_count = ParameterInteger(name="InstanceCount", default_value=1)
+    output_path = ParameterString(
+        name="OutputPath", default_value=f"s3://{pipeline_session.default_bucket()}"
+    )
 
     # If image_uri is not provided, the instance_type should not be a pipeline variable
     # since instance_type is used to retrieve image_uri in compile time (PySDK)
@@ -619,6 +622,7 @@ def test_model_registration_with_tensorflow_model_with_pipeline_model(
         framework_version=tf_full_version,
         py_version=tf_full_py_version,
         sagemaker_session=pipeline_session,
+        output_path=output_path,
     )
     train_step_args = tensorflow_estimator.fit(inputs=inputs)
     step_train = TrainingStep(
@@ -648,7 +652,7 @@ def test_model_registration_with_tensorflow_model_with_pipeline_model(
     )
     pipeline = Pipeline(
         name=pipeline_name,
-        parameters=[instance_count],
+        parameters=[instance_count, output_path],
         steps=[step_train, step_register_model],
         sagemaker_session=pipeline_session,
     )
diff --git a/tests/unit/sagemaker/workflow/test_training_step.py b/tests/unit/sagemaker/workflow/test_training_step.py
@@ -15,6 +15,7 @@
 import os
 import json
 from mock import Mock, PropertyMock
+import re
 
 import pytest
 import warnings
@@ -163,6 +164,7 @@ def test_training_step_with_estimator(pipeline_session, training_input, hyperpar
 
 def test_estimator_with_parameterized_output(pipeline_session, training_input):
     output_path = ParameterString(name="OutputPath")
+    # XGBoost
     estimator = XGBoost(
         framework_version="1.3-1",
         py_version="py3",
@@ -174,21 +176,48 @@ def test_estimator_with_parameterized_output(pipeline_session, training_input):
         sagemaker_session=pipeline_session,
     )
     step_args = estimator.fit(inputs=training_input)
-    step = TrainingStep(
-        name="MyTrainingStep",
+    step1 = TrainingStep(
+        name="MyTrainingStep1",
+        step_args=step_args,
+        description="TrainingStep description",
+        display_name="MyTrainingStep",
+    )
+
+    # TensorFlow
+    # If model_dir is None and output_path is a pipeline variable
+    # a default model_dir will be generated with default bucket
+    estimator = TensorFlow(
+        framework_version="2.4.1",
+        py_version="py37",
+        role=ROLE,
+        instance_type=INSTANCE_TYPE,
+        instance_count=1,
+        entry_point=DUMMY_LOCAL_SCRIPT_PATH,
+        output_path=output_path,
+        sagemaker_session=pipeline_session,
+    )
+    step_args = estimator.fit(inputs=training_input)
+    step2 = TrainingStep(
+        name="MyTrainingStep2",
         step_args=step_args,
         description="TrainingStep description",
         display_name="MyTrainingStep",
     )
     pipeline = Pipeline(
         name="MyPipeline",
-        steps=[step],
+        steps=[step1, step2],
+        parameters=[output_path],
         sagemaker_session=pipeline_session,
     )
-    step_def = json.loads(pipeline.definition())["Steps"][0]
-    assert step_def["Arguments"]["OutputDataConfig"]["S3OutputPath"] == {
-        "Get": "Parameters.OutputPath"
-    }
+    step_defs = json.loads(pipeline.definition())["Steps"]
+    for step_def in step_defs:
+        assert step_def["Arguments"]["OutputDataConfig"]["S3OutputPath"] == {
+            "Get": "Parameters.OutputPath"
+        }
+        if step_def["Name"] != "MyTrainingStep2":
+            continue
+        model_dir = step_def["Arguments"]["HyperParameters"]["model_dir"]
+        assert re.match(rf'"s3://{BUCKET}/.*/model"', model_dir)
 
 
 @pytest.mark.parametrize(
@@ -316,7 +345,7 @@ def test_training_step_with_algorithm_base(algo_estimator, pipeline_session):
         sagemaker_session=pipeline_session,
     )
     data = RecordSet(
-        "s3://{}/{}".format(pipeline_session.default_bucket(), "dummy"),
+        "s3://{}/{}".format(BUCKET, "dummy"),
         num_records=1000,
         feature_dim=128,
         channel="train",

Original file line number	Diff line number	Diff line change
`@@ -608,6 +608,9 @@ def test_model_registration_with_tensorflow_model_with_pipeline_model(`
`608`	`608`	`)`
`609`	`609`	`inputs = TrainingInput(s3_data=input_path)`
`610`	`610`	`instance_count = ParameterInteger(name="InstanceCount", default_value=1)`
	`611`	`+ output_path = ParameterString(`
	`612`	`+ name="OutputPath", default_value=f"s3://{pipeline_session.default_bucket()}"`
	`613`	`+ )`
`611`	`614`
`612`	`615`	`# If image_uri is not provided, the instance_type should not be a pipeline variable`
`613`	`616`	`# since instance_type is used to retrieve image_uri in compile time (PySDK)`
`@@ -619,6 +622,7 @@ def test_model_registration_with_tensorflow_model_with_pipeline_model(`
`619`	`622`	`framework_version=tf_full_version,`
`620`	`623`	`py_version=tf_full_py_version,`
`621`	`624`	`sagemaker_session=pipeline_session,`
	`625`	`+ output_path=output_path,`
`622`	`626`	`)`
`623`	`627`	`train_step_args = tensorflow_estimator.fit(inputs=inputs)`
`624`	`628`	`step_train = TrainingStep(`
`@@ -648,7 +652,7 @@ def test_model_registration_with_tensorflow_model_with_pipeline_model(`
`648`	`652`	`)`
`649`	`653`	`pipeline = Pipeline(`
`650`	`654`	`name=pipeline_name,`
`651`		`- parameters=[instance_count],`
	`655`	`+ parameters=[instance_count, output_path],`
`652`	`656`	`steps=[step_train, step_register_model],`
`653`	`657`	`sagemaker_session=pipeline_session,`
`654`	`658`	`)`