feature: support displayName and description for pipeline steps

jayatalr · jayatalr · commit bf8bcd86c67d · 2021-08-16T18:38:35.000-07:00
diff --git a/src/sagemaker/workflow/lambda_step.py b/src/sagemaker/workflow/lambda_step.py
@@ -105,7 +105,9 @@ def __init__(
             depends_on (List[str]): A list of step names this `sagemaker.workflow.steps.LambdaStep`
                 depends on
         """
-        super(LambdaStep, self).__init__(name,display_name, description, StepTypeEnum.LAMBDA, depends_on)
+        super(LambdaStep, self).__init__(
+            name, display_name, description, StepTypeEnum.LAMBDA, depends_on
+        )
         self.lambda_func = lambda_func
         self.outputs = outputs if outputs is not None else []
         self.cache_config = cache_config
diff --git a/tests/data/mxnet_mnist/code/_repack_model.py b/tests/data/mxnet_mnist/code/_repack_model.py
@@ -0,0 +1,67 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+"""Repack model script for training jobs to inject entry points"""
+from __future__ import absolute_import
+
+import argparse
+import os
+import shutil
+import tarfile
+import tempfile
+
+# Repack Model
+# The following script is run via a training job which takes an existing model and a custom
+# entry point script as arguments. The script creates a new model archive with the custom
+# entry point in the "code" directory along with the existing model.  Subsequently, when the model
+# is unpacked for inference, the custom entry point will be used.
+# Reference: https://docs.aws.amazon.com/sagemaker/latest/dg/amazon-sagemaker-toolkits.html
+
+# distutils.dir_util.copy_tree works way better than the half-baked
+# shutil.copytree which bombs on previously existing target dirs...
+# alas ... https://bugs.python.org/issue10948
+# we'll go ahead and use the copy_tree function anyways because this
+# repacking is some short-lived hackery, right??
+from distutils.dir_util import copy_tree
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--inference_script", type=str, default="inference.py")
+    parser.add_argument("--model_archive", type=str, default="model.tar.gz")
+    args = parser.parse_args()
+
+    # the data directory contains a model archive generated by a previous training job
+    data_directory = "/opt/ml/input/data/training"
+    model_path = os.path.join(data_directory, args.model_archive)
+
+    # create a temporary directory
+    with tempfile.TemporaryDirectory() as tmp:
+        local_path = os.path.join(tmp, "local.tar.gz")
+        # copy the previous training job's model archive to the temporary directory
+        shutil.copy2(model_path, local_path)
+        src_dir = os.path.join(tmp, "src")
+        # create the "code" directory which will contain the inference script
+        os.makedirs(os.path.join(src_dir, "code"))
+        # extract the contents of the previous training job's model archive to the "src"
+        # directory of this training job
+        with tarfile.open(name=local_path, mode="r:gz") as tf:
+            tf.extractall(path=src_dir)
+
+        # generate a path to the custom inference script
+        entry_point = os.path.join("/opt/ml/code", args.inference_script)
+        # copy the custom inference script to the "src" dir
+        shutil.copy2(entry_point, os.path.join(src_dir, "code", args.inference_script))
+
+        # copy the "src" dir, which includes the previous training job's model and the
+        # custom inference script, to the output of this training job
+        copy_tree(src_dir, "/opt/ml/model")
diff --git a/tests/data/sip/sklearn_source_dir/_repack_model.py b/tests/data/sip/sklearn_source_dir/_repack_model.py
@@ -0,0 +1,67 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+"""Repack model script for training jobs to inject entry points"""
+from __future__ import absolute_import
+
+import argparse
+import os
+import shutil
+import tarfile
+import tempfile
+
+# Repack Model
+# The following script is run via a training job which takes an existing model and a custom
+# entry point script as arguments. The script creates a new model archive with the custom
+# entry point in the "code" directory along with the existing model.  Subsequently, when the model
+# is unpacked for inference, the custom entry point will be used.
+# Reference: https://docs.aws.amazon.com/sagemaker/latest/dg/amazon-sagemaker-toolkits.html
+
+# distutils.dir_util.copy_tree works way better than the half-baked
+# shutil.copytree which bombs on previously existing target dirs...
+# alas ... https://bugs.python.org/issue10948
+# we'll go ahead and use the copy_tree function anyways because this
+# repacking is some short-lived hackery, right??
+from distutils.dir_util import copy_tree
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--inference_script", type=str, default="inference.py")
+    parser.add_argument("--model_archive", type=str, default="model.tar.gz")
+    args = parser.parse_args()
+
+    # the data directory contains a model archive generated by a previous training job
+    data_directory = "/opt/ml/input/data/training"
+    model_path = os.path.join(data_directory, args.model_archive)
+
+    # create a temporary directory
+    with tempfile.TemporaryDirectory() as tmp:
+        local_path = os.path.join(tmp, "local.tar.gz")
+        # copy the previous training job's model archive to the temporary directory
+        shutil.copy2(model_path, local_path)
+        src_dir = os.path.join(tmp, "src")
+        # create the "code" directory which will contain the inference script
+        os.makedirs(os.path.join(src_dir, "code"))
+        # extract the contents of the previous training job's model archive to the "src"
+        # directory of this training job
+        with tarfile.open(name=local_path, mode="r:gz") as tf:
+            tf.extractall(path=src_dir)
+
+        # generate a path to the custom inference script
+        entry_point = os.path.join("/opt/ml/code", args.inference_script)
+        # copy the custom inference script to the "src" dir
+        shutil.copy2(entry_point, os.path.join(src_dir, "code", args.inference_script))
+
+        # copy the "src" dir, which includes the previous training job's model and the
+        # custom inference script, to the output of this training job
+        copy_tree(src_dir, "/opt/ml/model")
diff --git a/tests/data/sip/xgboost_source_dir/_repack_model.py b/tests/data/sip/xgboost_source_dir/_repack_model.py
@@ -0,0 +1,67 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+"""Repack model script for training jobs to inject entry points"""
+from __future__ import absolute_import
+
+import argparse
+import os
+import shutil
+import tarfile
+import tempfile
+
+# Repack Model
+# The following script is run via a training job which takes an existing model and a custom
+# entry point script as arguments. The script creates a new model archive with the custom
+# entry point in the "code" directory along with the existing model.  Subsequently, when the model
+# is unpacked for inference, the custom entry point will be used.
+# Reference: https://docs.aws.amazon.com/sagemaker/latest/dg/amazon-sagemaker-toolkits.html
+
+# distutils.dir_util.copy_tree works way better than the half-baked
+# shutil.copytree which bombs on previously existing target dirs...
+# alas ... https://bugs.python.org/issue10948
+# we'll go ahead and use the copy_tree function anyways because this
+# repacking is some short-lived hackery, right??
+from distutils.dir_util import copy_tree
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--inference_script", type=str, default="inference.py")
+    parser.add_argument("--model_archive", type=str, default="model.tar.gz")
+    args = parser.parse_args()
+
+    # the data directory contains a model archive generated by a previous training job
+    data_directory = "/opt/ml/input/data/training"
+    model_path = os.path.join(data_directory, args.model_archive)
+
+    # create a temporary directory
+    with tempfile.TemporaryDirectory() as tmp:
+        local_path = os.path.join(tmp, "local.tar.gz")
+        # copy the previous training job's model archive to the temporary directory
+        shutil.copy2(model_path, local_path)
+        src_dir = os.path.join(tmp, "src")
+        # create the "code" directory which will contain the inference script
+        os.makedirs(os.path.join(src_dir, "code"))
+        # extract the contents of the previous training job's model archive to the "src"
+        # directory of this training job
+        with tarfile.open(name=local_path, mode="r:gz") as tf:
+            tf.extractall(path=src_dir)
+
+        # generate a path to the custom inference script
+        entry_point = os.path.join("/opt/ml/code", args.inference_script)
+        # copy the custom inference script to the "src" dir
+        shutil.copy2(entry_point, os.path.join(src_dir, "code", args.inference_script))
+
+        # copy the "src" dir, which includes the previous training job's model and the
+        # custom inference script, to the output of this training job
+        copy_tree(src_dir, "/opt/ml/model")
diff --git a/tests/unit/sagemaker/workflow/test_lambda_step.py b/tests/unit/sagemaker/workflow/test_lambda_step.py
@@ -48,7 +48,8 @@ def test_lambda_step(sagemaker_session):
             function_arn="arn:aws:lambda:us-west-2:123456789012:function:sagemaker_test_lambda",
             session=sagemaker_session,
         ),
-        display_name="MyLambdaStep", description="MyLambdaStepDescription",
+        display_name="MyLambdaStep",
+        description="MyLambdaStepDescription",
         inputs={"arg1": "foo", "arg2": 5, "arg3": param},
         outputs=[outputParam1, outputParam2],
     )