aws · mufaddal-rohawala · Aug 24, 2022 · Aug 18, 2022 · Aug 22, 2022 · Aug 22, 2022
@@ -2,14 +2,18 @@
 Built-in Algorithms
 ######################
 
-Amazon SageMaker provides implementations of some common machine learning algorithms optimized for GPU architecture and massive datasets.
+Built-in algorithms are offered in 2 modes:
+
+* Container mode algorithms offered through :ref:`Estimators <estimators>` & :ref:`Amazon Estimators <amazon_estimators>`
+
+* Script mode algorithms based on `pre-built SageMaker Docker Images <https://docs.aws.amazon.com/sagemaker/latest/dg/docker-containers-prebuilt.html>`__ offered through Estimators
 
 .. toctree::
     :maxdepth: 2
 
+    sagemaker.amazon.amazon_estimator
     tabular/index
     text/index
     time_series/index
     unsupervised/index
     vision/index
-    other/index
@@ -1,3 +1,5 @@
+.. _amazon_estimators:
+
 Amazon Estimators
 --------------------
 

@@ -15,4 +15,3 @@ Amazon SageMaker provides built-in algorithms that are tailored to the analysis
     linear_learner
     tabtransformer
     xgboost
-    object2vec
@@ -10,6 +10,7 @@ Amazon SageMaker provides algorithms that are tailored to the analysis of textua
     blazing_text
     lda
     ntm
+    object2vec
     sequence_to_sequence
     text_classification_tensorflow
     sentence_pair_classification_tensorflow
@@ -19,4 +20,3 @@ Amazon SageMaker provides algorithms that are tailored to the analysis of textua
     text_summarization_hugging_face
     text_generation_hugging_face
     machine_translation_hugging_face
-    text_embedding_tensorflow_mxnet
@@ -7,14 +7,15 @@ Amazon SageMaker provides image processing algorithms that are used for image cl
 .. toctree::
     :maxdepth: 2
 
-    image_classification_mxnet
-    image_classification_pytorch
     image_classification_tensorflow
+    image_classification_pytorch
+    image_classification_mxnet
+    image_embedding_tensorflow
+    instance_segmentation_mxnet
+    object_detection_tensorflow
+    object_detection_pytorch
     object_detection_mxnet_gluoncv
     object_detection_mxnet
-    object_detection_pytorch
-    object_detection_tensorflow
     semantic_segmentation_mxnet_gluoncv
     semantic_segmentation_mxnet
-    instance_segmentation_mxnet
-    image_embedding_tensorflow
+    text_embedding_tensorflow_mxnet
@@ -1,3 +1,5 @@
+.. _estimators:
+
 Estimators
 ----------
 

@@ -58,6 +58,20 @@ class ProblemTypes(str, Enum):
     TABULAR_CLASSIFICATION = "Classification"
 
 
+class Frameworks(str, Enum):
+    """Possible frameworks for JumpStart models"""
+
+    TENSORFLOW = "Tensorflow Hub"
+    PYTORCH = "Pytorch Hub"
+    HUGGINGFACE = "HuggingFace"
+    CATBOOST = "Catboost"
+    GLUONCV = "GluonCV"
+    LIGHTGBM = "LightGBM"
+    XGBOOST = "XGBoost"
+    SCIKIT_LEARN = "ScikitLearn"
+    SOURCE = "Source"
+
+
 JUMPSTART_REGION = "eu-west-2"
 SDK_MANIFEST_FILE = "models_manifest.json"
 JUMPSTART_BUCKET_BASE_URL = "https://jumpstart-cache-prod-{}.s3.{}.amazonaws.com".format(
@@ -82,6 +96,61 @@ class ProblemTypes(str, Enum):
     Tasks.TABULAR_CLASSIFICATION: ProblemTypes.TABULAR_CLASSIFICATION,
 }
 
+TO_FRAMEWORK = {
+    "Tensorflow Hub": Frameworks.TENSORFLOW,
+    "Pytorch Hub": Frameworks.PYTORCH,
+    "HuggingFace": Frameworks.HUGGINGFACE,
+    "Catboost": Frameworks.CATBOOST,
+    "GluonCV": Frameworks.GLUONCV,
+    "LightGBM": Frameworks.LIGHTGBM,
+    "XGBoost": Frameworks.XGBOOST,
+    "ScikitLearn": Frameworks.SCIKIT_LEARN,
+    "Source": Frameworks.SOURCE,
+}
+
+
+MODALITY_MAP = {
+    (Tasks.IC, Frameworks.PYTORCH): "algorithms/vision/image_classification_pytorch.rst",
+    (Tasks.IC, Frameworks.TENSORFLOW): "algorithms/vision/image_classification_tensorflow.rst",
+    (Tasks.IC_EMBEDDING, Frameworks.TENSORFLOW): "algorithms/vision/image_embedding_tensorflow.rst",
+    (Tasks.IS, Frameworks.GLUONCV): "algorithms/vision/instance_segmentation_mxnet.rst",
+    (Tasks.OD, Frameworks.GLUONCV): "algorithms/vision/object_detection_mxnet.rst",
+    (Tasks.OD, Frameworks.PYTORCH): "algorithms/vision/object_detection_pytorch.rst",
+    (Tasks.OD, Frameworks.TENSORFLOW): "algorithms/vision/object_detection_tensorflow.rst",
+    (Tasks.SEMSEG, Frameworks.GLUONCV): "algorithms/vision/semantic_segmentation_mxnet.rst",
+    (
+        Tasks.TRANSLATION,
+        Frameworks.HUGGINGFACE,
+    ): "algorithms/text/machine_translation_hugging_face.rst",
+    (Tasks.NER, Frameworks.GLUONCV): "algorithms/text/named_entity_recognition_hugging_face.rst",
+    (Tasks.EQA, Frameworks.PYTORCH): "algorithms/text/question_answering_pytorch.rst",
+    (
+        Tasks.SPC,
+        Frameworks.HUGGINGFACE,
+    ): "algorithms/text/sentence_pair_classification_hugging_face.rst",
+    (
+        Tasks.SPC,
+        Frameworks.TENSORFLOW,
+    ): "algorithms/text/sentence_pair_classification_tensorflow.rst",
+    (Tasks.TC, Frameworks.TENSORFLOW): "algorithms/text/text_classification_tensorflow.rst",
+    (
+        Tasks.TC_EMBEDDING,
+        Frameworks.GLUONCV,
+    ): "algorithms/vision/text_embedding_tensorflow_mxnet.rst",
+    (
+        Tasks.TC_EMBEDDING,
+        Frameworks.TENSORFLOW,
+    ): "algorithms/vision/text_embedding_tensorflow_mxnet.rst",
+    (
+        Tasks.TEXT_GENERATION,
+        Frameworks.HUGGINGFACE,
+    ): "algorithms/text/text_generation_hugging_face.rst",
+    (
+        Tasks.SUMMARIZATION,
+        Frameworks.HUGGINGFACE,
+    ): "algorithms/text/text_summarization_hugging_face.rst",
+}
+
 
 def get_jumpstart_sdk_manifest():
     url = "{}/{}".format(JUMPSTART_BUCKET_BASE_URL, SDK_MANIFEST_FILE)
@@ -102,6 +171,10 @@ def get_model_task(id):
     return TASK_MAP[task_short] if task_short in TASK_MAP else "Source"
 
 
+def get_string_model_task(id):
+    return id.split("-")[1]
+
+
 def get_model_source(url):
     if "tfhub" in url:
         return "Tensorflow Hub"
@@ -113,8 +186,6 @@ def get_model_source(url):
         return "Catboost"
     if "gluon" in url:
         return "GluonCV"
-    if "catboost" in url:
-        return "Catboost"
     if "lightgbm" in url:
         return "LightGBM"
     if "xgboost" in url:
@@ -138,58 +209,97 @@ def create_jumpstart_model_table():
             ) < Version(model["version"]):
                 sdk_manifest_top_versions_for_models[model["model_id"]] = model
 
-    file_content = []
+    file_content_intro = []
 
-    file_content.append(".. _all-pretrained-models:\n\n")
-    file_content.append(".. |external-link| raw:: html\n\n")
-    file_content.append('   <i class="fa fa-external-link"></i>\n\n')
+    file_content_intro.append(".. _all-pretrained-models:\n\n")
+    file_content_intro.append(".. |external-link| raw:: html\n\n")
+    file_content_intro.append('   <i class="fa fa-external-link"></i>\n\n')
 
-    file_content.append("================================================\n")
-    file_content.append("Built-in Algorithms with pre-trained Model Table\n")
-    file_content.append("================================================\n")
-    file_content.append(
+    file_content_intro.append("================================================\n")
+    file_content_intro.append("Built-in Algorithms with pre-trained Model Table\n")
+    file_content_intro.append("================================================\n")
+    file_content_intro.append(
         """
     The SageMaker Python SDK uses model IDs and model versions to access the necessary
     utilities for pre-trained models. This table serves to provide the core material plus
     some extra information that can be useful in selecting the correct model ID and
     corresponding parameters.\n"""
     )
-    file_content.append(
+    file_content_intro.append(
         """
     If you want to automatically use the latest version of the model, use "*" for the `model_version` attribute.
     We highly suggest pinning an exact model version however.\n"""
     )
-    file_content.append(
+    file_content_intro.append(
         """
     These models are also available through the
     `JumpStart UI in SageMaker Studio <https://docs.aws.amazon.com/sagemaker/latest/dg/studio-jumpstart.html>`__\n"""
     )
-    file_content.append("\n")
-    file_content.append(".. list-table:: Available Models\n")
-    file_content.append("   :widths: 50 20 20 20 30 20\n")
-    file_content.append("   :header-rows: 1\n")
-    file_content.append("   :class: datatable\n")
-    file_content.append("\n")
-    file_content.append("   * - Model ID\n")
-    file_content.append("     - Fine Tunable?\n")
-    file_content.append("     - Latest Version\n")
-    file_content.append("     - Min SDK Version\n")
-    file_content.append("     - Problem Type\n")
-    file_content.append("     - Source\n")
+    file_content_intro.append("\n")
+    file_content_intro.append(".. list-table:: Available Models\n")
+    file_content_intro.append("   :widths: 50 20 20 20 30 20\n")
+    file_content_intro.append("   :header-rows: 1\n")
+    file_content_intro.append("   :class: datatable\n")
+    file_content_intro.append("\n")
+    file_content_intro.append("   * - Model ID\n")
+    file_content_intro.append("     - Fine Tunable?\n")
+    file_content_intro.append("     - Latest Version\n")
+    file_content_intro.append("     - Min SDK Version\n")
+    file_content_intro.append("     - Problem Type\n")
+    file_content_intro.append("     - Source\n")
+
+    dynamic_table_files = []
+    file_content_entries = []
 
     for model in sdk_manifest_top_versions_for_models.values():
         model_spec = get_jumpstart_sdk_spec(model["spec_key"])
         model_task = get_model_task(model_spec["model_id"])
+        string_model_task = get_string_model_task(model_spec["model_id"])
         model_source = get_model_source(model_spec["url"])
-        file_content.append("   * - {}\n".format(model_spec["model_id"]))
-        file_content.append("     - {}\n".format(model_spec["training_supported"]))
-        file_content.append("     - {}\n".format(model["version"]))
-        file_content.append("     - {}\n".format(model["min_version"]))
-        file_content.append("     - {}\n".format(model_task))
-        file_content.append(
+        file_content_entries.append("   * - {}\n".format(model_spec["model_id"]))
+        file_content_entries.append("     - {}\n".format(model_spec["training_supported"]))
+        file_content_entries.append("     - {}\n".format(model["version"]))
+        file_content_entries.append("     - {}\n".format(model["min_version"]))
+        file_content_entries.append("     - {}\n".format(model_task))
+        file_content_entries.append(
             "     - `{} <{}>`__ |external-link|\n".format(model_source, model_spec["url"])
         )
 
-    f = open("doc_utils/pretrainedmodels.rst", "w")
-    f.writelines(file_content)
+        if (string_model_task, TO_FRAMEWORK[model_source]) in MODALITY_MAP:
+            file_content_single_entry = []
+
+            if (
+                MODALITY_MAP[(string_model_task, TO_FRAMEWORK[model_source])]
+                not in dynamic_table_files
+            ):
+                file_content_single_entry.append("\n")
+                file_content_single_entry.append(".. list-table:: Available Models\n")
+                file_content_single_entry.append("   :widths: 50 20 20 20 20\n")
+                file_content_single_entry.append("   :header-rows: 1\n")
+                file_content_single_entry.append("   :class: datatable\n")
+                file_content_single_entry.append("\n")
+                file_content_single_entry.append("   * - Model ID\n")
+                file_content_single_entry.append("     - Fine Tunable?\n")
+                file_content_single_entry.append("     - Latest Version\n")
+                file_content_single_entry.append("     - Min SDK Version\n")
+                file_content_single_entry.append("     - Source\n")
+
+                dynamic_table_files.append(
+                    MODALITY_MAP[(string_model_task, TO_FRAMEWORK[model_source])]
+                )
+
+            file_content_single_entry.append("   * - {}\n".format(model_spec["model_id"]))
+            file_content_single_entry.append("     - {}\n".format(model_spec["training_supported"]))
+            file_content_single_entry.append("     - {}\n".format(model["version"]))
+            file_content_single_entry.append("     - {}\n".format(model["min_version"]))
+            file_content_single_entry.append(
+                "     - `{} <{}>`__\n".format(model_source, model_spec["url"])
+            )
+            f = open(MODALITY_MAP[(string_model_task, TO_FRAMEWORK[model_source])], "a")
+            f.writelines(file_content_single_entry)
+            f.close()
+
+    f = open("doc_utils/pretrainedmodels.rst", "a")
+    f.writelines(file_content_intro)
+    f.writelines(file_content_entries)
     f.close()