diff --git a/doc/doc_utils/jumpstart_doc_utils.py b/doc/doc_utils/jumpstart_doc_utils.py index 47cf6e5f39..b692715706 100644 --- a/doc/doc_utils/jumpstart_doc_utils.py +++ b/doc/doc_utils/jumpstart_doc_utils.py @@ -14,12 +14,73 @@ from urllib import request import json from packaging.version import Version +from enum import Enum + + +class Tasks(str, Enum): + """The ML task name as referenced in the infix of the model ID.""" + + IC = "ic" + OD = "od" + OD1 = "od1" + SEMSEG = "semseg" + IS = "is" + TC = "tc" + SPC = "spc" + EQA = "eqa" + TEXT_GENERATION = "textgeneration" + IC_EMBEDDING = "icembedding" + TC_EMBEDDING = "tcembedding" + NER = "ner" + SUMMARIZATION = "summarization" + TRANSLATION = "translation" + TABULAR_REGRESSION = "regression" + TABULAR_CLASSIFICATION = "classification" + + +class ProblemTypes(str, Enum): + """Possible problem types for JumpStart models.""" + + IMAGE_CLASSIFICATION = "Image Classification" + IMAGE_EMBEDDING = "Image Embedding" + OBJECT_DETECTION = "Object Detection" + SEMANTIC_SEGMENTATION = "Semantic Segmentation" + INSTANCE_SEGMENTATION = "Instance Segmentation" + TEXT_CLASSIFICATION = "Text Classification" + TEXT_EMBEDDING = "Text Embedding" + QUESTION_ANSWERING = "Question Answering" + SENTENCE_PAIR_CLASSIFICATION = "Sentence Pair Classification" + TEXT_GENERATION = "Text Generation" + TEXT_SUMMARIZATION = "Text Summarization" + MACHINE_TRANSLATION = "Machine Translation" + NAMED_ENTITY_RECOGNITION = "Named Entity Recognition" + TABULAR_REGRESSION = "Regression" + TABULAR_CLASSIFICATION = "Classification" + JUMPSTART_REGION = "eu-west-2" SDK_MANIFEST_FILE = "models_manifest.json" JUMPSTART_BUCKET_BASE_URL = "https://jumpstart-cache-prod-{}.s3.{}.amazonaws.com".format( JUMPSTART_REGION, JUMPSTART_REGION ) +TASK_MAP = { + Tasks.IC: ProblemTypes.IMAGE_CLASSIFICATION, + Tasks.IC_EMBEDDING: ProblemTypes.IMAGE_EMBEDDING, + Tasks.OD: ProblemTypes.OBJECT_DETECTION, + Tasks.OD1: ProblemTypes.OBJECT_DETECTION, + Tasks.SEMSEG: ProblemTypes.SEMANTIC_SEGMENTATION, + Tasks.IS: ProblemTypes.INSTANCE_SEGMENTATION, + Tasks.TC: ProblemTypes.TEXT_CLASSIFICATION, + Tasks.TC_EMBEDDING: ProblemTypes.TEXT_EMBEDDING, + Tasks.EQA: ProblemTypes.QUESTION_ANSWERING, + Tasks.SPC: ProblemTypes.SENTENCE_PAIR_CLASSIFICATION, + Tasks.TEXT_GENERATION: ProblemTypes.TEXT_GENERATION, + Tasks.SUMMARIZATION: ProblemTypes.TEXT_SUMMARIZATION, + Tasks.TRANSLATION: ProblemTypes.MACHINE_TRANSLATION, + Tasks.NER: ProblemTypes.NAMED_ENTITY_RECOGNITION, + Tasks.TABULAR_REGRESSION: ProblemTypes.TABULAR_REGRESSION, + Tasks.TABULAR_CLASSIFICATION: ProblemTypes.TABULAR_CLASSIFICATION, +} def get_jumpstart_sdk_manifest(): @@ -36,6 +97,11 @@ def get_jumpstart_sdk_spec(key): return json.loads(model_spec) +def get_model_task(id): + task_short = id.split("-")[1] + return TASK_MAP[task_short] if task_short in TASK_MAP else "Source" + + def create_jumpstart_model_table(): sdk_manifest = get_jumpstart_sdk_manifest() sdk_manifest_top_versions_for_models = {} @@ -69,12 +135,12 @@ def create_jumpstart_model_table(): ) file_content.append( """ - Each model id is linked to an external page that describes the model.\n + Click on the Problem Type to navigate to the source of the model.\n """ ) file_content.append("\n") file_content.append(".. list-table:: Available Models\n") - file_content.append(" :widths: 50 20 20 20\n") + file_content.append(" :widths: 50 20 20 20 30\n") file_content.append(" :header-rows: 1\n") file_content.append(" :class: datatable\n") file_content.append("\n") @@ -82,13 +148,16 @@ def create_jumpstart_model_table(): file_content.append(" - Fine Tunable?\n") file_content.append(" - Latest Version\n") file_content.append(" - Min SDK Version\n") + file_content.append(" - Problem Type/Source\n") for model in sdk_manifest_top_versions_for_models.values(): model_spec = get_jumpstart_sdk_spec(model["spec_key"]) - file_content.append(" * - `{} <{}>`_\n".format(model_spec["model_id"], model_spec["url"])) + model_task = get_model_task(model_spec["model_id"]) + file_content.append(" * - {}\n".format(model_spec["model_id"])) file_content.append(" - {}\n".format(model_spec["training_supported"])) file_content.append(" - {}\n".format(model["version"])) file_content.append(" - {}\n".format(model["min_version"])) + file_content.append(" - `{} <{}>`__\n".format(model_task, model_spec["url"])) f = open("doc_utils/jumpstart.rst", "w") f.writelines(file_content) diff --git a/doc/overview.rst b/doc/overview.rst index df340338d9..bd3f1fd147 100644 --- a/doc/overview.rst +++ b/doc/overview.rst @@ -573,15 +573,49 @@ Here is an example: # When you are done using your endpoint model.sagemaker_session.delete_endpoint('my-endpoint') -******************************************** -Use Prebuilt Models with SageMaker JumpStart -******************************************** +********************************************************* +Use SageMaker JumpStart Algorithms with Pretrained Models +********************************************************* + +JumpStart for the SageMaker Python SDK uses model ids and model versions to access the necessary +utilities. This table serves to provide the core material plus some extra information that can be useful +in selecting the correct model id and corresponding parameters. .. toctree:: :maxdepth: 2 doc_utils/jumpstart +Example notebooks +================= + +JumpStart supports 15 different machine learning problem types. Below is a list of all the supported +problem types with a link to a Jupyter notebook that provides example usage. + +Vision + - `Image Classification `__ + - `Object Detection `__ + - `Semantic Segmentation `__ + - `Instance Segmentation `__ + - `Image Embedding `__ + +Text + - `Text Classification `__ + - `Sentence Pair Classification `__ + - `Question Answering `__ + - `Named Entity Recognition `__ + - `Text Summarization `__ + - `Text Generation `__ + - `Machine Translation `__ + - `Text Embedding `__ + +Tabular + - `Tabular Classification (LightGBM & Catboost) `__ + - `Tabular Classification (XGBoost & Linear Learner) `__ + - `Tabular Regression (LightGBM & Catboost) `__ + - `Tabular Regression (XGBoost & Linear Learner) `__ + + `Amazon SageMaker JumpStart `__ is a SageMaker feature that helps users bring machine learning (ML) applications to market using prebuilt solutions for common use cases,