Skip to content

Commit 46391bf

Browse files
jeniyatmufaddal-rohawalaahsan-z-khanBasil BeiroutiPayton Staub
committed
doc: Support for generation of Jumpstart model table on build (#2924)
Co-authored-by: Mufaddal Rohawala <[email protected]> Co-authored-by: Ahsan Khan <[email protected]> Co-authored-by: Mufaddal Rohawala <[email protected]> Co-authored-by: Basil Beirouti <[email protected]> Co-authored-by: Payton Staub <[email protected]> Co-authored-by: Shreya Pandit <[email protected]> Co-authored-by: Basil Beirouti <[email protected]> Co-authored-by: Payton Staub <[email protected]> Co-authored-by: Mohamed Ali Jamaoui <[email protected]> Co-authored-by: ci <ci> Co-authored-by: Jeniya Tabassum <[email protected]> Co-authored-by: sreedes <[email protected]> Co-authored-by: Navin Soni <[email protected]> Co-authored-by: Miyoung <[email protected]> Co-authored-by: Ameen Khan <[email protected]> Co-authored-by: Zhankui Lu <[email protected]> Co-authored-by: Xiaoguang Chen <[email protected]> Co-authored-by: Jonathan Guinegagne <[email protected]> Co-authored-by: Zhankui Lu <[email protected]> Co-authored-by: Yifei Zhu <[email protected]> Co-authored-by: Qingzi-Lan <[email protected]> Co-authored-by: Navin Soni <[email protected]> Co-authored-by: marckarp <[email protected]> Co-authored-by: chenxy <[email protected]> Co-authored-by: Xinghan Chen <[email protected]> Co-authored-by: Tulio Casagrande <[email protected]> Co-authored-by: jerrypeng7773 <[email protected]> Co-authored-by: marckarp <[email protected]> Co-authored-by: jayatalr <[email protected]> Co-authored-by: bhaoz <[email protected]> Co-authored-by: Ethan Cheng <[email protected]> Co-authored-by: Xiaoguang Chen <[email protected]> Co-authored-by: keerthanvasist <[email protected]> Co-authored-by: Shreya Pandit <[email protected]> Co-authored-by: Ivy Bazan <[email protected]> Co-authored-by: IvyBazan <[email protected]> Co-authored-by: Benjamin Crabtree <[email protected]> Co-authored-by: iasoon <[email protected]> Co-authored-by: evakravi <[email protected]>
1 parent 7af5de6 commit 46391bf

File tree

6 files changed

+156
-47
lines changed

6 files changed

+156
-47
lines changed

doc/conf.py

+15
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@
1515

1616
import pkg_resources
1717
from datetime import datetime
18+
import sys
19+
import os
20+
21+
sys.path.append(os.path.join(os.path.dirname(__file__), "."))
22+
from doc_utils.jumpstart_doc_utils import create_jumpstart_model_table # noqa: E402
1823

1924
project = "sagemaker"
2025
version = pkg_resources.require(project)[0].version
@@ -71,6 +76,12 @@
7176
# For Adobe Analytics
7277
html_js_files = [
7378
"https://a0.awsstatic.com/s_code/js/3.0/awshome_s_code.js",
79+
"https://cdn.datatables.net/1.10.23/js/jquery.dataTables.min.js",
80+
"js/datatable.js",
81+
]
82+
83+
html_css_files = [
84+
"https://cdn.datatables.net/1.10.23/css/jquery.dataTables.min.css",
7485
]
7586

7687
html_context = {"css_files": ["_static/theme_overrides.css"]}
@@ -83,3 +94,7 @@
8394

8495
# autosectionlabel
8596
autosectionlabel_prefix_document = True
97+
98+
99+
def setup(app):
100+
create_jumpstart_model_table()

doc/doc_utils/__init__.py

Whitespace-only changes.

doc/doc_utils/jumpstart.rst

Whitespace-only changes.

doc/doc_utils/jumpstart_doc_utils.py

+89
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"). You
4+
# may not use this file except in compliance with the License. A copy of
5+
# the License is located at
6+
#
7+
# http://aws.amazon.com/apache2.0/
8+
#
9+
# or in the "license" file accompanying this file. This file is
10+
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific
12+
# language governing permissions and limitations under the License.
13+
from __future__ import absolute_import
14+
from urllib import request
15+
import json
16+
from packaging.version import Version
17+
18+
JUMPSTART_REGION = "eu-west-2"
19+
SDK_MANIFEST_FILE = "models_manifest.json"
20+
JUMPSTART_BUCKET_BASE_URL = "https://jumpstart-cache-prod-{}.s3.{}.amazonaws.com".format(
21+
JUMPSTART_REGION, JUMPSTART_REGION
22+
)
23+
24+
25+
def get_jumpstart_sdk_manifest():
26+
url = "{}/{}".format(JUMPSTART_BUCKET_BASE_URL, SDK_MANIFEST_FILE)
27+
with request.urlopen(url) as f:
28+
models_manifest = f.read().decode("utf-8")
29+
return json.loads(models_manifest)
30+
31+
32+
def get_jumpstart_sdk_spec(key):
33+
url = "{}/{}".format(JUMPSTART_BUCKET_BASE_URL, key)
34+
with request.urlopen(url) as f:
35+
model_spec = f.read().decode("utf-8")
36+
return json.loads(model_spec)
37+
38+
39+
def create_jumpstart_model_table():
40+
sdk_manifest = get_jumpstart_sdk_manifest()
41+
sdk_manifest_top_versions_for_models = {}
42+
43+
for model in sdk_manifest:
44+
if model["model_id"] not in sdk_manifest_top_versions_for_models:
45+
sdk_manifest_top_versions_for_models[model["model_id"]] = model
46+
else:
47+
if Version(
48+
sdk_manifest_top_versions_for_models[model["model_id"]]["version"]
49+
) < Version(model["version"]):
50+
sdk_manifest_top_versions_for_models[model["model_id"]] = model
51+
52+
file_content = []
53+
54+
file_content.append("==================================\n")
55+
file_content.append("JumpStart Available Model Table\n")
56+
file_content.append("==================================\n")
57+
file_content.append(
58+
"""
59+
JumpStart for the SageMaker Python SDK uses model ids and model versions to access the necessary
60+
utilities. This table serves to provide the core material plus some extra information that can be useful
61+
in selecting the correct model id and corresponding parameters.\n
62+
"""
63+
)
64+
file_content.append(
65+
"""
66+
If you want to automatically use the latest version of the model, use "*" for the `model_version` attribute.
67+
We highly suggest pinning an exact model version however.\n
68+
"""
69+
)
70+
file_content.append("\n")
71+
file_content.append(".. list-table:: Available Models\n")
72+
file_content.append(" :widths: 50 20 20 20\n")
73+
file_content.append(" :header-rows: 1\n")
74+
file_content.append(" :class: datatable\n")
75+
file_content.append("\n")
76+
file_content.append(" * - Model ID\n")
77+
file_content.append(" - Fine Tunable?\n")
78+
file_content.append(" - Latest Version\n")
79+
file_content.append(" - Min SDK Version\n")
80+
81+
for model in sorted(sdk_manifest, key=lambda elt: elt["model_id"]):
82+
model_spec = get_jumpstart_sdk_spec(model["spec_key"])
83+
file_content.append(" * - {}\n".format(model["model_id"]))
84+
file_content.append(" - {}\n".format(model_spec["training_supported"]))
85+
file_content.append(" - {}\n".format(model["version"]))
86+
file_content.append(" - {}\n".format(model["min_version"]))
87+
88+
f = open("doc_utils/jumpstart.rst", "w")
89+
f.writelines(file_content)

doc/overview.rst

+51-47
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,11 @@ Here is an example:
577577
Use Prebuilt Models with SageMaker JumpStart
578578
********************************************
579579

580+
.. toctree::
581+
:maxdepth: 2
582+
583+
doc_utils/jumpstart
584+
580585
`Amazon SageMaker JumpStart <https://aws.amazon.com/sagemaker/getting-started/>`__ is a
581586
SageMaker feature that helps users bring machine learning (ML)
582587
applications to market using prebuilt solutions for common use cases,
@@ -628,11 +633,11 @@ the ``model_id`` and ``model_version`` needed to retrieve the URI.
628633

629634
- ``model_id``: A unique identifier for the JumpStart model.
630635
- ``model_version``: The version of the specifications for the
631-
model. To use the latest version, enter ``*``. This is a
636+
model. To use the latest version, enter ``"*"``. This is a
632637
required parameter.
633638
634639
To retrieve a model, first select a ``model id`` and ``version`` from
635-
the Available Models.
640+
the :doc:`available models <./doc_utils/jumpstart>`.
636641

637642
.. code:: python
638643
@@ -652,7 +657,7 @@ Then use those values to retrieve the model as follows.
652657
JumpStart scripts
653658
-----------------
654659

655-
To adapt JumpStart models for the SageMaker Python SDK, a custom
660+
To adapt JumpStart models for SageMaker, a custom
656661
script is needed to perform training or inference. JumpStart
657662
maintains a suite of scripts used for each of the models in the
658663
JumpStart S3 bucket, which can be accessed using the SageMaker Python
@@ -769,7 +774,7 @@ Deployment may take about 5 minutes.
769774
   predictor_cls=Predictor
770775
)
771776
772-
Because ``catboost`` relies on the PyTorch Deep Learning Containers
777+
Because ``catboost`` and ``lightgbm`` rely on the PyTorch Deep Learning Containers
773778
image, the corresponding Models and Endpoints display the “pytorch”
774779
prefix when viewed in the AWS console. To verify that these models
775780
were created successfully with your desired base model, refer to
@@ -780,7 +785,7 @@ Perform Inference
780785
781786
Finally, use the ``predictor`` instance to query your endpoint. For
782787
``catboost-classification-model``, for example, the predictor accepts
783-
a string. For more information about how to use the predictor, see
788+
a csv. For more information about how to use the predictor, see
784789
the
785790
`Appendix <https://sagemaker.readthedocs.io/en/stable/overview.html#appendix>`__.
786791

@@ -807,9 +812,8 @@ using “training” as the model scope. Use the utility functions to
807812
retrieve the URI of each of the three components you need to
808813
continue. The HuggingFace model in this example requires a GPU
809814
instance, so use the ``ml.p3.2xlarge`` instance type. For a complete
810-
list of available SageMaker instance types , see `Available SageMaker
811-
Studio Instance
812-
Types <https://docs.aws.amazon.com/sagemaker/latest/dg/notebooks-available-instance-types.html>`__.
815+
list of available SageMaker instance types, see the `SageMaker On-Demand Pricing
816+
Table <https://aws.amazon.com/sagemaker/pricing/#On-Demand_Pricing>`__ and select 'Training'.
813817

814818
.. code:: python
815819
@@ -970,45 +974,45 @@ ContentType of ``application/list-text``.
970974

971975
.. container::
972976

973-
+-----------------------+-----------------------+-----------------------+
974-
| Task | Identifier | ContentType |
975-
+-----------------------+-----------------------+-----------------------+
976-
| Image Classification | ic | "application/x-image" |
977-
+-----------------------+-----------------------+-----------------------+
978-
| Object Detection | od, od1 | "application/x-image" |
979-
+-----------------------+-----------------------+-----------------------+
980-
| Semantic Segmentation | semseg | "application/x-image" |
981-
+-----------------------+-----------------------+-----------------------+
982-
| Instance Segmentation | is | "application/x-image" |
983-
+-----------------------+-----------------------+-----------------------+
984-
| Text Classification | tc | "application/x-text" |
985-
+-----------------------+-----------------------+-----------------------+
986-
| Sentence Pair | spc | "a |
987-
| Classification | | pplication/list-text" |
988-
+-----------------------+-----------------------+-----------------------+
989-
| Extractive Question | eqa | "a |
990-
| Answering | | pplication/list-text" |
991-
+-----------------------+-----------------------+-----------------------+
992-
| Text Generation | textgeneration | "application/x-text" |
993-
+-----------------------+-----------------------+-----------------------+
994-
| Image Classification | icembedding | "application/x-image" |
995-
| Embedding | | |
996-
+-----------------------+-----------------------+-----------------------+
997-
| Text Classification | tcembedding | "application/x-text" |
998-
| Embedding | | |
999-
+-----------------------+-----------------------+-----------------------+
1000-
| Named-entity | ner | "application/x-text" |
1001-
| Recognition | | |
1002-
+-----------------------+-----------------------+-----------------------+
1003-
| Text Summarization | summarization | "application/x-text" |
1004-
+-----------------------+-----------------------+-----------------------+
1005-
| Text Translation | translation | "application/x-text" |
1006-
+-----------------------+-----------------------+-----------------------+
1007-
| Tabular Regression | regression | "text/csv" |
1008-
+-----------------------+-----------------------+-----------------------+
1009-
| Tabular | classification | "text/csv" |
1010-
| Classification | | |
1011-
+-----------------------+-----------------------+-----------------------+
977+
+-----------------------+-----------------------+-------------------------+
978+
| Task | Identifier | ContentType |
979+
+-----------------------+-----------------------+-------------------------+
980+
| Image Classification | ic | "application/x-image" |
981+
+-----------------------+-----------------------+-------------------------+
982+
| Object Detection | od, od1 | "application/x-image" |
983+
+-----------------------+-----------------------+-------------------------+
984+
| Semantic Segmentation | semseg | "application/x-image" |
985+
+-----------------------+-----------------------+-------------------------+
986+
| Instance Segmentation | is | "application/x-image" |
987+
+-----------------------+-----------------------+-------------------------+
988+
| Text Classification | tc | "application/x-text" |
989+
+-----------------------+-----------------------+-------------------------+
990+
| Sentence Pair | spc | "application/list-text" |
991+
| Classification | | |
992+
+-----------------------+-----------------------+-------------------------+
993+
| Extractive Question | eqa | "application/list-text" |
994+
| Answering | | |
995+
+-----------------------+-----------------------+-------------------------+
996+
| Text Generation | textgeneration | "application/x-text" |
997+
+-----------------------+-----------------------+-------------------------+
998+
| Image Classification | icembedding | "application/x-image" |
999+
| Embedding | | |
1000+
+-----------------------+-----------------------+-------------------------+
1001+
| Text Classification | tcembedding | "application/x-text" |
1002+
| Embedding | | |
1003+
+-----------------------+-----------------------+-------------------------+
1004+
| Named-entity | ner | "application/x-text" |
1005+
| Recognition | | |
1006+
+-----------------------+-----------------------+-------------------------+
1007+
| Text Summarization | summarization | "application/x-text" |
1008+
+-----------------------+-----------------------+-------------------------+
1009+
| Text Translation | translation | "application/x-text" |
1010+
+-----------------------+-----------------------+-------------------------+
1011+
| Tabular Regression | regression | "text/csv" |
1012+
+-----------------------+-----------------------+-------------------------+
1013+
| Tabular | classification | "text/csv" |
1014+
| Classification | | |
1015+
+-----------------------+-----------------------+-------------------------+
10121016

10131017
********************************
10141018
SageMaker Automatic Model Tuning

doc/requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
sphinx==3.1.1
22
sphinx-rtd-theme==0.5.0
33
docutils==0.15.2
4+
packaging==20.9

0 commit comments

Comments
 (0)