Skip to content

Commit eb6dc7e

Browse files
Merge branch 'master' into est_anno
2 parents d2cabee + 291f6e4 commit eb6dc7e

File tree

9 files changed

+180
-92
lines changed

9 files changed

+180
-92
lines changed

doc/doc_utils/jumpstart_doc_utils.py

+14-7
Original file line numberDiff line numberDiff line change
@@ -143,20 +143,26 @@ def create_jumpstart_model_table():
143143
file_content.append(".. |external-link| raw:: html\n\n")
144144
file_content.append(' <i class="fa fa-external-link"></i>\n\n')
145145

146-
file_content.append("==================================\n")
147-
file_content.append("JumpStart Available Model Table\n")
148-
file_content.append("==================================\n")
146+
file_content.append("================================================\n")
147+
file_content.append("Built-in Algorithms with pre-trained Model Table\n")
148+
file_content.append("================================================\n")
149149
file_content.append(
150150
"""
151-
JumpStart for the SageMaker Python SDK uses model IDs and model versions to access the necessary
152-
utilities. This table serves to provide the core material plus some extra information that can be useful
153-
in selecting the correct model ID and corresponding parameters.\n"""
151+
The SageMaker Python SDK uses model IDs and model versions to access the necessary
152+
utilities for pre-trained models. This table serves to provide the core material plus
153+
some extra information that can be useful in selecting the correct model ID and
154+
corresponding parameters.\n"""
154155
)
155156
file_content.append(
156157
"""
157158
If you want to automatically use the latest version of the model, use "*" for the `model_version` attribute.
158159
We highly suggest pinning an exact model version however.\n"""
159160
)
161+
file_content.append(
162+
"""
163+
These models are also available through the
164+
`JumpStart UI in SageMaker Studio <https://docs.aws.amazon.com/sagemaker/latest/dg/studio-jumpstart.html>`__\n"""
165+
)
160166
file_content.append("\n")
161167
file_content.append(".. list-table:: Available Models\n")
162168
file_content.append(" :widths: 50 20 20 20 30 20\n")
@@ -183,5 +189,6 @@ def create_jumpstart_model_table():
183189
" - `{} <{}>`__ |external-link|\n".format(model_source, model_spec["url"])
184190
)
185191

186-
f = open("doc_utils/jumpstart.rst", "w")
192+
f = open("doc_utils/pretrainedmodels.rst", "w")
187193
f.writelines(file_content)
194+
f.close()
File renamed without changes.

doc/overview.rst

+61-69
Original file line numberDiff line numberDiff line change
@@ -573,24 +573,31 @@ Here is an example:
573573
# When you are done using your endpoint
574574
model.sagemaker_session.delete_endpoint('my-endpoint')
575575
576-
*********************************************************
577-
Use SageMaker JumpStart Algorithms with Pretrained Models
578-
*********************************************************
576+
***********************************************************************
577+
Use Built-in Algorithms with Pre-trained Models in SageMaker Python SDK
578+
***********************************************************************
579+
580+
SageMaker Python SDK provides built-in algorithms with pre-trained models from popular open source model
581+
hubs, such as TensorFlow Hub, Pytorch Hub, and HuggingFace. Customer can deploy these pre-trained models
582+
as-is or first fine-tune them on a custom dataset and then deploy to a SageMaker endpoint for inference.
583+
584+
585+
SageMaker SDK built-in algorithms allow customers access pre-trained models using model ids and model
586+
versions. The ‘pre-trained model’ table below provides list of models with information useful in
587+
selecting the correct model id and corresponding parameters. These models are also available through
588+
the `JumpStart UI in SageMaker Studio <https://docs.aws.amazon.com/sagemaker/latest/dg/studio-jumpstart.html>`__.
579589

580-
JumpStart for the SageMaker Python SDK uses model ids and model versions to access the necessary
581-
utilities. This table serves to provide the core material plus some extra information that can be useful
582-
in selecting the correct model id and corresponding parameters.
583590

584591
.. toctree::
585592
:maxdepth: 2
586593

587-
doc_utils/jumpstart
594+
doc_utils/pretrainedmodels
588595

589596
Example notebooks
590597
=================
591598

592-
JumpStart supports 15 different machine learning problem types. Below is a list of all the supported
593-
problem types with a link to a Jupyter notebook that provides example usage.
599+
SageMaker built-in algorithms with pre-trained models support 15 different machine learning problem types.
600+
Below is a list of all the supported problem types with a link to a Jupyter notebook that provides example usage.
594601

595602
Vision
596603
- `Image Classification <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/jumpstart_image_classification/Amazon_JumpStart_Image_Classification.ipynb>`__
@@ -610,25 +617,15 @@ Text
610617
- `Text Embedding <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/jumpstart_text_embedding/Amazon_JumpStart_Text_Embedding.ipynb>`__
611618

612619
Tabular
613-
- `Tabular Classification (LightGBM & Catboost) <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/jumpstart_tabular_classification/Amazon_JumpStart_Tabular_Classification_LightGBM_CatBoost.ipynb>`__
614-
- `Tabular Classification (XGBoost & Linear Learner) <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/jumpstart_tabular_classification/Amazon_JumpStart_Tabular_Classification_XGBoost_LinearLearner.ipynb>`__
615-
- `Tabular Regression (LightGBM & Catboost) <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/jumpstart_tabular_regression/Amazon_JumpStart_Tabular_Regression_LightGBM_CatBoost.ipynb>`__
616-
- `Tabular Regression (XGBoost & Linear Learner) <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/jumpstart_tabular_regression/Amazon_JumpStart_Tabular_Regression_XGBoost_LinearLearner.ipynb>`__
617-
618-
619-
`Amazon SageMaker JumpStart <https://aws.amazon.com/sagemaker/getting-started/>`__ is a
620-
SageMaker feature that helps users bring machine learning (ML)
621-
applications to market using prebuilt solutions for common use cases,
622-
example notebooks, open source models from model zoos, and built-in
623-
algorithms.
624-
625-
A JumpStart model enables you to quickly start a machine learning
626-
workflow. JumpStart takes models from popular open source model hubs,
627-
such as TensorFlow and HuggingFace, and pre-trains them on an open
628-
source dataset. Using the SageMaker Python SDK, you can select a
629-
prebuilt model from the model zoo to train on custom data or deploy
630-
to a SageMaker endpoint for inference without signing up for
631-
SageMaker Studio.
620+
- `Tabular Classification (LightGBM & Catboost) <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/lightgbm_catboost_tabular/Amazon_Tabular_Classification_LightGBM_CatBoost.ipynb>`__
621+
- `Tabular Classification (XGBoost & Scikit-learn Linear Learner) <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/xgboost_linear_learner_tabular/Amazon_Tabular_Classification_XGBoost_LinearLearner.ipynb>`__
622+
- `Tabular Classification (AutoGluon) <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/autogluon_tabular/Amazon_Tabular_Classification_AutoGluon.ipynb>`__
623+
- `Tabular Classification (TabTransformer) <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/tabtransformer_tabular/Amazon_Tabular_Classification_TabTransformer.ipynb>`__
624+
- `Tabular Regression (LightGBM & Catboost) <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/lightgbm_catboost_tabular/Amazon_Tabular_Regression_LightGBM_CatBoost.ipynb>`__
625+
- `Tabular Regression (XGBoost & Scikit-learn Linear Learner) <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/xgboost_linear_learner_tabular/Amazon_Tabular_Regression_XGBoost_LinearLearner.ipynb>`__
626+
- `Tabular Regression (AutoGluon) <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/autogluon_tabular/Amazon_Tabular_Regression_AutoGluon.ipynb>`__
627+
- `Tabular Regression (TabTransformer) <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/tabtransformer_tabular/Amazon_Tabular_Regression_TabTransformer.ipynb>`__
628+
632629

633630
The following topic give you information about JumpStart components,
634631
as well as how to use the SageMaker Python SDK for these workflows.
@@ -644,24 +641,22 @@ Prerequisites
644641
Amazon S3. For more information about IAM role permissions,
645642
see `Policies and permissions in IAM <https://docs.aws.amazon.com/IAM/latest/UserGuide/access_policies.html>`__.
646643

647-
JumpStart Components
648-
====================
644+
Built-in Components
645+
===================
649646

650-
The following sections give information about the main JumpStart
647+
The following sections give information about the main built-in
651648
components and their function.
652649

653-
JumpStart models
654-
----------------
650+
Pre-trained models
651+
------------------
655652

656-
JumpStart maintains a model zoo of over 300 models pre-trained on
657-
open source datasets. You can use the SageMaker Python SDK
658-
to fine-tune a model on your own dataset or deploy it directly to a
659-
SageMaker endpoint for inference.
653+
SageMaker maintains a model zoo of over 300 models from popular open source model hubs, such as
654+
TensorFlow Hub, Pytorch Hub, and HuggingFace. You can use the SageMaker Python SDK to fine-tune
655+
a model on your own dataset or deploy it directly to a SageMaker endpoint for inference.
660656

661-
JumpStart model artifacts are stored as tarballs in the JumpStart S3
662-
bucket. Each model is versioned and contains a unique ID which can be
663-
used to retrieve the model URI. The following information describes
664-
the ``model_id`` and ``model_version`` needed to retrieve the URI.
657+
Model artifacts are stored as tarballs in a S3 bucket. Each model is versioned and contains a
658+
unique ID which can be used to retrieve the model URI. The following information describes the
659+
``model_id`` and ``model_version`` needed to retrieve the URI.
665660

666661
.. container::
667662

@@ -671,7 +666,7 @@ the ``model_id`` and ``model_version`` needed to retrieve the URI.
671666
required parameter.
672667
673668
To retrieve a model, first select a ``model ID`` and ``version`` from
674-
the :doc:`available models <./doc_utils/jumpstart>`.
669+
the :doc:`available models <./doc_utils/pretrainedmodels>`.
675670

676671
.. code:: python
677672
@@ -688,15 +683,13 @@ Then use those values to retrieve the model as follows.
688683
    model_id=model_id, model_version=model_version, model_scope=scope
689684
)
690685
691-
JumpStart scripts
692-
-----------------
686+
Model scripts
687+
-------------
693688

694-
To adapt JumpStart models for SageMaker, a custom
695-
script is needed to perform training or inference. JumpStart
696-
maintains a suite of scripts used for each of the models in the
697-
JumpStart S3 bucket, which can be accessed using the SageMaker Python
698-
SDK. Use the ``model_id`` and ``version`` of the corresponding model
699-
to retrieve the related script as follows.
689+
To adapt pre-trained models for SageMaker, a custom script is needed to perform training
690+
or inference. SageMaker maintains a suite of scripts used for each of the models in the
691+
S3 bucket, which can be accessed using the SageMaker Python SDK Use the ``model_id`` and
692+
``version`` of the corresponding model to retrieve the related script as follows.
700693

701694
.. code:: python
702695
@@ -706,11 +699,11 @@ to retrieve the related script as follows.
706699
    model_id=model_id, model_version=model_version, script_scope=scope
707700
)
708701
709-
JumpStart images
710-
----------------
702+
Model images
703+
-------------
711704

712705
A Docker image is required to perform training or inference on all
713-
SageMaker models. JumpStart relies on Docker images from the
706+
SageMaker models. SageMaker relies on Docker images from the
714707
following repos https://github.com/aws/deep-learning-containers,
715708
https://github.com/aws/sagemaker-xgboost-container,
716709
and https://github.com/aws/sagemaker-scikit-learn-container. Use
@@ -733,16 +726,16 @@ retrieve the related image as follows.
733726
Deploy a  Pre-Trained Model Directly to a SageMaker Endpoint
734727
============================================================
735728

736-
In this section, you learn how to take a pre-trained JumpStart model
737-
and deploy it directly to a SageMaker Endpoint. This is the fastest
738-
way to start machine learning with a JumpStart model. The following
729+
In this section, you learn how to take a pre-trained model and deploy
730+
it directly to a SageMaker Endpoint. This is the fastest way to start
731+
machine learning with a pre-trained model. The following
739732
assumes familiarity with `SageMaker
740733
models <https://sagemaker.readthedocs.io/en/stable/api/inference/model.html>`__
741734
and their deploy functions.
742735

743-
To begin, select a ``model_id`` and ``version`` from the JumpStart
736+
To begin, select a ``model_id`` and ``version`` from the pre-trained
744737
models table, as well as a model scope of either “inference” or
745-
“training”. For this example, you use a pre-trained JumpStart model,
738+
“training”. For this example, you use a pre-trained model,
746739
so select “inference”  for your model scope. Use the utility
747740
functions to retrieve the URI of each of the three components you
748741
need to continue.
@@ -772,7 +765,7 @@ need to continue.
772765
773766
Next, pass the URIs and other key parameters as part of a new
774767
SageMaker Model class. The ``entry_point`` is a JumpStart script
775-
named ``inference.py``. JumpStart handles the implementation of this
768+
named ``inference.py``. SageMaker handles the implementation of this
776769
script. You must use this value for model inference to be successful.
777770
For more information about the Model class and its parameters,
778771
see `Model <https://sagemaker.readthedocs.io/en/stable/api/inference/model.html>`__.
@@ -811,7 +804,7 @@ Deployment may take about 5 minutes.
811804
Because the model and script URIs are distributed by SageMaker JumpStart,
812805
the endpoint, endpoint config and model resources will be prefixed with
813806
``sagemaker-jumpstart``. Refer to the model ``Tags`` to inspect the
814-
JumpStart artifacts involved in the model creation.
807+
model artifacts involved in the model creation.
815808

816809
Perform Inference
817810
-----------------
@@ -829,17 +822,16 @@ the
829822
Fine-tune a Model and Deploy to a SageMaker Endpoint
830823
====================================================
831824

832-
In this section, you initiate a training job to further train one of
833-
the pretrained JumpStart models for your use case, then deploy it to
834-
a SageMaker Endpoint for inference. This lets you fine tune the model
835-
for your use case with your custom dataset. The following assumes
825+
In this section, you initiate a training job to further train one of the pre-trained models
826+
for your use case, then deploy it to a SageMaker Endpoint for inference. This lets you fine
827+
tune the model for your use case with your custom dataset. The following assumes
836828
familiarity with `SageMaker training jobs and their
837829
architecture <https://docs.aws.amazon.com/sagemaker/latest/dg/how-it-works-training.html>`__.
838830

839-
Fine-tune a JumpStart Model on a Custom Dataset
840-
-----------------------------------------------
831+
Fine-tune a Pre-trained Model on a Custom Dataset
832+
-------------------------------------------------
841833

842-
To begin, select a ``model_id`` and ``version`` from the JumpStart
834+
To begin, select a ``model_id`` and ``version`` from the pre-trained
843835
models table, as well as a model scope. In this case, you begin by
844836
using “training” as the model scope. Use the utility functions to
845837
retrieve the URI of each of the three components you need to
@@ -875,10 +867,10 @@ Table <https://aws.amazon.com/sagemaker/pricing/#On-Demand_Pricing>`__ and selec
875867
    instance_type=training_instance_type,
876868
)
877869
878-
Next, use the JumpStart resource URIs to create an ``Estimator`` and
870+
Next, use the model resource URIs to create an ``Estimator`` and
879871
train it on a custom training dataset. You must specify the S3 path
880872
of your custom training dataset. The Estimator class requires
881-
an ``entry_point`` parameter. In this case, JumpStart uses
873+
an ``entry_point`` parameter. In this case, SageMaker uses
882874
“transfer_learning.py”. The training job fails to execute if this
883875
value is not set.
884876

requirements/extras/test_requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
tox==3.24.5
22
flake8==4.0.1
3-
pytest==6.0.2
3+
pytest==6.2.5
44
pytest-cov==3.0.0
55
pytest-rerunfailures==10.2
66
pytest-timeout==2.1.0

src/sagemaker/image_uris.py

+3
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,16 @@
2424
from sagemaker.spark import defaults
2525
from sagemaker.jumpstart import artifacts
2626
from sagemaker.workflow import is_pipeline_variable
27+
from sagemaker.workflow.utilities import override_pipeline_parameter_var
2728

2829
logger = logging.getLogger(__name__)
2930

3031
ECR_URI_TEMPLATE = "{registry}.dkr.{hostname}/{repository}"
3132
HUGGING_FACE_FRAMEWORK = "huggingface"
3233

3334

35+
# TODO: we should remove this decorator later
36+
@override_pipeline_parameter_var
3437
def retrieve(
3538
framework,
3639
region,

src/sagemaker/processing.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1199,7 +1199,9 @@ def __init__(
11991199
source (str): The source for the output.
12001200
destination (str): The destination of the output. If a destination
12011201
is not provided, one will be generated:
1202-
"s3://<default-bucket-name>/<job-name>/output/<output-name>".
1202+
"s3://<default-bucket-name>/<job-name>/output/<output-name>"
1203+
(Note: this does not apply when used with
1204+
:class:`~sagemaker.workflow.steps.ProcessingStep`).
12031205
output_name (str): The name of the output. If a name
12041206
is not provided, one will be generated (eg. "output-1").
12051207
s3_upload_mode (str): Valid options are "EndOfJob" or "Continuous".

0 commit comments

Comments
 (0)