Merge branch 'master' into master

SifeiLi · web-flow · commit bf3502a18f7d · 2024-02-05T16:23:56.000-08:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,18 @@
 # Changelog
 
+## v2.207.0 (2024-02-05)
+
+### Features
+
+ * Introduce HF Transformers to ModelBuilder
+ * retrieve jumpstart estimator and predictor without specifying model id (infer from tags)
+
+### Bug Fixes and Other Changes
+
+ * SMP PT upgrade to 2.1
+ * Fetch HF metadata only when explicit type is not selected
+ * relax upper bound for urllib dependency
+
 ## v2.206.0 (2024-01-31)
 
 ### Features
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -227,6 +227,12 @@ For example, see the [Processing API reference](https://github.com/aws/sagemaker
 
 To build the Sphinx docs, run the following command in the `doc/` directory:
 
+```shell
+# Initial setup, only required for the first run
+pip install -r requirements.txt
+pip install -e ../
+```
+
 ```shell
 make html
 ```
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-2.206.1.dev0
+2.207.1.dev0
diff --git a/src/sagemaker/fw_utils.py b/src/sagemaker/fw_utils.py
@@ -139,6 +139,8 @@
         "1.13.1",
         "2.0.0",
         "2.0.1",
+        "2.1.0",
+        "2.1.2",
     ],
 }
 
@@ -158,7 +160,7 @@
 ]
 
 
-TORCH_DISTRIBUTED_GPU_SUPPORTED_FRAMEWORK_VERSIONS = ["1.13.1", "2.0.0", "2.0.1", "2.1.0"]
+TORCH_DISTRIBUTED_GPU_SUPPORTED_FRAMEWORK_VERSIONS = ["1.13.1", "2.0.0", "2.0.1", "2.1.0", "2.1.2"]
 
 TRAINIUM_SUPPORTED_DISTRIBUTION_STRATEGIES = ["torch_distributed"]
 TRAINIUM_SUPPORTED_TORCH_DISTRIBUTED_FRAMEWORK_VERSIONS = [
diff --git a/src/sagemaker/image_uri_config/huggingface.json b/src/sagemaker/image_uri_config/huggingface.json
@@ -12,7 +12,8 @@
             "4.12": "4.12.3",
             "4.17": "4.17.0",
             "4.26": "4.26.0",
-            "4.28": "4.28.1"
+            "4.28": "4.28.1",
+            "4.36": "4.36.0"
         },
         "versions": {
             "4.4.2": {
@@ -970,6 +971,53 @@
                         "gpu": "cu118-ubuntu20.04"
                     }
                 }
+            },
+            "4.36.0": {
+                "version_aliases": {
+                    "pytorch2.1": "pytorch2.1.0"
+                },
+                "pytorch2.1.0": {
+                    "py_versions": [
+                        "py310"
+                    ],
+                    "registries": {
+                        "af-south-1": "626614931356",
+                        "il-central-1": "780543022126",
+                        "ap-east-1": "871362719292",
+                        "ap-northeast-1": "763104351884",
+                        "ap-northeast-2": "763104351884",
+                        "ap-northeast-3": "364406365360",
+                        "ap-south-1": "763104351884",
+                        "ap-southeast-1": "763104351884",
+                        "ap-southeast-2": "763104351884",
+                        "ap-southeast-3": "907027046896",
+                        "ca-central-1": "763104351884",
+                        "cn-north-1": "727897471807",
+                        "cn-northwest-1": "727897471807",
+                        "eu-central-1": "763104351884",
+                        "eu-north-1": "763104351884",
+                        "eu-west-1": "763104351884",
+                        "eu-west-2": "763104351884",
+                        "eu-west-3": "763104351884",
+                        "eu-south-1": "692866216735",
+                        "me-south-1": "217643126080",
+                        "me-central-1": "914824155844",
+                        "sa-east-1": "763104351884",
+                        "us-east-1": "763104351884",
+                        "us-east-2": "763104351884",
+                        "us-gov-east-1": "446045086412",
+                        "us-gov-west-1": "442386744353",
+                        "us-iso-east-1": "886529160074",
+                        "us-isob-east-1": "094389454867",
+                        "us-west-1": "763104351884",
+                        "us-west-2": "763104351884",
+                        "ca-west-1": "204538143572"
+                    },
+                    "repository": "huggingface-pytorch-training",
+                    "container_version": {
+                        "gpu": "cu121-ubuntu20.04"
+                    }
+                }
             }
         }
     },
@@ -985,7 +1033,8 @@
             "4.12": "4.12.3",
             "4.17": "4.17.0",
             "4.26": "4.26.0",
-            "4.28": "4.28.1"
+            "4.28": "4.28.1",
+            "4.37": "4.37.0"
         },
         "versions": {
             "4.6.1": {
@@ -1782,7 +1831,59 @@
                         "cpu": "ubuntu20.04"
                     }
                 }
+            },
+            "4.37.0": {
+                "version_aliases": {
+                    "pytorch2.1": "pytorch2.1.0"
+                },
+                "pytorch2.1.0": {
+                    "py_versions": [
+                        "py310"
+                    ],
+                    "registries": {
+                        "af-south-1": "626614931356",
+                        "il-central-1": "780543022126",
+                        "ap-east-1": "871362719292",
+                        "ap-northeast-1": "763104351884",
+                        "ap-northeast-2": "763104351884",
+                        "ap-northeast-3": "364406365360",
+                        "ap-south-1": "763104351884",
+                        "ap-south-2": "772153158452",
+                        "ap-southeast-1": "763104351884",
+                        "ap-southeast-2": "763104351884",
+                        "ap-southeast-3": "907027046896",
+                        "ap-southeast-4": "457447274322",
+                        "ca-central-1": "763104351884",
+                        "cn-north-1": "727897471807",
+                        "cn-northwest-1": "727897471807",
+                        "eu-central-1": "763104351884",
+                        "eu-central-2": "380420809688",
+                        "eu-north-1": "763104351884",
+                        "eu-west-1": "763104351884",
+                        "eu-west-2": "763104351884",
+                        "eu-west-3": "763104351884",
+                        "eu-south-1": "692866216735",
+                        "eu-south-2": "503227376785",
+                        "me-south-1": "217643126080",
+                        "me-central-1": "914824155844",
+                        "sa-east-1": "763104351884",
+                        "us-east-1": "763104351884",
+                        "us-east-2": "763104351884",
+                        "us-gov-east-1": "446045086412",
+                        "us-gov-west-1": "442386744353",
+                        "us-iso-east-1": "886529160074",
+                        "us-isob-east-1": "094389454867",
+                        "us-west-1": "763104351884",
+                        "us-west-2": "763104351884",
+                        "ca-west-1": "204538143572"
+                    },
+                    "repository": "huggingface-pytorch-inference",
+                    "container_version": {
+                        "gpu": "cu118-ubuntu20.04",
+                        "cpu": "ubuntu22.04"
+                    }
+                }
             }
         }
     }
-}
+}
diff --git a/src/sagemaker/image_uri_config/pytorch-smp.json b/src/sagemaker/image_uri_config/pytorch-smp.json
@@ -4,7 +4,8 @@
             "gpu"
         ],
         "version_aliases": {
-            "2.0": "2.0.1"
+            "2.0": "2.0.1",
+            "2.1": "2.1.2"
         },
         "versions": {
             "2.0.1": {
@@ -31,7 +32,32 @@
                     "us-west-2": "658645717510"
                 },
                 "repository": "smdistributed-modelparallel"
+            },
+            "2.1.2": {
+                "py_versions": [
+                    "py310"
+                ],
+                "registries": {
+                    "ap-northeast-1": "658645717510",
+                    "ap-northeast-2": "658645717510",
+                    "ap-northeast-3": "658645717510",
+                    "ap-south-1": "658645717510",
+                    "ap-southeast-1": "658645717510",
+                    "ap-southeast-2": "658645717510",
+                    "ca-central-1": "658645717510",
+                    "eu-central-1": "658645717510",
+                    "eu-north-1": "658645717510",
+                    "eu-west-1": "658645717510",
+                    "eu-west-2": "658645717510",
+                    "eu-west-3": "658645717510",
+                    "sa-east-1": "658645717510",
+                    "us-east-1": "658645717510",
+                    "us-east-2": "658645717510",
+                    "us-west-1": "658645717510",
+                    "us-west-2": "658645717510"
+                },
+                "repository": "smdistributed-modelparallel"
             }
         }
     }
-}
+}
diff --git a/src/sagemaker/image_uris.py b/src/sagemaker/image_uris.py
@@ -672,7 +672,7 @@ def get_training_image_uri(
             if "modelparallel" in distribution["smdistributed"]:
                 if distribution["smdistributed"]["modelparallel"].get("enabled", True):
                     framework = "pytorch-smp"
-                    if "p5" in instance_type:
+                    if "p5" in instance_type or "2.1" in framework_version:
                         container_version = "cu121"
                     else:
                         container_version = "cu118"
diff --git a/src/sagemaker/serve/builder/model_builder.py b/src/sagemaker/serve/builder/model_builder.py
@@ -579,19 +579,19 @@ def build(
 
         self.serve_settings = self._get_serve_setting()
 
-        hf_model_md = get_huggingface_model_metadata(
-            self.model, self.env_vars.get("HUGGING_FACE_HUB_TOKEN")
-        )
-
         if isinstance(self.model, str):
             if self._is_jumpstart_model_id():
                 return self._build_for_jumpstart()
-            if self._is_djl():
+            if self._is_djl():  # pylint: disable=R1705
                 return self._build_for_djl()
-            if hf_model_md.get("pipeline_tag") == "text-generation":  # pylint: disable=R1705
-                return self._build_for_tgi()
             else:
-                return self._build_for_transformers()
+                hf_model_md = get_huggingface_model_metadata(
+                    self.model, self.env_vars.get("HUGGING_FACE_HUB_TOKEN")
+                )
+                if hf_model_md.get("pipeline_tag") == "text-generation":  # pylint: disable=R1705
+                    return self._build_for_tgi()
+                else:
+                    return self._build_for_transformers()
 
         self._build_validations()
 
diff --git a/tests/integ/sagemaker/serve/test_serve_js_happy.py b/tests/integ/sagemaker/serve/test_serve_js_happy.py
@@ -13,7 +13,6 @@
 from __future__ import absolute_import
 
 import pytest
-from unittest.mock import patch, Mock
 from sagemaker.serve.builder.model_builder import ModelBuilder
 from sagemaker.serve.builder.schema_builder import SchemaBuilder
 from tests.integ.sagemaker.serve.constants import (
@@ -33,7 +32,6 @@
 ]
 JS_MODEL_ID = "huggingface-textgeneration1-gpt-neo-125m-fp16"
 ROLE_NAME = "SageMakerRole"
-MOCK_HF_MODEL_METADATA_JSON = {"mock_key": "mock_value"}
 
 
 @pytest.fixture
@@ -47,23 +45,14 @@ def happy_model_builder(sagemaker_session):
     )
 
 
-@patch("sagemaker.huggingface.llm_utils.urllib")
-@patch("sagemaker.huggingface.llm_utils.json")
 @pytest.mark.skipif(
     PYTHON_VERSION_IS_NOT_310,
     reason="The goal of these test are to test the serving components of our feature",
 )
 @pytest.mark.slow_test
-def test_happy_tgi_sagemaker_endpoint(
-    mock_urllib, mock_json, happy_model_builder, gpu_instance_type
-):
+def test_happy_tgi_sagemaker_endpoint(happy_model_builder, gpu_instance_type):
     logger.info("Running in SAGEMAKER_ENDPOINT mode...")
     caught_ex = None
-
-    mock_json.load.return_value = MOCK_HF_MODEL_METADATA_JSON
-    mock_hf_model_metadata_url = Mock()
-    mock_urllib.request.Request.side_effect = mock_hf_model_metadata_url
-
     model = happy_model_builder.build()
 
     with timeout(minutes=SERVE_SAGEMAKER_ENDPOINT_TIMEOUT):
diff --git a/tests/integ/sagemaker/serve/test_serve_pt_happy.py b/tests/integ/sagemaker/serve/test_serve_pt_happy.py
@@ -19,7 +19,6 @@
 import io
 import numpy as np
 
-from unittest.mock import patch, Mock
 from sagemaker.serve.builder.model_builder import ModelBuilder, Mode
 from sagemaker.serve.builder.schema_builder import SchemaBuilder, CustomPayloadTranslator
 from sagemaker.serve.spec.inference_spec import InferenceSpec
@@ -38,7 +37,6 @@
 logger = logging.getLogger(__name__)
 
 ROLE_NAME = "SageMakerRole"
-MOCK_HF_MODEL_METADATA_JSON = {"mock_key": "mock_value"}
 
 
 @pytest.fixture
@@ -183,8 +181,6 @@ def model_builder(request):
 #                 ), f"{caught_ex} was thrown when running pytorch squeezenet local container test"
 
 
-@patch("sagemaker.huggingface.llm_utils.urllib")
-@patch("sagemaker.huggingface.llm_utils.json")
 @pytest.mark.skipif(
     PYTHON_VERSION_IS_NOT_310,  # or NOT_RUNNING_ON_INF_EXP_DEV_PIPELINE,
     reason="The goal of these test are to test the serving components of our feature",
@@ -194,17 +190,12 @@ def model_builder(request):
 )
 @pytest.mark.slow_test
 def test_happy_pytorch_sagemaker_endpoint(
-    mock_urllib,
-    mock_json,
     sagemaker_session,
     model_builder,
     cpu_instance_type,
     test_image,
 ):
     logger.info("Running in SAGEMAKER_ENDPOINT mode...")
-    mock_json.load.return_value = MOCK_HF_MODEL_METADATA_JSON
-    mock_hf_model_metadata_url = Mock()
-    mock_urllib.request.Request.side_effect = mock_hf_model_metadata_url
     caught_ex = None
 
     iam_client = sagemaker_session.boto_session.client("iam")
diff --git a/tests/unit/sagemaker/image_uris/test_smp_v2.py b/tests/unit/sagemaker/image_uris/test_smp_v2.py
@@ -16,7 +16,7 @@
 from sagemaker import image_uris
 from tests.unit.sagemaker.image_uris import expected_uris
 
-CONTAINER_VERSIONS = {"ml.p4d.24xlarge": "cu118", "ml.p5d.24xlarge": "cu121"}
+CONTAINER_VERSIONS = {"ml.p4d.24xlarge": "cu118", "ml.p5.24xlarge": "cu121"}
 
 
 @pytest.mark.parametrize("load_config", ["pytorch-smp.json"], indirect=True)
@@ -34,6 +34,10 @@ def test_smp_v2(load_config):
             for py_version in PY_VERSIONS:
                 for region in ACCOUNTS.keys():
                     for instance_type in CONTAINER_VERSIONS.keys():
+                        cuda_vers = CONTAINER_VERSIONS[instance_type]
+                        if "2.1" in version:
+                            cuda_vers = "cu121"
+
                         uri = image_uris.get_training_image_uri(
                             region,
                             framework="pytorch",
@@ -45,7 +49,7 @@ def test_smp_v2(load_config):
                         expected = expected_uris.framework_uri(
                             repo="smdistributed-modelparallel",
                             fw_version=version,
-                            py_version=f"{py_version}-{CONTAINER_VERSIONS[instance_type]}",
+                            py_version=f"{py_version}-{cuda_vers}",
                             processor=processor,
                             region=region,
                             account=ACCOUNTS[region],
diff --git a/tests/unit/sagemaker/serve/builder/test_model_builder.py b/tests/unit/sagemaker/serve/builder/test_model_builder.py

Original file line number	Diff line number	Diff line change
`@@ -139,6 +139,8 @@`
`139`	`139`	`"1.13.1",`
`140`	`140`	`"2.0.0",`
`141`	`141`	`"2.0.1",`
	`142`	`+ "2.1.0",`
	`143`	`+ "2.1.2",`
`142`	`144`	`],`
`143`	`145`	`}`
`144`	`146`
`@@ -158,7 +160,7 @@`
`158`	`160`	`]`
`159`	`161`
`160`	`162`
`161`		`-TORCH_DISTRIBUTED_GPU_SUPPORTED_FRAMEWORK_VERSIONS = ["1.13.1", "2.0.0", "2.0.1", "2.1.0"]`
	`163`	`+TORCH_DISTRIBUTED_GPU_SUPPORTED_FRAMEWORK_VERSIONS = ["1.13.1", "2.0.0", "2.0.1", "2.1.0", "2.1.2"]`
`162`	`164`
`163`	`165`	`TRAINIUM_SUPPORTED_DISTRIBUTION_STRATEGIES = ["torch_distributed"]`
`164`	`166`	`TRAINIUM_SUPPORTED_TORCH_DISTRIBUTED_FRAMEWORK_VERSIONS = [`