Skip to content

Commit bf3502a

Browse files
authored
Merge branch 'master' into master
2 parents 8a1ca20 + 2fff890 commit bf3502a

File tree

12 files changed

+176
-148
lines changed

12 files changed

+176
-148
lines changed

CHANGELOG.md

+13
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,18 @@
11
# Changelog
22

3+
## v2.207.0 (2024-02-05)
4+
5+
### Features
6+
7+
* Introduce HF Transformers to ModelBuilder
8+
* retrieve jumpstart estimator and predictor without specifying model id (infer from tags)
9+
10+
### Bug Fixes and Other Changes
11+
12+
* SMP PT upgrade to 2.1
13+
* Fetch HF metadata only when explicit type is not selected
14+
* relax upper bound for urllib dependency
15+
316
## v2.206.0 (2024-01-31)
417

518
### Features

CONTRIBUTING.md

+6
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,12 @@ For example, see the [Processing API reference](https://github.com/aws/sagemaker
227227

228228
To build the Sphinx docs, run the following command in the `doc/` directory:
229229

230+
```shell
231+
# Initial setup, only required for the first run
232+
pip install -r requirements.txt
233+
pip install -e ../
234+
```
235+
230236
```shell
231237
make html
232238
```

VERSION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.206.1.dev0
1+
2.207.1.dev0

src/sagemaker/fw_utils.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,8 @@
139139
"1.13.1",
140140
"2.0.0",
141141
"2.0.1",
142+
"2.1.0",
143+
"2.1.2",
142144
],
143145
}
144146

@@ -158,7 +160,7 @@
158160
]
159161

160162

161-
TORCH_DISTRIBUTED_GPU_SUPPORTED_FRAMEWORK_VERSIONS = ["1.13.1", "2.0.0", "2.0.1", "2.1.0"]
163+
TORCH_DISTRIBUTED_GPU_SUPPORTED_FRAMEWORK_VERSIONS = ["1.13.1", "2.0.0", "2.0.1", "2.1.0", "2.1.2"]
162164

163165
TRAINIUM_SUPPORTED_DISTRIBUTION_STRATEGIES = ["torch_distributed"]
164166
TRAINIUM_SUPPORTED_TORCH_DISTRIBUTED_FRAMEWORK_VERSIONS = [

src/sagemaker/image_uri_config/huggingface.json

+104-3
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
"4.12": "4.12.3",
1313
"4.17": "4.17.0",
1414
"4.26": "4.26.0",
15-
"4.28": "4.28.1"
15+
"4.28": "4.28.1",
16+
"4.36": "4.36.0"
1617
},
1718
"versions": {
1819
"4.4.2": {
@@ -970,6 +971,53 @@
970971
"gpu": "cu118-ubuntu20.04"
971972
}
972973
}
974+
},
975+
"4.36.0": {
976+
"version_aliases": {
977+
"pytorch2.1": "pytorch2.1.0"
978+
},
979+
"pytorch2.1.0": {
980+
"py_versions": [
981+
"py310"
982+
],
983+
"registries": {
984+
"af-south-1": "626614931356",
985+
"il-central-1": "780543022126",
986+
"ap-east-1": "871362719292",
987+
"ap-northeast-1": "763104351884",
988+
"ap-northeast-2": "763104351884",
989+
"ap-northeast-3": "364406365360",
990+
"ap-south-1": "763104351884",
991+
"ap-southeast-1": "763104351884",
992+
"ap-southeast-2": "763104351884",
993+
"ap-southeast-3": "907027046896",
994+
"ca-central-1": "763104351884",
995+
"cn-north-1": "727897471807",
996+
"cn-northwest-1": "727897471807",
997+
"eu-central-1": "763104351884",
998+
"eu-north-1": "763104351884",
999+
"eu-west-1": "763104351884",
1000+
"eu-west-2": "763104351884",
1001+
"eu-west-3": "763104351884",
1002+
"eu-south-1": "692866216735",
1003+
"me-south-1": "217643126080",
1004+
"me-central-1": "914824155844",
1005+
"sa-east-1": "763104351884",
1006+
"us-east-1": "763104351884",
1007+
"us-east-2": "763104351884",
1008+
"us-gov-east-1": "446045086412",
1009+
"us-gov-west-1": "442386744353",
1010+
"us-iso-east-1": "886529160074",
1011+
"us-isob-east-1": "094389454867",
1012+
"us-west-1": "763104351884",
1013+
"us-west-2": "763104351884",
1014+
"ca-west-1": "204538143572"
1015+
},
1016+
"repository": "huggingface-pytorch-training",
1017+
"container_version": {
1018+
"gpu": "cu121-ubuntu20.04"
1019+
}
1020+
}
9731021
}
9741022
}
9751023
},
@@ -985,7 +1033,8 @@
9851033
"4.12": "4.12.3",
9861034
"4.17": "4.17.0",
9871035
"4.26": "4.26.0",
988-
"4.28": "4.28.1"
1036+
"4.28": "4.28.1",
1037+
"4.37": "4.37.0"
9891038
},
9901039
"versions": {
9911040
"4.6.1": {
@@ -1782,7 +1831,59 @@
17821831
"cpu": "ubuntu20.04"
17831832
}
17841833
}
1834+
},
1835+
"4.37.0": {
1836+
"version_aliases": {
1837+
"pytorch2.1": "pytorch2.1.0"
1838+
},
1839+
"pytorch2.1.0": {
1840+
"py_versions": [
1841+
"py310"
1842+
],
1843+
"registries": {
1844+
"af-south-1": "626614931356",
1845+
"il-central-1": "780543022126",
1846+
"ap-east-1": "871362719292",
1847+
"ap-northeast-1": "763104351884",
1848+
"ap-northeast-2": "763104351884",
1849+
"ap-northeast-3": "364406365360",
1850+
"ap-south-1": "763104351884",
1851+
"ap-south-2": "772153158452",
1852+
"ap-southeast-1": "763104351884",
1853+
"ap-southeast-2": "763104351884",
1854+
"ap-southeast-3": "907027046896",
1855+
"ap-southeast-4": "457447274322",
1856+
"ca-central-1": "763104351884",
1857+
"cn-north-1": "727897471807",
1858+
"cn-northwest-1": "727897471807",
1859+
"eu-central-1": "763104351884",
1860+
"eu-central-2": "380420809688",
1861+
"eu-north-1": "763104351884",
1862+
"eu-west-1": "763104351884",
1863+
"eu-west-2": "763104351884",
1864+
"eu-west-3": "763104351884",
1865+
"eu-south-1": "692866216735",
1866+
"eu-south-2": "503227376785",
1867+
"me-south-1": "217643126080",
1868+
"me-central-1": "914824155844",
1869+
"sa-east-1": "763104351884",
1870+
"us-east-1": "763104351884",
1871+
"us-east-2": "763104351884",
1872+
"us-gov-east-1": "446045086412",
1873+
"us-gov-west-1": "442386744353",
1874+
"us-iso-east-1": "886529160074",
1875+
"us-isob-east-1": "094389454867",
1876+
"us-west-1": "763104351884",
1877+
"us-west-2": "763104351884",
1878+
"ca-west-1": "204538143572"
1879+
},
1880+
"repository": "huggingface-pytorch-inference",
1881+
"container_version": {
1882+
"gpu": "cu118-ubuntu20.04",
1883+
"cpu": "ubuntu22.04"
1884+
}
1885+
}
17851886
}
17861887
}
17871888
}
1788-
}
1889+
}

src/sagemaker/image_uri_config/pytorch-smp.json

+28-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
"gpu"
55
],
66
"version_aliases": {
7-
"2.0": "2.0.1"
7+
"2.0": "2.0.1",
8+
"2.1": "2.1.2"
89
},
910
"versions": {
1011
"2.0.1": {
@@ -31,7 +32,32 @@
3132
"us-west-2": "658645717510"
3233
},
3334
"repository": "smdistributed-modelparallel"
35+
},
36+
"2.1.2": {
37+
"py_versions": [
38+
"py310"
39+
],
40+
"registries": {
41+
"ap-northeast-1": "658645717510",
42+
"ap-northeast-2": "658645717510",
43+
"ap-northeast-3": "658645717510",
44+
"ap-south-1": "658645717510",
45+
"ap-southeast-1": "658645717510",
46+
"ap-southeast-2": "658645717510",
47+
"ca-central-1": "658645717510",
48+
"eu-central-1": "658645717510",
49+
"eu-north-1": "658645717510",
50+
"eu-west-1": "658645717510",
51+
"eu-west-2": "658645717510",
52+
"eu-west-3": "658645717510",
53+
"sa-east-1": "658645717510",
54+
"us-east-1": "658645717510",
55+
"us-east-2": "658645717510",
56+
"us-west-1": "658645717510",
57+
"us-west-2": "658645717510"
58+
},
59+
"repository": "smdistributed-modelparallel"
3460
}
3561
}
3662
}
37-
}
63+
}

src/sagemaker/image_uris.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -672,7 +672,7 @@ def get_training_image_uri(
672672
if "modelparallel" in distribution["smdistributed"]:
673673
if distribution["smdistributed"]["modelparallel"].get("enabled", True):
674674
framework = "pytorch-smp"
675-
if "p5" in instance_type:
675+
if "p5" in instance_type or "2.1" in framework_version:
676676
container_version = "cu121"
677677
else:
678678
container_version = "cu118"

src/sagemaker/serve/builder/model_builder.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -579,19 +579,19 @@ def build(
579579

580580
self.serve_settings = self._get_serve_setting()
581581

582-
hf_model_md = get_huggingface_model_metadata(
583-
self.model, self.env_vars.get("HUGGING_FACE_HUB_TOKEN")
584-
)
585-
586582
if isinstance(self.model, str):
587583
if self._is_jumpstart_model_id():
588584
return self._build_for_jumpstart()
589-
if self._is_djl():
585+
if self._is_djl(): # pylint: disable=R1705
590586
return self._build_for_djl()
591-
if hf_model_md.get("pipeline_tag") == "text-generation": # pylint: disable=R1705
592-
return self._build_for_tgi()
593587
else:
594-
return self._build_for_transformers()
588+
hf_model_md = get_huggingface_model_metadata(
589+
self.model, self.env_vars.get("HUGGING_FACE_HUB_TOKEN")
590+
)
591+
if hf_model_md.get("pipeline_tag") == "text-generation": # pylint: disable=R1705
592+
return self._build_for_tgi()
593+
else:
594+
return self._build_for_transformers()
595595

596596
self._build_validations()
597597

tests/integ/sagemaker/serve/test_serve_js_happy.py

+1-12
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
from __future__ import absolute_import
1414

1515
import pytest
16-
from unittest.mock import patch, Mock
1716
from sagemaker.serve.builder.model_builder import ModelBuilder
1817
from sagemaker.serve.builder.schema_builder import SchemaBuilder
1918
from tests.integ.sagemaker.serve.constants import (
@@ -33,7 +32,6 @@
3332
]
3433
JS_MODEL_ID = "huggingface-textgeneration1-gpt-neo-125m-fp16"
3534
ROLE_NAME = "SageMakerRole"
36-
MOCK_HF_MODEL_METADATA_JSON = {"mock_key": "mock_value"}
3735

3836

3937
@pytest.fixture
@@ -47,23 +45,14 @@ def happy_model_builder(sagemaker_session):
4745
)
4846

4947

50-
@patch("sagemaker.huggingface.llm_utils.urllib")
51-
@patch("sagemaker.huggingface.llm_utils.json")
5248
@pytest.mark.skipif(
5349
PYTHON_VERSION_IS_NOT_310,
5450
reason="The goal of these test are to test the serving components of our feature",
5551
)
5652
@pytest.mark.slow_test
57-
def test_happy_tgi_sagemaker_endpoint(
58-
mock_urllib, mock_json, happy_model_builder, gpu_instance_type
59-
):
53+
def test_happy_tgi_sagemaker_endpoint(happy_model_builder, gpu_instance_type):
6054
logger.info("Running in SAGEMAKER_ENDPOINT mode...")
6155
caught_ex = None
62-
63-
mock_json.load.return_value = MOCK_HF_MODEL_METADATA_JSON
64-
mock_hf_model_metadata_url = Mock()
65-
mock_urllib.request.Request.side_effect = mock_hf_model_metadata_url
66-
6756
model = happy_model_builder.build()
6857

6958
with timeout(minutes=SERVE_SAGEMAKER_ENDPOINT_TIMEOUT):

tests/integ/sagemaker/serve/test_serve_pt_happy.py

-9
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
import io
2020
import numpy as np
2121

22-
from unittest.mock import patch, Mock
2322
from sagemaker.serve.builder.model_builder import ModelBuilder, Mode
2423
from sagemaker.serve.builder.schema_builder import SchemaBuilder, CustomPayloadTranslator
2524
from sagemaker.serve.spec.inference_spec import InferenceSpec
@@ -38,7 +37,6 @@
3837
logger = logging.getLogger(__name__)
3938

4039
ROLE_NAME = "SageMakerRole"
41-
MOCK_HF_MODEL_METADATA_JSON = {"mock_key": "mock_value"}
4240

4341

4442
@pytest.fixture
@@ -183,8 +181,6 @@ def model_builder(request):
183181
# ), f"{caught_ex} was thrown when running pytorch squeezenet local container test"
184182

185183

186-
@patch("sagemaker.huggingface.llm_utils.urllib")
187-
@patch("sagemaker.huggingface.llm_utils.json")
188184
@pytest.mark.skipif(
189185
PYTHON_VERSION_IS_NOT_310, # or NOT_RUNNING_ON_INF_EXP_DEV_PIPELINE,
190186
reason="The goal of these test are to test the serving components of our feature",
@@ -194,17 +190,12 @@ def model_builder(request):
194190
)
195191
@pytest.mark.slow_test
196192
def test_happy_pytorch_sagemaker_endpoint(
197-
mock_urllib,
198-
mock_json,
199193
sagemaker_session,
200194
model_builder,
201195
cpu_instance_type,
202196
test_image,
203197
):
204198
logger.info("Running in SAGEMAKER_ENDPOINT mode...")
205-
mock_json.load.return_value = MOCK_HF_MODEL_METADATA_JSON
206-
mock_hf_model_metadata_url = Mock()
207-
mock_urllib.request.Request.side_effect = mock_hf_model_metadata_url
208199
caught_ex = None
209200

210201
iam_client = sagemaker_session.boto_session.client("iam")

tests/unit/sagemaker/image_uris/test_smp_v2.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from sagemaker import image_uris
1717
from tests.unit.sagemaker.image_uris import expected_uris
1818

19-
CONTAINER_VERSIONS = {"ml.p4d.24xlarge": "cu118", "ml.p5d.24xlarge": "cu121"}
19+
CONTAINER_VERSIONS = {"ml.p4d.24xlarge": "cu118", "ml.p5.24xlarge": "cu121"}
2020

2121

2222
@pytest.mark.parametrize("load_config", ["pytorch-smp.json"], indirect=True)
@@ -34,6 +34,10 @@ def test_smp_v2(load_config):
3434
for py_version in PY_VERSIONS:
3535
for region in ACCOUNTS.keys():
3636
for instance_type in CONTAINER_VERSIONS.keys():
37+
cuda_vers = CONTAINER_VERSIONS[instance_type]
38+
if "2.1" in version:
39+
cuda_vers = "cu121"
40+
3741
uri = image_uris.get_training_image_uri(
3842
region,
3943
framework="pytorch",
@@ -45,7 +49,7 @@ def test_smp_v2(load_config):
4549
expected = expected_uris.framework_uri(
4650
repo="smdistributed-modelparallel",
4751
fw_version=version,
48-
py_version=f"{py_version}-{CONTAINER_VERSIONS[instance_type]}",
52+
py_version=f"{py_version}-{cuda_vers}",
4953
processor=processor,
5054
region=region,
5155
account=ACCOUNTS[region],

0 commit comments

Comments
 (0)