Skip to content

Commit 0254538

Browse files
authored
Merge branch 'aws:master' into master
2 parents b9e2517 + 9ead9c8 commit 0254538

File tree

84 files changed

+2308
-527
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

84 files changed

+2308
-527
lines changed

CHANGELOG.md

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,59 @@
11
# Changelog
22

3+
## v2.242.0 (2025-03-14)
4+
5+
### Features
6+
7+
* add integ tests for training JumpStart models in private hub
8+
9+
### Bug Fixes and Other Changes
10+
11+
* Torch upgrade
12+
* Prevent RunContext overlap between test_run tests
13+
* remove s3 output location requirement from hub class init
14+
* Fixing Pytorch training python version in tests
15+
* update image_uri_configs 03-11-2025 07:18:09 PST
16+
* resolve infinite loop in _find_config on Windows systems
17+
* pipeline definition function doc update
18+
19+
## v2.241.0 (2025-03-06)
20+
21+
### Features
22+
23+
* Make DistributedConfig Extensible
24+
* support training for JumpStart model references as part of Curated Hub Phase 2
25+
* Allow ModelTrainer to accept hyperparameters file
26+
27+
### Bug Fixes and Other Changes
28+
29+
* Skip tests with deprecated instance type
30+
* Ensure Model.is_repack() returns a boolean
31+
* Fix error when there is no session to call _create_model_request()
32+
* Use sagemaker session's s3_resource in download_folder
33+
* Added check for the presence of model package group before creating one
34+
* Fix key error in _send_metrics()
35+
36+
## v2.240.0 (2025-02-25)
37+
38+
### Features
39+
40+
* Add support for TGI Neuronx 0.0.27 and HF PT 2.3.0 image in PySDK
41+
42+
### Bug Fixes and Other Changes
43+
44+
* Remove main function entrypoint in ModelBuilder dependency manager.
45+
* forbid extras in Configs
46+
* altconfig hubcontent and reenable integ test
47+
* Merge branch 'master-rba' into local_merge
48+
* py_version doc fixes
49+
* Add backward compatbility for RecordSerializer and RecordDeserializer
50+
* update image_uri_configs 02-21-2025 06:18:10 PST
51+
* update image_uri_configs 02-20-2025 06:18:08 PST
52+
53+
### Documentation Changes
54+
55+
* Removed a line about python version requirements of training script which can misguide users.
56+
357
## v2.239.3 (2025-02-19)
458

559
### Bug Fixes and Other Changes

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.239.4.dev0
1+
2.242.1.dev0

doc/frameworks/pytorch/using_pytorch.rst

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,6 @@ To train a PyTorch model by using the SageMaker Python SDK:
2828
Prepare a PyTorch Training Script
2929
=================================
3030

31-
Your PyTorch training script must be a Python 3.6 compatible source file.
32-
3331
Prepare your script in a separate source file than the notebook, terminal session, or source file you're
3432
using to submit the script to SageMaker via a ``PyTorch`` Estimator. This will be discussed in further detail below.
3533

doc/overview.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ To train a model by using the SageMaker Python SDK, you:
3030

3131
After you train a model, you can save it, and then serve the model as an endpoint to get real-time inferences or get inferences for an entire dataset by using batch transform.
3232

33+
34+
Important Note:
35+
36+
* When using torch to load Models, it is recommended to use version torch>=2.6.0 and torchvision>=0.17.0
37+
3338
Prepare a Training script
3439
=========================
3540

src/sagemaker/_studio.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,10 @@ def _find_config(working_dir=None):
6565
wd = Path(working_dir) if working_dir else Path.cwd()
6666

6767
path = None
68-
while path is None and not wd.match("/"):
68+
69+
# Get the root of the current working directory for both Windows and Unix-like systems
70+
root = Path(wd.anchor)
71+
while path is None and wd != root:
6972
candidate = wd / STUDIO_PROJECT_CONFIG
7073
if Path.exists(candidate):
7174
path = candidate

src/sagemaker/estimator.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2550,7 +2550,6 @@ def _get_train_args(cls, estimator, inputs, experiment_config):
25502550
raise ValueError(
25512551
"File URIs are supported in local mode only. Please use a S3 URI instead."
25522552
)
2553-
25542553
config = _Job._load_config(inputs, estimator)
25552554

25562555
current_hyperparameters = estimator.hyperparameters()

src/sagemaker/experiments/_metrics.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -197,8 +197,8 @@ def _send_metrics(self, metrics):
197197
response = self._metrics_client.batch_put_metrics(**request)
198198
errors = response["Errors"] if "Errors" in response else None
199199
if errors:
200-
message = errors[0]["Message"]
201-
raise Exception(f'{len(errors)} errors with message "{message}"')
200+
error_code = errors[0]["Code"]
201+
raise Exception(f'{len(errors)} errors with error code "{error_code}"')
202202

203203
def _construct_batch_put_metrics_request(self, batch):
204204
"""Creates dictionary object used as request to metrics service."""

src/sagemaker/huggingface/model.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ def deploy(
218218
container_startup_health_check_timeout=None,
219219
inference_recommendation_id=None,
220220
explainer_config=None,
221+
update_endpoint: Optional[bool] = False,
221222
**kwargs,
222223
):
223224
"""Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``.
@@ -296,6 +297,11 @@ def deploy(
296297
would like to deploy the model and endpoint with recommended parameters.
297298
explainer_config (sagemaker.explainer.ExplainerConfig): Specifies online explainability
298299
configuration for use with Amazon SageMaker Clarify. (default: None)
300+
update_endpoint (Optional[bool]):
301+
Flag to update the model in an existing Amazon SageMaker endpoint.
302+
If True, this will deploy a new EndpointConfig to an already existing endpoint
303+
and delete resources corresponding to the previous EndpointConfig. Default: False
304+
Note: Currently this is supported for single model endpoints
299305
Raises:
300306
ValueError: If arguments combination check failed in these circumstances:
301307
- If no role is specified or
@@ -335,6 +341,7 @@ def deploy(
335341
container_startup_health_check_timeout=container_startup_health_check_timeout,
336342
inference_recommendation_id=inference_recommendation_id,
337343
explainer_config=explainer_config,
344+
update_endpoint=update_endpoint,
338345
**kwargs,
339346
)
340347

src/sagemaker/image_uri_config/pytorch.json

Lines changed: 92 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,8 @@
8585
"2.2": "2.2.0",
8686
"2.3": "2.3.0",
8787
"2.4": "2.4.0",
88-
"2.5": "2.5.1"
88+
"2.5": "2.5.1",
89+
"2.6": "2.6.0"
8990
},
9091
"versions": {
9192
"0.4.0": {
@@ -1253,6 +1254,50 @@
12531254
"us-west-2": "763104351884"
12541255
},
12551256
"repository": "pytorch-inference"
1257+
},
1258+
"2.6.0": {
1259+
"py_versions": [
1260+
"py312"
1261+
],
1262+
"registries": {
1263+
"af-south-1": "626614931356",
1264+
"ap-east-1": "871362719292",
1265+
"ap-northeast-1": "763104351884",
1266+
"ap-northeast-2": "763104351884",
1267+
"ap-northeast-3": "364406365360",
1268+
"ap-south-1": "763104351884",
1269+
"ap-south-2": "772153158452",
1270+
"ap-southeast-1": "763104351884",
1271+
"ap-southeast-2": "763104351884",
1272+
"ap-southeast-3": "907027046896",
1273+
"ap-southeast-4": "457447274322",
1274+
"ap-southeast-5": "550225433462",
1275+
"ap-southeast-7": "590183813437",
1276+
"ca-central-1": "763104351884",
1277+
"ca-west-1": "204538143572",
1278+
"cn-north-1": "727897471807",
1279+
"cn-northwest-1": "727897471807",
1280+
"eu-central-1": "763104351884",
1281+
"eu-central-2": "380420809688",
1282+
"eu-north-1": "763104351884",
1283+
"eu-south-1": "692866216735",
1284+
"eu-south-2": "503227376785",
1285+
"eu-west-1": "763104351884",
1286+
"eu-west-2": "763104351884",
1287+
"eu-west-3": "763104351884",
1288+
"il-central-1": "780543022126",
1289+
"me-central-1": "914824155844",
1290+
"me-south-1": "217643126080",
1291+
"mx-central-1": "637423239942",
1292+
"sa-east-1": "763104351884",
1293+
"us-east-1": "763104351884",
1294+
"us-east-2": "763104351884",
1295+
"us-gov-east-1": "446045086412",
1296+
"us-gov-west-1": "442386744353",
1297+
"us-west-1": "763104351884",
1298+
"us-west-2": "763104351884"
1299+
},
1300+
"repository": "pytorch-inference"
12561301
}
12571302
}
12581303
},
@@ -1628,7 +1673,8 @@
16281673
"2.2": "2.2.0",
16291674
"2.3": "2.3.0",
16301675
"2.4": "2.4.0",
1631-
"2.5": "2.5.1"
1676+
"2.5": "2.5.1",
1677+
"2.6": "2.6.0"
16321678
},
16331679
"versions": {
16341680
"0.4.0": {
@@ -2801,6 +2847,50 @@
28012847
"us-west-2": "763104351884"
28022848
},
28032849
"repository": "pytorch-training"
2850+
},
2851+
"2.6.0": {
2852+
"py_versions": [
2853+
"py312"
2854+
],
2855+
"registries": {
2856+
"af-south-1": "626614931356",
2857+
"ap-east-1": "871362719292",
2858+
"ap-northeast-1": "763104351884",
2859+
"ap-northeast-2": "763104351884",
2860+
"ap-northeast-3": "364406365360",
2861+
"ap-south-1": "763104351884",
2862+
"ap-south-2": "772153158452",
2863+
"ap-southeast-1": "763104351884",
2864+
"ap-southeast-2": "763104351884",
2865+
"ap-southeast-3": "907027046896",
2866+
"ap-southeast-4": "457447274322",
2867+
"ap-southeast-5": "550225433462",
2868+
"ap-southeast-7": "590183813437",
2869+
"ca-central-1": "763104351884",
2870+
"ca-west-1": "204538143572",
2871+
"cn-north-1": "727897471807",
2872+
"cn-northwest-1": "727897471807",
2873+
"eu-central-1": "763104351884",
2874+
"eu-central-2": "380420809688",
2875+
"eu-north-1": "763104351884",
2876+
"eu-south-1": "692866216735",
2877+
"eu-south-2": "503227376785",
2878+
"eu-west-1": "763104351884",
2879+
"eu-west-2": "763104351884",
2880+
"eu-west-3": "763104351884",
2881+
"il-central-1": "780543022126",
2882+
"me-central-1": "914824155844",
2883+
"me-south-1": "217643126080",
2884+
"mx-central-1": "637423239942",
2885+
"sa-east-1": "763104351884",
2886+
"us-east-1": "763104351884",
2887+
"us-east-2": "763104351884",
2888+
"us-gov-east-1": "446045086412",
2889+
"us-gov-west-1": "442386744353",
2890+
"us-west-1": "763104351884",
2891+
"us-west-2": "763104351884"
2892+
},
2893+
"repository": "pytorch-training"
28042894
}
28052895
}
28062896
}

src/sagemaker/inputs.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ def __init__(
4343
attribute_names: Optional[List[Union[str, PipelineVariable]]] = None,
4444
target_attribute_name: Optional[Union[str, PipelineVariable]] = None,
4545
shuffle_config: Optional["ShuffleConfig"] = None,
46+
hub_access_config: Optional[dict] = None,
47+
model_access_config: Optional[dict] = None,
4648
):
4749
r"""Create a definition for input data used by an SageMaker training job.
4850
@@ -102,6 +104,13 @@ def __init__(
102104
shuffle_config (sagemaker.inputs.ShuffleConfig): If specified this configuration enables
103105
shuffling on this channel. See the SageMaker API documentation for more info:
104106
https://docs.aws.amazon.com/sagemaker/latest/dg/API_ShuffleConfig.html
107+
hub_access_config (dict): Specify the HubAccessConfig of a
108+
Model Reference for which a training job is being created for.
109+
model_access_config (dict): For models that require a Model Access Config, specify True
110+
or False for to indicate whether model terms of use have been accepted.
111+
The `accept_eula` value must be explicitly defined as `True` in order to
112+
accept the end-user license agreement (EULA) that some
113+
models require. (Default: None).
105114
"""
106115
self.config = {
107116
"DataSource": {"S3DataSource": {"S3DataType": s3_data_type, "S3Uri": s3_data}}
@@ -129,6 +138,27 @@ def __init__(
129138
self.config["TargetAttributeName"] = target_attribute_name
130139
if shuffle_config is not None:
131140
self.config["ShuffleConfig"] = {"Seed": shuffle_config.seed}
141+
self.add_hub_access_config(hub_access_config)
142+
self.add_model_access_config(model_access_config)
143+
144+
def add_hub_access_config(self, hub_access_config=None):
145+
"""Add Hub Access Config to the channel's configuration.
146+
147+
Args:
148+
hub_access_config (dict): The HubAccessConfig to be added to the
149+
channel's configuration.
150+
"""
151+
if hub_access_config is not None:
152+
self.config["DataSource"]["S3DataSource"]["HubAccessConfig"] = hub_access_config
153+
154+
def add_model_access_config(self, model_access_config=None):
155+
"""Add Model Access Config to the channel's configuration.
156+
157+
Args:
158+
model_access_config (dict): Whether model terms of use have been accepted.
159+
"""
160+
if model_access_config is not None:
161+
self.config["DataSource"]["S3DataSource"]["ModelAccessConfig"] = model_access_config
132162

133163

134164
class ShuffleConfig(object):

0 commit comments

Comments
 (0)