Skip to content

Commit ed3ed4a

Browse files
authored
Merge branch 'master' into patch-1
2 parents de08c76 + 8ac6ca8 commit ed3ed4a

File tree

11 files changed

+376
-15
lines changed

11 files changed

+376
-15
lines changed

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,15 @@
11
# Changelog
22

3+
## v2.150.0 (2023-04-26)
4+
5+
### Features
6+
7+
* Introduce TensorBoard app class
8+
9+
### Bug Fixes and Other Changes
10+
11+
* Update data wrangler images
12+
313
## v2.149.0 (2023-04-25)
414

515
### Features

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.149.1.dev0
1+
2.150.1.dev0

src/sagemaker/image_uri_config/data-wrangler.json

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,35 @@
77
"ap-east-1": "707077482487",
88
"ap-northeast-1": "649008135260",
99
"ap-northeast-2": "131546521161",
10+
"ap-northeast-3": "913387583493",
11+
"ap-south-1": "089933028263",
12+
"ap-southeast-1": "119527597002",
13+
"ap-southeast-2": "422173101802",
14+
"ca-central-1": "557239378090",
15+
"eu-central-1": "024640144536",
16+
"eu-north-1": "054986407534",
17+
"eu-south-1": "488287956546",
18+
"eu-west-1": "245179582081",
19+
"eu-west-2": "894491911112",
20+
"eu-west-3": "807237891255",
21+
"me-south-1": "376037874950",
22+
"sa-east-1": "424196993095",
23+
"us-east-1": "663277389841",
24+
"us-east-2": "415577184552",
25+
"us-west-1": "926135532090",
26+
"us-west-2": "174368400705",
27+
"cn-north-1": "245909111842",
28+
"cn-northwest-1": "249157047649"
29+
},
30+
"repository": "sagemaker-data-wrangler-container"
31+
},
32+
"2.x": {
33+
"registries": {
34+
"af-south-1": "143210264188",
35+
"ap-east-1": "707077482487",
36+
"ap-northeast-1": "649008135260",
37+
"ap-northeast-2": "131546521161",
38+
"ap-northeast-3": "913387583493",
1039
"ap-south-1": "089933028263",
1140
"ap-southeast-1": "119527597002",
1241
"ap-southeast-2": "422173101802",

src/sagemaker/image_uris.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
SKLEARN_FRAMEWORK = "sklearn"
3737
TRAINIUM_ALLOWED_FRAMEWORKS = "pytorch"
3838
INFERENCE_GRAVITON = "inference_graviton"
39+
DATA_WRANGLER_FRAMEWORK = "data-wrangler"
3940

4041

4142
@override_pipeline_parameter_var
@@ -461,6 +462,9 @@ def _validate_version_and_set_if_needed(version, config, framework):
461462

462463
return available_versions[0]
463464

465+
if version is None and framework in [DATA_WRANGLER_FRAMEWORK]:
466+
version = _get_latest_versions(available_versions)
467+
464468
_validate_arg(version, available_versions + aliased_versions, "{} version".format(framework))
465469
return version
466470

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"). You
4+
# may not use this file except in compliance with the License. A copy of
5+
# the License is located at
6+
#
7+
# http://aws.amazon.com/apache2.0/
8+
#
9+
# or in the "license" file accompanying this file. This file is
10+
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific
12+
# language governing permissions and limitations under the License.
13+
"""This module contains methods for starting up and accessing TensorBoard apps hosted on SageMaker"""
14+
from __future__ import absolute_import
15+
16+
import json
17+
import logging
18+
import os
19+
import re
20+
21+
from typing import Optional
22+
from sagemaker.session import Session, NOTEBOOK_METADATA_FILE
23+
24+
logger = logging.getLogger(__name__)
25+
26+
27+
class TensorBoardApp(object):
28+
"""TensorBoardApp is a class for creating/accessing a TensorBoard app hosted on SageMaker."""
29+
30+
def __init__(self, region: Optional[str] = None):
31+
"""Initialize a TensorBoardApp object.
32+
33+
Args:
34+
region (str): The AWS Region, e.g. us-east-1. If not specified,
35+
one is created using the default AWS configuration chain.
36+
"""
37+
if region:
38+
self.region = region
39+
else:
40+
try:
41+
self.region = Session().boto_region_name
42+
except ValueError:
43+
raise ValueError(
44+
"Failed to get the Region information from the default config. Please either "
45+
"pass your Region manually as an input argument or set up the local AWS configuration."
46+
)
47+
48+
self._domain_id = None
49+
self._user_profile_name = None
50+
self._valid_domain_and_user = False
51+
self._get_domain_and_user()
52+
53+
def __str__(self):
54+
"""Return str(self)."""
55+
return f"TensorBoardApp(region={self.region})"
56+
57+
def __repr__(self):
58+
"""Return repr(self)."""
59+
return self.__str__()
60+
61+
def get_app_url(self, training_job_name: Optional[str] = None):
62+
"""Generates an unsigned URL to help access the TensorBoard application hosted in SageMaker.
63+
64+
For users that are already in SageMaker Studio, this method tries to get the domain id and the user
65+
profile from the Studio environment. If succeeded, the generated URL will direct to the TensorBoard
66+
application in SageMaker. Otherwise, it will direct to the TensorBoard landing page in the SageMaker
67+
console. For non-Studio users, the URL will direct to the TensorBoard landing page in the SageMaker
68+
console.
69+
70+
Args:
71+
training_job_name (str): Optional. The name of the training job to pre-load in TensorBoard.
72+
If nothing provided, the method still returns the TensorBoard application URL,
73+
but the application will not have any training jobs added for tracking. You can
74+
add training jobs later by using the SageMaker Data Manager UI.
75+
Default: ``None``
76+
77+
Returns:
78+
str: An unsigned URL for TensorBoard hosted on SageMaker.
79+
"""
80+
if self._valid_domain_and_user:
81+
url = "https://{}.studio.{}.sagemaker.aws/tensorboard/default".format(
82+
self._domain_id, self.region
83+
)
84+
if training_job_name is not None:
85+
self._validate_job_name(training_job_name)
86+
url += "/data/plugin/sagemaker_data_manager/add_folder_or_job?Redirect=True&Name={}".format(
87+
training_job_name
88+
)
89+
else:
90+
url += "/#sagemaker_data_manager"
91+
else:
92+
url = "https://{region}.console.aws.amazon.com/sagemaker/home?region={region}#/tensor-board-landing".format(
93+
region=self.region
94+
)
95+
if training_job_name is not None:
96+
self._validate_job_name(training_job_name)
97+
url += "/{}".format(training_job_name)
98+
99+
return url
100+
101+
def _get_domain_and_user(self):
102+
"""Get and validate studio domain id and user profile from NOTEBOOK_METADATA_FILE in studio environment.
103+
104+
Set _valid_domain_and_user to True if validation succeeded.
105+
"""
106+
if not os.path.isfile(NOTEBOOK_METADATA_FILE):
107+
return
108+
109+
with open(NOTEBOOK_METADATA_FILE, "rb") as f:
110+
metadata = json.loads(f.read())
111+
self._domain_id = metadata.get("DomainId")
112+
self._user_profile_name = metadata.get("UserProfileName")
113+
if self._validate_domain_id() is True and self._validate_user_profile_name() is True:
114+
self._valid_domain_and_user = True
115+
else:
116+
logger.warning(
117+
"NOTEBOOK_METADATA_FILE detected but failed to get valid domain and user from it."
118+
)
119+
120+
def _validate_job_name(self, job_name: str):
121+
"""Validate training job name format."""
122+
job_name_regex = "^[a-zA-Z0-9](-*[a-zA-Z0-9]){0,62}"
123+
if not re.fullmatch(job_name_regex, job_name):
124+
raise ValueError(
125+
"Invalid job name. Job name must match regular expression {}".format(job_name_regex)
126+
)
127+
128+
def _validate_domain_id(self):
129+
"""Validate domain id format."""
130+
if self._domain_id is None or len(self._domain_id) > 63:
131+
return False
132+
return True
133+
134+
def _validate_user_profile_name(self):
135+
"""Validate user profile name format."""
136+
user_profile_name_regex = "^[a-zA-Z0-9](-*[a-zA-Z0-9]){0,62}"
137+
if self._user_profile_name is None or not re.fullmatch(
138+
user_profile_name_regex, self._user_profile_name
139+
):
140+
return False
141+
return True

tests/conftest.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,6 @@
5252
"me-south-1",
5353
"sa-east-1",
5454
"us-west-1",
55-
"ap-northeast-1", # it has p3, but not enough
56-
"ap-south-1",
57-
"ap-northeast-2", # it has p3, but not enough
58-
"us-east-2", # it has p3, but not enough
5955
]
6056

6157
NO_T2_REGIONS = ["eu-north-1", "ap-east-1", "me-south-1"]

tests/integ/test_horovod.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
from sagemaker.tensorflow import TensorFlow
2525
from tests.integ import timeout
2626

27+
from packaging.version import Version
28+
2729
horovod_dir = os.path.join(os.path.dirname(__file__), "..", "data", "horovod")
2830

2931

@@ -58,6 +60,12 @@ def test_hvd_gpu(
5860
tmpdir,
5961
**kwargs,
6062
):
63+
if (
64+
Version(tensorflow_training_latest_version) >= Version("2.12")
65+
and kwargs["instance_type"] == "ml.p2.xlarge"
66+
):
67+
pytest.skip("P2 instances have been deprecated for sagemaker jobs starting TensorFlow 2.12")
68+
6169
_create_and_fit_estimator(
6270
sagemaker_session,
6371
tensorflow_training_latest_version,

tests/integ/test_tf.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from tests.integ.utils import gpu_list, retry_with_instance_list
2929
from tests.integ.s3_utils import assert_s3_file_patterns_exist
3030

31+
from packaging.version import Version
3132

3233
ROLE = "SageMakerRole"
3334

@@ -56,6 +57,12 @@ def test_framework_processing_job_with_deps(
5657
tensorflow_training_latest_py_version,
5758
**kwargs,
5859
):
60+
if (
61+
Version(tensorflow_training_latest_version) >= Version("2.12")
62+
and kwargs["instance_type"] == "ml.p2.xlarge"
63+
):
64+
pytest.skip("P2 instances have been deprecated for sagemaker jobs starting TensorFlow 2.12")
65+
5966
with timeout.timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
6067
code_path = os.path.join(DATA_DIR, "dummy_code_bundle_with_reqs")
6168
entry_point = "main_script.py"
@@ -187,6 +194,12 @@ def test_mwms_gpu(
187194
capsys,
188195
**kwargs,
189196
):
197+
if (
198+
Version(tensorflow_training_latest_version) >= Version("2.12")
199+
and kwargs["instance_type"] == "ml.p2.xlarge"
200+
):
201+
pytest.skip("P2 instances have been deprecated for sagemaker jobs starting TensorFlow 2.12")
202+
190203
instance_count = 2
191204
estimator = TensorFlow(
192205
source_dir=os.path.join(RESOURCE_PATH, "tensorflow_mnist"),
@@ -243,6 +256,12 @@ def test_mnist_distributed_gpu(
243256
tensorflow_training_latest_py_version,
244257
**kwargs,
245258
):
259+
if (
260+
Version(tensorflow_training_latest_version) >= Version("2.12")
261+
and kwargs["instance_type"] == "ml.p2.xlarge"
262+
):
263+
pytest.skip("P2 instances have been deprecated for sagemaker jobs starting TensorFlow 2.12")
264+
246265
_create_and_fit_estimator(
247266
sagemaker_session,
248267
tensorflow_training_latest_version,

tests/unit/sagemaker/image_uris/test_data_wrangler.py

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
"ap-east-1": "707077482487",
2121
"ap-northeast-1": "649008135260",
2222
"ap-northeast-2": "131546521161",
23+
"ap-northeast-3": "913387583493",
2324
"ap-south-1": "089933028263",
2425
"ap-southeast-1": "119527597002",
2526
"ap-southeast-2": "422173101802",
@@ -39,15 +40,29 @@
3940
"cn-north-1": "245909111842",
4041
"cn-northwest-1": "249157047649",
4142
}
43+
VERSIONS = ["1.x", "2.x"]
4244

4345

4446
def test_data_wrangler_ecr_uri():
45-
for region in DATA_WRANGLER_ACCOUNTS.keys():
46-
actual_uri = image_uris.retrieve("data-wrangler", region=region)
47-
expected_uri = expected_uris.algo_uri(
48-
"sagemaker-data-wrangler-container",
49-
DATA_WRANGLER_ACCOUNTS[region],
50-
region,
51-
version="1.x",
52-
)
53-
assert expected_uri == actual_uri
47+
for version in VERSIONS:
48+
for region in DATA_WRANGLER_ACCOUNTS.keys():
49+
actual_uri = image_uris.retrieve("data-wrangler", region=region, version="1.x")
50+
expected_uri = expected_uris.algo_uri(
51+
"sagemaker-data-wrangler-container",
52+
DATA_WRANGLER_ACCOUNTS[region],
53+
region,
54+
version="1.x",
55+
)
56+
assert expected_uri == actual_uri
57+
58+
59+
def test_data_wrangler_ecr_uri_none():
60+
region = "us-west-2"
61+
actual_uri = image_uris.retrieve("data-wrangler", region=region)
62+
expected_uri = expected_uris.algo_uri(
63+
"sagemaker-data-wrangler-container",
64+
DATA_WRANGLER_ACCOUNTS[region],
65+
region,
66+
version=VERSIONS[-1],
67+
)
68+
assert expected_uri == actual_uri

tests/unit/sagemaker/wrangler/test_processing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
REGION = "us-west-2"
2424
DATA_WRANGLER_RECIPE_SOURCE = "s3://data_wrangler_flows/flow-26-18-43-16-0b48ac2e.flow"
2525
DATA_WRANGLER_CONTAINER_URI = (
26-
"174368400705.dkr.ecr.us-west-2.amazonaws.com/sagemaker-data-wrangler-container:1.x"
26+
"174368400705.dkr.ecr.us-west-2.amazonaws.com/sagemaker-data-wrangler-container:2.x"
2727
)
2828
MOCK_S3_URI = "s3://mock_data/mock.csv"
2929

0 commit comments

Comments
 (0)