Skip to content

Commit 2c37ea2

Browse files
authored
Merge branch 'aws:master' into add-gpu-capability-to-local
2 parents 6bf4402 + 6d7bfc4 commit 2c37ea2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1489
-402
lines changed

CHANGELOG.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,26 @@
11
# Changelog
22

3+
## v2.92.1 (2022-05-26)
4+
5+
### Bug Fixes and Other Changes
6+
7+
* pin protobuf to < 4.0 to fix breaking change
8+
9+
## v2.92.0 (2022-05-26)
10+
11+
### Features
12+
13+
* add 'Domain' property to RegisterModel step
14+
15+
### Bug Fixes and Other Changes
16+
17+
* support estimator output path parameterization
18+
* Add back Prevent passing PipelineVariable object into image_uris.retrieve
19+
* jumpstart amt tracking
20+
* fix missing register method params for framework models
21+
* fix docstring for decorated functions
22+
* Documents: add sagemaker model building pipeline readthedocs
23+
324
## v2.91.1 (2022-05-19)
425

526
### Bug Fixes and Other Changes

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.91.2.dev0
1+
2.92.2.dev0

setup.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,13 @@ def read_requirements(filename):
4848
# Declare minimal set for installation
4949
required_packages = [
5050
"attrs==20.3.0",
51-
"boto3>=1.20.21",
51+
"boto3>=1.20.21,<2.0",
5252
"google-pasta",
53-
"numpy>=1.9.0",
54-
"protobuf>=3.1",
55-
"protobuf3-to-dict>=0.1.5",
53+
"numpy>=1.9.0,<2.0",
54+
"protobuf>=3.1,<4.0",
55+
"protobuf3-to-dict>=0.1.5,<1.0",
5656
"smdebug_rulesconfig==1.0.1",
57-
"importlib-metadata>=1.4.0",
57+
"importlib-metadata>=1.4.0,<2.0",
5858
"packaging>=20.0",
5959
"pandas",
6060
"pathos",

src/sagemaker/clarify.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -827,7 +827,7 @@ def run_pre_training_bias(
827827
"`TVD <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-total-variation-distance.html>`_",
828828
"`KS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-kolmogorov-smirnov.html>`_",
829829
"`CDDL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-cddl.html>`_"].
830-
Defaults to computing all.
830+
Defaults to str "all" to run all metrics if left unspecified.
831831
wait (bool): Whether the call should wait until the job completes (default: True).
832832
logs (bool): Whether to show the logs produced by the job.
833833
Only meaningful when ``wait`` is True (default: True).
@@ -906,7 +906,7 @@ def run_post_training_bias(
906906
"`CDDPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-cddpl.html>`_
907907
", "`TE <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-te.html>`_",
908908
"`FT <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-ft.html>`_"].
909-
Defaults to computing all.
909+
Defaults to str "all" to run all metrics if left unspecified.
910910
wait (bool): Whether the call should wait until the job completes (default: True).
911911
logs (bool): Whether to show the logs produced by the job.
912912
Only meaningful when ``wait`` is True (default: True).
@@ -989,7 +989,7 @@ def run_bias(
989989
"`TVD <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-total-variation-distance.html>`_",
990990
"`KS <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-kolmogorov-smirnov.html>`_",
991991
"`CDDL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-data-bias-metric-cddl.html>`_"].
992-
Defaults to computing all.
992+
Defaults to str "all" to run all metrics if left unspecified.
993993
post_training_methods (str or list[str]): Selector of a subset of potential metrics:
994994
["`DPPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-dppl.html>`_"
995995
, "`DI <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-di.html>`_",
@@ -1002,7 +1002,7 @@ def run_bias(
10021002
"`CDDPL <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-cddpl.html>`_
10031003
", "`TE <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-te.html>`_",
10041004
"`FT <https://docs.aws.amazon.com/sagemaker/latest/dg/clarify-post-training-bias-metric-ft.html>`_"].
1005-
Defaults to computing all.
1005+
Defaults to str "all" to run all metrics if left unspecified.
10061006
wait (bool): Whether the call should wait until the job completes (default: True).
10071007
logs (bool): Whether to show the logs produced by the job.
10081008
Only meaningful when ``wait`` is True (default: True).

src/sagemaker/estimator.py

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,7 @@ def __init__(
483483
if (
484484
not self.sagemaker_session.local_mode
485485
and output_path
486+
and not is_pipeline_variable(output_path)
486487
and output_path.startswith("file://")
487488
):
488489
raise RuntimeError("file:// output paths are only supported in Local Mode")
@@ -695,26 +696,45 @@ def _stage_user_code_in_s3(self) -> str:
695696
696697
Returns: S3 URI
697698
"""
698-
local_mode = self.output_path.startswith("file://")
699-
700-
if self.code_location is None and local_mode:
701-
code_bucket = self.sagemaker_session.default_bucket()
702-
code_s3_prefix = "{}/{}".format(self._current_job_name, "source")
703-
kms_key = None
704-
elif self.code_location is None:
705-
code_bucket, _ = parse_s3_url(self.output_path)
706-
code_s3_prefix = "{}/{}".format(self._current_job_name, "source")
707-
kms_key = self.output_kms_key
708-
elif local_mode:
709-
code_bucket, key_prefix = parse_s3_url(self.code_location)
710-
code_s3_prefix = "/".join(filter(None, [key_prefix, self._current_job_name, "source"]))
711-
kms_key = None
699+
if is_pipeline_variable(self.output_path):
700+
if self.code_location is None:
701+
code_bucket = self.sagemaker_session.default_bucket()
702+
code_s3_prefix = "{}/{}".format(self._current_job_name, "source")
703+
kms_key = None
704+
else:
705+
code_bucket, key_prefix = parse_s3_url(self.code_location)
706+
code_s3_prefix = "/".join(
707+
filter(None, [key_prefix, self._current_job_name, "source"])
708+
)
709+
710+
output_bucket = self.sagemaker_session.default_bucket()
711+
kms_key = self.output_kms_key if code_bucket == output_bucket else None
712712
else:
713-
code_bucket, key_prefix = parse_s3_url(self.code_location)
714-
code_s3_prefix = "/".join(filter(None, [key_prefix, self._current_job_name, "source"]))
713+
local_mode = self.output_path.startswith("file://")
714+
if local_mode:
715+
if self.code_location is None:
716+
code_bucket = self.sagemaker_session.default_bucket()
717+
code_s3_prefix = "{}/{}".format(self._current_job_name, "source")
718+
kms_key = None
719+
else:
720+
code_bucket, key_prefix = parse_s3_url(self.code_location)
721+
code_s3_prefix = "/".join(
722+
filter(None, [key_prefix, self._current_job_name, "source"])
723+
)
724+
kms_key = None
725+
else:
726+
if self.code_location is None:
727+
code_bucket, _ = parse_s3_url(self.output_path)
728+
code_s3_prefix = "{}/{}".format(self._current_job_name, "source")
729+
kms_key = self.output_kms_key
730+
else:
731+
code_bucket, key_prefix = parse_s3_url(self.code_location)
732+
code_s3_prefix = "/".join(
733+
filter(None, [key_prefix, self._current_job_name, "source"])
734+
)
715735

716-
output_bucket, _ = parse_s3_url(self.output_path)
717-
kms_key = self.output_kms_key if code_bucket == output_bucket else None
736+
output_bucket, _ = parse_s3_url(self.output_path)
737+
kms_key = self.output_kms_key if code_bucket == output_bucket else None
718738

719739
return tar_and_upload_dir(
720740
session=self.sagemaker_session.boto_session,
@@ -1321,6 +1341,11 @@ def register(
13211341
Returns:
13221342
str: A string of SageMaker Model Package ARN.
13231343
"""
1344+
if isinstance(self.sagemaker_session, PipelineSession):
1345+
raise TypeError(
1346+
"estimator.register does not support PipelineSession at this moment. "
1347+
"Please use model.register with PipelineSession if you're using the ModelStep."
1348+
)
13241349
default_name = name_from_base(self.base_job_name)
13251350
model_name = model_name or default_name
13261351
if compile_model_family is not None:

src/sagemaker/fw_utils.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,10 @@ def warn_if_parameter_server_with_multi_gpu(training_instance_type, distribution
428428
"""
429429
if training_instance_type == "local" or distribution is None:
430430
return
431+
if is_pipeline_variable(training_instance_type):
432+
# The training_instance_type is not available in compile time.
433+
# Rather, it's given in Pipeline execution time
434+
return
431435

432436
is_multi_gpu_instance = (
433437
training_instance_type == "local_gpu"
@@ -485,6 +489,10 @@ def validate_smdistributed(
485489
if "smdistributed" not in distribution:
486490
# Distribution strategy other than smdistributed is selected
487491
return
492+
if is_pipeline_variable(instance_type):
493+
# The instance_type is not available in compile time.
494+
# Rather, it's given in Pipeline execution time
495+
return
488496

489497
# distribution contains smdistributed
490498
smdistributed = distribution["smdistributed"]

src/sagemaker/image_uris.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from sagemaker.jumpstart.utils import is_jumpstart_model_input
2424
from sagemaker.spark import defaults
2525
from sagemaker.jumpstart import artifacts
26+
from sagemaker.workflow import is_pipeline_variable
2627

2728
logger = logging.getLogger(__name__)
2829

@@ -104,11 +105,17 @@ def retrieve(
104105
105106
Raises:
106107
NotImplementedError: If the scope is not supported.
107-
ValueError: If the combination of arguments specified is not supported.
108+
ValueError: If the combination of arguments specified is not supported or
109+
any PipelineVariable object is passed in.
108110
VulnerableJumpStartModelError: If any of the dependencies required by the script have
109111
known security vulnerabilities.
110112
DeprecatedJumpStartModelError: If the version of the model is deprecated.
111113
"""
114+
args = dict(locals())
115+
for name, val in args.items():
116+
if is_pipeline_variable(val):
117+
raise ValueError("%s should not be a pipeline variable (%s)" % (name, type(val)))
118+
112119
if is_jumpstart_model_input(model_id, model_version):
113120
return artifacts._retrieve_image_uri(
114121
model_id,

src/sagemaker/session.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -591,7 +591,7 @@ def submit(request):
591591
LOGGER.debug("train request: %s", json.dumps(request, indent=4))
592592
self.sagemaker_client.create_training_job(**request)
593593

594-
self._intercept_create_request(train_request, submit)
594+
self._intercept_create_request(train_request, submit, self.train.__name__)
595595

596596
def _get_train_request( # noqa: C901
597597
self,
@@ -922,7 +922,7 @@ def submit(request):
922922
LOGGER.debug("process request: %s", json.dumps(request, indent=4))
923923
self.sagemaker_client.create_processing_job(**request)
924924

925-
self._intercept_create_request(process_request, submit)
925+
self._intercept_create_request(process_request, submit, self.process.__name__)
926926

927927
def _get_process_request(
928928
self,
@@ -2099,7 +2099,7 @@ def submit(request):
20992099
LOGGER.debug("tune request: %s", json.dumps(request, indent=4))
21002100
self.sagemaker_client.create_hyper_parameter_tuning_job(**request)
21012101

2102-
self._intercept_create_request(tune_request, submit)
2102+
self._intercept_create_request(tune_request, submit, self.create_tuning_job.__name__)
21032103

21042104
def _get_tuning_request(
21052105
self,
@@ -2569,7 +2569,7 @@ def submit(request):
25692569
LOGGER.debug("Transform request: %s", json.dumps(request, indent=4))
25702570
self.sagemaker_client.create_transform_job(**request)
25712571

2572-
self._intercept_create_request(transform_request, submit)
2572+
self._intercept_create_request(transform_request, submit, self.transform.__name__)
25732573

25742574
def _create_model_request(
25752575
self,

src/sagemaker/tuner.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from sagemaker.estimator import Framework
3333
from sagemaker.inputs import TrainingInput
3434
from sagemaker.job import _Job
35+
from sagemaker.jumpstart.utils import add_jumpstart_tags, get_jumpstart_base_name_if_jumpstart_model
3536
from sagemaker.parameter import (
3637
CategoricalParameter,
3738
ContinuousParameter,
@@ -319,6 +320,42 @@ def _prepare_for_tuning(self, job_name=None, include_cls_metadata=False):
319320
"""Prepare the tuner instance for tuning (fit)."""
320321
self._prepare_job_name_for_tuning(job_name=job_name)
321322
self._prepare_static_hyperparameters_for_tuning(include_cls_metadata=include_cls_metadata)
323+
self._prepare_tags_for_tuning()
324+
325+
def _get_model_uri(
326+
self,
327+
estimator,
328+
):
329+
"""Return the model artifact URI used by the Estimator instance.
330+
331+
This attribute can live in multiple places, and accessing the attribute can
332+
raise a TypeError, which needs to be handled.
333+
"""
334+
try:
335+
return getattr(estimator, "model_data", None)
336+
except TypeError:
337+
return getattr(estimator, "model_uri", None)
338+
339+
def _prepare_tags_for_tuning(self):
340+
"""Add tags to tuning job (from Estimator and JumpStart tags)."""
341+
342+
# Add tags from Estimator class
343+
estimator = self.estimator or self.estimator_dict[sorted(self.estimator_dict.keys())[0]]
344+
345+
estimator_tags = getattr(estimator, "tags", []) or []
346+
347+
if self.tags is None and len(estimator_tags) > 0:
348+
self.tags = []
349+
350+
for tag in estimator_tags:
351+
if tag not in self.tags:
352+
self.tags.append(tag)
353+
354+
self.tags = add_jumpstart_tags(
355+
tags=self.tags,
356+
training_script_uri=getattr(estimator, "source_dir", None),
357+
training_model_uri=self._get_model_uri(estimator),
358+
)
322359

323360
def _prepare_job_name_for_tuning(self, job_name=None):
324361
"""Set current job name before starting tuning."""
@@ -331,6 +368,12 @@ def _prepare_job_name_for_tuning(self, job_name=None):
331368
self.estimator or self.estimator_dict[sorted(self.estimator_dict.keys())[0]]
332369
)
333370
base_name = base_name_from_image(estimator.training_image_uri())
371+
372+
jumpstart_base_name = get_jumpstart_base_name_if_jumpstart_model(
373+
getattr(estimator, "source_dir", None),
374+
self._get_model_uri(estimator),
375+
)
376+
base_name = jumpstart_base_name or base_name
334377
self._current_job_name = name_from_base(
335378
base_name, max_length=self.TUNING_JOB_NAME_MAX_LENGTH, short=True
336379
)

src/sagemaker/workflow/__init__.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,8 @@
1212
# language governing permissions and limitations under the License.
1313
"""Defines Types etc. used in workflow."""
1414
from __future__ import absolute_import
15-
from typing import Union
1615

1716
from sagemaker.workflow.entities import Expression
18-
from sagemaker.workflow.execution_variables import ExecutionVariable
19-
from sagemaker.workflow.parameters import Parameter
20-
from sagemaker.workflow.properties import Properties
21-
22-
PipelineNonPrimitiveInputTypes = Union[ExecutionVariable, Expression, Parameter, Properties]
2317

2418

2519
def is_pipeline_variable(var: object) -> bool:

0 commit comments

Comments
 (0)