Skip to content

Commit 74607d5

Browse files
nmadanBalaji SankarRuban Hussainbalajisankar15Ivy Bazan
committed
Pathways/Intelligent defaults integration (aws#894)
Co-authored-by: Balaji Sankar <[email protected]> Co-authored-by: Ruban Hussain <[email protected]> Co-authored-by: Balaji Sankar <[email protected]> Co-authored-by: Ivy Bazan <[email protected]> Co-authored-by: rubanh <[email protected]> Co-authored-by: Namrata Madan <[email protected]> fixes (aws#845) fix: Make 'role' as Optional for ModelQualityMonitor and DefaultModelMonitor, and fixed PROCESSING_CONFIG_PATH (aws#849) Fix: Certain unit tests aren't passing sagemaker_session. Modify the logic to accommodate that case (aws#850)
1 parent 0393d7e commit 74607d5

File tree

133 files changed

+6895
-1463
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

133 files changed

+6895
-1463
lines changed

doc/api/utility/config.rst

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Config
2+
-------
3+
4+
.. automodule:: sagemaker.config.config
5+
:members:
6+
:undoc-members:
7+
:show-inheritance:

doc/overview.rst

+572
Large diffs are not rendered by default.

setup.py

+2
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ def read_requirements(filename):
6060
"pathos",
6161
"schema",
6262
"PyYAML==5.4.1",
63+
"jsonschema",
64+
"platformdirs",
6365
]
6466

6567
# Specific use case dependencies

src/sagemaker/algorithm.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class AlgorithmEstimator(EstimatorBase):
4646
def __init__(
4747
self,
4848
algorithm_arn: str,
49-
role: str,
49+
role: str = None,
5050
instance_count: Optional[Union[int, PipelineVariable]] = None,
5151
instance_type: Optional[Union[str, PipelineVariable]] = None,
5252
volume_size: Union[int, PipelineVariable] = 30,

src/sagemaker/amazon/amazon_estimator.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ class AmazonAlgorithmEstimatorBase(EstimatorBase):
5050

5151
def __init__(
5252
self,
53-
role: str,
53+
role: Optional[Union[str, PipelineVariable]] = None,
5454
instance_count: Optional[Union[int, PipelineVariable]] = None,
5555
instance_type: Optional[Union[str, PipelineVariable]] = None,
5656
data_location: Optional[str] = None,

src/sagemaker/amazon/factorization_machines.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ class FactorizationMachines(AmazonAlgorithmEstimatorBase):
8787

8888
def __init__(
8989
self,
90-
role: str,
90+
role: Optional[Union[str, PipelineVariable]] = None,
9191
instance_count: Optional[Union[int, PipelineVariable]] = None,
9292
instance_type: Optional[Union[str, PipelineVariable]] = None,
9393
num_factors: Optional[int] = None,
@@ -326,7 +326,7 @@ class FactorizationMachinesModel(Model):
326326
def __init__(
327327
self,
328328
model_data: Union[str, PipelineVariable],
329-
role: str,
329+
role: Optional[str] = None,
330330
sagemaker_session: Optional[Session] = None,
331331
**kwargs
332332
):

src/sagemaker/amazon/ipinsights.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ class IPInsights(AmazonAlgorithmEstimatorBase):
6363

6464
def __init__(
6565
self,
66-
role: str,
66+
role: Optional[Union[str, PipelineVariable]] = None,
6767
instance_count: Optional[Union[int, PipelineVariable]] = None,
6868
instance_type: Optional[Union[str, PipelineVariable]] = None,
6969
num_entity_vectors: Optional[int] = None,
@@ -229,7 +229,7 @@ class IPInsightsModel(Model):
229229
def __init__(
230230
self,
231231
model_data: Union[str, PipelineVariable],
232-
role: str,
232+
role: Optional[str] = None,
233233
sagemaker_session: Optional[Session] = None,
234234
**kwargs
235235
):

src/sagemaker/amazon/kmeans.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ class KMeans(AmazonAlgorithmEstimatorBase):
6262

6363
def __init__(
6464
self,
65-
role: str,
65+
role: Optional[Union[str, PipelineVariable]] = None,
6666
instance_count: Optional[Union[int, PipelineVariable]] = None,
6767
instance_type: Optional[Union[str, PipelineVariable]] = None,
6868
k: Optional[int] = None,
@@ -255,7 +255,7 @@ class KMeansModel(Model):
255255
def __init__(
256256
self,
257257
model_data: Union[str, PipelineVariable],
258-
role: str,
258+
role: Optional[str] = None,
259259
sagemaker_session: Optional[Session] = None,
260260
**kwargs
261261
):

src/sagemaker/amazon/knn.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ class KNN(AmazonAlgorithmEstimatorBase):
7373

7474
def __init__(
7575
self,
76-
role: str,
76+
role: Optional[Union[str, PipelineVariable]] = None,
7777
instance_count: Optional[Union[int, PipelineVariable]] = None,
7878
instance_type: Optional[Union[str, PipelineVariable]] = None,
7979
k: Optional[int] = None,
@@ -246,7 +246,7 @@ class KNNModel(Model):
246246
def __init__(
247247
self,
248248
model_data: Union[str, PipelineVariable],
249-
role: str,
249+
role: Optional[str] = None,
250250
sagemaker_session: Optional[Session] = None,
251251
**kwargs
252252
):

src/sagemaker/amazon/lda.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ class LDA(AmazonAlgorithmEstimatorBase):
5252

5353
def __init__(
5454
self,
55-
role: str,
55+
role: Optional[Union[str, PipelineVariable]] = None,
5656
instance_type: Optional[Union[str, PipelineVariable]] = None,
5757
num_topics: Optional[int] = None,
5858
alpha0: Optional[float] = None,
@@ -230,7 +230,7 @@ class LDAModel(Model):
230230
def __init__(
231231
self,
232232
model_data: Union[str, PipelineVariable],
233-
role: str,
233+
role: Optional[str] = None,
234234
sagemaker_session: Optional[Session] = None,
235235
**kwargs
236236
):

src/sagemaker/amazon/linear_learner.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ class LinearLearner(AmazonAlgorithmEstimatorBase):
145145

146146
def __init__(
147147
self,
148-
role: str,
148+
role: Optional[Union[str, PipelineVariable]] = None,
149149
instance_count: Optional[Union[int, PipelineVariable]] = None,
150150
instance_type: Optional[Union[str, PipelineVariable]] = None,
151151
predictor_type: Optional[str] = None,
@@ -499,7 +499,7 @@ class LinearLearnerModel(Model):
499499
def __init__(
500500
self,
501501
model_data: Union[str, PipelineVariable],
502-
role: str,
502+
role: Optional[str] = None,
503503
sagemaker_session: Optional[Session] = None,
504504
**kwargs
505505
):

src/sagemaker/amazon/ntm.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ class NTM(AmazonAlgorithmEstimatorBase):
7474

7575
def __init__(
7676
self,
77-
role: str,
77+
role: Optional[Union[str, PipelineVariable]] = None,
7878
instance_count: Optional[Union[int, PipelineVariable]] = None,
7979
instance_type: Optional[Union[str, PipelineVariable]] = None,
8080
num_topics: Optional[int] = None,
@@ -263,7 +263,7 @@ class NTMModel(Model):
263263
def __init__(
264264
self,
265265
model_data: Union[str, PipelineVariable],
266-
role: str,
266+
role: Optional[str] = None,
267267
sagemaker_session: Optional[Session] = None,
268268
**kwargs
269269
):

src/sagemaker/amazon/object2vec.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ class Object2Vec(AmazonAlgorithmEstimatorBase):
153153

154154
def __init__(
155155
self,
156-
role: str,
156+
role: Optional[Union[str, PipelineVariable]] = None,
157157
instance_count: Optional[Union[int, PipelineVariable]] = None,
158158
instance_type: Optional[Union[str, PipelineVariable]] = None,
159159
epochs: Optional[int] = None,
@@ -361,7 +361,7 @@ class Object2VecModel(Model):
361361
def __init__(
362362
self,
363363
model_data: Union[str, PipelineVariable],
364-
role: str,
364+
role: Optional[str] = None,
365365
sagemaker_session: Optional[Session] = None,
366366
**kwargs
367367
):

src/sagemaker/amazon/pca.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ class PCA(AmazonAlgorithmEstimatorBase):
6060

6161
def __init__(
6262
self,
63-
role: str,
63+
role: Optional[Union[str, PipelineVariable]] = None,
6464
instance_count: Optional[Union[int, PipelineVariable]] = None,
6565
instance_type: Optional[Union[str, PipelineVariable]] = None,
6666
num_components: Optional[int] = None,
@@ -243,7 +243,7 @@ class PCAModel(Model):
243243
def __init__(
244244
self,
245245
model_data: Union[str, PipelineVariable],
246-
role: str,
246+
role: Optional[str] = None,
247247
sagemaker_session: Optional[Session] = None,
248248
**kwargs
249249
):

src/sagemaker/amazon/randomcutforest.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ class RandomCutForest(AmazonAlgorithmEstimatorBase):
5454

5555
def __init__(
5656
self,
57-
role: str,
57+
role: Optional[Union[str, PipelineVariable]] = None,
5858
instance_count: Optional[Union[int, PipelineVariable]] = None,
5959
instance_type: Optional[Union[str, PipelineVariable]] = None,
6060
num_samples_per_tree: Optional[int] = None,
@@ -216,7 +216,7 @@ class RandomCutForestModel(Model):
216216
def __init__(
217217
self,
218218
model_data: Union[str, PipelineVariable],
219-
role: str,
219+
role: Optional[str] = None,
220220
sagemaker_session: Optional[Session] = None,
221221
**kwargs
222222
):

src/sagemaker/automl/automl.py

+38-8
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,16 @@
1919

2020
from sagemaker import Model, PipelineModel
2121
from sagemaker.automl.candidate_estimator import CandidateEstimator
22+
from sagemaker.config import (
23+
AUTO_ML_ROLE_ARN_PATH,
24+
AUTO_ML_KMS_KEY_ID_PATH,
25+
AUTO_ML_VPC_CONFIG_PATH,
26+
AUTO_ML_VOLUME_KMS_KEY_ID_PATH,
27+
AUTO_ML_INTER_CONTAINER_ENCRYPTION_PATH,
28+
)
2229
from sagemaker.job import _Job
2330
from sagemaker.session import Session
24-
from sagemaker.utils import name_from_base
31+
from sagemaker.utils import name_from_base, resolve_value_from_config
2532
from sagemaker.workflow.entities import PipelineVariable
2633
from sagemaker.workflow.pipeline_context import runnable_by_pipeline
2734

@@ -98,15 +105,15 @@ class AutoML(object):
98105

99106
def __init__(
100107
self,
101-
role: str,
102-
target_attribute_name: str,
108+
role: Optional[str] = None,
109+
target_attribute_name: str = None,
103110
output_kms_key: Optional[str] = None,
104111
output_path: Optional[str] = None,
105112
base_job_name: Optional[str] = None,
106113
compression_type: Optional[str] = None,
107114
sagemaker_session: Optional[Session] = None,
108115
volume_kms_key: Optional[str] = None,
109-
encrypt_inter_container_traffic: Optional[bool] = False,
116+
encrypt_inter_container_traffic: Optional[bool] = None,
110117
vpc_config: Optional[Dict[str, List]] = None,
111118
problem_type: Optional[str] = None,
112119
max_candidates: Optional[int] = None,
@@ -176,14 +183,10 @@ def __init__(
176183
Returns:
177184
AutoML object.
178185
"""
179-
self.role = role
180-
self.output_kms_key = output_kms_key
181186
self.output_path = output_path
182187
self.base_job_name = base_job_name
183188
self.compression_type = compression_type
184-
self.volume_kms_key = volume_kms_key
185189
self.encrypt_inter_container_traffic = encrypt_inter_container_traffic
186-
self.vpc_config = vpc_config
187190
self.problem_type = problem_type
188191
self.max_candidate = max_candidates
189192
self.max_runtime_per_training_job_in_seconds = max_runtime_per_training_job_in_seconds
@@ -204,6 +207,31 @@ def __init__(
204207
self._auto_ml_job_desc = None
205208
self._best_candidate = None
206209
self.sagemaker_session = sagemaker_session or Session()
210+
self.vpc_config = resolve_value_from_config(
211+
vpc_config, AUTO_ML_VPC_CONFIG_PATH, sagemaker_session=self.sagemaker_session
212+
)
213+
self.volume_kms_key = resolve_value_from_config(
214+
volume_kms_key, AUTO_ML_VOLUME_KMS_KEY_ID_PATH, sagemaker_session=self.sagemaker_session
215+
)
216+
self.output_kms_key = resolve_value_from_config(
217+
output_kms_key, AUTO_ML_KMS_KEY_ID_PATH, sagemaker_session=self.sagemaker_session
218+
)
219+
self.role = resolve_value_from_config(
220+
role, AUTO_ML_ROLE_ARN_PATH, sagemaker_session=self.sagemaker_session
221+
)
222+
if not self.role:
223+
# Originally IAM role was a required parameter.
224+
# Now we marked that as Optional because we can fetch it from SageMakerConfig
225+
# Because of marking that parameter as optional, we should validate if it is None, even
226+
# after fetching the config.
227+
raise ValueError("An AWS IAM role is required to create an AutoML job.")
228+
229+
self.encrypt_inter_container_traffic = resolve_value_from_config(
230+
direct_input=encrypt_inter_container_traffic,
231+
config_path=AUTO_ML_INTER_CONTAINER_ENCRYPTION_PATH,
232+
default_value=False,
233+
sagemaker_session=self.sagemaker_session,
234+
)
207235

208236
self._check_problem_type_and_job_objective(self.problem_type, self.job_objective)
209237

@@ -276,6 +304,8 @@ def attach(cls, auto_ml_job_name, sagemaker_session=None):
276304
volume_kms_key=auto_ml_job_desc.get("AutoMLJobConfig", {})
277305
.get("SecurityConfig", {})
278306
.get("VolumeKmsKeyId"),
307+
# Do not override encrypt_inter_container_traffic from config because this info
308+
# is pulled from an existing automl job
279309
encrypt_inter_container_traffic=auto_ml_job_desc.get("AutoMLJobConfig", {})
280310
.get("SecurityConfig", {})
281311
.get("EnableInterContainerTrafficEncryption", False),

src/sagemaker/automl/candidate_estimator.py

+30-7
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,14 @@
1414
from __future__ import absolute_import
1515

1616
from six import string_types
17-
18-
from sagemaker import Session
17+
from sagemaker.config import (
18+
TRAINING_JOB_VPC_CONFIG_PATH,
19+
TRAINING_JOB_VOLUME_KMS_KEY_ID_PATH,
20+
TRAINING_JOB_INTER_CONTAINER_ENCRYPTION_PATH,
21+
)
22+
from sagemaker.session import Session
1923
from sagemaker.job import _Job
20-
from sagemaker.utils import name_from_base
24+
from sagemaker.utils import name_from_base, resolve_value_from_config
2125

2226

2327
class CandidateEstimator(object):
@@ -72,7 +76,8 @@ def fit(
7276
inputs,
7377
candidate_name=None,
7478
volume_kms_key=None,
75-
encrypt_inter_container_traffic=False,
79+
# default of False for training job, checked inside function
80+
encrypt_inter_container_traffic=None,
7681
vpc_config=None,
7782
wait=True,
7883
logs=True,
@@ -87,7 +92,8 @@ def fit(
8792
volume_kms_key (str): The KMS key id to encrypt data on the storage volume attached to
8893
the ML compute instance(s).
8994
encrypt_inter_container_traffic (bool): To encrypt all communications between ML compute
90-
instances in distributed training. Default: False.
95+
instances in distributed training. If not passed, will be fetched from
96+
sagemaker_config if a value is defined there. Default: False.
9197
vpc_config (dict): Specifies a VPC that jobs and hosted models have access to.
9298
Control access to and from training and model containers by configuring the VPC
9399
wait (bool): Whether the call should wait until all jobs completes (default: True).
@@ -99,7 +105,14 @@ def fit(
99105
"""Logs can only be shown if wait is set to True.
100106
Please either set wait to True or set logs to False."""
101107
)
102-
108+
vpc_config = resolve_value_from_config(
109+
vpc_config, TRAINING_JOB_VPC_CONFIG_PATH, sagemaker_session=self.sagemaker_session
110+
)
111+
volume_kms_key = resolve_value_from_config(
112+
volume_kms_key,
113+
TRAINING_JOB_VOLUME_KMS_KEY_ID_PATH,
114+
sagemaker_session=self.sagemaker_session,
115+
)
103116
self.name = candidate_name or self.name
104117
running_jobs = {}
105118

@@ -131,12 +144,22 @@ def fit(
131144
base_name = "sagemaker-automl-training-rerun"
132145
step_name = name_from_base(base_name)
133146
step["name"] = step_name
147+
148+
# Check training_job config not auto_ml_job config because this function calls
149+
# training job API
150+
_encrypt_inter_container_traffic = resolve_value_from_config(
151+
direct_input=encrypt_inter_container_traffic,
152+
config_path=TRAINING_JOB_INTER_CONTAINER_ENCRYPTION_PATH,
153+
default_value=False,
154+
sagemaker_session=self.sagemaker_session,
155+
)
156+
134157
train_args = self._get_train_args(
135158
desc,
136159
channels,
137160
step_name,
138161
volume_kms_key,
139-
encrypt_inter_container_traffic,
162+
_encrypt_inter_container_traffic,
140163
vpc_config,
141164
)
142165
self.sagemaker_session.train(**train_args)

src/sagemaker/chainer/model.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,8 @@ class ChainerModel(FrameworkModel):
8282
def __init__(
8383
self,
8484
model_data: Union[str, PipelineVariable],
85-
role: str,
86-
entry_point: str,
85+
role: Optional[str] = None,
86+
entry_point: Optional[str] = None,
8787
image_uri: Optional[Union[str, PipelineVariable]] = None,
8888
framework_version: Optional[str] = None,
8989
py_version: Optional[str] = None,

0 commit comments

Comments
 (0)