Skip to content

Commit 2dedfd3

Browse files
authored
Merge branch 'master' into fix-repack-model-pipelines
2 parents 4bc1550 + aa54685 commit 2dedfd3

File tree

8 files changed

+225
-3
lines changed

8 files changed

+225
-3
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# Changelog
22

3+
## v2.59.4 (2021-09-27)
4+
5+
### Bug Fixes and Other Changes
6+
7+
* add checks for ExecutionRole in UserSettings, adds more unit tests
8+
* add pytorch 1.8.1 for huggingface
9+
310
## v2.59.3.post0 (2021-09-22)
411

512
### Documentation Changes

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.59.4.dev0
1+
2.59.5.dev0

src/sagemaker/session.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3554,9 +3554,12 @@ def get_caller_identity_arn(self):
35543554
user_profile_desc = self.sagemaker_client.describe_user_profile(
35553555
DomainId=domain_id, UserProfileName=user_profile_name
35563556
)
3557-
if user_profile_desc.get("UserSettings") is not None:
3557+
3558+
# First, try to find role in userSettings
3559+
if user_profile_desc.get("UserSettings", {}).get("ExecutionRole"):
35583560
return user_profile_desc["UserSettings"]["ExecutionRole"]
35593561

3562+
# If not found, fallback to the domain
35603563
domain_desc = self.sagemaker_client.describe_domain(DomainId=domain_id)
35613564
return domain_desc["DefaultUserSettings"]["ExecutionRole"]
35623565
except ClientError:

src/sagemaker/workflow/properties.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ def __init__(
6767
for key, info in members.items():
6868
if Properties._shapes.get(info["shape"], {}).get("type") == "list":
6969
self.__dict__[key] = PropertiesList(f"{path}.{key}", info["shape"])
70+
elif Properties._shapes.get(info["shape"], {}).get("type") == "map":
71+
self.__dict__[key] = PropertiesMap(f"{path}.{key}", info["shape"])
7072
else:
7173
self.__dict__[key] = Properties(f"{path}.{key}", info["shape"])
7274

@@ -109,6 +111,38 @@ def __getitem__(self, item: Union[int, str]):
109111
return self._items.get(item)
110112

111113

114+
class PropertiesMap(Properties):
115+
"""PropertiesMap for use in workflow expressions."""
116+
117+
def __init__(self, path: str, shape_name: str = None):
118+
"""Create a PropertiesMap instance representing the given shape.
119+
120+
Args:
121+
path (str): The parent path of the PropertiesMap instance.
122+
shape_name (str): The botocore sagemaker service model shape name.
123+
"""
124+
super(PropertiesMap, self).__init__(path, shape_name)
125+
self.shape_name = shape_name
126+
self._items: Dict[Union[int, str], Properties] = dict()
127+
128+
def __getitem__(self, item: Union[int, str]):
129+
"""Populate the indexing item with a Property, for both lists and dictionaries.
130+
131+
Args:
132+
item (Union[int, str]): The index of the item in sequence.
133+
"""
134+
if item not in self._items.keys():
135+
shape = Properties._shapes.get(self.shape_name)
136+
member = shape["value"]["shape"]
137+
if isinstance(item, str):
138+
property_item = Properties(f"{self._path}['{item}']", member)
139+
else:
140+
property_item = Properties(f"{self._path}[{item}]", member)
141+
self._items[item] = property_item
142+
143+
return self._items.get(item)
144+
145+
112146
@attr.s
113147
class PropertyFile(Expression):
114148
"""Provides a property file struct.

tests/integ/test_workflow.py

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -855,6 +855,148 @@ def test_one_step_callback_pipeline(sagemaker_session, role, pipeline_name, regi
855855
pass
856856

857857

858+
def test_steps_with_map_params_pipeline(
859+
sagemaker_session, role, script_dir, pipeline_name, region_name, athena_dataset_definition
860+
):
861+
instance_count = ParameterInteger(name="InstanceCount", default_value=2)
862+
framework_version = "0.20.0"
863+
instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
864+
output_prefix = ParameterString(name="OutputPrefix", default_value="output")
865+
input_data = f"s3://sagemaker-sample-data-{region_name}/processing/census/census-income.csv"
866+
867+
sklearn_processor = SKLearnProcessor(
868+
framework_version=framework_version,
869+
instance_type=instance_type,
870+
instance_count=instance_count,
871+
base_job_name="test-sklearn",
872+
sagemaker_session=sagemaker_session,
873+
role=role,
874+
)
875+
step_process = ProcessingStep(
876+
name="my-process",
877+
display_name="ProcessingStep",
878+
description="description for Processing step",
879+
processor=sklearn_processor,
880+
inputs=[
881+
ProcessingInput(source=input_data, destination="/opt/ml/processing/input"),
882+
ProcessingInput(dataset_definition=athena_dataset_definition),
883+
],
884+
outputs=[
885+
ProcessingOutput(output_name="train_data", source="/opt/ml/processing/train"),
886+
ProcessingOutput(
887+
output_name="test_data",
888+
source="/opt/ml/processing/test",
889+
destination=Join(
890+
on="/",
891+
values=[
892+
"s3:/",
893+
sagemaker_session.default_bucket(),
894+
"test-sklearn",
895+
output_prefix,
896+
ExecutionVariables.PIPELINE_EXECUTION_ID,
897+
],
898+
),
899+
),
900+
],
901+
code=os.path.join(script_dir, "preprocessing.py"),
902+
)
903+
904+
sklearn_train = SKLearn(
905+
framework_version=framework_version,
906+
entry_point=os.path.join(script_dir, "train.py"),
907+
instance_type=instance_type,
908+
sagemaker_session=sagemaker_session,
909+
role=role,
910+
hyperparameters={
911+
"batch-size": 500,
912+
"epochs": 5,
913+
},
914+
)
915+
step_train = TrainingStep(
916+
name="my-train",
917+
display_name="TrainingStep",
918+
description="description for Training step",
919+
estimator=sklearn_train,
920+
inputs=TrainingInput(
921+
s3_data=step_process.properties.ProcessingOutputConfig.Outputs[
922+
"train_data"
923+
].S3Output.S3Uri
924+
),
925+
)
926+
927+
model = Model(
928+
image_uri=sklearn_train.image_uri,
929+
model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
930+
sagemaker_session=sagemaker_session,
931+
role=role,
932+
)
933+
model_inputs = CreateModelInput(
934+
instance_type="ml.m5.large",
935+
accelerator_type="ml.eia1.medium",
936+
)
937+
step_model = CreateModelStep(
938+
name="my-model",
939+
display_name="ModelStep",
940+
description="description for Model step",
941+
model=model,
942+
inputs=model_inputs,
943+
)
944+
945+
# Condition step for evaluating model quality and branching execution
946+
cond_lte = ConditionGreaterThanOrEqualTo(
947+
left=step_train.properties.HyperParameters["batch-size"],
948+
right=6.0,
949+
)
950+
951+
step_cond = ConditionStep(
952+
name="CustomerChurnAccuracyCond",
953+
conditions=[cond_lte],
954+
if_steps=[],
955+
else_steps=[step_model],
956+
)
957+
958+
pipeline = Pipeline(
959+
name=pipeline_name,
960+
parameters=[instance_type, instance_count, output_prefix],
961+
steps=[step_process, step_train, step_cond],
962+
sagemaker_session=sagemaker_session,
963+
)
964+
965+
definition = json.loads(pipeline.definition())
966+
assert definition["Version"] == "2020-12-01"
967+
968+
steps = definition["Steps"]
969+
assert len(steps) == 3
970+
training_args = {}
971+
condition_args = {}
972+
for step in steps:
973+
if step["Type"] == "Training":
974+
training_args = step["Arguments"]
975+
if step["Type"] == "Condition":
976+
condition_args = step["Arguments"]
977+
978+
assert training_args["InputDataConfig"][0]["DataSource"]["S3DataSource"]["S3Uri"] == {
979+
"Get": "Steps.my-process.ProcessingOutputConfig.Outputs['train_data'].S3Output.S3Uri"
980+
}
981+
assert condition_args["Conditions"][0]["LeftValue"] == {
982+
"Get": "Steps.my-train.HyperParameters['batch-size']"
983+
}
984+
985+
try:
986+
response = pipeline.create(role)
987+
create_arn = response["PipelineArn"]
988+
assert re.match(
989+
fr"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}",
990+
create_arn,
991+
)
992+
993+
finally:
994+
try:
995+
pipeline.delete()
996+
except Exception:
997+
pass
998+
999+
8581000
def test_two_step_callback_pipeline_with_output_reference(
8591001
sagemaker_session, role, pipeline_name, region_name
8601002
):

tests/unit/sagemaker/workflow/test_properties.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ def test_properties_describe_training_job_response():
2222
for name in some_prop_names:
2323
assert name in prop.__dict__.keys()
2424
assert prop.CreationTime.expr == {"Get": "Steps.MyStep.CreationTime"}
25+
assert prop.HyperParameters.expr == {"Get": "Steps.MyStep.HyperParameters"}
2526
assert prop.OutputDataConfig.S3OutputPath.expr == {
2627
"Get": "Steps.MyStep.OutputDataConfig.S3OutputPath"
2728
}

tests/unit/sagemaker/workflow/test_steps.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ def test_training_step_base_estimator(sagemaker_session):
226226
"CacheConfig": {"Enabled": True, "ExpireAfter": "PT1H"},
227227
}
228228
assert step.properties.TrainingJobName.expr == {"Get": "Steps.MyTrainingStep.TrainingJobName"}
229+
assert step.properties.HyperParameters.expr == {"Get": "Steps.MyTrainingStep.HyperParameters"}
229230

230231

231232
def test_training_step_tensorflow(sagemaker_session):

tests/unit/test_session.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,7 @@ def test_get_caller_identity_arn_from_describe_user_profile(boto_session):
343343
),
344344
)
345345
@patch("os.path.exists", side_effect=mock_exists(NOTEBOOK_METADATA_FILE, True))
346-
def test_get_caller_identity_arn_from_describe_domain(boto_session):
346+
def test_get_caller_identity_arn_from_describe_domain_if_no_user_settings(boto_session):
347347
sess = Session(boto_session)
348348
expected_role = "arn:aws:iam::369233609183:role/service-role/SageMakerRole-20171129T072388"
349349
sess.sagemaker_client.describe_user_profile.return_value = {}
@@ -361,6 +361,40 @@ def test_get_caller_identity_arn_from_describe_domain(boto_session):
361361
sess.sagemaker_client.describe_domain.assert_called_once_with(DomainId="d-kbnw5yk6tg8j")
362362

363363

364+
@patch(
365+
"six.moves.builtins.open",
366+
mock_open(
367+
read_data='{"ResourceName": "SageMakerInstance", '
368+
'"DomainId": "d-kbnw5yk6tg8j", '
369+
'"UserProfileName": "default-1617915559064"}'
370+
),
371+
)
372+
@patch("os.path.exists", side_effect=mock_exists(NOTEBOOK_METADATA_FILE, True))
373+
def test_fallback_to_domain_if_role_unavailable_in_user_settings(boto_session):
374+
sess = Session(boto_session)
375+
expected_role = "expected_role"
376+
sess.sagemaker_client.describe_user_profile.return_value = {
377+
"DomainId": "d-kbnw5yk6tg8j",
378+
"UserSettings": {
379+
"JupyterServerAppSettings": {},
380+
"KernelGatewayAppSettings": {},
381+
},
382+
}
383+
384+
sess.sagemaker_client.describe_domain.return_value = {
385+
"DefaultUserSettings": {"ExecutionRole": expected_role}
386+
}
387+
388+
actual = sess.get_caller_identity_arn()
389+
390+
assert actual == expected_role
391+
sess.sagemaker_client.describe_user_profile.assert_called_once_with(
392+
DomainId="d-kbnw5yk6tg8j",
393+
UserProfileName="default-1617915559064",
394+
)
395+
sess.sagemaker_client.describe_domain.assert_called_once_with(DomainId="d-kbnw5yk6tg8j")
396+
397+
364398
@patch("six.moves.builtins.open", mock_open(read_data='{"ResourceName": "SageMakerInstance"}'))
365399
@patch("os.path.exists", side_effect=mock_exists(NOTEBOOK_METADATA_FILE, True))
366400
@patch("sagemaker.session.sts_regional_endpoint", return_value=STS_ENDPOINT)

0 commit comments

Comments
 (0)