diff --git a/src/sagemaker/debugger/profiler_config.py b/src/sagemaker/debugger/profiler_config.py index 3d4a24e8d1..561de38b9f 100644 --- a/src/sagemaker/debugger/profiler_config.py +++ b/src/sagemaker/debugger/profiler_config.py @@ -32,6 +32,7 @@ def __init__( s3_output_path: Optional[Union[str, PipelineVariable]] = None, system_monitor_interval_millis: Optional[Union[int, PipelineVariable]] = None, framework_profile_params: Optional[FrameworkProfile] = None, + disable_profiler: Optional[Union[str, PipelineVariable]] = False, ): """Initialize a ``ProfilerConfig`` instance. @@ -78,6 +79,7 @@ class and SageMaker Framework estimators. self.s3_output_path = s3_output_path self.system_monitor_interval_millis = system_monitor_interval_millis self.framework_profile_params = framework_profile_params + self.disable_profiler = disable_profiler def _to_request_dict(self): """Generate a request dictionary using the parameters provided when initializing the object. @@ -91,6 +93,8 @@ def _to_request_dict(self): if self.s3_output_path is not None: profiler_config_request["S3OutputPath"] = self.s3_output_path + profiler_config_request["DisableProfiler"] = self.disable_profiler + if self.system_monitor_interval_millis is not None: profiler_config_request[ "ProfilingIntervalInMilliseconds" diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py index e3b06950aa..8ed9b724a5 100644 --- a/src/sagemaker/estimator.py +++ b/src/sagemaker/estimator.py @@ -938,26 +938,29 @@ def _prepare_collection_configs(self): def _prepare_profiler_for_training(self): """Set necessary values and do basic validations in profiler config and profiler rules. - When user explicitly set rules to an empty list, default profiler rule won't be enabled. - Default profiler rule will be enabled in supported regions when either: - 1. user doesn't specify any rules, i.e., rules=None; or - 2. user only specify debugger rules, i.e., rules=[Rule.sagemaker(...)] + No default profiler rule will be used. The user needs to specify rules explicitly """ if self.disable_profiler: - if self.profiler_config: - raise RuntimeError("profiler_config cannot be set when disable_profiler is True.") + if self.profiler_config and not self.profiler_config.disable_profiler: + raise RuntimeError( + "profiler_config.disable_profiler cannot be False" + + " when disable_profiler is True." + ) if self.profiler_rules: raise RuntimeError("ProfilerRule cannot be set when disable_profiler is True.") elif _region_supports_profiler(self.sagemaker_session.boto_region_name): if self.profiler_config is None: self.profiler_config = ProfilerConfig(s3_output_path=self.output_path) if self.rules is None or (self.rules and not self.profiler_rules): - self.profiler_rules = [get_default_profiler_rule()] + self.profiler_rules = [] if self.profiler_config and not self.profiler_config.s3_output_path: self.profiler_config.s3_output_path = self.output_path self.profiler_rule_configs = self._prepare_profiler_rules() + # if profiler_config is still None, it means the job has profiler disabled + if self.profiler_config is None: + self.profiler_config = ProfilerConfig(disable_profiler=True) def _prepare_profiler_rules(self): """Set any necessary values in profiler rules, if they are provided.""" @@ -1048,7 +1051,7 @@ def latest_job_profiler_artifacts_path(self): error_message="""Cannot get the profiling output artifacts path. The Estimator is not associated with a training job.""" ) - if self.profiler_config is not None: + if self.profiler_config is not None and not self.profiler_config.disable_profiler: return os.path.join( self.profiler_config.s3_output_path, self.latest_training_job.name, @@ -1895,8 +1898,8 @@ def enable_default_profiling(self): else: self.profiler_config = ProfilerConfig(s3_output_path=self.output_path) - self.profiler_rules = [get_default_profiler_rule()] - self.profiler_rule_configs = self._prepare_profiler_rules() + self.profiler_rules = [] + self.profiler_rule_configs = [] _TrainingJob.update( self, self.profiler_rule_configs, self.profiler_config._to_request_dict() diff --git a/tests/integ/sagemaker/workflow/test_workflow.py b/tests/integ/sagemaker/workflow/test_workflow.py index 44f4e2d26e..bd24b653ae 100644 --- a/tests/integ/sagemaker/workflow/test_workflow.py +++ b/tests/integ/sagemaker/workflow/test_workflow.py @@ -1269,8 +1269,6 @@ def test_caching_behavior( # create pipeline pipeline.create(role) definition = json.loads(pipeline.definition()) - # delete profiler config for assertions as it will contain a timestamp - del definition["Steps"][1]["Arguments"]["ProfilerRuleConfigurations"] # verify input path expected_abalone_input_path = f"{pipeline_name}/{step_process.name}" f"/input/abalone_data" @@ -1295,7 +1293,6 @@ def test_caching_behavior( # verify no changes definition2 = json.loads(pipeline.definition()) - del definition2["Steps"][1]["Arguments"]["ProfilerRuleConfigurations"] assert definition == definition2 # add dummy file to source_dir @@ -1306,7 +1303,6 @@ def test_caching_behavior( # verify changes definition3 = json.loads(pipeline.definition()) - del definition3["Steps"][1]["Arguments"]["ProfilerRuleConfigurations"] assert definition != definition3 finally: diff --git a/tests/integ/test_profiler.py b/tests/integ/test_profiler.py index bddd53e20c..7d3fdb2d7b 100644 --- a/tests/integ/test_profiler.py +++ b/tests/integ/test_profiler.py @@ -13,7 +13,6 @@ from __future__ import absolute_import import os -import re import time import uuid @@ -22,7 +21,6 @@ from sagemaker.debugger import ( DebuggerHookConfig, FrameworkProfile, - get_rule_container_image_uri, ProfilerConfig, ProfilerRule, Rule, @@ -93,8 +91,6 @@ def test_mxnet_with_default_profiler_config_and_profiler_rule( ) job_description = mx.latest_training_job.describe() - if "DisableProfiler" in job_description["ProfilerConfig"]: - job_description["ProfilerConfig"].pop("DisableProfiler") assert ( job_description["ProfilerConfig"] == ProfilerConfig( @@ -103,13 +99,6 @@ def test_mxnet_with_default_profiler_config_and_profiler_rule( ) assert job_description.get("ProfilingStatus") == "Enabled" - profiler_rule_configuration = job_description.get("ProfilerRuleConfigurations")[0] - assert re.match(r"ProfilerReport-\d*", profiler_rule_configuration["RuleConfigurationName"]) - assert profiler_rule_configuration["RuleEvaluatorImage"] == get_rule_container_image_uri( - mx.sagemaker_session.boto_region_name - ) - assert profiler_rule_configuration["RuleParameters"] == {"rule_to_invoke": "ProfilerReport"} - with pytest.raises(ValueError) as error: mx.enable_default_profiling() assert "Debugger monitoring is already enabled." in str(error) @@ -155,18 +144,9 @@ def test_mxnet_with_custom_profiler_config_then_update_rule_and_config( ) job_description = mx.latest_training_job.describe() - if "DisableProfiler" in job_description["ProfilerConfig"]: - job_description["ProfilerConfig"].pop("DisableProfiler") assert job_description.get("ProfilerConfig") == profiler_config._to_request_dict() assert job_description.get("ProfilingStatus") == "Enabled" - profiler_rule_configuration = job_description.get("ProfilerRuleConfigurations")[0] - assert re.match(r"ProfilerReport-\d*", profiler_rule_configuration["RuleConfigurationName"]) - assert profiler_rule_configuration["RuleEvaluatorImage"] == get_rule_container_image_uri( - mx.sagemaker_session.boto_region_name - ) - assert profiler_rule_configuration["RuleParameters"] == {"rule_to_invoke": "ProfilerReport"} - _wait_until_training_can_be_updated(sagemaker_session.sagemaker_client, training_job_name) mx.update_profiler( @@ -178,13 +158,6 @@ def test_mxnet_with_custom_profiler_config_then_update_rule_and_config( assert job_description["ProfilerConfig"]["S3OutputPath"] == profiler_config.s3_output_path assert job_description["ProfilerConfig"]["ProfilingIntervalInMilliseconds"] == 500 - profiler_report_rule_config = job_description.get("ProfilerRuleConfigurations")[0] - assert re.match(r"ProfilerReport-\d*", profiler_report_rule_config["RuleConfigurationName"]) - assert profiler_report_rule_config["RuleEvaluatorImage"] == get_rule_container_image_uri( - mx.sagemaker_session.boto_region_name - ) - assert profiler_report_rule_config["RuleParameters"] == {"rule_to_invoke": "ProfilerReport"} - def test_mxnet_with_built_in_profiler_rule_with_custom_parameters( sagemaker_session, @@ -225,8 +198,6 @@ def test_mxnet_with_built_in_profiler_rule_with_custom_parameters( ) job_description = mx.latest_training_job.describe() - if "DisableProfiler" in job_description["ProfilerConfig"]: - job_description["ProfilerConfig"].pop("DisableProfiler") assert job_description.get("ProfilingStatus") == "Enabled" assert ( job_description.get("ProfilerConfig") @@ -298,8 +269,6 @@ def test_mxnet_with_profiler_and_debugger_then_disable_framework_metrics( ) job_description = mx.latest_training_job.describe() - if "DisableProfiler" in job_description["ProfilerConfig"]: - job_description["ProfilerConfig"].pop("DisableProfiler") assert job_description["ProfilerConfig"] == profiler_config._to_request_dict() assert job_description["DebugHookConfig"] == debugger_hook_config._to_request_dict() assert job_description.get("ProfilingStatus") == "Enabled" @@ -387,13 +356,6 @@ def test_mxnet_with_enable_framework_metrics_then_update_framework_metrics( == updated_framework_profile.profiling_parameters ) - profiler_rule_configuration = job_description.get("ProfilerRuleConfigurations")[0] - assert re.match(r"ProfilerReport-\d*", profiler_rule_configuration["RuleConfigurationName"]) - assert profiler_rule_configuration["RuleEvaluatorImage"] == get_rule_container_image_uri( - mx.sagemaker_session.boto_region_name - ) - assert profiler_rule_configuration["RuleParameters"] == {"rule_to_invoke": "ProfilerReport"} - def test_mxnet_with_disable_profiler_then_enable_default_profiling( sagemaker_session, @@ -431,12 +393,10 @@ def test_mxnet_with_disable_profiler_then_enable_default_profiling( ) job_description = mx.latest_training_job.describe() - assert job_description.get("ProfilerConfig") is None assert job_description.get("ProfilerRuleConfigurations") is None assert job_description.get("ProfilingStatus") == "Disabled" _wait_until_training_can_be_updated(sagemaker_session.sagemaker_client, training_job_name) - mx.enable_default_profiling() job_description = mx.latest_training_job.describe() diff --git a/tests/unit/sagemaker/huggingface/test_estimator.py b/tests/unit/sagemaker/huggingface/test_estimator.py index 0088e34c58..072eefeb83 100644 --- a/tests/unit/sagemaker/huggingface/test_estimator.py +++ b/tests/unit/sagemaker/huggingface/test_estimator.py @@ -143,14 +143,8 @@ def _create_train_job(version, base_framework_version): "CollectionConfigurations": [], "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, - "profiler_rule_configs": [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "503895931360.dkr.ecr.us-east-1.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ], "profiler_config": { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, } diff --git a/tests/unit/sagemaker/tensorflow/test_estimator.py b/tests/unit/sagemaker/tensorflow/test_estimator.py index fea80b7ea9..771b18b35a 100644 --- a/tests/unit/sagemaker/tensorflow/test_estimator.py +++ b/tests/unit/sagemaker/tensorflow/test_estimator.py @@ -136,14 +136,8 @@ def _create_train_job(tf_version, horovod=False, ps=False, py_version="py2", smd "metric_definitions": None, "environment": None, "experiment_config": None, - "profiler_rule_configs": [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ], "profiler_config": { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, } diff --git a/tests/unit/sagemaker/training_compiler/test_huggingface_pytorch_compiler.py b/tests/unit/sagemaker/training_compiler/test_huggingface_pytorch_compiler.py index d35c0a51dd..656730a47c 100644 --- a/tests/unit/sagemaker/training_compiler/test_huggingface_pytorch_compiler.py +++ b/tests/unit/sagemaker/training_compiler/test_huggingface_pytorch_compiler.py @@ -145,14 +145,8 @@ def _create_train_job( "CollectionConfigurations": [], "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, - "profiler_rule_configs": [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "503895931360.dkr.ecr.us-east-1.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ], "profiler_config": { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, } diff --git a/tests/unit/sagemaker/training_compiler/test_huggingface_tensorflow_compiler.py b/tests/unit/sagemaker/training_compiler/test_huggingface_tensorflow_compiler.py index 7645c4fe23..c3684ac649 100644 --- a/tests/unit/sagemaker/training_compiler/test_huggingface_tensorflow_compiler.py +++ b/tests/unit/sagemaker/training_compiler/test_huggingface_tensorflow_compiler.py @@ -143,14 +143,8 @@ def _create_train_job( "CollectionConfigurations": [], "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, - "profiler_rule_configs": [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "503895931360.dkr.ecr.us-east-1.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ], "profiler_config": { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, } diff --git a/tests/unit/sagemaker/training_compiler/test_pytorch_compiler.py b/tests/unit/sagemaker/training_compiler/test_pytorch_compiler.py index 0fe2402695..068bb4e4b9 100644 --- a/tests/unit/sagemaker/training_compiler/test_pytorch_compiler.py +++ b/tests/unit/sagemaker/training_compiler/test_pytorch_compiler.py @@ -137,14 +137,10 @@ def _create_train_job(version, instance_type, training_compiler_config, instance "CollectionConfigurations": [], "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, - "profiler_rule_configs": [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "503895931360.dkr.ecr.us-east-1.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ], - "profiler_config": {"S3OutputPath": "s3://{}/".format(BUCKET_NAME)}, + "profiler_config": { + "DisableProfiler": False, + "S3OutputPath": "s3://{}/".format(BUCKET_NAME), + }, } diff --git a/tests/unit/sagemaker/training_compiler/test_tensorflow_compiler.py b/tests/unit/sagemaker/training_compiler/test_tensorflow_compiler.py index 1ce58a19b4..a5c14b1626 100644 --- a/tests/unit/sagemaker/training_compiler/test_tensorflow_compiler.py +++ b/tests/unit/sagemaker/training_compiler/test_tensorflow_compiler.py @@ -145,14 +145,8 @@ def _create_train_job(framework_version, instance_type, training_compiler_config "CollectionConfigurations": [], "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, - "profiler_rule_configs": [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "503895931360.dkr.ecr.us-east-1.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ], "profiler_config": { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, } diff --git a/tests/unit/sagemaker/workflow/test_step_collections.py b/tests/unit/sagemaker/workflow/test_step_collections.py index 2bf47a79d0..95738c99ca 100644 --- a/tests/unit/sagemaker/workflow/test_step_collections.py +++ b/tests/unit/sagemaker/workflow/test_step_collections.py @@ -796,6 +796,7 @@ def test_register_model_with_model_repack_with_estimator( "CollectionConfigurations": [], "S3OutputPath": f"s3://{BUCKET}/", }, + "ProfilerConfig": {"DisableProfiler": True}, "HyperParameters": { "inference_script": '"dummy_script.py"', "dependencies": f'"{dummy_requirements}"', @@ -923,6 +924,7 @@ def test_register_model_with_model_repack_with_model(model, model_metrics, drift "CollectionConfigurations": [], "S3OutputPath": f"s3://{BUCKET}/", }, + "ProfilerConfig": {"DisableProfiler": True}, "HyperParameters": { "inference_script": '"dummy_script.py"', "model_archive": '"s3://my-bucket/model.tar.gz"', @@ -1052,6 +1054,7 @@ def test_register_model_with_model_repack_with_pipeline_model( "CollectionConfigurations": [], "S3OutputPath": f"s3://{BUCKET}/", }, + "ProfilerConfig": {"DisableProfiler": True}, "HyperParameters": { "dependencies": "null", "inference_script": '"dummy_script.py"', @@ -1243,6 +1246,7 @@ def test_estimator_transformer_with_model_repack_with_estimator(estimator): "TrainingImage": "246618743249.dkr.ecr.us-west-2.amazonaws.com/" + "sagemaker-scikit-learn:0.23-1-cpu-py3", }, + "ProfilerConfig": {"DisableProfiler": True}, "OutputDataConfig": {"S3OutputPath": "s3://my-bucket/"}, "StoppingCondition": {"MaxRuntimeInSeconds": 86400}, "ResourceConfig": { diff --git a/tests/unit/sagemaker/workflow/test_steps.py b/tests/unit/sagemaker/workflow/test_steps.py index ba712d11d7..f2046cc00f 100644 --- a/tests/unit/sagemaker/workflow/test_steps.py +++ b/tests/unit/sagemaker/workflow/test_steps.py @@ -329,6 +329,7 @@ def test_training_step_base_estimator(sagemaker_session): "CollectionConfigurations": [], }, "ProfilerConfig": { + "DisableProfiler": False, "ProfilingIntervalInMilliseconds": 500, "S3OutputPath": {"Std:Join": {"On": "/", "Values": ["s3:/", "a", "b"]}}, }, @@ -438,7 +439,7 @@ def test_training_step_tensorflow(sagemaker_session): "sagemaker_instance_type": {"Get": "Parameters.InstanceType"}, "sagemaker_distributed_dataparallel_custom_mpi_options": '""', }, - "ProfilerConfig": {"S3OutputPath": "s3://my-bucket/"}, + "ProfilerConfig": {"DisableProfiler": False, "S3OutputPath": "s3://my-bucket/"}, }, "CacheConfig": {"Enabled": True, "ExpireAfter": "PT1H"}, } diff --git a/tests/unit/sagemaker/workflow/test_training_step.py b/tests/unit/sagemaker/workflow/test_training_step.py index 3e8b57b069..7f8e6b0c62 100644 --- a/tests/unit/sagemaker/workflow/test_training_step.py +++ b/tests/unit/sagemaker/workflow/test_training_step.py @@ -401,10 +401,6 @@ def test_training_step_with_estimator( } step_definition = json.loads(pipeline.definition())["Steps"][0] - # delete profiler rule configurations because of timestamp collision - del step_definition["Arguments"]["ProfilerRuleConfigurations"] - del step_args["ProfilerRuleConfigurations"] - assert step_definition == { "Name": "MyTrainingStep", "Description": "TrainingStep description", @@ -428,7 +424,6 @@ def test_training_step_with_estimator( # test idempotency step_def2 = json.loads(pipeline.definition())["Steps"][0] - del step_def2["Arguments"]["ProfilerRuleConfigurations"] assert step_definition == step_def2 @@ -537,10 +532,6 @@ def test_training_step_with_framework_estimator( del expected_step_args["OutputDataConfig"]["S3OutputPath"] del step_def["Arguments"]["OutputDataConfig"]["S3OutputPath"] - # delete profiler rule configurations because of timestamp collision - del step_def["Arguments"]["ProfilerRuleConfigurations"] - del expected_step_args["ProfilerRuleConfigurations"] - if "sagemaker_s3_output" in step_args["HyperParameters"]: del expected_step_args["HyperParameters"]["sagemaker_s3_output"] del step_def["Arguments"]["HyperParameters"]["sagemaker_s3_output"] @@ -555,7 +546,6 @@ def test_training_step_with_framework_estimator( step_def2 = json.loads(pipeline.definition())["Steps"][0] del step_def2["Arguments"]["InputDataConfig"][0]["DataSource"]["S3DataSource"]["S3Uri"] del step_def2["Arguments"]["OutputDataConfig"]["S3OutputPath"] - del step_def2["Arguments"]["ProfilerRuleConfigurations"] if "sagemaker_s3_output" in step_def2["Arguments"]["HyperParameters"]: del step_def2["Arguments"]["HyperParameters"]["sagemaker_s3_output"] assert step_def == step_def2 @@ -608,10 +598,6 @@ def test_training_step_with_framework_estimator_local_code( del expected_step_args["OutputDataConfig"]["S3OutputPath"] del step_def["Arguments"]["OutputDataConfig"]["S3OutputPath"] - # delete profiler rule configurations because of timestamp collision - del step_def["Arguments"]["ProfilerRuleConfigurations"] - del expected_step_args["ProfilerRuleConfigurations"] - if "sagemaker_s3_output" in step_args["HyperParameters"]: del expected_step_args["HyperParameters"]["sagemaker_s3_output"] del step_def["Arguments"]["HyperParameters"]["sagemaker_s3_output"] @@ -626,7 +612,6 @@ def test_training_step_with_framework_estimator_local_code( step_def2 = json.loads(pipeline.definition())["Steps"][0] del step_def2["Arguments"]["InputDataConfig"][0]["DataSource"]["S3DataSource"]["S3Uri"] del step_def2["Arguments"]["OutputDataConfig"]["S3OutputPath"] - del step_def2["Arguments"]["ProfilerRuleConfigurations"] if "sagemaker_s3_output" in step_def2["Arguments"]["HyperParameters"]: del step_def2["Arguments"]["HyperParameters"]["sagemaker_s3_output"] assert step_def == step_def2 @@ -701,10 +686,6 @@ def test_training_step_with_algorithm_base(algo_estimator, training_input, pipel del step_args["InputDataConfig"][0]["DataSource"]["S3DataSource"]["S3Uri"] del step_def["Arguments"]["InputDataConfig"][0]["DataSource"]["S3DataSource"]["S3Uri"] - # delete profiler rule configurations because of timestamp collision - del step_def["Arguments"]["ProfilerRuleConfigurations"] - del step_args["ProfilerRuleConfigurations"] - assert step_def == { "Name": "MyTrainingStep", "Type": "Training", @@ -714,7 +695,6 @@ def test_training_step_with_algorithm_base(algo_estimator, training_input, pipel # test idempotency step_def2 = json.loads(pipeline.definition())["Steps"][0] del step_def2["Arguments"]["InputDataConfig"][0]["DataSource"]["S3DataSource"]["S3Uri"] - del step_def2["Arguments"]["ProfilerRuleConfigurations"] assert step_def == step_def2 @@ -789,10 +769,6 @@ def test_training_step_with_algorithm_base_local_code( del step_args["InputDataConfig"][0]["DataSource"]["S3DataSource"]["S3Uri"] del step_def["Arguments"]["InputDataConfig"][0]["DataSource"]["S3DataSource"]["S3Uri"] - # delete profiler rule configurations because of timestamp collision - del step_def["Arguments"]["ProfilerRuleConfigurations"] - del step_args["ProfilerRuleConfigurations"] - assert step_def == { "Name": "MyTrainingStep", "Type": "Training", @@ -802,7 +778,6 @@ def test_training_step_with_algorithm_base_local_code( # test idempotency step_def2 = json.loads(pipeline.definition())["Steps"][0] del step_def2["Arguments"]["InputDataConfig"][0]["DataSource"]["S3DataSource"]["S3Uri"] - del step_def2["Arguments"]["ProfilerRuleConfigurations"] assert step_def == step_def2 diff --git a/tests/unit/sagemaker/workflow/test_utils.py b/tests/unit/sagemaker/workflow/test_utils.py index c8d86c5866..d1b81f3148 100644 --- a/tests/unit/sagemaker/workflow/test_utils.py +++ b/tests/unit/sagemaker/workflow/test_utils.py @@ -107,6 +107,7 @@ def test_repack_model_step(estimator): } ], "OutputDataConfig": {"S3OutputPath": f"s3://{BUCKET}/"}, + "ProfilerConfig": {"DisableProfiler": True}, "ResourceConfig": { "InstanceCount": 1, "InstanceType": "ml.m5.large", @@ -188,6 +189,7 @@ def test_repack_model_step_with_source_dir(estimator, source_dir): } ], "OutputDataConfig": {"S3OutputPath": f"s3://{BUCKET}/"}, + "ProfilerConfig": {"DisableProfiler": True}, "ResourceConfig": { "InstanceCount": 1, "InstanceType": "ml.m5.large", diff --git a/tests/unit/test_chainer.py b/tests/unit/test_chainer.py index 7cc973440f..eca4a9bf80 100644 --- a/tests/unit/test_chainer.py +++ b/tests/unit/test_chainer.py @@ -150,14 +150,8 @@ def _create_train_job(version, py_version): "CollectionConfigurations": [], "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, - "profiler_rule_configs": [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ], "profiler_config": { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, } diff --git a/tests/unit/test_estimator.py b/tests/unit/test_estimator.py index 868da88d78..8b771f9184 100644 --- a/tests/unit/test_estimator.py +++ b/tests/unit/test_estimator.py @@ -25,7 +25,10 @@ from botocore.exceptions import ClientError from mock import ANY, MagicMock, Mock, patch, PropertyMock from sagemaker.huggingface.estimator import HuggingFace -from sagemaker.jumpstart.constants import JUMPSTART_BUCKET_NAME_SET, JUMPSTART_RESOURCE_BASE_NAME +from sagemaker.jumpstart.constants import ( + JUMPSTART_BUCKET_NAME_SET, + JUMPSTART_RESOURCE_BASE_NAME, +) from sagemaker.jumpstart.enums import JumpStartTag import sagemaker.local @@ -106,7 +109,11 @@ "training_steps": "100", }, "RoleArn": "arn:aws:iam::366:role/SageMakerRole", - "ResourceConfig": {"VolumeSizeInGB": 30, "InstanceCount": 1, "InstanceType": "ml.c4.xlarge"}, + "ResourceConfig": { + "VolumeSizeInGB": 30, + "InstanceCount": 1, + "InstanceType": "ml.c4.xlarge", + }, "EnableNetworkIsolation": False, "StoppingCondition": {"MaxRuntimeInSeconds": 24 * 60 * 60}, "TrainingJobName": "neo", @@ -143,7 +150,10 @@ } MOCKED_S3_URI = "s3://mocked_s3_uri_from_source_dir" MOCKED_PIPELINE_CONFIG = _PipelineConfig( - "test-pipeline", "test-training-step", "code-hash-0123456789", "config-hash-0123456789" + "test-pipeline", + "test-training-step", + "code-hash-0123456789", + "config-hash-0123456789", ) @@ -247,7 +257,9 @@ def pipeline_session(): session_mock.resource.return_value = resource_mock session_mock.client.return_value = client_mock return PipelineSession( - boto_session=session_mock, sagemaker_client=client_mock, default_bucket=BUCKET_NAME + boto_session=session_mock, + sagemaker_client=client_mock, + default_bucket=BUCKET_NAME, ) @@ -322,7 +334,11 @@ def test_framework_all_init_args(sagemaker_session): }, "metric_definitions": [{"Name": "validation-rmse", "Regex": "validation-rmse=(\\d+)"}], "encrypt_inter_container_traffic": True, - "environment": {"env_key1": "env_val1", "env_key2": "env_val2", "env_key3": "env_val3"}, + "environment": { + "env_key1": "env_val1", + "env_key2": "env_val2", + "env_key3": "env_val3", + }, "experiment_config": None, "checkpoint_s3_uri": "s3://bucket/checkpoint", "checkpoint_local_path": "file://local/checkpoint", @@ -379,7 +395,8 @@ def test_framework_with_debugger_and_built_in_rule(sagemaker_session): rule_parameters={"threshold": "120", "stop_training_on_fire": "True"}, collections_to_save=[ CollectionConfig( - name="losses", parameters={"train.save_interval": "50", "eval.save_interval": "10"} + name="losses", + parameters={"train.save_interval": "50", "eval.save_interval": "10"}, ) ], ) @@ -405,18 +422,23 @@ def test_framework_with_debugger_and_built_in_rule(sagemaker_session): "CollectionConfigurations": [ { "CollectionName": "losses", - "CollectionParameters": {"train.save_interval": "50", "eval.save_interval": "10"}, + "CollectionParameters": { + "train.save_interval": "50", + "eval.save_interval": "10", + }, } ], } assert args["profiler_config"] == { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), } def test_framework_with_debugger_and_custom_rule(sagemaker_session): hook_config = DebuggerHookConfig( - s3_output_path="s3://output", collection_configs=[CollectionConfig(name="weights")] + s3_output_path="s3://output", + collection_configs=[CollectionConfig(name="weights")], ) debugger_custom_rule = Rule.custom( name="CustomRule", @@ -536,7 +558,8 @@ def test_framework_with_debugger_rule_and_multiple_actions(sagemaker_session): def test_framework_with_only_debugger_hook_config(sagemaker_session): hook_config = DebuggerHookConfig( - s3_output_path="s3://output", collection_configs=[CollectionConfig(name="weights")] + s3_output_path="s3://output", + collection_configs=[CollectionConfig(name="weights")], ) f = DummyFramework( entry_point=SCRIPT_PATH, @@ -574,15 +597,9 @@ def test_framework_without_debugger_and_profiler(time, sagemaker_session): } assert "debugger_rule_configs" not in args assert args["profiler_config"] == { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), } - assert args["profiler_rule_configs"] == [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ] def test_framework_with_debugger_and_profiler_rules(sagemaker_session): @@ -591,7 +608,8 @@ def test_framework_with_debugger_and_profiler_rules(sagemaker_session): rule_parameters={"threshold": "120", "stop_training_on_fire": "True"}, collections_to_save=[ CollectionConfig( - name="losses", parameters={"train.save_interval": "50", "eval.save_interval": "10"} + name="losses", + parameters={"train.save_interval": "50", "eval.save_interval": "10"}, ) ], ) @@ -639,18 +657,25 @@ def test_framework_with_debugger_and_profiler_rules(sagemaker_session): "CollectionConfigurations": [ { "CollectionName": "losses", - "CollectionParameters": {"train.save_interval": "50", "eval.save_interval": "10"}, + "CollectionParameters": { + "train.save_interval": "50", + "eval.save_interval": "10", + }, } ], } assert args["profiler_config"] == { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), } assert args["profiler_rule_configs"] == [ { "RuleConfigurationName": "CustomProfilerReportRule", "RuleEvaluatorImage": "895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport", "CPUBottleneck_threshold": "90"}, + "RuleParameters": { + "rule_to_invoke": "ProfilerReport", + "CPUBottleneck_threshold": "90", + }, }, { "InstanceType": "c4.4xlarge", @@ -679,6 +704,7 @@ def test_framework_with_only_profiler_rule_specified(sagemaker_session): sagemaker_session.train.assert_called_once() _, args = sagemaker_session.train.call_args assert args["profiler_config"] == { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), } assert args["profiler_rule_configs"] == [ @@ -711,16 +737,10 @@ def test_framework_with_profiler_config_without_s3_output_path(time, sagemaker_s sagemaker_session.train.assert_called_once() _, args = sagemaker_session.train.call_args assert args["profiler_config"] == { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), "ProfilingIntervalInMilliseconds": 1000, } - assert args["profiler_rule_configs"] == [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ] @pytest.mark.parametrize("region", PROFILER_UNSUPPORTED_REGIONS) @@ -745,7 +765,9 @@ def test_framework_with_no_default_profiler_in_unsupported_region(region): f.fit("s3://mydata") sms.train.assert_called_once() _, args = sms.train.call_args - assert args.get("profiler_config") is None + # assert args.get("profiler_config") == {"DisableProfiler": True} + # temporarily check if "DisableProfiler" flag is true until s3_output is changed to optional in service + assert args.get("profiler_config")["DisableProfiler"] is True assert args.get("profiler_rule_configs") is None @@ -865,7 +887,10 @@ def test_framework_with_profiler_config_and_profiler_disabled(sagemaker_session) disable_profiler=True, ) f.fit("s3://mydata") - assert "profiler_config cannot be set when disable_profiler is True." in str(error) + # assert "profiler_config cannot be set when disable_profiler is True." in str(error) + assert "profiler_config.disable_profiler cannot be False when disable_profiler is True." in str( + error + ) def test_framework_with_profiler_rule_and_profiler_disabled(sagemaker_session): @@ -927,15 +952,9 @@ def test_framework_with_enabling_default_profiling( sagemaker_session.update_training_job.assert_called_once() _, args = sagemaker_session.update_training_job.call_args assert args["profiler_config"] == { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), } - assert args["profiler_rule_configs"] == [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ] @patch("time.time", return_value=TIME) @@ -960,15 +979,9 @@ def test_framework_with_enabling_default_profiling_with_existed_s3_output_path( sagemaker_session.update_training_job.assert_called_once() _, args = sagemaker_session.update_training_job.call_args assert args["profiler_config"] == { + "DisableProfiler": False, "S3OutputPath": "s3://custom/", } - assert args["profiler_rule_configs"] == [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ] def test_framework_with_disabling_profiling_when_profiler_is_already_disabled( @@ -1001,7 +1014,9 @@ def test_framework_with_disabling_profiling(sagemaker_session, training_job_desc f.disable_profiling() sagemaker_session.update_training_job.assert_called_once() _, args = sagemaker_session.update_training_job.call_args - assert args["profiler_config"] == {"DisableProfiler": True} + # assert args["profiler_config"] == {"DisableProfiler": True} + # temporarily check if "DisableProfiler" flag is true until s3_output is changed to optional in service + assert args.get("profiler_config")["DisableProfiler"] is True def test_framework_with_update_profiler_when_no_training_job(sagemaker_session): @@ -1058,6 +1073,7 @@ def test_framework_with_update_profiler_config(sagemaker_session): sagemaker_session.update_training_job.assert_called_once() _, args = sagemaker_session.update_training_job.call_args assert args["profiler_config"] == { + "DisableProfiler": False, "ProfilingIntervalInMilliseconds": 1000, } assert "profiler_rule_configs" not in args @@ -1086,7 +1102,7 @@ def test_framework_with_update_profiler_report_rule(sagemaker_session): "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, } ] - assert "profiler_config" not in args + assert args["profiler_config"]["DisableProfiler"] is False def test_framework_with_disable_framework_metrics(sagemaker_session): @@ -1101,11 +1117,16 @@ def test_framework_with_disable_framework_metrics(sagemaker_session): f.update_profiler(disable_framework_metrics=True) sagemaker_session.update_training_job.assert_called_once() _, args = sagemaker_session.update_training_job.call_args - assert args["profiler_config"] == {"ProfilingParameters": {}} + assert args["profiler_config"] == { + "DisableProfiler": False, + "ProfilingParameters": {}, + } assert "profiler_rule_configs" not in args -def test_framework_with_disable_framework_metrics_and_update_system_metrics(sagemaker_session): +def test_framework_with_disable_framework_metrics_and_update_system_metrics( + sagemaker_session, +): f = DummyFramework( entry_point=SCRIPT_PATH, role=ROLE, @@ -1118,13 +1139,16 @@ def test_framework_with_disable_framework_metrics_and_update_system_metrics(sage sagemaker_session.update_training_job.assert_called_once() _, args = sagemaker_session.update_training_job.call_args assert args["profiler_config"] == { + "DisableProfiler": False, "ProfilingIntervalInMilliseconds": 1000, "ProfilingParameters": {}, } assert "profiler_rule_configs" not in args -def test_framework_with_disable_framework_metrics_and_update_framework_params(sagemaker_session): +def test_framework_with_disable_framework_metrics_and_update_framework_params( + sagemaker_session, +): with pytest.raises(ValueError) as error: f = DummyFramework( entry_point=SCRIPT_PATH, @@ -1160,7 +1184,10 @@ def test_framework_with_update_profiler_config_and_profiler_rule(sagemaker_sessi f.update_profiler(rules=[profiler_custom_rule], system_monitor_interval_millis=1000) sagemaker_session.update_training_job.assert_called_once() _, args = sagemaker_session.update_training_job.call_args - assert args["profiler_config"] == {"ProfilingIntervalInMilliseconds": 1000} + assert args["profiler_config"] == { + "DisableProfiler": False, + "ProfilingIntervalInMilliseconds": 1000, + } assert args["profiler_rule_configs"] == [ { "InstanceType": "c4.4xlarge", @@ -1659,7 +1686,10 @@ def test_start_new_wait_called(strftime, sagemaker_session): def test_attach_framework(sagemaker_session, training_job_description): - training_job_description["VpcConfig"] = {"Subnets": ["foo"], "SecurityGroupIds": ["bar"]} + training_job_description["VpcConfig"] = { + "Subnets": ["foo"], + "SecurityGroupIds": ["bar"], + } training_job_description["EnableNetworkIsolation"] = True framework_estimator = DummyFramework.attach( @@ -1753,7 +1783,8 @@ def test_attach_framework_with_inter_container_traffic_encryption_flag( def test_attach_framework_base_from_generated_name(sagemaker_session, training_job_description): base_job_name = "neo" framework_estimator = DummyFramework.attach( - training_job_name=utils.name_from_base("neo"), sagemaker_session=sagemaker_session + training_job_name=utils.name_from_base("neo"), + sagemaker_session=sagemaker_session, ) assert framework_estimator.base_job_name == base_job_name @@ -1948,7 +1979,8 @@ def test_git_support_bad_repo_url_format(sagemaker_session): @patch( "sagemaker.git_utils.git_clone_repo", side_effect=subprocess.CalledProcessError( - returncode=1, cmd="git clone https://github.com/aws/no-such-repo.git /tmp/repo_dir" + returncode=1, + cmd="git clone https://github.com/aws/no-such-repo.git /tmp/repo_dir", ), ) def test_git_support_git_clone_fail(git_clone_repo, sagemaker_session): @@ -1973,7 +2005,11 @@ def test_git_support_git_clone_fail(git_clone_repo, sagemaker_session): ), ) def test_git_support_branch_not_exist(git_clone_repo, sagemaker_session): - git_config = {"repo": GIT_REPO, "branch": "branch-that-does-not-exist", "commit": COMMIT} + git_config = { + "repo": GIT_REPO, + "branch": "branch-that-does-not-exist", + "commit": COMMIT, + } fw = DummyFramework( entry_point="entry_point", git_config=git_config, @@ -1994,7 +2030,11 @@ def test_git_support_branch_not_exist(git_clone_repo, sagemaker_session): ), ) def test_git_support_commit_not_exist(git_clone_repo, sagemaker_session): - git_config = {"repo": GIT_REPO, "branch": BRANCH, "commit": "commit-sha-that-does-not-exist"} + git_config = { + "repo": GIT_REPO, + "branch": BRANCH, + "commit": "commit-sha-that-does-not-exist", + } fw = DummyFramework( entry_point="entry_point", git_config=git_config, @@ -2137,7 +2177,11 @@ def test_git_support_with_token_2fa(git_clone_repo, sagemaker_session): }, ) def test_git_support_ssh_no_passphrase_needed(git_clone_repo, sagemaker_session): - git_config = {"repo": PRIVATE_GIT_REPO_SSH, "branch": PRIVATE_BRANCH, "commit": PRIVATE_COMMIT} + git_config = { + "repo": PRIVATE_GIT_REPO_SSH, + "branch": PRIVATE_BRANCH, + "commit": PRIVATE_COMMIT, + } entry_point = "entry_point" fw = DummyFramework( entry_point=entry_point, @@ -2159,7 +2203,11 @@ def test_git_support_ssh_no_passphrase_needed(git_clone_repo, sagemaker_session) ), ) def test_git_support_ssh_passphrase_required(git_clone_repo, sagemaker_session): - git_config = {"repo": PRIVATE_GIT_REPO_SSH, "branch": PRIVATE_BRANCH, "commit": PRIVATE_COMMIT} + git_config = { + "repo": PRIVATE_GIT_REPO_SSH, + "branch": PRIVATE_BRANCH, + "commit": PRIVATE_COMMIT, + } entry_point = "entry_point" fw = DummyFramework( entry_point=entry_point, @@ -2457,7 +2505,9 @@ def test_estimator_transformer_creation_with_optional_params(create_model, sagem ) create_model.assert_called_with( - vpc_config_override=new_vpc_config, model_kms_key=kms_key, enable_network_isolation=True + vpc_config_override=new_vpc_config, + model_kms_key=kms_key, + enable_network_isolation=True, ) assert transformer.strategy == strategy @@ -2635,14 +2685,7 @@ def test_unsupported_type_in_dict(): "input_config": None, "input_mode": "File", "output_config": {"S3OutputPath": OUTPUT_PATH}, - "profiler_config": {"S3OutputPath": OUTPUT_PATH}, - "profiler_rule_configs": [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ], + "profiler_config": {"DisableProfiler": False, "S3OutputPath": OUTPUT_PATH}, "resource_config": { "InstanceCount": INSTANCE_COUNT, "InstanceType": INSTANCE_TYPE, @@ -2749,7 +2792,11 @@ def test_fit_deploy_tags_in_estimator(name_from_base, sagemaker_session): @patch("sagemaker.estimator.name_from_base") def test_fit_deploy_tags(name_from_base, sagemaker_session): estimator = Estimator( - IMAGE_URI, ROLE, INSTANCE_COUNT, INSTANCE_TYPE, sagemaker_session=sagemaker_session + IMAGE_URI, + ROLE, + INSTANCE_COUNT, + INSTANCE_TYPE, + sagemaker_session=sagemaker_session, ) estimator.fit() @@ -3197,7 +3244,10 @@ def test_generic_training_job_analytics(sagemaker_session): "TrainingInputMode": "File", "MetricDefinitions": [ {"Name": "train:loss", "Regex": "train_loss=([0-9]+\\.[0-9]+)"}, - {"Name": "validation:loss", "Regex": "valid_loss=([0-9]+\\.[0-9]+)"}, + { + "Name": "validation:loss", + "Regex": "valid_loss=([0-9]+\\.[0-9]+)", + }, ], }, }, @@ -3228,7 +3278,11 @@ def test_generic_create_model_vpc_config_override(sagemaker_session): vpc_config_b = {"Subnets": ["foo", "bar"], "SecurityGroupIds": ["baz"]} e = Estimator( - IMAGE_URI, ROLE, INSTANCE_COUNT, INSTANCE_TYPE, sagemaker_session=sagemaker_session + IMAGE_URI, + ROLE, + INSTANCE_COUNT, + INSTANCE_TYPE, + sagemaker_session=sagemaker_session, ) e.fit({"train": "s3://bucket/training-prefix"}) assert e.get_vpc_config() is None @@ -3254,7 +3308,11 @@ def test_generic_deploy_vpc_config_override(sagemaker_session): vpc_config_b = {"Subnets": ["foo", "bar"], "SecurityGroupIds": ["baz"]} e = Estimator( - IMAGE_URI, ROLE, INSTANCE_COUNT, INSTANCE_TYPE, sagemaker_session=sagemaker_session + IMAGE_URI, + ROLE, + INSTANCE_COUNT, + INSTANCE_TYPE, + sagemaker_session=sagemaker_session, ) e.fit({"train": "s3://bucket/training-prefix"}) e.deploy(INSTANCE_COUNT, INSTANCE_TYPE) @@ -3274,7 +3332,11 @@ def test_generic_deploy_vpc_config_override(sagemaker_session): def test_generic_deploy_accelerator_type(sagemaker_session): e = Estimator( - IMAGE_URI, ROLE, INSTANCE_COUNT, INSTANCE_TYPE, sagemaker_session=sagemaker_session + IMAGE_URI, + ROLE, + INSTANCE_COUNT, + INSTANCE_TYPE, + sagemaker_session=sagemaker_session, ) e.fit({"train": "s3://bucket/training-prefix"}) e.deploy(INSTANCE_COUNT, INSTANCE_TYPE, accelerator_type=ACCELERATOR_TYPE) @@ -3617,7 +3679,13 @@ def test_file_output_path_not_supported_outside_local_mode(session_class): session_class.return_value = session with pytest.raises(RuntimeError): - Estimator(IMAGE_URI, ROLE, INSTANCE_COUNT, INSTANCE_TYPE, output_path="file:///tmp/model") + Estimator( + IMAGE_URI, + ROLE, + INSTANCE_COUNT, + INSTANCE_TYPE, + output_path="file:///tmp/model", + ) def test_prepare_init_params_from_job_description_with_image_training_job(): @@ -3726,7 +3794,10 @@ def test_prepare_for_training_with_name_based_on_image(sagemaker_session): @patch("sagemaker.algorithm.AlgorithmEstimator.validate_train_spec", Mock()) -@patch("sagemaker.algorithm.AlgorithmEstimator._parse_hyperparameters", Mock(return_value={})) +@patch( + "sagemaker.algorithm.AlgorithmEstimator._parse_hyperparameters", + Mock(return_value={}), +) def test_prepare_for_training_with_name_based_on_algorithm(sagemaker_session): estimator = AlgorithmEstimator( algorithm_arn="arn:aws:sagemaker:us-west-2:1234:algorithm/scikit-decision-trees-1542410022", @@ -3741,7 +3812,9 @@ def test_prepare_for_training_with_name_based_on_algorithm(sagemaker_session): @patch("sagemaker.workflow.utilities._pipeline_config", MOCKED_PIPELINE_CONFIG) -def test_prepare_for_training_with_pipeline_name_in_s3_path_no_source_dir(pipeline_session): +def test_prepare_for_training_with_pipeline_name_in_s3_path_no_source_dir( + pipeline_session, +): # script_uri is NOT provided -> use new cache key behavior that builds path using pipeline name + code_hash image_uri = "763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-training:1.9.0-gpu-py38" model_uri = "s3://someprefix2/models/model.tar.gz" @@ -4211,7 +4284,10 @@ def test_script_mode_estimator_tags_jumpstart_models_with_no_estimator_js_tags( @patch("sagemaker.model.Model._upload_code") @patch("sagemaker.utils.repack_model") def test_all_framework_estimators_add_jumpstart_tags( - patched_repack_model, patched_upload_code, patched_tar_and_upload_dir, sagemaker_session + patched_repack_model, + patched_upload_code, + patched_tar_and_upload_dir, + sagemaker_session, ): sagemaker_session.boto_region_name = REGION @@ -4240,13 +4316,20 @@ def test_all_framework_estimators_add_jumpstart_tags( "transformers_version": "4.6.1", "instance_type": "ml.p2.xlarge", }, - MXNet: {"framework_version": "1.7.0", "py_version": "py3", "instance_type": "ml.p2.xlarge"}, + MXNet: { + "framework_version": "1.7.0", + "py_version": "py3", + "instance_type": "ml.p2.xlarge", + }, SKLearn: {"framework_version": "0.23-1", "instance_type": "ml.m2.xlarge"}, XGBoost: {"framework_version": "1.3-1", "instance_type": "ml.m2.xlarge"}, } jumpstart_model_uri = f"s3://{list(JUMPSTART_BUCKET_NAME_SET)[0]}/model_dirs/model.tar.gz" jumpstart_model_uri_2 = f"s3://{list(JUMPSTART_BUCKET_NAME_SET)[1]}/model_dirs/model.tar.gz" - for framework_estimator_class, kwargs in framework_estimator_classes_to_kwargs.items(): + for ( + framework_estimator_class, + kwargs, + ) in framework_estimator_classes_to_kwargs.items(): estimator = framework_estimator_class( entry_point=ENTRY_POINT, role=ROLE, @@ -4362,7 +4445,10 @@ def test_script_mode_estimator_uses_jumpstart_base_name_with_js_models( @patch("sagemaker.model.Model._upload_code") @patch("sagemaker.utils.repack_model") def test_all_framework_estimators_add_jumpstart_base_name( - patched_repack_model, patched_upload_code, patched_tar_and_upload_dir, sagemaker_session + patched_repack_model, + patched_upload_code, + patched_tar_and_upload_dir, + sagemaker_session, ): sagemaker_session.boto_region_name = REGION @@ -4391,13 +4477,20 @@ def test_all_framework_estimators_add_jumpstart_base_name( "transformers_version": "4.6.1", "instance_type": "ml.p2.xlarge", }, - MXNet: {"framework_version": "1.7.0", "py_version": "py3", "instance_type": "ml.p2.xlarge"}, + MXNet: { + "framework_version": "1.7.0", + "py_version": "py3", + "instance_type": "ml.p2.xlarge", + }, SKLearn: {"framework_version": "0.23-1", "instance_type": "ml.m2.xlarge"}, XGBoost: {"framework_version": "1.3-1", "instance_type": "ml.m2.xlarge"}, } jumpstart_model_uri = f"s3://{list(JUMPSTART_BUCKET_NAME_SET)[0]}/model_dirs/model.tar.gz" jumpstart_model_uri_2 = f"s3://{list(JUMPSTART_BUCKET_NAME_SET)[1]}/model_dirs/model.tar.gz" - for framework_estimator_class, kwargs in framework_estimator_classes_to_kwargs.items(): + for ( + framework_estimator_class, + kwargs, + ) in framework_estimator_classes_to_kwargs.items(): estimator = framework_estimator_class( entry_point=ENTRY_POINT, role=ROLE, diff --git a/tests/unit/test_mxnet.py b/tests/unit/test_mxnet.py index 9ba3e17ff3..f12d8e160f 100644 --- a/tests/unit/test_mxnet.py +++ b/tests/unit/test_mxnet.py @@ -160,14 +160,8 @@ def _get_train_args(job_name): "CollectionConfigurations": [], "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, - "profiler_rule_configs": [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet:1.4.0-cpu-py3", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ], "profiler_config": { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, } diff --git a/tests/unit/test_pytorch.py b/tests/unit/test_pytorch.py index c8aad13774..5691834c3a 100644 --- a/tests/unit/test_pytorch.py +++ b/tests/unit/test_pytorch.py @@ -158,14 +158,8 @@ def _create_train_job(version, py_version): "CollectionConfigurations": [], "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, - "profiler_rule_configs": [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ], "profiler_config": { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, } diff --git a/tests/unit/test_rl.py b/tests/unit/test_rl.py index 2035636e76..0c0a9c6d64 100644 --- a/tests/unit/test_rl.py +++ b/tests/unit/test_rl.py @@ -153,14 +153,8 @@ def _create_train_job(toolkit, toolkit_version, framework): "CollectionConfigurations": [], "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, - "profiler_rule_configs": [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ], "profiler_config": { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, "retry_strategy": None, diff --git a/tests/unit/test_sklearn.py b/tests/unit/test_sklearn.py index c3e984e0b7..430cb484b4 100644 --- a/tests/unit/test_sklearn.py +++ b/tests/unit/test_sklearn.py @@ -140,14 +140,8 @@ def _create_train_job(version): "CollectionConfigurations": [], "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, - "profiler_rule_configs": [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ], "profiler_config": { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, } diff --git a/tests/unit/test_xgboost.py b/tests/unit/test_xgboost.py index d58c4992cd..87a853d5d0 100644 --- a/tests/unit/test_xgboost.py +++ b/tests/unit/test_xgboost.py @@ -154,14 +154,8 @@ def _create_train_job(version, instance_count=1, instance_type="ml.c4.4xlarge"): "CollectionConfigurations": [], "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, - "profiler_rule_configs": [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ], "profiler_config": { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, }