diff --git a/src/sagemaker/debugger/profiler_config.py b/src/sagemaker/debugger/profiler_config.py index 3d4a24e8d1..90b1565b9a 100644 --- a/src/sagemaker/debugger/profiler_config.py +++ b/src/sagemaker/debugger/profiler_config.py @@ -32,6 +32,7 @@ def __init__( s3_output_path: Optional[Union[str, PipelineVariable]] = None, system_monitor_interval_millis: Optional[Union[int, PipelineVariable]] = None, framework_profile_params: Optional[FrameworkProfile] = None, + disable_profiler: Optional[FrameworkProfile] = False, ): """Initialize a ``ProfilerConfig`` instance. @@ -78,6 +79,7 @@ class and SageMaker Framework estimators. self.s3_output_path = s3_output_path self.system_monitor_interval_millis = system_monitor_interval_millis self.framework_profile_params = framework_profile_params + self.disable_profiler = disable_profiler def _to_request_dict(self): """Generate a request dictionary using the parameters provided when initializing the object. @@ -91,6 +93,8 @@ def _to_request_dict(self): if self.s3_output_path is not None: profiler_config_request["S3OutputPath"] = self.s3_output_path + profiler_config_request["DisableProfiler"] = self.disable_profiler + if self.system_monitor_interval_millis is not None: profiler_config_request[ "ProfilingIntervalInMilliseconds" diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py index 6f729267de..bad41ac535 100644 --- a/src/sagemaker/estimator.py +++ b/src/sagemaker/estimator.py @@ -937,26 +937,29 @@ def _prepare_collection_configs(self): def _prepare_profiler_for_training(self): """Set necessary values and do basic validations in profiler config and profiler rules. - When user explicitly set rules to an empty list, default profiler rule won't be enabled. - Default profiler rule will be enabled in supported regions when either: - 1. user doesn't specify any rules, i.e., rules=None; or - 2. user only specify debugger rules, i.e., rules=[Rule.sagemaker(...)] + No default profiler rule will be used. The user needs to specify rules explicitly """ if self.disable_profiler: - if self.profiler_config: - raise RuntimeError("profiler_config cannot be set when disable_profiler is True.") + if self.profiler_config and not self.profiler_config.disable_profiler: + raise RuntimeError( + "profiler_config.disable_profiler cannot be False" + + " when disable_profiler is True." + ) if self.profiler_rules: raise RuntimeError("ProfilerRule cannot be set when disable_profiler is True.") elif _region_supports_profiler(self.sagemaker_session.boto_region_name): if self.profiler_config is None: self.profiler_config = ProfilerConfig(s3_output_path=self.output_path) if self.rules is None or (self.rules and not self.profiler_rules): - self.profiler_rules = [get_default_profiler_rule()] + self.profiler_rules = [] if self.profiler_config and not self.profiler_config.s3_output_path: self.profiler_config.s3_output_path = self.output_path self.profiler_rule_configs = self._prepare_profiler_rules() + # if profiler_config is still None, it means the job has profiler disabled + if self.profiler_config is None: + self.profiler_config = ProfilerConfig(disable_profiler=True) def _prepare_profiler_rules(self): """Set any necessary values in profiler rules, if they are provided.""" @@ -1047,7 +1050,7 @@ def latest_job_profiler_artifacts_path(self): error_message="""Cannot get the profiling output artifacts path. The Estimator is not associated with a training job.""" ) - if self.profiler_config is not None: + if self.profiler_config is not None and not self.profiler_config.disable_profiler: return os.path.join( self.profiler_config.s3_output_path, self.latest_training_job.name, @@ -1893,8 +1896,8 @@ def enable_default_profiling(self): else: self.profiler_config = ProfilerConfig(s3_output_path=self.output_path) - self.profiler_rules = [get_default_profiler_rule()] - self.profiler_rule_configs = self._prepare_profiler_rules() + self.profiler_rules = [] + self.profiler_rule_configs = [] _TrainingJob.update( self, self.profiler_rule_configs, self.profiler_config._to_request_dict() diff --git a/tests/integ/test_profiler.py b/tests/integ/test_profiler.py index bddd53e20c..61ac0c70e8 100644 --- a/tests/integ/test_profiler.py +++ b/tests/integ/test_profiler.py @@ -13,7 +13,6 @@ from __future__ import absolute_import import os -import re import time import uuid @@ -22,7 +21,6 @@ from sagemaker.debugger import ( DebuggerHookConfig, FrameworkProfile, - get_rule_container_image_uri, ProfilerConfig, ProfilerRule, Rule, @@ -103,13 +101,6 @@ def test_mxnet_with_default_profiler_config_and_profiler_rule( ) assert job_description.get("ProfilingStatus") == "Enabled" - profiler_rule_configuration = job_description.get("ProfilerRuleConfigurations")[0] - assert re.match(r"ProfilerReport-\d*", profiler_rule_configuration["RuleConfigurationName"]) - assert profiler_rule_configuration["RuleEvaluatorImage"] == get_rule_container_image_uri( - mx.sagemaker_session.boto_region_name - ) - assert profiler_rule_configuration["RuleParameters"] == {"rule_to_invoke": "ProfilerReport"} - with pytest.raises(ValueError) as error: mx.enable_default_profiling() assert "Debugger monitoring is already enabled." in str(error) @@ -160,13 +151,6 @@ def test_mxnet_with_custom_profiler_config_then_update_rule_and_config( assert job_description.get("ProfilerConfig") == profiler_config._to_request_dict() assert job_description.get("ProfilingStatus") == "Enabled" - profiler_rule_configuration = job_description.get("ProfilerRuleConfigurations")[0] - assert re.match(r"ProfilerReport-\d*", profiler_rule_configuration["RuleConfigurationName"]) - assert profiler_rule_configuration["RuleEvaluatorImage"] == get_rule_container_image_uri( - mx.sagemaker_session.boto_region_name - ) - assert profiler_rule_configuration["RuleParameters"] == {"rule_to_invoke": "ProfilerReport"} - _wait_until_training_can_be_updated(sagemaker_session.sagemaker_client, training_job_name) mx.update_profiler( @@ -178,13 +162,6 @@ def test_mxnet_with_custom_profiler_config_then_update_rule_and_config( assert job_description["ProfilerConfig"]["S3OutputPath"] == profiler_config.s3_output_path assert job_description["ProfilerConfig"]["ProfilingIntervalInMilliseconds"] == 500 - profiler_report_rule_config = job_description.get("ProfilerRuleConfigurations")[0] - assert re.match(r"ProfilerReport-\d*", profiler_report_rule_config["RuleConfigurationName"]) - assert profiler_report_rule_config["RuleEvaluatorImage"] == get_rule_container_image_uri( - mx.sagemaker_session.boto_region_name - ) - assert profiler_report_rule_config["RuleParameters"] == {"rule_to_invoke": "ProfilerReport"} - def test_mxnet_with_built_in_profiler_rule_with_custom_parameters( sagemaker_session, @@ -387,13 +364,6 @@ def test_mxnet_with_enable_framework_metrics_then_update_framework_metrics( == updated_framework_profile.profiling_parameters ) - profiler_rule_configuration = job_description.get("ProfilerRuleConfigurations")[0] - assert re.match(r"ProfilerReport-\d*", profiler_rule_configuration["RuleConfigurationName"]) - assert profiler_rule_configuration["RuleEvaluatorImage"] == get_rule_container_image_uri( - mx.sagemaker_session.boto_region_name - ) - assert profiler_rule_configuration["RuleParameters"] == {"rule_to_invoke": "ProfilerReport"} - def test_mxnet_with_disable_profiler_then_enable_default_profiling( sagemaker_session, @@ -431,12 +401,10 @@ def test_mxnet_with_disable_profiler_then_enable_default_profiling( ) job_description = mx.latest_training_job.describe() - assert job_description.get("ProfilerConfig") is None assert job_description.get("ProfilerRuleConfigurations") is None assert job_description.get("ProfilingStatus") == "Disabled" _wait_until_training_can_be_updated(sagemaker_session.sagemaker_client, training_job_name) - mx.enable_default_profiling() job_description = mx.latest_training_job.describe() diff --git a/tests/unit/sagemaker/huggingface/test_estimator.py b/tests/unit/sagemaker/huggingface/test_estimator.py index c391d45382..4999bf56c9 100644 --- a/tests/unit/sagemaker/huggingface/test_estimator.py +++ b/tests/unit/sagemaker/huggingface/test_estimator.py @@ -142,14 +142,8 @@ def _create_train_job(version, base_framework_version): "CollectionConfigurations": [], "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, - "profiler_rule_configs": [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "503895931360.dkr.ecr.us-east-1.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ], "profiler_config": { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, } diff --git a/tests/unit/sagemaker/tensorflow/test_estimator.py b/tests/unit/sagemaker/tensorflow/test_estimator.py index 2e7576421f..3650586063 100644 --- a/tests/unit/sagemaker/tensorflow/test_estimator.py +++ b/tests/unit/sagemaker/tensorflow/test_estimator.py @@ -135,14 +135,8 @@ def _create_train_job(tf_version, horovod=False, ps=False, py_version="py2", smd "metric_definitions": None, "environment": None, "experiment_config": None, - "profiler_rule_configs": [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ], "profiler_config": { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, } diff --git a/tests/unit/sagemaker/training_compiler/test_huggingface_pytorch_compiler.py b/tests/unit/sagemaker/training_compiler/test_huggingface_pytorch_compiler.py index af46cf4360..f45bc58c4d 100644 --- a/tests/unit/sagemaker/training_compiler/test_huggingface_pytorch_compiler.py +++ b/tests/unit/sagemaker/training_compiler/test_huggingface_pytorch_compiler.py @@ -144,14 +144,8 @@ def _create_train_job( "CollectionConfigurations": [], "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, - "profiler_rule_configs": [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "503895931360.dkr.ecr.us-east-1.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ], "profiler_config": { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, } diff --git a/tests/unit/sagemaker/training_compiler/test_huggingface_tensorflow_compiler.py b/tests/unit/sagemaker/training_compiler/test_huggingface_tensorflow_compiler.py index 5aef9316da..65a3cf20c9 100644 --- a/tests/unit/sagemaker/training_compiler/test_huggingface_tensorflow_compiler.py +++ b/tests/unit/sagemaker/training_compiler/test_huggingface_tensorflow_compiler.py @@ -142,14 +142,8 @@ def _create_train_job( "CollectionConfigurations": [], "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, - "profiler_rule_configs": [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "503895931360.dkr.ecr.us-east-1.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ], "profiler_config": { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, } diff --git a/tests/unit/sagemaker/training_compiler/test_tensorflow_compiler.py b/tests/unit/sagemaker/training_compiler/test_tensorflow_compiler.py index 7517f3a641..9b4b180fe4 100644 --- a/tests/unit/sagemaker/training_compiler/test_tensorflow_compiler.py +++ b/tests/unit/sagemaker/training_compiler/test_tensorflow_compiler.py @@ -144,14 +144,8 @@ def _create_train_job(framework_version, instance_type, training_compiler_config "CollectionConfigurations": [], "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, - "profiler_rule_configs": [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "503895931360.dkr.ecr.us-east-1.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ], "profiler_config": { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, } diff --git a/tests/unit/sagemaker/workflow/test_step_collections.py b/tests/unit/sagemaker/workflow/test_step_collections.py index 2bf47a79d0..95738c99ca 100644 --- a/tests/unit/sagemaker/workflow/test_step_collections.py +++ b/tests/unit/sagemaker/workflow/test_step_collections.py @@ -796,6 +796,7 @@ def test_register_model_with_model_repack_with_estimator( "CollectionConfigurations": [], "S3OutputPath": f"s3://{BUCKET}/", }, + "ProfilerConfig": {"DisableProfiler": True}, "HyperParameters": { "inference_script": '"dummy_script.py"', "dependencies": f'"{dummy_requirements}"', @@ -923,6 +924,7 @@ def test_register_model_with_model_repack_with_model(model, model_metrics, drift "CollectionConfigurations": [], "S3OutputPath": f"s3://{BUCKET}/", }, + "ProfilerConfig": {"DisableProfiler": True}, "HyperParameters": { "inference_script": '"dummy_script.py"', "model_archive": '"s3://my-bucket/model.tar.gz"', @@ -1052,6 +1054,7 @@ def test_register_model_with_model_repack_with_pipeline_model( "CollectionConfigurations": [], "S3OutputPath": f"s3://{BUCKET}/", }, + "ProfilerConfig": {"DisableProfiler": True}, "HyperParameters": { "dependencies": "null", "inference_script": '"dummy_script.py"', @@ -1243,6 +1246,7 @@ def test_estimator_transformer_with_model_repack_with_estimator(estimator): "TrainingImage": "246618743249.dkr.ecr.us-west-2.amazonaws.com/" + "sagemaker-scikit-learn:0.23-1-cpu-py3", }, + "ProfilerConfig": {"DisableProfiler": True}, "OutputDataConfig": {"S3OutputPath": "s3://my-bucket/"}, "StoppingCondition": {"MaxRuntimeInSeconds": 86400}, "ResourceConfig": { diff --git a/tests/unit/sagemaker/workflow/test_steps.py b/tests/unit/sagemaker/workflow/test_steps.py index 6161537220..d9ed713f46 100644 --- a/tests/unit/sagemaker/workflow/test_steps.py +++ b/tests/unit/sagemaker/workflow/test_steps.py @@ -374,6 +374,7 @@ def test_training_step_base_estimator(sagemaker_session): "CollectionConfigurations": [], }, "ProfilerConfig": { + "DisableProfiler": False, "ProfilingIntervalInMilliseconds": 500, "S3OutputPath": {"Std:Join": {"On": "/", "Values": ["s3:/", "a", "b"]}}, }, @@ -483,7 +484,7 @@ def test_training_step_tensorflow(sagemaker_session): "sagemaker_instance_type": {"Get": "Parameters.InstanceType"}, "sagemaker_distributed_dataparallel_custom_mpi_options": '""', }, - "ProfilerConfig": {"S3OutputPath": "s3://my-bucket/"}, + "ProfilerConfig": {"DisableProfiler": False, "S3OutputPath": "s3://my-bucket/"}, }, "CacheConfig": {"Enabled": True, "ExpireAfter": "PT1H"}, } diff --git a/tests/unit/sagemaker/workflow/test_training_step.py b/tests/unit/sagemaker/workflow/test_training_step.py index 4133343c93..19b9a80224 100644 --- a/tests/unit/sagemaker/workflow/test_training_step.py +++ b/tests/unit/sagemaker/workflow/test_training_step.py @@ -307,10 +307,6 @@ def test_training_step_with_estimator( } step_definition = json.loads(pipeline.definition())["Steps"][0] - # delete profiler rule configurations because of timestamp collision - del step_definition["Arguments"]["ProfilerRuleConfigurations"] - del expected_step_arguments["ProfilerRuleConfigurations"] - assert step_definition == { "Name": "MyTrainingStep", "Description": "TrainingStep description", @@ -427,18 +423,6 @@ def test_training_step_with_framework_estimator( del step_args["OutputDataConfig"]["S3OutputPath"] del step_def["Arguments"]["OutputDataConfig"]["S3OutputPath"] - # trim timestamp so RuleConfigurationName will match - rule_config_name_step_args = step_args["ProfilerRuleConfigurations"][0]["RuleConfigurationName"] - step_args["ProfilerRuleConfigurations"][0][ - "RuleConfigurationName" - ] = rule_config_name_step_args[:-11] - rule_config_name_step_def = step_def["Arguments"]["ProfilerRuleConfigurations"][0][ - "RuleConfigurationName" - ] - step_def["Arguments"]["ProfilerRuleConfigurations"][0][ - "RuleConfigurationName" - ] = rule_config_name_step_def[:-11] - if "sagemaker_s3_output" in step_args["HyperParameters"]: del step_args["HyperParameters"]["sagemaker_s3_output"] del step_def["Arguments"]["HyperParameters"]["sagemaker_s3_output"] @@ -519,18 +503,6 @@ def test_training_step_with_algorithm_base(algo_estimator, training_input, pipel del step_args["InputDataConfig"][0]["DataSource"]["S3DataSource"]["S3Uri"] del step_def["Arguments"]["InputDataConfig"][0]["DataSource"]["S3DataSource"]["S3Uri"] - # trim timestamp so RuleConfigurationName will match - rule_config_name_step_args = step_args["ProfilerRuleConfigurations"][0]["RuleConfigurationName"] - step_args["ProfilerRuleConfigurations"][0][ - "RuleConfigurationName" - ] = rule_config_name_step_args[:-11] - rule_config_name_step_def = step_def["Arguments"]["ProfilerRuleConfigurations"][0][ - "RuleConfigurationName" - ] - step_def["Arguments"]["ProfilerRuleConfigurations"][0][ - "RuleConfigurationName" - ] = rule_config_name_step_def[:-11] - assert step_def == { "Name": "MyTrainingStep", "Type": "Training", diff --git a/tests/unit/sagemaker/workflow/test_utils.py b/tests/unit/sagemaker/workflow/test_utils.py index dcbf5a6421..a00c50cf3b 100644 --- a/tests/unit/sagemaker/workflow/test_utils.py +++ b/tests/unit/sagemaker/workflow/test_utils.py @@ -157,6 +157,7 @@ def test_repack_model_step(estimator): } ], "OutputDataConfig": {"S3OutputPath": f"s3://{BUCKET}/"}, + "ProfilerConfig": {"DisableProfiler": True}, "ResourceConfig": { "InstanceCount": 1, "InstanceType": "ml.m5.large", @@ -238,6 +239,7 @@ def test_repack_model_step_with_source_dir(estimator, source_dir): } ], "OutputDataConfig": {"S3OutputPath": f"s3://{BUCKET}/"}, + "ProfilerConfig": {"DisableProfiler": True}, "ResourceConfig": { "InstanceCount": 1, "InstanceType": "ml.m5.large", diff --git a/tests/unit/test_chainer.py b/tests/unit/test_chainer.py index 7cc973440f..eca4a9bf80 100644 --- a/tests/unit/test_chainer.py +++ b/tests/unit/test_chainer.py @@ -150,14 +150,8 @@ def _create_train_job(version, py_version): "CollectionConfigurations": [], "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, - "profiler_rule_configs": [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ], "profiler_config": { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, } diff --git a/tests/unit/test_estimator.py b/tests/unit/test_estimator.py index 34e6a43fcf..9e0eada95c 100644 --- a/tests/unit/test_estimator.py +++ b/tests/unit/test_estimator.py @@ -410,6 +410,7 @@ def test_framework_with_debugger_and_built_in_rule(sagemaker_session): ], } assert args["profiler_config"] == { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), } @@ -574,15 +575,9 @@ def test_framework_without_debugger_and_profiler(time, sagemaker_session): } assert "debugger_rule_configs" not in args assert args["profiler_config"] == { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), } - assert args["profiler_rule_configs"] == [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ] def test_framework_with_debugger_and_profiler_rules(sagemaker_session): @@ -644,6 +639,7 @@ def test_framework_with_debugger_and_profiler_rules(sagemaker_session): ], } assert args["profiler_config"] == { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), } assert args["profiler_rule_configs"] == [ @@ -679,6 +675,7 @@ def test_framework_with_only_profiler_rule_specified(sagemaker_session): sagemaker_session.train.assert_called_once() _, args = sagemaker_session.train.call_args assert args["profiler_config"] == { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), } assert args["profiler_rule_configs"] == [ @@ -711,16 +708,10 @@ def test_framework_with_profiler_config_without_s3_output_path(time, sagemaker_s sagemaker_session.train.assert_called_once() _, args = sagemaker_session.train.call_args assert args["profiler_config"] == { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), "ProfilingIntervalInMilliseconds": 1000, } - assert args["profiler_rule_configs"] == [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ] @pytest.mark.parametrize("region", PROFILER_UNSUPPORTED_REGIONS) @@ -745,7 +736,9 @@ def test_framework_with_no_default_profiler_in_unsupported_region(region): f.fit("s3://mydata") sms.train.assert_called_once() _, args = sms.train.call_args - assert args.get("profiler_config") is None + # assert args.get("profiler_config") == {"DisableProfiler": True} + # temporarily check if "DisableProfiler" flag is true until s3_output is changed to optional in service + assert args.get("profiler_config")["DisableProfiler"] is True assert args.get("profiler_rule_configs") is None @@ -865,7 +858,9 @@ def test_framework_with_profiler_config_and_profiler_disabled(sagemaker_session) disable_profiler=True, ) f.fit("s3://mydata") - assert "profiler_config cannot be set when disable_profiler is True." in str(error) + assert "profiler_config.disable_profiler cannot be False when disable_profiler is True." in str( + error + ) def test_framework_with_profiler_rule_and_profiler_disabled(sagemaker_session): @@ -927,15 +922,9 @@ def test_framework_with_enabling_default_profiling( sagemaker_session.update_training_job.assert_called_once() _, args = sagemaker_session.update_training_job.call_args assert args["profiler_config"] == { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), } - assert args["profiler_rule_configs"] == [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ] @patch("time.time", return_value=TIME) @@ -960,15 +949,9 @@ def test_framework_with_enabling_default_profiling_with_existed_s3_output_path( sagemaker_session.update_training_job.assert_called_once() _, args = sagemaker_session.update_training_job.call_args assert args["profiler_config"] == { + "DisableProfiler": False, "S3OutputPath": "s3://custom/", } - assert args["profiler_rule_configs"] == [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ] def test_framework_with_disabling_profiling_when_profiler_is_already_disabled( @@ -1001,7 +984,9 @@ def test_framework_with_disabling_profiling(sagemaker_session, training_job_desc f.disable_profiling() sagemaker_session.update_training_job.assert_called_once() _, args = sagemaker_session.update_training_job.call_args - assert args["profiler_config"] == {"DisableProfiler": True} + # assert args["profiler_config"] == {"DisableProfiler": True} + # temporarily check if "DisableProfiler" flag is true until s3_output is changed to optional in service + assert args.get("profiler_config")["DisableProfiler"] is True def test_framework_with_update_profiler_when_no_training_job(sagemaker_session): @@ -1058,6 +1043,7 @@ def test_framework_with_update_profiler_config(sagemaker_session): sagemaker_session.update_training_job.assert_called_once() _, args = sagemaker_session.update_training_job.call_args assert args["profiler_config"] == { + "DisableProfiler": False, "ProfilingIntervalInMilliseconds": 1000, } assert "profiler_rule_configs" not in args @@ -1086,7 +1072,7 @@ def test_framework_with_update_profiler_report_rule(sagemaker_session): "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, } ] - assert "profiler_config" not in args + assert args["profiler_config"]["DisableProfiler"] is False def test_framework_with_disable_framework_metrics(sagemaker_session): @@ -1101,7 +1087,7 @@ def test_framework_with_disable_framework_metrics(sagemaker_session): f.update_profiler(disable_framework_metrics=True) sagemaker_session.update_training_job.assert_called_once() _, args = sagemaker_session.update_training_job.call_args - assert args["profiler_config"] == {"ProfilingParameters": {}} + assert args["profiler_config"] == {"DisableProfiler": False, "ProfilingParameters": {}} assert "profiler_rule_configs" not in args @@ -1118,6 +1104,7 @@ def test_framework_with_disable_framework_metrics_and_update_system_metrics(sage sagemaker_session.update_training_job.assert_called_once() _, args = sagemaker_session.update_training_job.call_args assert args["profiler_config"] == { + "DisableProfiler": False, "ProfilingIntervalInMilliseconds": 1000, "ProfilingParameters": {}, } @@ -1160,7 +1147,10 @@ def test_framework_with_update_profiler_config_and_profiler_rule(sagemaker_sessi f.update_profiler(rules=[profiler_custom_rule], system_monitor_interval_millis=1000) sagemaker_session.update_training_job.assert_called_once() _, args = sagemaker_session.update_training_job.call_args - assert args["profiler_config"] == {"ProfilingIntervalInMilliseconds": 1000} + assert args["profiler_config"] == { + "DisableProfiler": False, + "ProfilingIntervalInMilliseconds": 1000, + } assert args["profiler_rule_configs"] == [ { "InstanceType": "c4.4xlarge", @@ -2630,14 +2620,7 @@ def test_unsupported_type_in_dict(): "input_config": None, "input_mode": "File", "output_config": {"S3OutputPath": OUTPUT_PATH}, - "profiler_config": {"S3OutputPath": OUTPUT_PATH}, - "profiler_rule_configs": [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ], + "profiler_config": {"DisableProfiler": False, "S3OutputPath": OUTPUT_PATH}, "resource_config": { "InstanceCount": INSTANCE_COUNT, "InstanceType": INSTANCE_TYPE, diff --git a/tests/unit/test_mxnet.py b/tests/unit/test_mxnet.py index 99b0e839b7..df0b44c71e 100644 --- a/tests/unit/test_mxnet.py +++ b/tests/unit/test_mxnet.py @@ -159,14 +159,8 @@ def _get_train_args(job_name): "CollectionConfigurations": [], "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, - "profiler_rule_configs": [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet:1.4.0-cpu-py3", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ], "profiler_config": { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, } diff --git a/tests/unit/test_pytorch.py b/tests/unit/test_pytorch.py index 082f699d63..5f54198fce 100644 --- a/tests/unit/test_pytorch.py +++ b/tests/unit/test_pytorch.py @@ -157,14 +157,8 @@ def _create_train_job(version, py_version): "CollectionConfigurations": [], "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, - "profiler_rule_configs": [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ], "profiler_config": { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, } diff --git a/tests/unit/test_rl.py b/tests/unit/test_rl.py index 4efc2e5bf8..1e48ad86bc 100644 --- a/tests/unit/test_rl.py +++ b/tests/unit/test_rl.py @@ -152,14 +152,8 @@ def _create_train_job(toolkit, toolkit_version, framework): "CollectionConfigurations": [], "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, - "profiler_rule_configs": [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ], "profiler_config": { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, "retry_strategy": None, diff --git a/tests/unit/test_sklearn.py b/tests/unit/test_sklearn.py index 13cc755336..d2343c9faf 100644 --- a/tests/unit/test_sklearn.py +++ b/tests/unit/test_sklearn.py @@ -139,14 +139,8 @@ def _create_train_job(version): "CollectionConfigurations": [], "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, - "profiler_rule_configs": [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ], "profiler_config": { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, } diff --git a/tests/unit/test_xgboost.py b/tests/unit/test_xgboost.py index 82f27c19ae..3fbbc1c760 100644 --- a/tests/unit/test_xgboost.py +++ b/tests/unit/test_xgboost.py @@ -153,14 +153,8 @@ def _create_train_job(version, instance_count=1, instance_type="ml.c4.4xlarge"): "CollectionConfigurations": [], "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, - "profiler_rule_configs": [ - { - "RuleConfigurationName": "ProfilerReport-1510006209", - "RuleEvaluatorImage": "895741380848.dkr.ecr.us-west-2.amazonaws.com/sagemaker-debugger-rules:latest", - "RuleParameters": {"rule_to_invoke": "ProfilerReport"}, - } - ], "profiler_config": { + "DisableProfiler": False, "S3OutputPath": "s3://{}/".format(BUCKET_NAME), }, }