diff --git a/CHANGELOG.rst b/CHANGELOG.rst index cf9a999742..0d666397b1 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,7 @@ CHANGELOG ======== * bug-fix: Estimators: Fix serialization of single records +* bug-fix: deprecate enable_cloudwatch_metrics from Framework Estimators. 1.9.0 ===== diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py index 7959656535..691f1f35d2 100644 --- a/src/sagemaker/estimator.py +++ b/src/sagemaker/estimator.py @@ -15,6 +15,7 @@ import json import logging import os +import warnings from abc import ABCMeta from abc import abstractmethod from six import with_metaclass @@ -550,8 +551,8 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cl The hyperparameters are made accessible as a dict[str, str] to the training code on SageMaker. For convenience, this accepts other types for keys and values, but ``str()`` will be called to convert them before training. - enable_cloudwatch_metrics (bool): Whether training and hosting containers will - generate CloudWatch metrics under the AWS/SageMakerContainer namespace (default: False). + enable_cloudwatch_metrics (bool): [DEPRECATED] Now there are cloudwatch metrics emitted by all SageMaker + training jobs. This will be ignored for now and removed in a further release. container_log_level (int): Log level to use within the container (default: logging.INFO). Valid values are defined in the Python logging module. code_location (str): Name of the S3 bucket where custom code is uploaded (default: None). @@ -564,7 +565,10 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cl super(Framework, self).__init__(**kwargs) self.source_dir = source_dir self.entry_point = entry_point - self.enable_cloudwatch_metrics = enable_cloudwatch_metrics + if enable_cloudwatch_metrics: + warnings.warn('enable_cloudwatch_metrics is now deprecated and will be removed in the future.', + DeprecationWarning) + self.enable_cloudwatch_metrics = False self.container_log_level = container_log_level self._hyperparameters = hyperparameters or {} self.code_location = code_location diff --git a/src/sagemaker/mxnet/README.rst b/src/sagemaker/mxnet/README.rst index bdb1916bd4..70434aadaa 100644 --- a/src/sagemaker/mxnet/README.rst +++ b/src/sagemaker/mxnet/README.rst @@ -543,9 +543,6 @@ The MXNetModel constructor takes the following arguments: directory with any other training source code dependencies including tne entry point file. Structure within this directory will be preserved when training on SageMaker. -- ``enable_cloudwatch_metrics (boolean):`` Optional. If true, training - and hosting containers will generate Cloudwatch metrics under the - AWS/SageMakerContainer namespace. - ``container_log_level (int):`` Log level to use within the container. Valid values are defined in the Python logging module. - ``code_location (str):`` Optional. Name of the S3 bucket where your diff --git a/tests/unit/test_chainer.py b/tests/unit/test_chainer.py index e9a24ab78c..e6ad5b4a4f 100644 --- a/tests/unit/test_chainer.py +++ b/tests/unit/test_chainer.py @@ -66,7 +66,7 @@ def _get_full_gpu_image_uri(version): def _chainer_estimator(sagemaker_session, framework_version=defaults.CHAINER_VERSION, train_instance_type=None, - enable_cloudwatch_metrics=False, base_job_name=None, use_mpi=None, num_processes=None, + base_job_name=None, use_mpi=None, num_processes=None, process_slots_per_host=None, additional_mpi_options=None, **kwargs): return Chainer(entry_point=SCRIPT_PATH, framework_version=framework_version, @@ -74,7 +74,6 @@ def _chainer_estimator(sagemaker_session, framework_version=defaults.CHAINER_VER sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, train_instance_type=train_instance_type if train_instance_type else INSTANCE_TYPE, - enable_cloudwatch_metrics=enable_cloudwatch_metrics, base_job_name=base_job_name, use_mpi=use_mpi, num_processes=num_processes, @@ -152,7 +151,6 @@ def _create_train_job_with_additional_hyperparameters(version): }, 'hyperparameters': { 'sagemaker_program': json.dumps('dummy_script.py'), - 'sagemaker_enable_cloudwatch_metrics': 'false', 'sagemaker_container_log_level': str(logging.INFO), 'sagemaker_job_name': json.dumps(JOB_NAME), 'sagemaker_submit_directory': @@ -225,12 +223,10 @@ def test_attach_with_additional_hyperparameters(sagemaker_session, chainer_versi def test_create_model(sagemaker_session, chainer_version): container_log_level = '"logging.INFO"' source_dir = 's3://mybucket/source' - enable_cloudwatch_metrics = 'true' chainer = Chainer(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, framework_version=chainer_version, container_log_level=container_log_level, - py_version=PYTHON_VERSION, base_job_name='job', source_dir=source_dir, - enable_cloudwatch_metrics=enable_cloudwatch_metrics) + py_version=PYTHON_VERSION, base_job_name='job', source_dir=source_dir) job_name = 'new_name' chainer.fit(inputs='s3://mybucket/train', job_name=job_name) @@ -244,7 +240,6 @@ def test_create_model(sagemaker_session, chainer_version): assert model.name == job_name assert model.container_log_level == container_log_level assert model.source_dir == source_dir - assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics def test_create_model_with_optional_params(sagemaker_session): @@ -269,13 +264,11 @@ def test_create_model_with_optional_params(sagemaker_session): def test_create_model_with_custom_image(sagemaker_session): container_log_level = '"logging.INFO"' source_dir = 's3://mybucket/source' - enable_cloudwatch_metrics = 'true' custom_image = 'ubuntu:latest' chainer = Chainer(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, image_name=custom_image, container_log_level=container_log_level, - py_version=PYTHON_VERSION, base_job_name='job', source_dir=source_dir, - enable_cloudwatch_metrics=enable_cloudwatch_metrics) + py_version=PYTHON_VERSION, base_job_name='job', source_dir=source_dir) chainer.fit(inputs='s3://mybucket/train', job_name='new_name') model = chainer.create_model() diff --git a/tests/unit/test_mxnet.py b/tests/unit/test_mxnet.py index 16d7e49f25..1787fc00d3 100644 --- a/tests/unit/test_mxnet.py +++ b/tests/unit/test_mxnet.py @@ -101,11 +101,10 @@ def _create_train_job(version): def test_create_model(sagemaker_session, mxnet_version): container_log_level = '"logging.INFO"' source_dir = 's3://mybucket/source' - enable_cloudwatch_metrics = 'true' mx = MXNet(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, framework_version=mxnet_version, container_log_level=container_log_level, - base_job_name='job', source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics) + base_job_name='job', source_dir=source_dir) job_name = 'new_name' mx.fit(inputs='s3://mybucket/train', job_name=job_name) @@ -119,7 +118,6 @@ def test_create_model(sagemaker_session, mxnet_version): assert model.name == job_name assert model.container_log_level == container_log_level assert model.source_dir == source_dir - assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics def test_create_model_with_optional_params(sagemaker_session): @@ -144,12 +142,11 @@ def test_create_model_with_optional_params(sagemaker_session): def test_create_model_with_custom_image(sagemaker_session): container_log_level = '"logging.INFO"' source_dir = 's3://mybucket/source' - enable_cloudwatch_metrics = 'true' custom_image = 'mxnet:2.0' mx = MXNet(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, image_name=custom_image, container_log_level=container_log_level, - base_job_name='job', source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics) + base_job_name='job', source_dir=source_dir) job_name = 'new_name' mx.fit(inputs='s3://mybucket/train', job_name='new_name') @@ -162,7 +159,6 @@ def test_create_model_with_custom_image(sagemaker_session): assert model.name == job_name assert model.container_log_level == container_log_level assert model.source_dir == source_dir - assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics @patch('time.strftime', return_value=TIMESTAMP) diff --git a/tests/unit/test_pytorch.py b/tests/unit/test_pytorch.py index 0b11dcee25..2cdafe2aef 100644 --- a/tests/unit/test_pytorch.py +++ b/tests/unit/test_pytorch.py @@ -64,7 +64,7 @@ def _get_full_gpu_image_uri(version, py_version=PYTHON_VERSION): def _pytorch_estimator(sagemaker_session, framework_version=defaults.PYTORCH_VERSION, train_instance_type=None, - enable_cloudwatch_metrics=False, base_job_name=None, **kwargs): + base_job_name=None, **kwargs): return PyTorch(entry_point=SCRIPT_PATH, framework_version=framework_version, py_version=PYTHON_VERSION, @@ -72,7 +72,6 @@ def _pytorch_estimator(sagemaker_session, framework_version=defaults.PYTORCH_VER sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, train_instance_type=train_instance_type if train_instance_type else INSTANCE_TYPE, - enable_cloudwatch_metrics=enable_cloudwatch_metrics, base_job_name=base_job_name, **kwargs) @@ -119,11 +118,10 @@ def _create_train_job(version): def test_create_model(sagemaker_session, pytorch_version): container_log_level = '"logging.INFO"' source_dir = 's3://mybucket/source' - enable_cloudwatch_metrics = 'true' pytorch = PyTorch(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, framework_version=pytorch_version, container_log_level=container_log_level, - base_job_name='job', source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics) + base_job_name='job', source_dir=source_dir) job_name = 'new_name' pytorch.fit(inputs='s3://mybucket/train', job_name='new_name') @@ -137,7 +135,6 @@ def test_create_model(sagemaker_session, pytorch_version): assert model.name == job_name assert model.container_log_level == container_log_level assert model.source_dir == source_dir - assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics def test_create_model_with_optional_params(sagemaker_session): @@ -162,12 +159,11 @@ def test_create_model_with_optional_params(sagemaker_session): def test_create_model_with_custom_image(sagemaker_session): container_log_level = '"logging.INFO"' source_dir = 's3://mybucket/source' - enable_cloudwatch_metrics = 'true' image = 'pytorch:9000' pytorch = PyTorch(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, container_log_level=container_log_level, image_name=image, - base_job_name='job', source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics) + base_job_name='job', source_dir=source_dir) job_name = 'new_name' pytorch.fit(inputs='s3://mybucket/train', job_name='new_name') @@ -180,7 +176,6 @@ def test_create_model_with_custom_image(sagemaker_session): assert model.name == job_name assert model.container_log_level == container_log_level assert model.source_dir == source_dir - assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics @patch('time.strftime', return_value=TIMESTAMP) diff --git a/tests/unit/test_tf_estimator.py b/tests/unit/test_tf_estimator.py index a6969b840b..4861843ca0 100644 --- a/tests/unit/test_tf_estimator.py +++ b/tests/unit/test_tf_estimator.py @@ -107,7 +107,7 @@ def _create_train_job(tf_version): def _build_tf(sagemaker_session, framework_version=defaults.TF_VERSION, train_instance_type=None, - checkpoint_path=None, enable_cloudwatch_metrics=False, base_job_name=None, + checkpoint_path=None, base_job_name=None, training_steps=None, evaluation_steps=None, **kwargs): return TensorFlow(entry_point=SCRIPT_PATH, training_steps=training_steps, @@ -118,7 +118,6 @@ def _build_tf(sagemaker_session, framework_version=defaults.TF_VERSION, train_in train_instance_count=INSTANCE_COUNT, train_instance_type=train_instance_type if train_instance_type else INSTANCE_TYPE, checkpoint_path=checkpoint_path, - enable_cloudwatch_metrics=enable_cloudwatch_metrics, base_job_name=base_job_name, **kwargs) @@ -183,12 +182,11 @@ def test_tf_nonexistent_requirements_path(sagemaker_session): def test_create_model(sagemaker_session, tf_version): container_log_level = '"logging.INFO"' source_dir = 's3://mybucket/source' - enable_cloudwatch_metrics = 'true' tf = TensorFlow(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, training_steps=1000, evaluation_steps=10, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, framework_version=tf_version, container_log_level=container_log_level, base_job_name='job', - source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics) + source_dir=source_dir) job_name = 'doing something' tf.fit(inputs='s3://mybucket/train', job_name=job_name) @@ -202,7 +200,6 @@ def test_create_model(sagemaker_session, tf_version): assert model.name == job_name assert model.container_log_level == container_log_level assert model.source_dir == source_dir - assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics def test_create_model_with_optional_params(sagemaker_session): @@ -228,13 +225,12 @@ def test_create_model_with_optional_params(sagemaker_session): def test_create_model_with_custom_image(sagemaker_session): container_log_level = '"logging.INFO"' source_dir = 's3://mybucket/source' - enable_cloudwatch_metrics = 'true' custom_image = 'tensorflow:1.0' tf = TensorFlow(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, training_steps=1000, evaluation_steps=10, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, image_name=custom_image, container_log_level=container_log_level, base_job_name='job', - source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics) + source_dir=source_dir) job_name = 'doing something' tf.fit(inputs='s3://mybucket/train', job_name=job_name)