diff --git a/tests/conftest.py b/tests/conftest.py index f6682ebb8c..c95acda9fc 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,7 +22,7 @@ from botocore.config import Config from packaging.version import Version -from sagemaker import Session, image_uris, utils +from sagemaker import Session, image_uris, utils, get_execution_role from sagemaker.local import LocalSession from sagemaker.workflow.pipeline_context import PipelineSession, LocalPipelineSession @@ -91,6 +91,7 @@ def pytest_addoption(parser): parser.addoption("--sagemaker-client-config", action="store", default=None) parser.addoption("--sagemaker-runtime-config", action="store", default=None) parser.addoption("--boto-config", action="store", default=None) + parser.addoption("--sagemaker-metrics-config", action="store", default=None) def pytest_configure(config): @@ -113,6 +114,12 @@ def sagemaker_runtime_config(request): return json.loads(config) if config else None +@pytest.fixture(scope="session") +def sagemaker_metrics_config(request): + config = request.config.getoption("--sagemaker-metrics-config") + return json.loads(config) if config else None + + @pytest.fixture(scope="session") def boto_session(request): config = request.config.getoption("--boto-config") @@ -133,7 +140,9 @@ def region(boto_session): @pytest.fixture(scope="session") -def sagemaker_session(sagemaker_client_config, sagemaker_runtime_config, boto_session): +def sagemaker_session( + sagemaker_client_config, sagemaker_runtime_config, boto_session, sagemaker_metrics_config +): sagemaker_client_config.setdefault("config", Config(retries=dict(max_attempts=10))) sagemaker_client = ( boto_session.client("sagemaker", **sagemaker_client_config) @@ -145,11 +154,17 @@ def sagemaker_session(sagemaker_client_config, sagemaker_runtime_config, boto_se if sagemaker_runtime_config else None ) + metrics_client = ( + boto_session.client("sagemaker-metrics", **sagemaker_metrics_config) + if sagemaker_metrics_config + else None + ) return Session( boto_session=boto_session, sagemaker_client=sagemaker_client, sagemaker_runtime_client=runtime_client, + sagemaker_metrics_client=metrics_client, ) @@ -168,6 +183,11 @@ def local_pipeline_session(boto_session): return LocalPipelineSession(boto_session=boto_session) +@pytest.fixture(scope="session") +def execution_role(sagemaker_session): + return get_execution_role(sagemaker_session) + + @pytest.fixture(scope="module") def custom_bucket_name(boto_session): region = boto_session.region_name diff --git a/tests/data/experiment/inference.py b/tests/data/experiment/inference.py index cdb9a7b8c6..43371badc5 100644 --- a/tests/data/experiment/inference.py +++ b/tests/data/experiment/inference.py @@ -10,6 +10,7 @@ # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the License for the specific language governing # permissions and limitations under the License. +import json import logging import os import pickle as pkl @@ -24,11 +25,32 @@ sdk_file = f"{code_dir}/{sdk_name}" os.system(f"pip install {sdk_file}") + +def _get_client_config_in_dict(cfg_in_str) -> dict: + return json.loads(cfg_in_str) if cfg_in_str else None + + from sagemaker.session import Session from sagemaker.experiments import load_run boto_session = boto3.Session(region_name=os.environ["AWS_REGION"]) -sagemaker_session = Session(boto_session=boto_session) + +sagemaker_client_config = _get_client_config_in_dict(os.environ.get("SM_CLIENT_CONFIG", None)) +sagemaker_metrics_config = _get_client_config_in_dict(os.environ.get("SM_METRICS_CONFIG", None)) +sagemaker_client = ( + boto_session.client("sagemaker", **sagemaker_client_config) if sagemaker_client_config else None +) +metrics_client = ( + boto_session.client("sagemaker-metrics", **sagemaker_metrics_config) + if sagemaker_metrics_config + else None +) + +sagemaker_session = Session( + boto_session=boto_session, + sagemaker_client=sagemaker_client, + sagemaker_metrics_client=metrics_client, +) def model_fn(model_dir): diff --git a/tests/data/experiment/process_job_script_for_run_clz.py b/tests/data/experiment/process_job_script_for_run_clz.py index 32fd0ab4f6..694586d1d8 100644 --- a/tests/data/experiment/process_job_script_for_run_clz.py +++ b/tests/data/experiment/process_job_script_for_run_clz.py @@ -13,6 +13,7 @@ """This script file runs on SageMaker processing job""" from __future__ import absolute_import +import json import logging import os import boto3 @@ -25,8 +26,28 @@ from sagemaker.experiments import load_run +def _get_client_config_in_dict(cfg_in_str) -> dict: + return json.loads(cfg_in_str) if cfg_in_str else None + + boto_session = boto3.Session(region_name=os.environ["AWS_REGION"]) -sagemaker_session = Session(boto_session=boto_session) + +sagemaker_client_config = _get_client_config_in_dict(os.environ.get("SM_CLIENT_CONFIG", None)) +sagemaker_metrics_config = _get_client_config_in_dict(os.environ.get("SM_METRICS_CONFIG", None)) +sagemaker_client = ( + boto_session.client("sagemaker", **sagemaker_client_config) if sagemaker_client_config else None +) +metrics_client = ( + boto_session.client("sagemaker-metrics", **sagemaker_metrics_config) + if sagemaker_metrics_config + else None +) + +sagemaker_session = Session( + boto_session=boto_session, + sagemaker_client=sagemaker_client, + sagemaker_metrics_client=metrics_client, +) with load_run(sagemaker_session=sagemaker_session) as run: diff --git a/tests/data/experiment/train_job_script_for_run_clz.py b/tests/data/experiment/train_job_script_for_run_clz.py index 34c86e0993..93a4e08a6c 100644 --- a/tests/data/experiment/train_job_script_for_run_clz.py +++ b/tests/data/experiment/train_job_script_for_run_clz.py @@ -13,6 +13,7 @@ """This script file runs on SageMaker training job""" from __future__ import absolute_import +import json import logging import time import os @@ -24,8 +25,29 @@ from sagemaker import Session from sagemaker.experiments import load_run, Run + +def _get_client_config_in_dict(cfg_in_str) -> dict: + return json.loads(cfg_in_str) if cfg_in_str else None + + boto_session = boto3.Session(region_name=os.environ["AWS_REGION"]) -sagemaker_session = Session(boto_session=boto_session) + +sagemaker_client_config = _get_client_config_in_dict(os.environ.get("SM_CLIENT_CONFIG", None)) +sagemaker_metrics_config = _get_client_config_in_dict(os.environ.get("SM_METRICS_CONFIG", None)) +sagemaker_client = ( + boto_session.client("sagemaker", **sagemaker_client_config) if sagemaker_client_config else None +) +metrics_client = ( + boto_session.client("sagemaker-metrics", **sagemaker_metrics_config) + if sagemaker_metrics_config + else None +) + +sagemaker_session = Session( + boto_session=boto_session, + sagemaker_client=sagemaker_client, + sagemaker_metrics_client=metrics_client, +) if os.environ["RUN_OPERATION"] == "init": logging.info("Initializing a Run") diff --git a/tests/integ/sagemaker/experiments/test_run.py b/tests/integ/sagemaker/experiments/test_run.py index 96aad30dc0..e9b742ee87 100644 --- a/tests/integ/sagemaker/experiments/test_run.py +++ b/tests/integ/sagemaker/experiments/test_run.py @@ -13,6 +13,7 @@ from __future__ import absolute_import import datetime +import json import os import pytest @@ -38,10 +39,6 @@ from sagemaker.experiments._helper import _DEFAULT_ARTIFACT_PREFIX -# when running integration tests locally modify this to your test account's execution role -EXECUTION_ROLE = "SageMakerRole" - - @pytest.fixture def artifact_file_path(tempdir): file_contents = "test artifact file" @@ -168,7 +165,13 @@ def test_run_name_vs_trial_component_name_edge_cases(sagemaker_session, input_na _RUN_LOAD = "load" -def test_run_from_local_and_train_job_and_all_exp_cfg_match(sagemaker_session, dev_sdk_tar): +def test_run_from_local_and_train_job_and_all_exp_cfg_match( + sagemaker_session, + dev_sdk_tar, + execution_role, + sagemaker_client_config, + sagemaker_metrics_config, +): # Notes: # 1. The 1st Run created locally and its exp config was auto passed to the job # 2. In training job, the same exp and run names are given in the Run constructor @@ -177,7 +180,12 @@ def test_run_from_local_and_train_job_and_all_exp_cfg_match(sagemaker_session, d # 3. In a different training job, load the same Run and log more parameters there. exp_name = unique_name_from_base(_EXP_NAME_BASE_IN_SCRIPT) estimator = _generate_estimator( - sdk_tar=dev_sdk_tar, sagemaker_session=sagemaker_session, exp_name=exp_name + sdk_tar=dev_sdk_tar, + sagemaker_session=sagemaker_session, + exp_name=exp_name, + execution_role=execution_role, + sagemaker_client_config=sagemaker_client_config, + sagemaker_metrics_config=sagemaker_metrics_config, ) tc_name = Run._generate_trial_component_name( experiment_name=exp_name, run_name=_RUN_NAME_IN_SCRIPT @@ -251,7 +259,13 @@ def test_run_from_local_and_train_job_and_all_exp_cfg_match(sagemaker_session, d ) -def test_run_from_local_and_train_job_and_exp_cfg_not_match(sagemaker_session, dev_sdk_tar): +def test_run_from_local_and_train_job_and_exp_cfg_not_match( + sagemaker_session, + dev_sdk_tar, + execution_role, + sagemaker_client_config, + sagemaker_metrics_config, +): # Notes: # 1. The 1st Run created locally and its exp config was auto passed to the job # 2. In training job, different exp and run names (i.e. 2nd Run) are given @@ -262,7 +276,12 @@ def test_run_from_local_and_train_job_and_exp_cfg_not_match(sagemaker_session, d exp_name = unique_name_from_base(_EXP_NAME_BASE_IN_SCRIPT) exp_name2 = unique_name_from_base(_EXP_NAME_BASE_IN_SCRIPT) estimator = _generate_estimator( - sdk_tar=dev_sdk_tar, sagemaker_session=sagemaker_session, exp_name=exp_name + sdk_tar=dev_sdk_tar, + sagemaker_session=sagemaker_session, + exp_name=exp_name, + execution_role=execution_role, + sagemaker_client_config=sagemaker_client_config, + sagemaker_metrics_config=sagemaker_metrics_config, ) tc_name = Run._generate_trial_component_name( experiment_name=exp_name, run_name=_RUN_NAME_IN_SCRIPT @@ -326,7 +345,13 @@ def test_run_from_local_and_train_job_and_exp_cfg_not_match(sagemaker_session, d ) -def test_run_from_train_job_only(sagemaker_session, dev_sdk_tar): +def test_run_from_train_job_only( + sagemaker_session, + dev_sdk_tar, + execution_role, + sagemaker_client_config, + sagemaker_metrics_config, +): # Notes: # 1. No Run created locally or specified in experiment config # 2. In training job, Run is initialized @@ -338,6 +363,9 @@ def test_run_from_train_job_only(sagemaker_session, dev_sdk_tar): sdk_tar=dev_sdk_tar, sagemaker_session=sagemaker_session, exp_name=exp_name, + execution_role=execution_role, + sagemaker_client_config=sagemaker_client_config, + sagemaker_metrics_config=sagemaker_metrics_config, ) tc_name = Run._generate_trial_component_name( experiment_name=exp_name, run_name=_RUN_NAME_IN_SCRIPT @@ -367,7 +395,12 @@ def test_run_from_train_job_only(sagemaker_session, dev_sdk_tar): # dev_sdk_tar is required to trigger generating the dev SDK tar def test_run_from_processing_job_and_override_default_exp_config( - sagemaker_session, dev_sdk_tar, run_obj + sagemaker_session, + dev_sdk_tar, + run_obj, + execution_role, + sagemaker_client_config, + sagemaker_metrics_config, ): # Notes: # 1. The 1st Run (run) created locally @@ -378,14 +411,12 @@ def test_run_from_processing_job_and_override_default_exp_config( # fetched from the job env # 4. All data are logged in the Run either locally or in the processing job exp_name = unique_name_from_base(_EXP_NAME_BASE_IN_SCRIPT) - processor = FrameworkProcessor( - estimator_cls=PyTorch, - framework_version="1.10", - py_version="py38", - instance_count=1, - instance_type="ml.m5.xlarge", - role=EXECUTION_ROLE, + processor = _generate_processor( + exp_name=exp_name, sagemaker_session=sagemaker_session, + execution_role=execution_role, + sagemaker_client_config=sagemaker_client_config, + sagemaker_metrics_config=sagemaker_metrics_config, ) with cleanup_exp_resources(exp_names=[exp_name], sagemaker_session=sagemaker_session): @@ -441,7 +472,14 @@ def test_run_from_processing_job_and_override_default_exp_config( # dev_sdk_tar is required to trigger generating the dev SDK tar -def test_run_from_transform_job(sagemaker_session, dev_sdk_tar, xgboost_latest_version): +def test_run_from_transform_job( + sagemaker_session, + dev_sdk_tar, + xgboost_latest_version, + execution_role, + sagemaker_client_config, + sagemaker_metrics_config, +): # Notes: # 1. The 1st Run (run) created locally # 2. In the inference script running in a transform job, load the 1st Run @@ -454,17 +492,22 @@ def test_run_from_transform_job(sagemaker_session, dev_sdk_tar, xgboost_latest_v path=os.path.join(_TRANSFORM_MATERIALS, "xgb_model.tar.gz"), key_prefix="integ-test-data/xgboost/model", ) + env = _update_env_with_client_config( + env={ + "EXPERIMENT_NAME": exp_name, + "RUN_NAME": _RUN_NAME_IN_SCRIPT, + }, + sagemaker_metrics_config=sagemaker_metrics_config, + sagemaker_client_config=sagemaker_client_config, + ) xgboost_model = XGBoostModel( sagemaker_session=sagemaker_session, model_data=xgb_model_data_s3, - role=EXECUTION_ROLE, + role=execution_role, entry_point="inference.py", source_dir=_EXP_DIR, framework_version=xgboost_latest_version, - env={ - "EXPERIMENT_NAME": exp_name, - "RUN_NAME": _RUN_NAME_IN_SCRIPT, - }, + env=env, ) transformer = xgboost_model.transformer( instance_count=1, @@ -511,20 +554,24 @@ def test_run_from_transform_job(sagemaker_session, dev_sdk_tar, xgboost_latest_v # dev_sdk_tar is required to trigger generating the dev SDK tar -def test_load_run_auto_pass_in_exp_config_to_job(sagemaker_session, dev_sdk_tar): +def test_load_run_auto_pass_in_exp_config_to_job( + sagemaker_session, + dev_sdk_tar, + execution_role, + sagemaker_client_config, + sagemaker_metrics_config, +): # Notes: # 1. In local side, load the Run created previously and invoke a job under the load context # 2. In the job script, load the 1st Run via exp config auto-passed to the job env # 3. All data are logged in the Run either locally or in the transform job exp_name = unique_name_from_base(_EXP_NAME_BASE_IN_SCRIPT) - processor = FrameworkProcessor( - estimator_cls=PyTorch, - framework_version="1.10", - py_version="py38", - instance_count=1, - instance_type="ml.m5.xlarge", - role=EXECUTION_ROLE, + processor = _generate_processor( + exp_name=exp_name, sagemaker_session=sagemaker_session, + execution_role=execution_role, + sagemaker_client_config=sagemaker_client_config, + sagemaker_metrics_config=sagemaker_metrics_config, ) with cleanup_exp_resources(exp_names=[exp_name], sagemaker_session=sagemaker_session): @@ -583,23 +630,58 @@ def test_list(run_obj, sagemaker_session): assert run_tcs[0].experiment_config == run_obj.experiment_config -def _generate_estimator(exp_name, sdk_tar, sagemaker_session): +def _generate_estimator( + exp_name, + sdk_tar, + sagemaker_session, + execution_role, + sagemaker_client_config, + sagemaker_metrics_config, +): + env = _update_env_with_client_config( + env={ + "EXPERIMENT_NAME": exp_name, + "RUN_NAME": _RUN_NAME_IN_SCRIPT, + "RUN_OPERATION": _RUN_INIT, + }, + sagemaker_metrics_config=sagemaker_metrics_config, + sagemaker_client_config=sagemaker_client_config, + ) return SKLearn( framework_version="0.23-1", entry_point=_ENTRY_POINT_PATH, dependencies=[sdk_tar], - role=EXECUTION_ROLE, + role=execution_role, instance_type="ml.m5.large", instance_count=1, volume_size=10, max_run=900, enable_sagemaker_metrics=True, - environment={ + environment=env, + sagemaker_session=sagemaker_session, + ) + + +def _generate_processor( + exp_name, sagemaker_session, execution_role, sagemaker_metrics_config, sagemaker_client_config +): + env = _update_env_with_client_config( + env={ "EXPERIMENT_NAME": exp_name, "RUN_NAME": _RUN_NAME_IN_SCRIPT, - "RUN_OPERATION": _RUN_INIT, }, + sagemaker_metrics_config=sagemaker_metrics_config, + sagemaker_client_config=sagemaker_client_config, + ) + return FrameworkProcessor( + estimator_cls=PyTorch, + framework_version="1.10", + py_version="py38", + instance_count=1, + instance_type="ml.m5.xlarge", + role=execution_role, sagemaker_session=sagemaker_session, + env=env, ) @@ -719,3 +801,15 @@ def _check_tc_status_intermediate( return assert isinstance(tc_load.end_time, datetime.datetime) assert tc_load.end_time == old_end_time + + +def _update_env_with_client_config(env, sagemaker_client_config, sagemaker_metrics_config): + if sagemaker_client_config and sagemaker_client_config.get("endpoint_url", None): + env["SM_CLIENT_CONFIG"] = json.dumps( + {"endpoint_url": sagemaker_client_config["endpoint_url"]} + ) + if sagemaker_metrics_config and sagemaker_metrics_config.get("endpoint_url", None): + env["SM_METRICS_CONFIG"] = json.dumps( + {"endpoint_url": sagemaker_metrics_config["endpoint_url"]} + ) + return env