diff --git a/tests/integ/sagemaker/remote_function/conftest.py b/tests/integ/sagemaker/remote_function/conftest.py index 9f62315d77..bbb71a8129 100644 --- a/tests/integ/sagemaker/remote_function/conftest.py +++ b/tests/integ/sagemaker/remote_function/conftest.py @@ -18,6 +18,7 @@ import shutil import pytest import docker +import re from sagemaker.utils import sagemaker_timestamp, _tmpdir, sts_regional_endpoint @@ -57,25 +58,30 @@ "ENV SAGEMAKER_JOB_CONDA_ENV=default_env\n" ) -CONDA_YML_FILE_TEMPLATE = ( - "name: integ_test_env\n" - "channels:\n" - " - defaults\n" - "dependencies:\n" - " - scipy=1.7.3\n" - " - pip:\n" - " - /sagemaker-{sagemaker_version}.tar.gz\n" - "prefix: /opt/conda/bin/conda\n" +AUTO_CAPTURE_CLIENT_DOCKER_TEMPLATE = ( + "FROM public.ecr.aws/docker/library/python:{py_version}-slim\n\n" + 'SHELL ["/bin/bash", "-c"]\n' + "RUN apt-get update -y \ + && apt-get install -y unzip curl\n\n" + "RUN curl -L -O 'https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh' \ + && bash Mambaforge-Linux-x86_64.sh -b -p '/opt/conda' \ + && /opt/conda/bin/conda init bash\n\n" + "ENV PATH $PATH:/opt/conda/bin\n" + "COPY {source_archive} ./\n" + "RUN mamba create -n auto_capture_client python={py_version} -y \ + && mamba run -n auto_capture_client pip install '{source_archive}' awscli boto3\n" + "COPY test_auto_capture.py .\n" + 'CMD ["mamba", "run", "-n", "auto_capture_client", "python", "test_auto_capture.py"]\n' ) -CONDA_YML_FILE_WITH_SM_FROM_INPUT_CHANNEL = ( +CONDA_YML_FILE_TEMPLATE = ( "name: integ_test_env\n" "channels:\n" " - defaults\n" "dependencies:\n" " - scipy=1.7.3\n" " - pip:\n" - " - sagemaker-2.132.1.dev0-py2.py3-none-any.whl\n" + " - /sagemaker-{sagemaker_version}.tar.gz\n" "prefix: /opt/conda/bin/conda\n" ) @@ -99,6 +105,12 @@ def dummy_container_with_conda(sagemaker_session): return ecr_uri +@pytest.fixture(scope="package") +def auto_capture_test_container(sagemaker_session): + ecr_uri = _build_auto_capture_client_container("3.10", AUTO_CAPTURE_CLIENT_DOCKER_TEMPLATE) + return ecr_uri + + @pytest.fixture(scope="package") def conda_env_yml(): """Write conda yml file needed for tests""" @@ -116,22 +128,6 @@ def conda_env_yml(): os.remove(conda_yml_file_name) -@pytest.fixture(scope="package") -def conda_yml_file_sm_from_input_channel(): - """Write conda yml file needed for tests""" - - conda_yml_file_name = "conda_env_sm_from_input_channel.yml" - conda_file_path = os.path.join(os.getcwd(), conda_yml_file_name) - - with open(conda_file_path, "w") as yml_file: - yml_file.writelines(CONDA_YML_FILE_WITH_SM_FROM_INPUT_CHANNEL) - yield conda_file_path - - # cleanup - if os.path.isfile(conda_yml_file_name): - os.remove(conda_yml_file_name) - - def _build_container(sagemaker_session, py_version, docker_templete): """Build a dummy test container locally and push a container to an ecr repo""" @@ -178,6 +174,25 @@ def _build_container(sagemaker_session, py_version, docker_templete): return ecr_image +def _build_auto_capture_client_container(py_version, docker_templete): + """Build a test docker container that will act as a client for auto_capture tests""" + with _tmpdir() as tmpdir: + print("building docker image locally in ", tmpdir) + print("building source archive...") + source_archive = _generate_sdk_tar_with_public_version(tmpdir) + _move_auto_capture_test_file(tmpdir) + with open(os.path.join(tmpdir, "Dockerfile"), "w") as file: + file.writelines( + docker_templete.format(py_version=py_version, source_archive=source_archive) + ) + + docker_client = docker.from_env() + + print("building docker image...") + image, build_logs = docker_client.images.build(path=tmpdir, tag=REPO_NAME, rm=True) + return image.id + + def _is_repository_exists(ecr_client, repo_name): try: ecr_client.describe_repositories(repositoryNames=[repo_name]) @@ -212,3 +227,51 @@ def _generate_and_move_sagemaker_sdk_tar(destination_folder): shutil.copy2(source_path, destination_path) return source_archive + + +def _generate_sdk_tar_with_public_version(destination_folder): + """ + This function is used for auto capture integ tests. This test need the sagemaker version + that is already published to PyPI. So we manipulate the current local dev version to change + latest released SDK version. + + It does the following + 1. Change the dev version of the SDK to the latest published version + 2. Generate SDK tar using that version + 3. Move tar file to the folder when docker file is present + 3. Update the version back to the dev version + """ + dist_folder_path = "dist" + + with open(os.path.join(os.getcwd(), "VERSION"), "r+") as version_file: + dev_sagemaker_version = version_file.readline().strip() + public_sagemaker_version = re.sub("1.dev0", "0", dev_sagemaker_version) + version_file.seek(0) + version_file.write(public_sagemaker_version) + version_file.truncate() + if os.path.exists(dist_folder_path): + shutil.rmtree(dist_folder_path) + + source_archive = _generate_and_move_sagemaker_sdk_tar(destination_folder) + + with open(os.path.join(os.getcwd(), "VERSION"), "r+") as version_file: + version_file.seek(0) + version_file.write(dev_sagemaker_version) + version_file.truncate() + if os.path.exists(dist_folder_path): + shutil.rmtree(dist_folder_path) + + return source_archive + + +def _move_auto_capture_test_file(destination_folder): + """ + Move the test file for autocapture tests to a temp folder along with the docker file. + """ + + test_file_name = "test_auto_capture.py" + source_path = os.path.join( + os.getcwd(), "tests", "integ", "sagemaker", "remote_function", test_file_name + ) + destination_path = os.path.join(destination_folder, test_file_name) + shutil.copy2(source_path, destination_path) diff --git a/tests/integ/sagemaker/remote_function/test_auto_capture.py b/tests/integ/sagemaker/remote_function/test_auto_capture.py new file mode 100644 index 0000000000..570b81b732 --- /dev/null +++ b/tests/integ/sagemaker/remote_function/test_auto_capture.py @@ -0,0 +1,39 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from __future__ import absolute_import + +import pandas as pd + +from sagemaker.remote_function import remote + + +@remote( + role="SageMakerRole", + instance_type="ml.m5.xlarge", + dependencies="auto_capture", +) +def multiply(dataframe: pd.DataFrame, factor: float): + return dataframe * factor + + +df = pd.DataFrame( + { + "A": [14, 4, 5, 4, 1], + "B": [5, 2, 54, 3, 2], + "C": [20, 20, 7, 3, 8], + "D": [14, 3, 6, 2, 6], + } +) + +if __name__ == "__main__": + multiply(df, 10.0) diff --git a/tests/integ/sagemaker/remote_function/test_decorator.py b/tests/integ/sagemaker/remote_function/test_decorator.py index 6811a7a06d..541ab1417c 100644 --- a/tests/integ/sagemaker/remote_function/test_decorator.py +++ b/tests/integ/sagemaker/remote_function/test_decorator.py @@ -20,6 +20,8 @@ import random import string import pandas as pd +import subprocess +import shlex from sagemaker.experiments.run import Run, load_run from tests.integ.sagemaker.experiments.helpers import cleanup_exp_resources from sagemaker.experiments.trial_component import _TrialComponent @@ -596,3 +598,30 @@ def get_file_content(file_names): with pytest.raises(RuntimeEnvironmentError) as e: get_file_content(["test_file_1", "test_file_2", "test_file_3"]) assert "line 2: bws: command not found" in str(e) + + +def test_decorator_auto_capture(sagemaker_session, auto_capture_test_container): + """ + This test runs a docker container. The Container invocation will execute a python script + with remote function to test auto_capture scenario. The test requires conda to be + installed on the client side which is not available in the code build image. Hence we need + to run the test in another docker container with conda installed. + + Any assertion is not needed because if remote function execution fails, docker run comand + will throw an error thus failing this test. + """ + creds = sagemaker_session.boto_session.get_credentials() + region = sagemaker_session.boto_session.region_name + env = { + "AWS_ACCESS_KEY_ID": str(creds.access_key), + "AWS_SECRET_ACCESS_KEY": str(creds.secret_key), + "AWS_SESSION_TOKEN": str(creds.token), + } + cmd = ( + f"docker run -e AWS_ACCESS_KEY_ID={env['AWS_ACCESS_KEY_ID']} " + f"-e AWS_SECRET_ACCESS_KEY={env['AWS_SECRET_ACCESS_KEY']} " + f"-e AWS_SESSION_TOKEN={env['AWS_SESSION_TOKEN']} " + f"-e AWS_DEFAULT_REGION={region} " + f"--rm {auto_capture_test_container}" + ) + subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT).decode("utf-8")