From 1e03620a34d9ab30c1234ba799c89c05f2afef60 Mon Sep 17 00:00:00 2001 From: Dhanasekar Date: Thu, 11 Mar 2021 11:09:24 +0000 Subject: [PATCH 01/15] Update Archiver to v0.3.1 / Integ tests --- .../torchserve.py | 5 ----- test/container/1.6.0/Dockerfile.dlc.cpu | 2 ++ test/container/1.6.0/Dockerfile.dlc.gpu | 2 ++ test/container/1.6.0/Dockerfile.pytorch | 2 ++ test/integration/__init__.py | 10 +++++----- test/resources/mnist/model_cpu/1d/code/mnist_1d.py | 2 +- .../model_cpu/1d/{model.pth => torch_model.pth} | Bin test/resources/mnist/model_cpu/code/mnist.py | 2 +- .../mnist/model_cpu/{model.pth => torch_model.pth} | Bin test/resources/mnist/model_eia/mnist.py | 4 ++-- .../mnist/model_eia/{model.pth => torch_model.pth} | Bin test/resources/mnist/model_gpu/code/mnist.py | 2 +- .../mnist/model_gpu/{model.pth => torch_model.pth} | Bin 13 files changed, 16 insertions(+), 15 deletions(-) rename test/resources/mnist/model_cpu/1d/{model.pth => torch_model.pth} (100%) rename test/resources/mnist/model_cpu/{model.pth => torch_model.pth} (100%) rename test/resources/mnist/model_eia/{model.pth => torch_model.pth} (100%) rename test/resources/mnist/model_gpu/{model.pth => torch_model.pth} (100%) diff --git a/src/sagemaker_pytorch_serving_container/torchserve.py b/src/sagemaker_pytorch_serving_container/torchserve.py index 95362352..4feb0bba 100644 --- a/src/sagemaker_pytorch_serving_container/torchserve.py +++ b/src/sagemaker_pytorch_serving_container/torchserve.py @@ -42,7 +42,6 @@ ) DEFAULT_TS_MODEL_DIRECTORY = os.path.join(os.getcwd(), ".sagemaker", "ts", "models") DEFAULT_TS_MODEL_NAME = "model" -DEFAULT_TS_MODEL_SERIALIZED_FILE = "model.pth" DEFAULT_TS_CODE_DIR = "code" DEFAULT_HANDLER_SERVICE = "sagemaker_pytorch_serving_container.handler_service" @@ -117,12 +116,8 @@ def _adapt_to_ts_format(handler_service): DEFAULT_TS_MODEL_NAME, "--handler", handler_service, - "--serialized-file", - os.path.join(environment.model_dir, DEFAULT_TS_MODEL_SERIALIZED_FILE), "--export-path", DEFAULT_TS_MODEL_DIRECTORY, - "--extra-files", - os.path.join(environment.model_dir, DEFAULT_TS_CODE_DIR, environment.Environment().module_name + ".py"), "--version", "1", ] diff --git a/test/container/1.6.0/Dockerfile.dlc.cpu b/test/container/1.6.0/Dockerfile.dlc.cpu index 44667c02..151834a3 100644 --- a/test/container/1.6.0/Dockerfile.dlc.cpu +++ b/test/container/1.6.0/Dockerfile.dlc.cpu @@ -1,6 +1,8 @@ ARG region FROM 763104351884.dkr.ecr.$region.amazonaws.com/pytorch-inference:1.6.0-cpu-py3 +RUN git clone https://github.com/pytorch/serve/ && cd serve/model-archiver/ && git checkout patch_release_0_3_1 && pip install . && cd - + COPY dist/sagemaker_pytorch_inference-*.tar.gz /sagemaker_pytorch_inference.tar.gz RUN pip install --upgrade --no-cache-dir /sagemaker_pytorch_inference.tar.gz && \ rm /sagemaker_pytorch_inference.tar.gz diff --git a/test/container/1.6.0/Dockerfile.dlc.gpu b/test/container/1.6.0/Dockerfile.dlc.gpu index e48fc985..7db93d42 100644 --- a/test/container/1.6.0/Dockerfile.dlc.gpu +++ b/test/container/1.6.0/Dockerfile.dlc.gpu @@ -1,6 +1,8 @@ ARG region FROM 763104351884.dkr.ecr.$region.amazonaws.com/pytorch-inference:1.6.0-gpu-py3 +RUN git clone https://github.com/pytorch/serve/ && cd serve/model-archiver/ && git checkout patch_release_0_3_1 && pip install . && cd - + COPY dist/sagemaker_pytorch_inference-*.tar.gz /sagemaker_pytorch_inference.tar.gz RUN pip install --upgrade --no-cache-dir /sagemaker_pytorch_inference.tar.gz && \ rm /sagemaker_pytorch_inference.tar.gz diff --git a/test/container/1.6.0/Dockerfile.pytorch b/test/container/1.6.0/Dockerfile.pytorch index debf50ff..eaa26fd7 100644 --- a/test/container/1.6.0/Dockerfile.pytorch +++ b/test/container/1.6.0/Dockerfile.pytorch @@ -38,6 +38,8 @@ RUN useradd -m model-server \ COPY artifacts/ts-entrypoint.py /usr/local/bin/dockerd-entrypoint.py COPY artifacts/config.properties /home/model-server +RUN git clone https://github.com/pytorch/serve/ && cd serve/model-archiver/ && git checkout patch_release_0_3_1 && pip install . && cd - + RUN chmod +x /usr/local/bin/dockerd-entrypoint.py EXPOSE 8080 8081 diff --git a/test/integration/__init__.py b/test/integration/__init__.py index 0f10294d..0442aa25 100644 --- a/test/integration/__init__.py +++ b/test/integration/__init__.py @@ -28,33 +28,33 @@ model_cpu_dir = os.path.join(mnist_path, cpu_sub_dir) mnist_cpu_script = os.path.join(model_cpu_dir, code_sub_dir, 'mnist.py') model_cpu_tar = file_utils.make_tarfile(mnist_cpu_script, - os.path.join(model_cpu_dir, "model.pth"), + os.path.join(model_cpu_dir, "torch_model.pth"), model_cpu_dir, script_path="code") model_cpu_1d_dir = os.path.join(model_cpu_dir, '1d') mnist_1d_script = os.path.join(model_cpu_1d_dir, code_sub_dir, 'mnist_1d.py') model_cpu_1d_tar = file_utils.make_tarfile(mnist_1d_script, - os.path.join(model_cpu_1d_dir, "model.pth"), + os.path.join(model_cpu_1d_dir, "torch_model.pth"), model_cpu_1d_dir, script_path="code") model_gpu_dir = os.path.join(mnist_path, gpu_sub_dir) mnist_gpu_script = os.path.join(model_gpu_dir, code_sub_dir, 'mnist.py') model_gpu_tar = file_utils.make_tarfile(mnist_gpu_script, - os.path.join(model_gpu_dir, "model.pth"), + os.path.join(model_gpu_dir, "torch_model.pth"), model_gpu_dir, script_path="code") model_eia_dir = os.path.join(mnist_path, eia_sub_dir) mnist_eia_script = os.path.join(model_eia_dir, 'mnist.py') model_eia_tar = file_utils.make_tarfile(mnist_eia_script, - os.path.join(model_eia_dir, "model.pth"), + os.path.join(model_eia_dir, "torch_model.pth"), model_eia_dir) call_model_fn_once_script = os.path.join(model_cpu_dir, code_sub_dir, 'call_model_fn_once.py') call_model_fn_once_tar = file_utils.make_tarfile(call_model_fn_once_script, - os.path.join(model_cpu_dir, "model.pth"), + os.path.join(model_cpu_dir, "torch_model.pth"), model_cpu_dir, "model_call_model_fn_once.tar.gz", script_path="code") diff --git a/test/resources/mnist/model_cpu/1d/code/mnist_1d.py b/test/resources/mnist/model_cpu/1d/code/mnist_1d.py index f027e700..457141e9 100644 --- a/test/resources/mnist/model_cpu/1d/code/mnist_1d.py +++ b/test/resources/mnist/model_cpu/1d/code/mnist_1d.py @@ -42,6 +42,6 @@ def forward(self, x): def model_fn(model_dir): model = torch.nn.DataParallel(Net()) - with open(os.path.join(model_dir, 'model.pth'), 'rb') as f: + with open(os.path.join(model_dir, 'torch_model.pth'), 'rb') as f: model.load_state_dict(torch.load(f)) return model diff --git a/test/resources/mnist/model_cpu/1d/model.pth b/test/resources/mnist/model_cpu/1d/torch_model.pth similarity index 100% rename from test/resources/mnist/model_cpu/1d/model.pth rename to test/resources/mnist/model_cpu/1d/torch_model.pth diff --git a/test/resources/mnist/model_cpu/code/mnist.py b/test/resources/mnist/model_cpu/code/mnist.py index e0a1dfbe..3c773c90 100644 --- a/test/resources/mnist/model_cpu/code/mnist.py +++ b/test/resources/mnist/model_cpu/code/mnist.py @@ -52,6 +52,6 @@ def forward(self, x): def model_fn(model_dir): logger.info('model_fn') model = torch.nn.DataParallel(Net()) - with open(os.path.join(model_dir, 'model.pth'), 'rb') as f: + with open(os.path.join(model_dir, 'torch_model.pth'), 'rb') as f: model.load_state_dict(torch.load(f)) return model diff --git a/test/resources/mnist/model_cpu/model.pth b/test/resources/mnist/model_cpu/torch_model.pth similarity index 100% rename from test/resources/mnist/model_cpu/model.pth rename to test/resources/mnist/model_cpu/torch_model.pth diff --git a/test/resources/mnist/model_eia/mnist.py b/test/resources/mnist/model_eia/mnist.py index ebc0bff0..53f0be8d 100644 --- a/test/resources/mnist/model_eia/mnist.py +++ b/test/resources/mnist/model_eia/mnist.py @@ -38,11 +38,11 @@ def model_fn(model_dir): logger.info('model_fn: Loading model with TorchScript from {}'.format(model_dir)) # Scripted model is serialized with torch.jit.save(). # No need to instantiate model definition then load state_dict - model = torch.jit.load('model.pth') + model = torch.jit.load('torch_model.pth') return model def save_model(model, model_dir): logger.info("Saving the model to {}.".format(model_dir)) - path = os.path.join(model_dir, 'model.pth') + path = os.path.join(model_dir, 'torch_model.pth') torch.jit.save(model, path) diff --git a/test/resources/mnist/model_eia/model.pth b/test/resources/mnist/model_eia/torch_model.pth similarity index 100% rename from test/resources/mnist/model_eia/model.pth rename to test/resources/mnist/model_eia/torch_model.pth diff --git a/test/resources/mnist/model_gpu/code/mnist.py b/test/resources/mnist/model_gpu/code/mnist.py index e0a1dfbe..3c773c90 100644 --- a/test/resources/mnist/model_gpu/code/mnist.py +++ b/test/resources/mnist/model_gpu/code/mnist.py @@ -52,6 +52,6 @@ def forward(self, x): def model_fn(model_dir): logger.info('model_fn') model = torch.nn.DataParallel(Net()) - with open(os.path.join(model_dir, 'model.pth'), 'rb') as f: + with open(os.path.join(model_dir, 'torch_model.pth'), 'rb') as f: model.load_state_dict(torch.load(f)) return model diff --git a/test/resources/mnist/model_gpu/model.pth b/test/resources/mnist/model_gpu/torch_model.pth similarity index 100% rename from test/resources/mnist/model_gpu/model.pth rename to test/resources/mnist/model_gpu/torch_model.pth From d322c1183bbeaf079368cc880ab1b1eefe111540 Mon Sep 17 00:00:00 2001 From: Dhanasekar Date: Thu, 11 Mar 2021 11:35:57 +0000 Subject: [PATCH 02/15] Fix Unit tests --- test/unit/test_model_server.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/test/unit/test_model_server.py b/test/unit/test_model_server.py index 108cd3c3..8ea1ab95 100644 --- a/test/unit/test_model_server.py +++ b/test/unit/test_model_server.py @@ -145,14 +145,8 @@ def test_adapt_to_ts_format(path_exists, make_dir, subprocess_check_call, set_py torchserve.DEFAULT_TS_MODEL_NAME, "--handler", handler_service, - "--serialized-file", - os.path.join(environment.model_dir, torchserve.DEFAULT_TS_MODEL_SERIALIZED_FILE), "--export-path", torchserve.DEFAULT_TS_MODEL_DIRECTORY, - "--extra-files", - os.path.join(environment.model_dir, - torchserve.DEFAULT_TS_CODE_DIR, - environment.Environment().module_name + ".py"), "--version", "1", ] From ba979790fe4354a23c5e98be887fc2d865c44775 Mon Sep 17 00:00:00 2001 From: Dhanasekar Date: Thu, 11 Mar 2021 11:53:34 +0000 Subject: [PATCH 03/15] Integ test error : .dockercfg: /home/ubuntu is not defined --- test/container/1.6.0/Dockerfile.dlc.cpu | 2 +- test/container/1.6.0/Dockerfile.dlc.gpu | 2 +- test/container/1.6.0/Dockerfile.pytorch | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/container/1.6.0/Dockerfile.dlc.cpu b/test/container/1.6.0/Dockerfile.dlc.cpu index 151834a3..8f08995c 100644 --- a/test/container/1.6.0/Dockerfile.dlc.cpu +++ b/test/container/1.6.0/Dockerfile.dlc.cpu @@ -1,7 +1,7 @@ ARG region FROM 763104351884.dkr.ecr.$region.amazonaws.com/pytorch-inference:1.6.0-cpu-py3 -RUN git clone https://github.com/pytorch/serve/ && cd serve/model-archiver/ && git checkout patch_release_0_3_1 && pip install . && cd - +RUN cd /tmp/ && git clone https://github.com/pytorch/serve/ && cd serve/model-archiver/ && git checkout patch_release_0_3_1 && pip install . && cd - COPY dist/sagemaker_pytorch_inference-*.tar.gz /sagemaker_pytorch_inference.tar.gz RUN pip install --upgrade --no-cache-dir /sagemaker_pytorch_inference.tar.gz && \ diff --git a/test/container/1.6.0/Dockerfile.dlc.gpu b/test/container/1.6.0/Dockerfile.dlc.gpu index 7db93d42..1230f6c3 100644 --- a/test/container/1.6.0/Dockerfile.dlc.gpu +++ b/test/container/1.6.0/Dockerfile.dlc.gpu @@ -1,7 +1,7 @@ ARG region FROM 763104351884.dkr.ecr.$region.amazonaws.com/pytorch-inference:1.6.0-gpu-py3 -RUN git clone https://github.com/pytorch/serve/ && cd serve/model-archiver/ && git checkout patch_release_0_3_1 && pip install . && cd - +RUN cd /tmp/ && git clone https://github.com/pytorch/serve/ && cd serve/model-archiver/ && git checkout patch_release_0_3_1 && pip install . && cd - COPY dist/sagemaker_pytorch_inference-*.tar.gz /sagemaker_pytorch_inference.tar.gz RUN pip install --upgrade --no-cache-dir /sagemaker_pytorch_inference.tar.gz && \ diff --git a/test/container/1.6.0/Dockerfile.pytorch b/test/container/1.6.0/Dockerfile.pytorch index eaa26fd7..50ac0e9d 100644 --- a/test/container/1.6.0/Dockerfile.pytorch +++ b/test/container/1.6.0/Dockerfile.pytorch @@ -38,7 +38,7 @@ RUN useradd -m model-server \ COPY artifacts/ts-entrypoint.py /usr/local/bin/dockerd-entrypoint.py COPY artifacts/config.properties /home/model-server -RUN git clone https://github.com/pytorch/serve/ && cd serve/model-archiver/ && git checkout patch_release_0_3_1 && pip install . && cd - +RUN cd /tmp/ && git clone https://github.com/pytorch/serve/ && cd serve/model-archiver/ && git checkout patch_release_0_3_1 && pip install . && cd - RUN chmod +x /usr/local/bin/dockerd-entrypoint.py From ba66c11b4cf6340acfd92d7fef6b0a8520301edc Mon Sep 17 00:00:00 2001 From: Dhanasekar Date: Thu, 11 Mar 2021 12:22:36 +0000 Subject: [PATCH 04/15] Install git ? --- test/container/1.6.0/Dockerfile.dlc.cpu | 2 ++ test/container/1.6.0/Dockerfile.dlc.gpu | 2 ++ test/container/1.6.0/Dockerfile.pytorch | 1 + 3 files changed, 5 insertions(+) diff --git a/test/container/1.6.0/Dockerfile.dlc.cpu b/test/container/1.6.0/Dockerfile.dlc.cpu index 8f08995c..c80e19de 100644 --- a/test/container/1.6.0/Dockerfile.dlc.cpu +++ b/test/container/1.6.0/Dockerfile.dlc.cpu @@ -1,6 +1,8 @@ ARG region FROM 763104351884.dkr.ecr.$region.amazonaws.com/pytorch-inference:1.6.0-cpu-py3 +RUN apt-get install -y git + RUN cd /tmp/ && git clone https://github.com/pytorch/serve/ && cd serve/model-archiver/ && git checkout patch_release_0_3_1 && pip install . && cd - COPY dist/sagemaker_pytorch_inference-*.tar.gz /sagemaker_pytorch_inference.tar.gz diff --git a/test/container/1.6.0/Dockerfile.dlc.gpu b/test/container/1.6.0/Dockerfile.dlc.gpu index 1230f6c3..ed4de22b 100644 --- a/test/container/1.6.0/Dockerfile.dlc.gpu +++ b/test/container/1.6.0/Dockerfile.dlc.gpu @@ -1,6 +1,8 @@ ARG region FROM 763104351884.dkr.ecr.$region.amazonaws.com/pytorch-inference:1.6.0-gpu-py3 +RUN apt-get install -y git + RUN cd /tmp/ && git clone https://github.com/pytorch/serve/ && cd serve/model-archiver/ && git checkout patch_release_0_3_1 && pip install . && cd - COPY dist/sagemaker_pytorch_inference-*.tar.gz /sagemaker_pytorch_inference.tar.gz diff --git a/test/container/1.6.0/Dockerfile.pytorch b/test/container/1.6.0/Dockerfile.pytorch index 50ac0e9d..e1d44b04 100644 --- a/test/container/1.6.0/Dockerfile.pytorch +++ b/test/container/1.6.0/Dockerfile.pytorch @@ -38,6 +38,7 @@ RUN useradd -m model-server \ COPY artifacts/ts-entrypoint.py /usr/local/bin/dockerd-entrypoint.py COPY artifacts/config.properties /home/model-server +RUN apt-get install -y git RUN cd /tmp/ && git clone https://github.com/pytorch/serve/ && cd serve/model-archiver/ && git checkout patch_release_0_3_1 && pip install . && cd - RUN chmod +x /usr/local/bin/dockerd-entrypoint.py From ade3b17b853056d77b183bbcf774a6480110e4ed Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 11 Mar 2021 13:21:05 +0000 Subject: [PATCH 05/15] Git install --- test/container/1.6.0/Dockerfile.dlc.cpu | 2 +- test/container/1.6.0/Dockerfile.dlc.gpu | 2 +- test/container/1.6.0/Dockerfile.pytorch | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/container/1.6.0/Dockerfile.dlc.cpu b/test/container/1.6.0/Dockerfile.dlc.cpu index c80e19de..c59ac794 100644 --- a/test/container/1.6.0/Dockerfile.dlc.cpu +++ b/test/container/1.6.0/Dockerfile.dlc.cpu @@ -1,7 +1,7 @@ ARG region FROM 763104351884.dkr.ecr.$region.amazonaws.com/pytorch-inference:1.6.0-cpu-py3 -RUN apt-get install -y git +RUN apt-get update && apt-get install -y git RUN cd /tmp/ && git clone https://github.com/pytorch/serve/ && cd serve/model-archiver/ && git checkout patch_release_0_3_1 && pip install . && cd - diff --git a/test/container/1.6.0/Dockerfile.dlc.gpu b/test/container/1.6.0/Dockerfile.dlc.gpu index ed4de22b..0758b85d 100644 --- a/test/container/1.6.0/Dockerfile.dlc.gpu +++ b/test/container/1.6.0/Dockerfile.dlc.gpu @@ -1,7 +1,7 @@ ARG region FROM 763104351884.dkr.ecr.$region.amazonaws.com/pytorch-inference:1.6.0-gpu-py3 -RUN apt-get install -y git +RUN apt-get update && apt-get install -y git RUN cd /tmp/ && git clone https://github.com/pytorch/serve/ && cd serve/model-archiver/ && git checkout patch_release_0_3_1 && pip install . && cd - diff --git a/test/container/1.6.0/Dockerfile.pytorch b/test/container/1.6.0/Dockerfile.pytorch index e1d44b04..fcd5ef5f 100644 --- a/test/container/1.6.0/Dockerfile.pytorch +++ b/test/container/1.6.0/Dockerfile.pytorch @@ -38,7 +38,7 @@ RUN useradd -m model-server \ COPY artifacts/ts-entrypoint.py /usr/local/bin/dockerd-entrypoint.py COPY artifacts/config.properties /home/model-server -RUN apt-get install -y git +RUN apt-get update && apt-get install -y git RUN cd /tmp/ && git clone https://github.com/pytorch/serve/ && cd serve/model-archiver/ && git checkout patch_release_0_3_1 && pip install . && cd - RUN chmod +x /usr/local/bin/dockerd-entrypoint.py From 395eba3ce2c04763ded3c65d54122ac0d4a8a3e6 Mon Sep 17 00:00:00 2001 From: Dhanasekar Date: Thu, 11 Mar 2021 13:44:41 +0000 Subject: [PATCH 06/15] Fix path --- src/sagemaker_pytorch_serving_container/torchserve.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/sagemaker_pytorch_serving_container/torchserve.py b/src/sagemaker_pytorch_serving_container/torchserve.py index 4feb0bba..d90e0000 100644 --- a/src/sagemaker_pytorch_serving_container/torchserve.py +++ b/src/sagemaker_pytorch_serving_container/torchserve.py @@ -120,6 +120,8 @@ def _adapt_to_ts_format(handler_service): DEFAULT_TS_MODEL_DIRECTORY, "--version", "1", + "--extra-files", + os.path.join(environment.model_dir) ] logger.info(model_archiver_cmd) From 6489d2f26b83365609d67c94e6ece58fbd498823 Mon Sep 17 00:00:00 2001 From: Dhanasekar Date: Thu, 11 Mar 2021 13:57:13 +0000 Subject: [PATCH 07/15] Fix unit tests --- test/unit/test_model_server.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/unit/test_model_server.py b/test/unit/test_model_server.py index 8ea1ab95..aeaec28e 100644 --- a/test/unit/test_model_server.py +++ b/test/unit/test_model_server.py @@ -149,6 +149,8 @@ def test_adapt_to_ts_format(path_exists, make_dir, subprocess_check_call, set_py torchserve.DEFAULT_TS_MODEL_DIRECTORY, "--version", "1", + "--extra-files", + environment.model_dir ] subprocess_check_call.assert_called_once_with(model_archiver_cmd) From edf6792cf42e5de3fc82f83e5bc7df9294f337d5 Mon Sep 17 00:00:00 2001 From: Dhanasekar Date: Thu, 11 Mar 2021 19:59:50 +0000 Subject: [PATCH 08/15] Ignore GPU tests --- buildspec.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buildspec.yml b/buildspec.yml index 7858bee8..84cd85c3 100644 --- a/buildspec.yml +++ b/buildspec.yml @@ -67,7 +67,7 @@ phases: # no reason to rebuild the image again since it was already built and pushed to ECR during CPU tests - generic_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $GENERIC_TAG" - test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$generic_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\"" - - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" + #- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" - dlc_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $DLC_GPU_TAG" - test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$dlc_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\" --skip-setup" - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" From 76e527695e026db4336fa57eb017966efef8e769 Mon Sep 17 00:00:00 2001 From: Dhanasekar Date: Thu, 11 Mar 2021 20:29:36 +0000 Subject: [PATCH 09/15] Disable more tests --- buildspec.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buildspec.yml b/buildspec.yml index 84cd85c3..f85c6219 100644 --- a/buildspec.yml +++ b/buildspec.yml @@ -70,7 +70,7 @@ phases: #- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" - dlc_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $DLC_GPU_TAG" - test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$dlc_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\" --skip-setup" - - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" + #- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" # run CPU sagemaker integration tests - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $GENERIC_TAG" From 86f5a77fd3524e378c272845aee880c84844a427 Mon Sep 17 00:00:00 2001 From: dhanainme <60679183+dhanainme@users.noreply.github.com> Date: Mon, 15 Mar 2021 15:34:14 -0700 Subject: [PATCH 10/15] Update Dockerfile.dlc.cpu --- test/container/1.6.0/Dockerfile.dlc.cpu | 4 ---- 1 file changed, 4 deletions(-) diff --git a/test/container/1.6.0/Dockerfile.dlc.cpu b/test/container/1.6.0/Dockerfile.dlc.cpu index c59ac794..44667c02 100644 --- a/test/container/1.6.0/Dockerfile.dlc.cpu +++ b/test/container/1.6.0/Dockerfile.dlc.cpu @@ -1,10 +1,6 @@ ARG region FROM 763104351884.dkr.ecr.$region.amazonaws.com/pytorch-inference:1.6.0-cpu-py3 -RUN apt-get update && apt-get install -y git - -RUN cd /tmp/ && git clone https://github.com/pytorch/serve/ && cd serve/model-archiver/ && git checkout patch_release_0_3_1 && pip install . && cd - - COPY dist/sagemaker_pytorch_inference-*.tar.gz /sagemaker_pytorch_inference.tar.gz RUN pip install --upgrade --no-cache-dir /sagemaker_pytorch_inference.tar.gz && \ rm /sagemaker_pytorch_inference.tar.gz From d4bb25f26a089ac74151088520bcf7380370921e Mon Sep 17 00:00:00 2001 From: dhanainme <60679183+dhanainme@users.noreply.github.com> Date: Mon, 15 Mar 2021 15:34:36 -0700 Subject: [PATCH 11/15] Update Dockerfile.pytorch --- test/container/1.6.0/Dockerfile.pytorch | 3 --- 1 file changed, 3 deletions(-) diff --git a/test/container/1.6.0/Dockerfile.pytorch b/test/container/1.6.0/Dockerfile.pytorch index fcd5ef5f..debf50ff 100644 --- a/test/container/1.6.0/Dockerfile.pytorch +++ b/test/container/1.6.0/Dockerfile.pytorch @@ -38,9 +38,6 @@ RUN useradd -m model-server \ COPY artifacts/ts-entrypoint.py /usr/local/bin/dockerd-entrypoint.py COPY artifacts/config.properties /home/model-server -RUN apt-get update && apt-get install -y git -RUN cd /tmp/ && git clone https://github.com/pytorch/serve/ && cd serve/model-archiver/ && git checkout patch_release_0_3_1 && pip install . && cd - - RUN chmod +x /usr/local/bin/dockerd-entrypoint.py EXPOSE 8080 8081 From 74b250ee8d26e11fd97c0c51ba28295bf1181f7d Mon Sep 17 00:00:00 2001 From: dhanainme <60679183+dhanainme@users.noreply.github.com> Date: Mon, 15 Mar 2021 15:35:32 -0700 Subject: [PATCH 12/15] Update Dockerfile.dlc.gpu --- test/container/1.6.0/Dockerfile.dlc.gpu | 4 ---- 1 file changed, 4 deletions(-) diff --git a/test/container/1.6.0/Dockerfile.dlc.gpu b/test/container/1.6.0/Dockerfile.dlc.gpu index 0758b85d..e48fc985 100644 --- a/test/container/1.6.0/Dockerfile.dlc.gpu +++ b/test/container/1.6.0/Dockerfile.dlc.gpu @@ -1,10 +1,6 @@ ARG region FROM 763104351884.dkr.ecr.$region.amazonaws.com/pytorch-inference:1.6.0-gpu-py3 -RUN apt-get update && apt-get install -y git - -RUN cd /tmp/ && git clone https://github.com/pytorch/serve/ && cd serve/model-archiver/ && git checkout patch_release_0_3_1 && pip install . && cd - - COPY dist/sagemaker_pytorch_inference-*.tar.gz /sagemaker_pytorch_inference.tar.gz RUN pip install --upgrade --no-cache-dir /sagemaker_pytorch_inference.tar.gz && \ rm /sagemaker_pytorch_inference.tar.gz From 5b43521c4854dd4ac6e200246b7bc518c4c5afee Mon Sep 17 00:00:00 2001 From: dhanainme <60679183+dhanainme@users.noreply.github.com> Date: Tue, 16 Mar 2021 11:35:25 -0700 Subject: [PATCH 13/15] Update buildspec.yml --- buildspec.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/buildspec.yml b/buildspec.yml index f85c6219..7858bee8 100644 --- a/buildspec.yml +++ b/buildspec.yml @@ -67,10 +67,10 @@ phases: # no reason to rebuild the image again since it was already built and pushed to ECR during CPU tests - generic_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $GENERIC_TAG" - test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$generic_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\"" - #- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" + - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" - dlc_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $DLC_GPU_TAG" - test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$dlc_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\" --skip-setup" - #- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" + - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" # run CPU sagemaker integration tests - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $GENERIC_TAG" From 1521faca87488deba578a9ffe4aa8550df600432 Mon Sep 17 00:00:00 2001 From: Dhanasekar Date: Wed, 17 Mar 2021 18:22:02 +0000 Subject: [PATCH 14/15] Update Dockerfile for Model Archiver 0.3.1 --- test/container/1.6.0/Dockerfile.dlc.cpu | 2 ++ test/container/1.6.0/Dockerfile.dlc.gpu | 2 ++ test/container/1.6.0/Dockerfile.pytorch | 5 +++-- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/test/container/1.6.0/Dockerfile.dlc.cpu b/test/container/1.6.0/Dockerfile.dlc.cpu index 44667c02..152a5753 100644 --- a/test/container/1.6.0/Dockerfile.dlc.cpu +++ b/test/container/1.6.0/Dockerfile.dlc.cpu @@ -1,6 +1,8 @@ ARG region FROM 763104351884.dkr.ecr.$region.amazonaws.com/pytorch-inference:1.6.0-cpu-py3 +RUN pip install --upgrade torch-model-archiver==0.3.1 + COPY dist/sagemaker_pytorch_inference-*.tar.gz /sagemaker_pytorch_inference.tar.gz RUN pip install --upgrade --no-cache-dir /sagemaker_pytorch_inference.tar.gz && \ rm /sagemaker_pytorch_inference.tar.gz diff --git a/test/container/1.6.0/Dockerfile.dlc.gpu b/test/container/1.6.0/Dockerfile.dlc.gpu index e48fc985..c10fc836 100644 --- a/test/container/1.6.0/Dockerfile.dlc.gpu +++ b/test/container/1.6.0/Dockerfile.dlc.gpu @@ -1,6 +1,8 @@ ARG region FROM 763104351884.dkr.ecr.$region.amazonaws.com/pytorch-inference:1.6.0-gpu-py3 +RUN pip install --upgrade torch-model-archiver==0.3.1 + COPY dist/sagemaker_pytorch_inference-*.tar.gz /sagemaker_pytorch_inference.tar.gz RUN pip install --upgrade --no-cache-dir /sagemaker_pytorch_inference.tar.gz && \ rm /sagemaker_pytorch_inference.tar.gz diff --git a/test/container/1.6.0/Dockerfile.pytorch b/test/container/1.6.0/Dockerfile.pytorch index debf50ff..ca9042f2 100644 --- a/test/container/1.6.0/Dockerfile.pytorch +++ b/test/container/1.6.0/Dockerfile.pytorch @@ -3,7 +3,8 @@ FROM pytorch/pytorch:1.6.0-cuda10.1-cudnn7-runtime LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true LABEL com.amazonaws.sagemaker.capabilities.multi-models=true -ARG TS_VERSION=0.1.1 +ARG TS_VERSION=0.3.1 +ARG TS_ARCHIVER_VERSION=0.3.1 ENV SAGEMAKER_SERVING_MODULE sagemaker_pytorch_serving_container.serving:main ENV TEMP=/home/model-server/tmp @@ -25,7 +26,7 @@ RUN conda install -c conda-forge opencv==4.0.1 \ && ln -s /opt/conda/bin/pip /usr/local/bin/pip3 RUN pip install torchserve==$TS_VERSION \ - && pip install torch-model-archiver==$TS_VERSION + && pip install torch-model-archiver==$TS_ARCHIVER_VERSION COPY dist/sagemaker_pytorch_inference-*.tar.gz /sagemaker_pytorch_inference.tar.gz RUN pip install --no-cache-dir /sagemaker_pytorch_inference.tar.gz && \ From 9400de0cee8f5b92f345080910f186523c0b0711 Mon Sep 17 00:00:00 2001 From: Dhanasekar Date: Wed, 17 Mar 2021 18:55:34 +0000 Subject: [PATCH 15/15] Update boto>=1.10.44 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ce6d778f..f37eb73c 100644 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ def read(fname): # different packages for different variants install_requires=['numpy', 'retrying', 'sagemaker-inference>=1.3.1'], extras_require={ - 'test': ['boto3==1.10.32', 'coverage==4.5.3', 'docker-compose==1.23.2', 'flake8==3.7.7', 'Flask==1.1.1', + 'test': ['boto3>=1.10.44', 'coverage==4.5.3', 'docker-compose==1.23.2', 'flake8==3.7.7', 'Flask==1.1.1', 'mock==2.0.0', 'pytest==4.4.0', 'pytest-cov==2.7.1', 'pytest-xdist==1.28.0', 'PyYAML==3.10', 'sagemaker==1.56.3', 'sagemaker-containers>=2.5.4', 'six==1.12.0', 'requests==2.20.0', 'requests_mock==1.6.0', 'torch==1.6.0', 'torchvision==0.7.0', 'tox==3.7.0']