Skip to content

Commit 1fbd3f7

Browse files
authored
Fix integration tests and update Python versions (#154)
* Fix integration tests and update Python versions * Fix test dependency versions, change GPU instance type, update GPU MNIST script, add environment variable to GPU tests * Modify Dockerfiles and set default value of env_vars to None * Remove instance type from SageMaker integration tests * Add instance_type to SageMaker tests * Fix buildspec.yml
1 parent c302b64 commit 1fbd3f7

File tree

14 files changed

+188
-107
lines changed

14 files changed

+188
-107
lines changed

buildspec-release.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ phases:
1212
# run unit tests
1313
- AWS_ACCESS_KEY_ID= AWS_SECRET_ACCESS_KEY= AWS_SESSION_TOKEN=
1414
AWS_CONTAINER_CREDENTIALS_RELATIVE_URI= AWS_DEFAULT_REGION=
15-
tox -e py36,py37 -- test/unit
15+
tox -e py38,py39,py310 -- test/unit
1616

1717
# run local integ tests
1818
#- $(aws ecr get-login --no-include-email --region us-west-2)

buildspec.yml

+65-52
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,16 @@ version: 0.2
22

33
env:
44
variables:
5-
FRAMEWORK_VERSION: '1.10.2'
6-
EIA_FRAMEWORK_VERSION: '1.3.1'
5+
FRAMEWORK_VERSIONS: '2.0.0 2.0.1'
6+
# EIA_FRAMEWORK_VERSION: '1.3.1'
77
CPU_INSTANCE_TYPE: 'ml.c4.xlarge'
8-
GPU_INSTANCE_TYPE: 'ml.p3.8xlarge'
9-
EIA_ACCELERATOR_TYPE: 'ml.eia2.medium'
8+
GPU_INSTANCE_TYPE: 'ml.g4dn.12xlarge'
9+
# EIA_ACCELERATOR_TYPE: 'ml.eia2.medium'
1010
ECR_REPO: 'sagemaker-test'
1111
GITHUB_REPO: 'sagemaker-pytorch-serving-container'
1212
DLC_ACCOUNT: '763104351884'
1313
SETUP_FILE: 'setup_cmds.sh'
14-
SETUP_CMDS: '#!/bin/bash\npython3.6 -m pip install --upgrade pip\npython3.6 -m pip install -U -e .\npython3.6 -m pip install -U -e .[test]'
14+
SETUP_CMDS: '#!/bin/bash\npython3.8 -m pip install --upgrade pip\npython3.8 -m pip install -U -e .\npython3.8 -m pip install -U -e .[test]'
1515

1616

1717
phases:
@@ -33,68 +33,81 @@ phases:
3333
- tox -e flake8,twine
3434

3535
# run unit tests
36-
- tox -e py36,py37 test/unit
36+
- tox -e py38,py39,py310 test/unit
3737

38-
# define tags
39-
- GENERIC_TAG="$FRAMEWORK_VERSION-pytorch-$BUILD_ID"
40-
- DLC_CPU_TAG="$FRAMEWORK_VERSION-dlc-cpu-$BUILD_ID"
41-
- DLC_GPU_TAG="$FRAMEWORK_VERSION-dlc-gpu-$BUILD_ID"
42-
- DLC_EIA_TAG="$EIA_FRAMEWORK_VERSION-dlc-eia-$BUILD_ID"
38+
# define EIA tag
39+
# - DLC_EIA_TAG="$EIA_FRAMEWORK_VERSION-dlc-eia-$BUILD_ID"
4340

4441
# run local CPU integration tests (build and push the image to ECR repo)
45-
- test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local -vv -rA -s --build-image --push-image --dockerfile-type pytorch --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $GENERIC_TAG"
46-
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*"
47-
- test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local -vv -rA -s --build-image --push-image --dockerfile-type dlc.cpu --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $DLC_CPU_TAG"
48-
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*"
49-
50-
# launch remote GPU instance
42+
- |
43+
for FRAMEWORK_VERSION in $FRAMEWORK_VERSIONS;
44+
do
45+
DLC_CPU_TAG="$FRAMEWORK_VERSION-dlc-cpu-$BUILD_ID";
46+
test_cmd="IGNORE_COVERAGE=- tox -e py38 -- test/integration/local -vv -rA -s --build-image --push-image --dockerfile-type dlc.cpu --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $DLC_CPU_TAG";
47+
execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg";
48+
docker system prune --all --force;
49+
done
50+
51+
# launch remote GPU instance with Deep Learning AMI GPU PyTorch 1.9 (Ubuntu 20.04)
5152
- prefix='ml.'
5253
- instance_type=${GPU_INSTANCE_TYPE#"$prefix"}
53-
- create-key-pair
54-
- launch-ec2-instance --instance-type $instance_type --ami-name dlami-ubuntu-latest
55-
54+
5655
# build DLC GPU image because the base DLC image is too big and takes too long to build as part of the test
5756
- python3 setup.py sdist
58-
- build_dir="test/container/$FRAMEWORK_VERSION"
5957
- $(aws ecr get-login --registry-ids $DLC_ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION)
60-
- docker build -f "$build_dir/Dockerfile.dlc.gpu" -t $PREPROD_IMAGE:$DLC_GPU_TAG --build-arg region=$AWS_DEFAULT_REGION .
61-
# push DLC GPU image to ECR
62-
- $(aws ecr get-login --registry-ids $ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION)
63-
- docker push $PREPROD_IMAGE:$DLC_GPU_TAG
64-
65-
# run GPU local integration tests
66-
- printf "$SETUP_CMDS" > $SETUP_FILE
67-
# no reason to rebuild the image again since it was already built and pushed to ECR during CPU tests
68-
- generic_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local -vv -rA -s --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $GENERIC_TAG"
69-
- test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$generic_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\""
70-
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*"
71-
- dlc_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local -vv -rA -s --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $DLC_GPU_TAG"
72-
- test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$dlc_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\" --skip-setup"
73-
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*"
58+
- |
59+
for FRAMEWORK_VERSION in $FRAMEWORK_VERSIONS;
60+
do
61+
create-key-pair;
62+
launch-ec2-instance --instance-type $instance_type --ami-name ami-03e3ef8c92fdb39ad;
63+
DLC_GPU_TAG="$FRAMEWORK_VERSION-dlc-gpu-$BUILD_ID";
64+
build_dir="test/container/$FRAMEWORK_VERSION";
65+
docker build -f "$build_dir/Dockerfile.dlc.gpu" -t $PREPROD_IMAGE:$DLC_GPU_TAG --build-arg region=$AWS_DEFAULT_REGION .;
66+
$(aws ecr get-login --registry-ids $ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION);
67+
docker push $PREPROD_IMAGE:$DLC_GPU_TAG;
68+
printf "$SETUP_CMDS" > $SETUP_FILE;
69+
dlc_cmd="IGNORE_COVERAGE=- tox -e py38 -- test/integration/local -vv -rA -s --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $DLC_GPU_TAG";
70+
test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$dlc_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\" --python-version \"3.8\"";
71+
execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg";
72+
docker system prune --all --force;
73+
cleanup-gpu-instances;
74+
cleanup-key-pairs;
75+
done
7476
7577
# run CPU sagemaker integration tests
76-
- test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $GENERIC_TAG"
77-
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*"
78-
- test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $DLC_CPU_TAG"
79-
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*"
78+
- |
79+
for FRAMEWORK_VERSION in $FRAMEWORK_VERSIONS;
80+
do
81+
DLC_CPU_TAG="$FRAMEWORK_VERSION-dlc-cpu-$BUILD_ID";
82+
test_cmd="IGNORE_COVERAGE=- tox -e py38 -- test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $DLC_CPU_TAG";
83+
execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg";
84+
docker system prune --all --force;
85+
done
8086
8187
# run GPU sagemaker integration tests
82-
- test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $GENERIC_TAG"
83-
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*"
84-
- test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $DLC_GPU_TAG"
85-
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*"
88+
- |
89+
for FRAMEWORK_VERSION in $FRAMEWORK_VERSIONS;
90+
do
91+
DLC_GPU_TAG="$FRAMEWORK_VERSION-dlc-gpu-$BUILD_ID";
92+
test_cmd="IGNORE_COVERAGE=- tox -e py38 -- test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $DLC_GPU_TAG";
93+
execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg";
94+
docker system prune --all --force;
95+
done
8696
8797
# run EIA sagemaker integration tests
88-
- test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker --build-image --push-image --dockerfile-type dlc.eia --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $EIA_FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --accelerator-type $EIA_ACCELERATOR_TYPE --tag $DLC_EIA_TAG"
89-
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*"
98+
# - test_cmd="IGNORE_COVERAGE=- tox -e py38 -- test/integration/sagemaker --build-image --push-image --dockerfile-type dlc.eia --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $EIA_FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --accelerator-type $EIA_ACCELERATOR_TYPE --tag $DLC_EIA_TAG"
99+
# - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg"
90100

91101
finally:
92-
# shut down remote GPU instance
93-
- cleanup-gpu-instances
94-
- cleanup-key-pairs
95102

96103
# remove ECR image
97-
- aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GENERIC_TAG
98-
- aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_CPU_TAG
99-
- aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_GPU_TAG
100-
- aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_EIA_TAG
104+
- |
105+
for FRAMEWORK_VERSION in $FRAMEWORK_VERSIONS;
106+
do
107+
DLC_CPU_TAG="$FRAMEWORK_VERSION-dlc-cpu-$BUILD_ID";
108+
DLC_GPU_TAG="$FRAMEWORK_VERSION-dlc-gpu-$BUILD_ID";
109+
aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_CPU_TAG;
110+
aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_GPU_TAG;
111+
done
112+
113+
# - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_EIA_TAG

setup.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -45,19 +45,19 @@ def read(fname):
4545
"Natural Language :: English",
4646
"License :: OSI Approved :: Apache Software License",
4747
"Programming Language :: Python",
48-
'Programming Language :: Python :: 2.7',
49-
'Programming Language :: Python :: 3.6',
50-
'Programming Language :: Python :: 3.7',
48+
'Programming Language :: Python :: 3.8',
49+
'Programming Language :: Python :: 3.9',
50+
'Programming Language :: Python :: 3.10'
5151
],
5252

5353
# We don't declare our dependency on torch here because we build with
5454
# different packages for different variants
55-
install_requires=['numpy', 'retrying', 'sagemaker-inference>=1.3.1'],
55+
install_requires=['numpy==1.24.4', 'retrying==1.3.4', 'sagemaker-inference==1.10.0'],
5656
extras_require={
57-
'test': ['boto3>=1.10.44', 'coverage==4.5.3', 'docker-compose==1.23.2', 'flake8==3.7.7', 'Flask==1.1.1',
58-
'mock==2.0.0', 'pytest==4.4.0', 'pytest-cov==2.7.1', 'pytest-xdist==1.28.0', 'PyYAML==3.10',
59-
'sagemaker==1.56.3', 'sagemaker-containers>=2.5.4', 'six==1.12.0', 'requests==2.20.0',
60-
'requests_mock==1.6.0', 'torch==1.6.0', 'torchvision==0.7.0', 'tox==3.7.0']
57+
'test': ['boto3==1.28.60', 'coverage==7.3.2', 'docker-compose==1.29.2', 'flake8==6.1.0', 'Flask==3.0.0',
58+
'mock==5.1.0', 'pytest==7.4.2', 'pytest-cov==4.1.0', 'pytest-xdist==3.3.1', 'PyYAML==5.4.1',
59+
'sagemaker==2.125.0', 'six==1.16.0', 'requests==2.31.0',
60+
'requests_mock==1.11.0', 'torch==2.1.0', 'torchvision==0.16.0', 'tox==4.11.3']
6161
},
6262

6363
entry_points={

src/sagemaker_pytorch_serving_container/default_pytorch_inference_handler.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def default_output_fn(self, prediction, accept):
135135
136136
Returns: output data serialized
137137
"""
138-
if type(prediction) == torch.Tensor:
138+
if type(prediction) is torch.Tensor:
139139
prediction = prediction.detach().cpu().numpy().tolist()
140140

141141
for content_type in utils.parse_accept(accept):

test/conftest.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -45,15 +45,16 @@ def pytest_addoption(parser):
4545
parser.addoption('--build-image', '-B', action='store_true')
4646
parser.addoption('--push-image', '-P', action='store_true')
4747
parser.addoption('--dockerfile-type', '-T',
48-
choices=['dlc.cpu', 'dlc.gpu', 'dlc.eia', 'pytorch'],
49-
default='pytorch')
48+
# choices=['dlc.cpu', 'dlc.gpu', 'dlc.eia', 'pytorch'],
49+
choices=['dlc.cpu', 'dlc.gpu'],
50+
default='dlc.cpu')
5051
parser.addoption('--dockerfile', '-D', default=None)
5152
parser.addoption('--aws-id', default=None)
5253
parser.addoption('--instance-type')
5354
parser.addoption('--accelerator-type')
5455
parser.addoption('--docker-base-name', default='sagemaker-pytorch-inference')
5556
parser.addoption('--region', default='us-west-2')
56-
parser.addoption('--framework-version', default="1.6.0")
57+
parser.addoption('--framework-version', default="2.0.0")
5758
parser.addoption('--py-version', choices=['2', '3'], default='3')
5859
# Processor is still "cpu" for EIA tests
5960
parser.addoption('--processor', choices=['gpu', 'cpu'], default='cpu')
+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
ARG region
2+
FROM 763104351884.dkr.ecr.$region.amazonaws.com/pytorch-inference:2.0.0-cpu-py310-ubuntu20.04-sagemaker
3+
4+
COPY dist/sagemaker_pytorch_inference-*.tar.gz /sagemaker_pytorch_inference.tar.gz
5+
6+
RUN pip uninstall -y sagemaker_pytorch_inference && \
7+
pip install --upgrade --no-cache-dir /sagemaker_pytorch_inference.tar.gz && \
8+
rm /sagemaker_pytorch_inference.tar.gz
+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
ARG region
2+
FROM 763104351884.dkr.ecr.$region.amazonaws.com/pytorch-inference:2.0.0-gpu-py310-cu118-ubuntu20.04-sagemaker
3+
4+
COPY dist/sagemaker_pytorch_inference-*.tar.gz /sagemaker_pytorch_inference.tar.gz
5+
6+
RUN pip uninstall -y sagemaker_pytorch_inference && \
7+
pip install --upgrade --no-cache-dir /sagemaker_pytorch_inference.tar.gz && \
8+
rm /sagemaker_pytorch_inference.tar.gz
+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
ARG region
2+
FROM 763104351884.dkr.ecr.$region.amazonaws.com/pytorch-inference:2.0.1-cpu-py310-ubuntu20.04-sagemaker
3+
4+
COPY dist/sagemaker_pytorch_inference-*.tar.gz /sagemaker_pytorch_inference.tar.gz
5+
6+
RUN pip uninstall -y sagemaker_pytorch_inference && \
7+
pip install --upgrade --no-cache-dir /sagemaker_pytorch_inference.tar.gz && \
8+
rm /sagemaker_pytorch_inference.tar.gz
+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
ARG region
2+
FROM 763104351884.dkr.ecr.$region.amazonaws.com/pytorch-inference:2.0.1-gpu-py310-cu118-ubuntu20.04-sagemaker
3+
4+
COPY dist/sagemaker_pytorch_inference-*.tar.gz /sagemaker_pytorch_inference.tar.gz
5+
6+
RUN pip uninstall -y sagemaker_pytorch_inference && \
7+
pip install --upgrade --no-cache-dir /sagemaker_pytorch_inference.tar.gz && \
8+
rm /sagemaker_pytorch_inference.tar.gz

0 commit comments

Comments
 (0)