|
1 | 1 | version: 0.2
|
2 | 2 |
|
| 3 | +env: |
| 4 | + variables: |
| 5 | + FRAMEWORK_VERSION: '1.4.0' |
| 6 | + CPU_INSTANCE_TYPE: 'ml.c4.xlarge' |
| 7 | + GPU_INSTANCE_TYPE: 'ml.p2.8xlarge' |
| 8 | + ECR_REPO: 'sagemaker-test' |
| 9 | + GITHUB_REPO: 'sagemaker-pytorch-container' |
| 10 | + DLC_ACCOUNT: '763104351884' |
| 11 | + SETUP_FILE: 'setup_cmds.sh' |
| 12 | + SETUP_CMDS: '#!/bin/bash\npip install --upgrade pip\npip install -U -e .\npip install -U -e .[test]' |
| 13 | + |
3 | 14 | phases:
|
4 | 15 | pre_build:
|
5 | 16 | commands:
|
| 17 | + - start-dockerd |
| 18 | + - ACCOUNT=$(aws --region $AWS_DEFAULT_REGION sts --endpoint-url https://sts.$AWS_DEFAULT_REGION.amazonaws.com get-caller-identity --query 'Account' --output text) |
| 19 | + - PREPROD_IMAGE="$ACCOUNT.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/$ECR_REPO" |
6 | 20 | - PR_NUM=$(echo $CODEBUILD_SOURCE_VERSION | grep -o '[0-9]\+')
|
| 21 | + - BUILD_ID="$(echo $CODEBUILD_BUILD_ID | sed -e 's/:/-/g')" |
7 | 22 | - echo 'Pull request number:' $PR_NUM '. No value means this build is not from pull request.'
|
8 | 23 |
|
9 | 24 | build:
|
10 | 25 | commands:
|
| 26 | + - TOX_PARALLEL_NO_SPINNER=1 |
| 27 | + - PY_COLORS=0 |
| 28 | + |
| 29 | + # install |
| 30 | + - pip3 install -U -e .[test] |
| 31 | + |
| 32 | + # run linters |
| 33 | + - tox -e flake8,twine |
| 34 | + |
| 35 | + # run unit tests |
| 36 | + - tox -e py27,py36,py37 test/unit |
| 37 | + |
| 38 | + # define tags |
| 39 | + - GENERIC_TAG="$FRAMEWORK_VERSION-pytorch-$BUILD_ID" |
| 40 | + - DLC_CPU_TAG="$FRAMEWORK_VERSION-dlc-cpu-$BUILD_ID" |
| 41 | + - DLC_GPU_TAG="$FRAMEWORK_VERSION-dlc-gpu-$BUILD_ID" |
| 42 | + |
| 43 | + # run local CPU integration tests (build and push the image to ECR repo) |
| 44 | + - test_cmd="pytest test/integration/local --build-image --push-image --dockerfile-type pytorch --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $GENERIC_TAG" |
| 45 | + - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*" |
| 46 | + - test_cmd="pytest test/integration/local --build-image --push-image --dockerfile-type dlc.cpu --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $DLC_CPU_TAG" |
| 47 | + - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*" |
| 48 | + |
| 49 | + # launch remote GPU instance |
| 50 | + - prefix='ml.' |
| 51 | + - instance_type=${GPU_INSTANCE_TYPE#"$prefix"} |
| 52 | + - create-key-pair |
| 53 | + - launch-ec2-instance --instance-type $instance_type --ami-name dlami-ubuntu-latest |
| 54 | + |
| 55 | + # build DLC GPU image because the base DLC image is too big and takes too long to build as part of the test |
| 56 | + - python3 setup.py sdist |
| 57 | + - build_dir="test/container/$FRAMEWORK_VERSION" |
| 58 | + - $(aws ecr get-login --registry-ids $DLC_ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION) |
| 59 | + - docker build -f "$build_dir/Dockerfile.dlc.gpu" -t $PREPROD_IMAGE:$DLC_GPU_TAG --build-arg region=$AWS_DEFAULT_REGION . |
| 60 | + # push DLC GPU image to ECR |
| 61 | + - $(aws ecr get-login --registry-ids $ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION) |
| 62 | + - docker push $PREPROD_IMAGE:$DLC_GPU_TAG |
| 63 | + |
| 64 | + # run GPU local integration tests |
| 65 | + - printf "$SETUP_CMDS" > $SETUP_FILE |
| 66 | + # no reason to rebuild the image again since it was already built and pushed to ECR during CPU tests |
| 67 | + - generic_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $GENERIC_TAG" |
| 68 | + - test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$generic_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\"" |
| 69 | + - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*" |
| 70 | + - dlc_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $DLC_GPU_TAG" |
| 71 | + - test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$dlc_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\" --skip-setup" |
| 72 | + - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*" |
| 73 | + |
| 74 | + # run CPU sagemaker integration tests |
| 75 | + - test_cmd="pytest -n 10 test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $GENERIC_TAG" |
| 76 | + - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*" |
| 77 | + - test_cmd="pytest -n 10 test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $DLC_CPU_TAG" |
| 78 | + - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*" |
| 79 | + |
| 80 | + # run GPU sagemaker integration tests |
| 81 | + - test_cmd="pytest -n 10 test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $GENERIC_TAG" |
| 82 | + - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*" |
| 83 | + - test_cmd="pytest -n 10 test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $DLC_GPU_TAG" |
| 84 | + - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec.yml" "lib/*" |
| 85 | + |
| 86 | + finally: |
| 87 | + # shut down remote GPU instance |
| 88 | + - cleanup-gpu-instances |
| 89 | + - cleanup-key-pairs |
11 | 90 |
|
12 |
| - - error_cmd="echo 'In order to make changes to the docker files, please, use https://github.com/aws/deep-learning-containers repository.' && exit 1" |
13 |
| - - execute-command-if-has-matching-changes "$error_cmd" "docker/" |
| 91 | + # remove ECR image |
| 92 | + - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GENERIC_TAG |
| 93 | + - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_CPU_TAG |
| 94 | + - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_GPU_TAG |
0 commit comments