Skip to content

infra: Make docker folder read only, remove unused tests. #209

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion buildspec-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ phases:
# run unit tests
- AWS_ACCESS_KEY_ID= AWS_SECRET_ACCESS_KEY= AWS_SESSION_TOKEN=
AWS_CONTAINER_CREDENTIALS_RELATIVE_URI= AWS_DEFAULT_REGION=
tox -e py27,py36,py37 -- test-toolkit/unit
tox -e py27,py36,py37 -- test/unit

# run local integ tests
#- $(aws ecr get-login --no-include-email --region us-west-2)
Expand Down
36 changes: 18 additions & 18 deletions buildspec-toolkit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,18 @@ phases:
- tox -e flake8,twine

# run unit tests
- tox -e py27,py36,py37 test-toolkit/unit
- tox -e py27,py36,py37 test/unit

# define tags
- GENERIC_TAG="$FRAMEWORK_VERSION-pytorch-$BUILD_ID"
- DLC_CPU_TAG="$FRAMEWORK_VERSION-dlc-cpu-$BUILD_ID"
- DLC_GPU_TAG="$FRAMEWORK_VERSION-dlc-gpu-$BUILD_ID"

# run local CPU integration tests (build and push the image to ECR repo)
- test_cmd="pytest test-toolkit/integration/local --build-image --push-image --dockerfile-type pytorch --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $GENERIC_TAG"
- execute-command-if-has-matching-changes "$test_cmd" "test-toolkit/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
- test_cmd="pytest test-toolkit/integration/local --build-image --push-image --dockerfile-type dlc.cpu --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $DLC_CPU_TAG"
- execute-command-if-has-matching-changes "$test_cmd" "test-toolkit/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
- test_cmd="pytest test/integration/local --build-image --push-image --dockerfile-type pytorch --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $GENERIC_TAG"
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
- test_cmd="pytest test/integration/local --build-image --push-image --dockerfile-type dlc.cpu --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $DLC_CPU_TAG"
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"

# launch remote GPU instance
- prefix='ml.'
Expand All @@ -54,7 +54,7 @@ phases:

# build DLC GPU image because the base DLC image is too big and takes too long to build as part of the test
- python3 setup.py sdist
- build_dir="test-toolkit/docker/$FRAMEWORK_VERSION"
- build_dir="test/container/$FRAMEWORK_VERSION"
- $(aws ecr get-login --registry-ids $DLC_ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION)
- docker build -f "$build_dir/Dockerfile.dlc.gpu" -t $PREPROD_IMAGE:$DLC_GPU_TAG --build-arg region=$AWS_DEFAULT_REGION .
# push DLC GPU image to ECR
Expand All @@ -64,24 +64,24 @@ phases:
# run GPU local integration tests
- printf "$SETUP_CMDS" > $SETUP_FILE
# no reason to rebuild the image again since it was already built and pushed to ECR during CPU tests
- generic_cmd="pytest test-toolkit/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $GENERIC_TAG"
- generic_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $GENERIC_TAG"
- test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$generic_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\""
- execute-command-if-has-matching-changes "$test_cmd" "test-toolkit/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
- dlc_cmd="pytest test-toolkit/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $DLC_GPU_TAG"
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
- dlc_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $DLC_GPU_TAG"
- test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$dlc_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\" --skip-setup"
- execute-command-if-has-matching-changes "$test_cmd" "test-toolkit/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"

# run CPU sagemaker integration tests
- test_cmd="pytest -n 10 test-toolkit/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $GENERIC_TAG"
- execute-command-if-has-matching-changes "$test_cmd" "test-toolkit/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
- test_cmd="pytest -n 10 test-toolkit/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $DLC_CPU_TAG"
- execute-command-if-has-matching-changes "$test_cmd" "test-toolkit/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
- test_cmd="pytest -n 10 test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $GENERIC_TAG"
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
- test_cmd="pytest -n 10 test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $DLC_CPU_TAG"
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"

# run GPU sagemaker integration tests
- test_cmd="pytest -n 10 test-toolkit/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $GENERIC_TAG"
- execute-command-if-has-matching-changes "$test_cmd" "test-toolkit/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
- test_cmd="pytest -n 10 test-toolkit/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $DLC_GPU_TAG"
- execute-command-if-has-matching-changes "$test_cmd" "test-toolkit/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
- test_cmd="pytest -n 10 test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $GENERIC_TAG"
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
- test_cmd="pytest -n 10 test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $DLC_GPU_TAG"
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"

finally:
# shut down remote GPU instance
Expand Down
79 changes: 2 additions & 77 deletions buildspec.yml
Original file line number Diff line number Diff line change
@@ -1,88 +1,13 @@
version: 0.2

env:
variables:
FRAMEWORK_VERSION: '1.5.0'
CPU_PY3_VERSION: '3'
CPU_INSTANCE_TYPE: 'ml.c4.xlarge'
GPU_PY3_VERSION: '3'
GPU_INSTANCE_TYPE: 'ml.p2.8xlarge'
LOCAL_BASE_REPO: 'pytorch-base'
ECR_REPO: 'sagemaker-test'
GITHUB_REPO: 'sagemaker-pytorch-container'
SETUP_FILE: 'setup_cmds.sh'
SETUP_CMDS: '#!/bin/bash\npip install --upgrade pip\npip install -U -e .\npip install -U -e .[test]'


phases:
pre_build:
commands:
- start-dockerd
- ACCOUNT=$(aws --region $AWS_DEFAULT_REGION sts --endpoint-url https://sts.$AWS_DEFAULT_REGION.amazonaws.com get-caller-identity --query 'Account' --output text)
- PREPROD_IMAGE="$ACCOUNT.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/$ECR_REPO"
- PR_NUM=$(echo $CODEBUILD_SOURCE_VERSION | grep -o '[0-9]\+')
- BUILD_ID="$(echo $CODEBUILD_BUILD_ID | sed -e 's/:/-/g')"
- echo 'Pull request number:' $PR_NUM '. No value means this build is not from pull request.'

build:
commands:
# install
- pip3 install -U -e .[test]

# run linter
- tox -e flake8

- cpu_dockerfile="Dockerfile.cpu"
- gpu_dockerfile="Dockerfile.gpu"

# build py3 image
- build_dir="docker/$FRAMEWORK_VERSION/py$GPU_PY3_VERSION"
- cp -r docker/build_artifacts/* $build_dir/
- CPU_PY3_TAG="$FRAMEWORK_VERSION-cpu-py3-$BUILD_ID"
- GPU_PY3_TAG="$FRAMEWORK_VERSION-gpu-py3-$BUILD_ID"
- build_cmd="docker build -f "$build_dir/$cpu_dockerfile" -t $PREPROD_IMAGE:$CPU_PY3_TAG $build_dir"
- execute-command-if-has-matching-changes "$build_cmd" "test/" "tests/" "docker/*" "buildspec.yml"
- build_cmd="docker build -f "$build_dir/$gpu_dockerfile" -t $PREPROD_IMAGE:$GPU_PY3_TAG $build_dir"
- execute-command-if-has-matching-changes "$build_cmd" "test/" "tests/" "docker/*" "buildspec.yml"

# push images to ecr
- $(aws ecr get-login --registry-ids $ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION)
- push_cmd="docker push $PREPROD_IMAGE:$CPU_PY3_TAG"
- execute-command-if-has-matching-changes "$push_cmd" "test/" "tests/" "docker/*" "buildspec.yml"
- push_cmd="docker push $PREPROD_IMAGE:$GPU_PY3_TAG"
- execute-command-if-has-matching-changes "$push_cmd" "test/" "tests/" "docker/*" "buildspec.yml"

# launch remote gpu instance
- prefix='ml.'
- instance_type=${GPU_INSTANCE_TYPE#"$prefix"}
- create-key-pair
- launch-ec2-instance --instance-type $instance_type --ami-name dlami-ubuntu

# run cpu integration tests
- test_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --framework-version $FRAMEWORK_VERSION --py-version $CPU_PY3_VERSION --processor cpu --tag $CPU_PY3_TAG"
- execute-command-if-has-matching-changes "$test_cmd" "test/" "tests/" "docker/*" "buildspec.yml"

# run gpu integration tests
- printf "$SETUP_CMDS" > $SETUP_FILE
- py3_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --framework-version $FRAMEWORK_VERSION --py-version $GPU_PY3_VERSION --processor gpu --tag $GPU_PY3_TAG"
- test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$py3_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\""
- execute-command-if-has-matching-changes "$test_cmd" "test/" "tests/" "docker/*" "buildspec.yml"

# run cpu sagemaker tests
- test_cmd="pytest -n 15 test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --py-version $CPU_PY3_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $CPU_PY3_TAG"
- execute-command-if-has-matching-changes "$test_cmd" "test/" "tests/" "docker/*" "buildspec.yml"

# run gpu sagemaker tests
- test_cmd="pytest -n 15 test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --py-version $GPU_PY3_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $GPU_PY3_TAG"
- execute-command-if-has-matching-changes "$test_cmd" "test/" "tests/" "docker/*" "buildspec.yml"

finally:
# shut down remote gpu instance
- cleanup-gpu-instances
- cleanup-key-pairs

# remove ecr image
- rm_cmd="aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$CPU_PY3_TAG"
- execute-command-if-has-matching-changes "$rm_cmd" "test/" "tests/" "docker/*" "buildspec.yml"
- rm_cmd="aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GPU_PY3_TAG"
- execute-command-if-has-matching-changes "$rm_cmd" "test/" "tests/" "docker/*" "buildspec.yml"
- error_cmd="echo 'In order to make changes to the docker files, please, use https://github.com/aws/deep-learning-containers repository.' && exit 1"
- execute-command-if-has-matching-changes "$error_cmd" "docker/"
Loading