diff --git a/buildspec-release.yml b/buildspec-release.yml
index 17e0f5ce..7285bb3b 100644
--- a/buildspec-release.yml
+++ b/buildspec-release.yml
@@ -12,7 +12,7 @@ phases:
       # run unit tests
       - AWS_ACCESS_KEY_ID= AWS_SECRET_ACCESS_KEY= AWS_SESSION_TOKEN=
         AWS_CONTAINER_CREDENTIALS_RELATIVE_URI= AWS_DEFAULT_REGION=
-        tox -e py27,py36,py37 -- test-toolkit/unit
+        tox -e py27,py36,py37 -- test/unit
 
       # run local integ tests
       #- $(aws ecr get-login --no-include-email --region us-west-2)
diff --git a/buildspec-toolkit.yml b/buildspec-toolkit.yml
index 5e11ec1b..a7046e0b 100644
--- a/buildspec-toolkit.yml
+++ b/buildspec-toolkit.yml
@@ -33,7 +33,7 @@ phases:
       - tox -e flake8,twine
 
       # run unit tests
-      - tox -e py27,py36,py37 test-toolkit/unit
+      - tox -e py27,py36,py37 test/unit
 
       # define tags
       - GENERIC_TAG="$FRAMEWORK_VERSION-pytorch-$BUILD_ID"
@@ -41,10 +41,10 @@ phases:
       - DLC_GPU_TAG="$FRAMEWORK_VERSION-dlc-gpu-$BUILD_ID"
 
       # run local CPU integration tests (build and push the image to ECR repo)
-      - test_cmd="pytest test-toolkit/integration/local --build-image --push-image --dockerfile-type pytorch --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $GENERIC_TAG"
-      - execute-command-if-has-matching-changes "$test_cmd" "test-toolkit/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
-      - test_cmd="pytest test-toolkit/integration/local --build-image --push-image --dockerfile-type dlc.cpu --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $DLC_CPU_TAG"
-      - execute-command-if-has-matching-changes "$test_cmd" "test-toolkit/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
+      - test_cmd="pytest test/integration/local --build-image --push-image --dockerfile-type pytorch --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $GENERIC_TAG"
+      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
+      - test_cmd="pytest test/integration/local --build-image --push-image --dockerfile-type dlc.cpu --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $DLC_CPU_TAG"
+      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
 
       # launch remote GPU instance
       - prefix='ml.'
@@ -54,7 +54,7 @@ phases:
 
       # build DLC GPU image because the base DLC image is too big and takes too long to build as part of the test
       - python3 setup.py sdist
-      - build_dir="test-toolkit/docker/$FRAMEWORK_VERSION"
+      - build_dir="test/container/$FRAMEWORK_VERSION"
       - $(aws ecr get-login --registry-ids $DLC_ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION)
       - docker build -f "$build_dir/Dockerfile.dlc.gpu" -t $PREPROD_IMAGE:$DLC_GPU_TAG --build-arg region=$AWS_DEFAULT_REGION .
       # push DLC GPU image to ECR
@@ -64,24 +64,24 @@ phases:
       # run GPU local integration tests
       - printf "$SETUP_CMDS" > $SETUP_FILE
       # no reason to rebuild the image again since it was already built and pushed to ECR during CPU tests
-      - generic_cmd="pytest test-toolkit/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $GENERIC_TAG"
+      - generic_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $GENERIC_TAG"
       - test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$generic_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\""
-      - execute-command-if-has-matching-changes "$test_cmd" "test-toolkit/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
-      - dlc_cmd="pytest test-toolkit/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $DLC_GPU_TAG"
+      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
+      - dlc_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $DLC_GPU_TAG"
       - test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$dlc_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\" --skip-setup"
-      - execute-command-if-has-matching-changes "$test_cmd" "test-toolkit/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
+      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
 
       # run CPU sagemaker integration tests
-      - test_cmd="pytest -n 10 test-toolkit/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $GENERIC_TAG"
-      - execute-command-if-has-matching-changes "$test_cmd" "test-toolkit/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
-      - test_cmd="pytest -n 10 test-toolkit/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $DLC_CPU_TAG"
-      - execute-command-if-has-matching-changes "$test_cmd" "test-toolkit/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
+      - test_cmd="pytest -n 10 test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $GENERIC_TAG"
+      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
+      - test_cmd="pytest -n 10 test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $DLC_CPU_TAG"
+      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
 
       # run GPU sagemaker integration tests
-      - test_cmd="pytest -n 10 test-toolkit/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $GENERIC_TAG"
-      - execute-command-if-has-matching-changes "$test_cmd" "test-toolkit/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
-      - test_cmd="pytest -n 10 test-toolkit/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $DLC_GPU_TAG"
-      - execute-command-if-has-matching-changes "$test_cmd" "test-toolkit/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
+      - test_cmd="pytest -n 10 test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $GENERIC_TAG"
+      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
+      - test_cmd="pytest -n 10 test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $DLC_GPU_TAG"
+      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "docker/build_artifacts/*"
 
     finally:
       # shut down remote GPU instance
diff --git a/buildspec.yml b/buildspec.yml
index c1f46fa9..c43cb34f 100644
--- a/buildspec.yml
+++ b/buildspec.yml
@@ -1,88 +1,13 @@
 version: 0.2
 
-env:
-  variables:
-    FRAMEWORK_VERSION: '1.5.0'
-    CPU_PY3_VERSION: '3'
-    CPU_INSTANCE_TYPE: 'ml.c4.xlarge'
-    GPU_PY3_VERSION: '3'
-    GPU_INSTANCE_TYPE: 'ml.p2.8xlarge'
-    LOCAL_BASE_REPO: 'pytorch-base'
-    ECR_REPO: 'sagemaker-test'
-    GITHUB_REPO: 'sagemaker-pytorch-container'
-    SETUP_FILE: 'setup_cmds.sh'
-    SETUP_CMDS: '#!/bin/bash\npip install --upgrade pip\npip install -U -e .\npip install -U -e .[test]'
-
-
 phases:
   pre_build:
     commands:
-      - start-dockerd
-      - ACCOUNT=$(aws --region $AWS_DEFAULT_REGION sts --endpoint-url https://sts.$AWS_DEFAULT_REGION.amazonaws.com get-caller-identity --query 'Account' --output text)
-      - PREPROD_IMAGE="$ACCOUNT.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/$ECR_REPO"
       - PR_NUM=$(echo $CODEBUILD_SOURCE_VERSION | grep -o '[0-9]\+')
-      - BUILD_ID="$(echo $CODEBUILD_BUILD_ID | sed -e 's/:/-/g')"
       - echo 'Pull request number:' $PR_NUM '. No value means this build is not from pull request.'
 
   build:
     commands:
-      # install
-      - pip3 install -U -e .[test]
-
-      # run linter
-      - tox -e flake8
-
-      - cpu_dockerfile="Dockerfile.cpu"
-      - gpu_dockerfile="Dockerfile.gpu"
-
-      # build py3 image
-      - build_dir="docker/$FRAMEWORK_VERSION/py$GPU_PY3_VERSION"
-      - cp -r docker/build_artifacts/* $build_dir/
-      - CPU_PY3_TAG="$FRAMEWORK_VERSION-cpu-py3-$BUILD_ID"
-      - GPU_PY3_TAG="$FRAMEWORK_VERSION-gpu-py3-$BUILD_ID"
-      - build_cmd="docker build -f "$build_dir/$cpu_dockerfile" -t $PREPROD_IMAGE:$CPU_PY3_TAG $build_dir"
-      - execute-command-if-has-matching-changes "$build_cmd" "test/" "tests/" "docker/*" "buildspec.yml"
-      - build_cmd="docker build -f "$build_dir/$gpu_dockerfile" -t $PREPROD_IMAGE:$GPU_PY3_TAG $build_dir"
-      - execute-command-if-has-matching-changes "$build_cmd" "test/" "tests/" "docker/*" "buildspec.yml"
-
-      # push images to ecr
-      - $(aws ecr get-login --registry-ids $ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION)
-      - push_cmd="docker push $PREPROD_IMAGE:$CPU_PY3_TAG"
-      - execute-command-if-has-matching-changes "$push_cmd" "test/" "tests/" "docker/*" "buildspec.yml"
-      - push_cmd="docker push $PREPROD_IMAGE:$GPU_PY3_TAG"
-      - execute-command-if-has-matching-changes "$push_cmd" "test/" "tests/" "docker/*" "buildspec.yml"
-
-      # launch remote gpu instance
-      - prefix='ml.'
-      - instance_type=${GPU_INSTANCE_TYPE#"$prefix"}
-      - create-key-pair
-      - launch-ec2-instance --instance-type $instance_type --ami-name dlami-ubuntu
-
-      # run cpu integration tests
-      - test_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --framework-version $FRAMEWORK_VERSION --py-version $CPU_PY3_VERSION --processor cpu --tag $CPU_PY3_TAG"
-      - execute-command-if-has-matching-changes "$test_cmd" "test/" "tests/" "docker/*" "buildspec.yml"
-
-      # run gpu integration tests
-      - printf "$SETUP_CMDS" > $SETUP_FILE
-      - py3_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --framework-version $FRAMEWORK_VERSION --py-version $GPU_PY3_VERSION --processor gpu --tag $GPU_PY3_TAG"
-      - test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$py3_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\""
-      - execute-command-if-has-matching-changes "$test_cmd" "test/" "tests/" "docker/*" "buildspec.yml"
-
-      # run cpu sagemaker tests
-      - test_cmd="pytest -n 15 test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --py-version $CPU_PY3_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $CPU_PY3_TAG"
-      - execute-command-if-has-matching-changes "$test_cmd" "test/" "tests/" "docker/*" "buildspec.yml"
-
-      # run gpu sagemaker tests
-      - test_cmd="pytest -n 15 test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --py-version $GPU_PY3_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $GPU_PY3_TAG"
-      - execute-command-if-has-matching-changes "$test_cmd" "test/" "tests/" "docker/*" "buildspec.yml"
-
-    finally:
-      # shut down remote gpu instance
-      - cleanup-gpu-instances
-      - cleanup-key-pairs
 
-      # remove ecr image
-      - rm_cmd="aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$CPU_PY3_TAG"
-      - execute-command-if-has-matching-changes "$rm_cmd" "test/" "tests/" "docker/*" "buildspec.yml"
-      - rm_cmd="aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GPU_PY3_TAG"
-      - execute-command-if-has-matching-changes "$rm_cmd" "test/" "tests/" "docker/*" "buildspec.yml"
+      - error_cmd="echo 'In order to make changes to the docker files, please, use https://github.com/aws/deep-learning-containers repository.' && exit 1"
+      - execute-command-if-has-matching-changes "$error_cmd" "docker/"
diff --git a/test-toolkit/conftest.py b/test-toolkit/conftest.py
deleted file mode 100644
index 13e28de3..00000000
--- a/test-toolkit/conftest.py
+++ /dev/null
@@ -1,225 +0,0 @@
-# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-from __future__ import absolute_import
-
-import boto3
-import os
-import logging
-import platform
-import pytest
-import shutil
-import sys
-import tempfile
-
-from sagemaker import LocalSession, Session
-
-from utils import image_utils
-
-logger = logging.getLogger(__name__)
-logging.getLogger('boto').setLevel(logging.INFO)
-logging.getLogger('boto3').setLevel(logging.INFO)
-logging.getLogger('botocore').setLevel(logging.INFO)
-logging.getLogger('factory.py').setLevel(logging.INFO)
-logging.getLogger('auth.py').setLevel(logging.INFO)
-logging.getLogger('connectionpool.py').setLevel(logging.INFO)
-
-
-dir_path = os.path.dirname(os.path.realpath(__file__))
-
-NO_P2_REGIONS = ['ap-east-1', 'ap-northeast-3', 'ap-southeast-2', 'ca-central-1', 'eu-central-1', 'eu-north-1',
-                 'eu-west-2', 'eu-west-3', 'us-west-1', 'sa-east-1', 'me-south-1']
-NO_P3_REGIONS = ['ap-east-1', 'ap-northeast-3', 'ap-southeast-1', 'ap-southeast-2', 'ap-south-1', 'ca-central-1',
-                 'eu-central-1', 'eu-north-1', 'eu-west-2', 'eu-west-3', 'sa-east-1', 'us-west-1', 'me-south-1']
-
-
-def pytest_addoption(parser):
-    parser.addoption('--build-image', '-B', action='store_true')
-    parser.addoption('--push-image', '-P', action='store_true')
-    parser.addoption('--dockerfile-type', '-T', choices=['dlc.cpu', 'dlc.gpu', 'pytorch'],
-                     default='pytorch')
-    parser.addoption('--dockerfile', '-D', default=None)
-    parser.addoption('--aws-id', default=None)
-    parser.addoption('--instance-type')
-    parser.addoption('--docker-base-name', default='sagemaker-pytorch-training')
-    parser.addoption('--region', default='us-west-2')
-    parser.addoption('--framework-version', default="1.4.0")
-    parser.addoption('--py-version', choices=['2', '3'], default=str(sys.version_info.major))
-    parser.addoption('--processor', choices=['gpu', 'cpu'], default='cpu')
-    # If not specified, will default to {framework-version}-{processor}-py{py-version}
-    parser.addoption('--tag', default=None)
-
-
-@pytest.fixture(scope='session', name='dockerfile_type')
-def fixture_dockerfile_type(request):
-    return request.config.getoption('--dockerfile-type')
-
-
-@pytest.fixture(scope='session', name='dockerfile')
-def fixture_dockerfile(request, dockerfile_type):
-    dockerfile = request.config.getoption('--dockerfile')
-    return dockerfile if dockerfile else 'Dockerfile.{}'.format(dockerfile_type)
-
-
-@pytest.fixture(scope='session', name='docker_base_name')
-def fixture_docker_base_name(request):
-    return request.config.getoption('--docker-base-name')
-
-
-@pytest.fixture(scope='session', name='region')
-def fixture_region(request):
-    return request.config.getoption('--region')
-
-
-@pytest.fixture(scope='session', name='framework_version')
-def fixture_framework_version(request):
-    return request.config.getoption('--framework-version')
-
-
-@pytest.fixture(scope='session', name='py_version')
-def fixture_py_version(request):
-    return 'py{}'.format(int(request.config.getoption('--py-version')))
-
-
-@pytest.fixture(scope='session', name='processor')
-def fixture_processor(request):
-    return request.config.getoption('--processor')
-
-
-@pytest.fixture(scope='session', name='tag')
-def fixture_tag(request, framework_version, processor, py_version):
-    provided_tag = request.config.getoption('--tag')
-    default_tag = '{}-{}-{}'.format(framework_version, processor, py_version)
-    return provided_tag if provided_tag else default_tag
-
-
-@pytest.fixture
-def opt_ml():
-    tmp = tempfile.mkdtemp()
-    os.mkdir(os.path.join(tmp, 'output'))
-
-    # Docker cannot mount Mac OS /var folder properly see
-    # https://forums.docker.com/t/var-folders-isnt-mounted-properly/9600
-    opt_ml_dir = '/private{}'.format(tmp) if platform.system() == 'Darwin' else tmp
-    yield opt_ml_dir
-
-    shutil.rmtree(tmp, True)
-
-
-@pytest.fixture(scope='session', name='use_gpu')
-def fixture_use_gpu(processor):
-    return processor == 'gpu'
-
-
-@pytest.fixture(scope='session', name='build_image', autouse=True)
-def fixture_build_image(request, framework_version, dockerfile, image_uri, region):
-    build_image = request.config.getoption('--build-image')
-    if build_image:
-        return image_utils.build_image(framework_version=framework_version,
-                                       dockerfile=dockerfile,
-                                       image_uri=image_uri,
-                                       region=region,
-                                       cwd=os.path.join(dir_path, '..'))
-
-    return image_uri
-
-
-@pytest.fixture(scope='session', name='push_image', autouse=True)
-def fixture_push_image(request, image_uri, region, aws_id):
-    push_image = request.config.getoption('--push-image')
-    if push_image:
-        return image_utils.push_image(image_uri, region, aws_id)
-    return None
-
-
-@pytest.fixture(scope='session', name='sagemaker_session')
-def fixture_sagemaker_session(region):
-    return Session(boto_session=boto3.Session(region_name=region))
-
-
-@pytest.fixture(scope='session', name='sagemaker_local_session')
-def fixture_sagemaker_local_session(region):
-    return LocalSession(boto_session=boto3.Session(region_name=region))
-
-
-@pytest.fixture(name='aws_id', scope='session')
-def fixture_aws_id(request):
-    return request.config.getoption('--aws-id')
-
-
-@pytest.fixture(name='instance_type', scope='session')
-def fixture_instance_type(request, processor):
-    provided_instance_type = request.config.getoption('--instance-type')
-    default_instance_type = 'local' if processor == 'cpu' else 'local_gpu'
-    return provided_instance_type or default_instance_type
-
-
-@pytest.fixture(name='docker_registry', scope='session')
-def fixture_docker_registry(aws_id, region):
-    return '{}.dkr.ecr.{}.amazonaws.com'.format(aws_id, region) if aws_id else None
-
-
-@pytest.fixture(name='image_uri', scope='session')
-def fixture_image_uri(docker_registry, docker_base_name, tag):
-    if docker_registry:
-        return '{}/{}:{}'.format(docker_registry, docker_base_name, tag)
-    return '{}:{}'.format(docker_base_name, tag)
-
-
-@pytest.fixture(scope='session', name='dist_cpu_backend', params=['gloo'])
-def fixture_dist_cpu_backend(request):
-    return request.param
-
-
-@pytest.fixture(scope='session', name='dist_gpu_backend', params=['gloo', 'nccl'])
-def fixture_dist_gpu_backend(request):
-    return request.param
-
-
-@pytest.fixture(autouse=True)
-def skip_by_device_type(request, use_gpu, instance_type):
-    is_gpu = use_gpu or instance_type[3] in ['g', 'p']
-    if (request.node.get_closest_marker('skip_gpu') and is_gpu) or \
-            (request.node.get_closest_marker('skip_cpu') and not is_gpu):
-        pytest.skip('Skipping because running on \'{}\' instance'.format(instance_type))
-
-
-@pytest.fixture(autouse=True)
-def skip_by_py_version(request, py_version):
-    """
-    This will cause tests to be skipped w/ py3 containers if "py-version" flag is not set
-    and pytest is running from py2. We can rely on this when py2 is deprecated, but for now
-    we must use "skip_py2_containers"
-    """
-    if request.node.get_closest_marker('skip_py2') and py_version != 'py3':
-        pytest.skip('Skipping the test because Python 2 is not supported.')
-
-
-@pytest.fixture(autouse=True)
-def skip_test_in_region(request, region):
-    if request.node.get_closest_marker('skip_test_in_region'):
-        if region == 'me-south-1':
-            pytest.skip('Skipping SageMaker test in region {}'.format(region))
-
-
-@pytest.fixture(autouse=True)
-def skip_gpu_instance_restricted_regions(region, instance_type):
-    if ((region in NO_P2_REGIONS and instance_type.startswith('ml.p2'))
-       or (region in NO_P3_REGIONS and instance_type.startswith('ml.p3'))):
-        pytest.skip('Skipping GPU test in region {}'.format(region))
-
-
-@pytest.fixture(autouse=True)
-def skip_py2_containers(request, tag):
-    if request.node.get_closest_marker('skip_py2_containers'):
-        if 'py2' in tag:
-            pytest.skip('Skipping python2 container with tag {}'.format(tag))
diff --git a/test-toolkit/integration/__init__.py b/test-toolkit/integration/__init__.py
deleted file mode 100644
index e879ae0e..00000000
--- a/test-toolkit/integration/__init__.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-from __future__ import absolute_import
-
-import os
-
-resources_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'resources'))
-mnist_path = os.path.join(resources_path, 'mnist')
-mnist_script = os.path.join(mnist_path, 'mnist.py')
-fastai_path = os.path.join(resources_path, 'fastai')
-fastai_cifar_script = os.path.join(fastai_path, 'train_cifar.py')
-fastai_mnist_script = os.path.join(fastai_path, 'mnist.py')
-data_dir = os.path.join(mnist_path, 'data')
-training_dir = os.path.join(data_dir, 'training')
-dist_operations_path = os.path.join(resources_path, 'distributed_operations.py')
-smdebug_mnist_script = os.path.join(mnist_path, 'smdebug_mnist.py')
-
-mnist_1d_script = os.path.join(mnist_path, 'mnist_1d.py')
-model_cpu_dir = os.path.join(mnist_path, 'model_cpu')
-model_cpu_1d_dir = os.path.join(model_cpu_dir, '1d')
-model_gpu_dir = os.path.join(mnist_path, 'model_gpu')
-model_gpu_1d_dir = os.path.join(model_gpu_dir, '1d')
-call_model_fn_once_script = os.path.join(resources_path, 'call_model_fn_once.py')
-
-requirements_dir = os.path.join(resources_path, 'requirements')
-requirements_script = 'entry.py'
-
-ROLE = 'dummy/unused-role'
-DEFAULT_TIMEOUT = 20
-PYTHON3 = 'py3'
diff --git a/test-toolkit/integration/local/__init__.py b/test-toolkit/integration/local/__init__.py
deleted file mode 100644
index 79cb9cdf..00000000
--- a/test-toolkit/integration/local/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-from __future__ import absolute_import
diff --git a/test-toolkit/integration/local/test_distributed_training.py b/test-toolkit/integration/local/test_distributed_training.py
deleted file mode 100644
index 553110a8..00000000
--- a/test-toolkit/integration/local/test_distributed_training.py
+++ /dev/null
@@ -1,100 +0,0 @@
-# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-from __future__ import absolute_import
-
-import os
-
-import pytest
-from sagemaker.pytorch import PyTorch
-
-from integration import data_dir, dist_operations_path, mnist_script, ROLE
-from utils.local_mode_utils import assert_files_exist
-
-MODEL_SUCCESS_FILES = {
-    'model': ['success'],
-    'output': ['success'],
-}
-
-
-@pytest.fixture(scope='session', name='dist_gpu_backend', params=['gloo'])
-def fixture_dist_gpu_backend(request):
-    return request.param
-
-
-@pytest.mark.skip_gpu
-def test_dist_operations_path_cpu(image_uri, dist_cpu_backend, sagemaker_local_session, tmpdir):
-    estimator = PyTorch(entry_point=dist_operations_path,
-                        role=ROLE,
-                        image_name=image_uri,
-                        train_instance_count=2,
-                        train_instance_type='local',
-                        sagemaker_session=sagemaker_local_session,
-                        hyperparameters={'backend': dist_cpu_backend},
-                        output_path='file://{}'.format(tmpdir))
-
-    _train_and_assert_success(estimator, str(tmpdir))
-
-
-@pytest.mark.skip_cpu
-def test_dist_operations_path_gpu_nccl(image_uri, sagemaker_local_session, tmpdir):
-    estimator = PyTorch(entry_point=dist_operations_path,
-                        role=ROLE,
-                        image_name=image_uri,
-                        train_instance_count=1,
-                        train_instance_type='local_gpu',
-                        sagemaker_session=sagemaker_local_session,
-                        hyperparameters={'backend': 'nccl'},
-                        output_path='file://{}'.format(tmpdir))
-
-    _train_and_assert_success(estimator, str(tmpdir))
-
-
-@pytest.mark.skip_gpu
-def test_cpu_nccl(image_uri, sagemaker_local_session, tmpdir):
-    estimator = PyTorch(entry_point=mnist_script,
-                        role=ROLE,
-                        image_name=image_uri,
-                        train_instance_count=2,
-                        train_instance_type='local',
-                        sagemaker_session=sagemaker_local_session,
-                        hyperparameters={'backend': 'nccl'},
-                        output_path='file://{}'.format(tmpdir))
-
-    with pytest.raises(RuntimeError):
-        estimator.fit({'training': 'file://{}'.format(os.path.join(data_dir, 'training'))})
-
-    failure_file = {'output': ['failure']}
-    assert_files_exist(str(tmpdir), failure_file)
-
-
-@pytest.mark.skip_gpu
-def test_mnist_cpu(image_uri, dist_cpu_backend, sagemaker_local_session, tmpdir):
-    estimator = PyTorch(entry_point=mnist_script,
-                        role=ROLE,
-                        image_name=image_uri,
-                        train_instance_count=2,
-                        train_instance_type='local',
-                        sagemaker_session=sagemaker_local_session,
-                        hyperparameters={'backend': dist_cpu_backend},
-                        output_path='file://{}'.format(tmpdir))
-
-    success_files = {
-        'model': ['model.pth'],
-        'output': ['success'],
-    }
-    _train_and_assert_success(estimator, str(tmpdir), success_files)
-
-
-def _train_and_assert_success(estimator, output_path, output_files=MODEL_SUCCESS_FILES):
-    estimator.fit({'training': 'file://{}'.format(os.path.join(data_dir, 'training'))})
-    assert_files_exist(output_path, output_files)
diff --git a/test-toolkit/integration/local/test_single_machine_training.py b/test-toolkit/integration/local/test_single_machine_training.py
deleted file mode 100644
index 68f99859..00000000
--- a/test-toolkit/integration/local/test_single_machine_training.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-from __future__ import absolute_import
-
-import os
-
-from sagemaker.pytorch import PyTorch
-
-from utils.local_mode_utils import assert_files_exist
-from integration import data_dir, mnist_script, ROLE
-
-
-def test_mnist(image_uri, processor, instance_type, sagemaker_local_session, tmpdir):
-    estimator = PyTorch(entry_point=mnist_script,
-                        role=ROLE,
-                        image_name=image_uri,
-                        train_instance_count=1,
-                        train_instance_type=instance_type,
-                        sagemaker_session=sagemaker_local_session,
-                        hyperparameters={'processor': processor},
-                        output_path='file://{}'.format(tmpdir))
-
-    _train_and_assert_success(estimator, data_dir, str(tmpdir))
-
-
-def _train_and_assert_success(estimator, input_dir, output_path):
-    estimator.fit({'training': 'file://{}'.format(os.path.join(input_dir, 'training'))})
-
-    success_files = {
-        'model': ['model.pth'],
-        'output': ['success'],
-    }
-    assert_files_exist(output_path, success_files)
diff --git a/test-toolkit/integration/sagemaker/__init__.py b/test-toolkit/integration/sagemaker/__init__.py
deleted file mode 100644
index 79cb9cdf..00000000
--- a/test-toolkit/integration/sagemaker/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-from __future__ import absolute_import
diff --git a/test-toolkit/integration/sagemaker/test_distributed_operations.py b/test-toolkit/integration/sagemaker/test_distributed_operations.py
deleted file mode 100644
index 6e072598..00000000
--- a/test-toolkit/integration/sagemaker/test_distributed_operations.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-from __future__ import absolute_import
-
-import os
-
-import boto3
-import pytest
-from sagemaker import utils
-from sagemaker.pytorch import PyTorch
-from six.moves.urllib.parse import urlparse
-
-from integration import data_dir, dist_operations_path, mnist_script, DEFAULT_TIMEOUT
-from integration.sagemaker.timeout import timeout
-
-MULTI_GPU_INSTANCE = 'ml.p3.8xlarge'
-
-
-@pytest.mark.skip_gpu
-@pytest.mark.deploy_test
-@pytest.mark.skip_test_in_region
-def test_dist_operations_cpu(sagemaker_session, image_uri, instance_type, dist_cpu_backend):
-    instance_type = instance_type or 'ml.c4.xlarge'
-    _test_dist_operations(sagemaker_session, image_uri, instance_type, dist_cpu_backend)
-
-
-@pytest.mark.skip_cpu
-@pytest.mark.deploy_test
-def test_dist_operations_gpu(sagemaker_session, instance_type, image_uri, dist_gpu_backend):
-    instance_type = instance_type or 'ml.p2.xlarge'
-    _test_dist_operations(sagemaker_session, image_uri, instance_type, dist_gpu_backend)
-
-
-@pytest.mark.skip_cpu
-def test_dist_operations_multi_gpu(sagemaker_session, image_uri, dist_gpu_backend):
-    _test_dist_operations(sagemaker_session, image_uri, MULTI_GPU_INSTANCE, dist_gpu_backend, 1)
-
-
-@pytest.mark.skip_cpu
-@pytest.mark.skip_py2_containers
-def test_mnist_gpu(sagemaker_session, image_uri, dist_gpu_backend):
-    with timeout(minutes=DEFAULT_TIMEOUT):
-        pytorch = PyTorch(entry_point=mnist_script,
-                          role='SageMakerRole',
-                          train_instance_count=2,
-                          image_name=image_uri,
-                          train_instance_type=MULTI_GPU_INSTANCE,
-                          sagemaker_session=sagemaker_session,
-                          hyperparameters={'backend': dist_gpu_backend})
-
-        training_input = sagemaker_session.upload_data(path=os.path.join(data_dir, 'training'),
-                                                       key_prefix='pytorch/mnist')
-
-        job_name = utils.unique_name_from_base('test-pytorch-dist-ops')
-        pytorch.fit({'training': training_input}, job_name=job_name)
-
-
-def _test_dist_operations(sagemaker_session, image_uri, instance_type, dist_backend, train_instance_count=3):
-    with timeout(minutes=DEFAULT_TIMEOUT):
-        pytorch = PyTorch(entry_point=dist_operations_path,
-                          role='SageMakerRole',
-                          train_instance_count=train_instance_count,
-                          train_instance_type=instance_type,
-                          sagemaker_session=sagemaker_session,
-                          image_name=image_uri,
-                          hyperparameters={'backend': dist_backend})
-
-        pytorch.sagemaker_session.default_bucket()
-        fake_input = pytorch.sagemaker_session.upload_data(path=dist_operations_path,
-                                                           key_prefix='pytorch/distributed_operations')
-
-        job_name = utils.unique_name_from_base('test-pytorch-dist-ops')
-        pytorch.fit({'required_argument': fake_input}, job_name=job_name)
-
-
-def _assert_s3_file_exists(region, s3_url):
-    parsed_url = urlparse(s3_url)
-    s3 = boto3.resource('s3', region_name=region)
-    s3.Object(parsed_url.netloc, parsed_url.path.lstrip('/')).load()
diff --git a/test-toolkit/integration/sagemaker/test_mnist.py b/test-toolkit/integration/sagemaker/test_mnist.py
deleted file mode 100644
index 0a7fec96..00000000
--- a/test-toolkit/integration/sagemaker/test_mnist.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-from __future__ import absolute_import
-
-import pytest
-from sagemaker import utils
-from sagemaker.pytorch import PyTorch
-
-from integration import training_dir, mnist_script, DEFAULT_TIMEOUT
-from integration.sagemaker.timeout import timeout
-
-
-@pytest.mark.skip_gpu
-def test_mnist_distributed_cpu(sagemaker_session, image_uri, instance_type, dist_cpu_backend):
-    instance_type = instance_type or 'ml.c4.xlarge'
-    _test_mnist_distributed(sagemaker_session, image_uri, instance_type, dist_cpu_backend)
-
-
-@pytest.mark.skip_cpu
-def test_mnist_distributed_gpu(sagemaker_session, image_uri, instance_type, dist_gpu_backend):
-    instance_type = instance_type or 'ml.p2.xlarge'
-    _test_mnist_distributed(sagemaker_session, image_uri, instance_type, dist_gpu_backend)
-
-
-def _test_mnist_distributed(sagemaker_session, image_uri, instance_type, dist_backend):
-    with timeout(minutes=DEFAULT_TIMEOUT):
-        pytorch = PyTorch(entry_point=mnist_script,
-                          role='SageMakerRole',
-                          train_instance_count=2,
-                          train_instance_type=instance_type,
-                          sagemaker_session=sagemaker_session,
-                          image_name=image_uri,
-                          hyperparameters={'backend': dist_backend, 'epochs': 2})
-        training_input = pytorch.sagemaker_session.upload_data(path=training_dir,
-                                                               key_prefix='pytorch/mnist')
-
-        job_name = utils.unique_name_from_base('test-pytorch-mnist')
-
-        pytorch.fit({'training': training_input}, job_name=job_name)
diff --git a/test-toolkit/integration/sagemaker/timeout.py b/test-toolkit/integration/sagemaker/timeout.py
deleted file mode 100644
index 1b941589..00000000
--- a/test-toolkit/integration/sagemaker/timeout.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-# TODO: this is used in all containers and sdk. We should move it to container support or sdk test utils.
-from __future__ import absolute_import
-import signal
-from contextlib import contextmanager
-import logging
-
-from botocore.exceptions import ClientError
-
-LOGGER = logging.getLogger('timeout')
-
-
-class TimeoutError(Exception):
-    pass
-
-
-@contextmanager
-def timeout(seconds=0, minutes=0, hours=0):
-    """Add a signal-based timeout to any block of code.
-    If multiple time units are specified, they will be added together to determine time limit.
-    Usage:
-    with timeout(seconds=5):
-        my_slow_function(...)
-    Args:
-        - seconds: The time limit, in seconds.
-        - minutes: The time limit, in minutes.
-        - hours: The time limit, in hours.
-    """
-
-    limit = seconds + 60 * minutes + 3600 * hours
-
-    def handler(signum, frame):
-        raise TimeoutError('timed out after {} seconds'.format(limit))
-
-    try:
-        signal.signal(signal.SIGALRM, handler)
-        signal.alarm(limit)
-
-        yield
-    finally:
-        signal.alarm(0)
-
-
-@contextmanager
-def timeout_and_delete_endpoint(estimator, seconds=0, minutes=0, hours=0):
-    with timeout(seconds=seconds, minutes=minutes, hours=hours) as t:
-        try:
-            yield [t]
-        finally:
-            try:
-                estimator.delete_endpoint()
-                LOGGER.info('deleted endpoint')
-            except ClientError as ce:
-                if ce.response['Error']['Code'] == 'ValidationException':
-                    # avoids the inner exception to be overwritten
-                    pass
-
-
-@contextmanager
-def timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, seconds=0, minutes=0, hours=0):
-    with timeout(seconds=seconds, minutes=minutes, hours=hours) as t:
-        try:
-            yield [t]
-        finally:
-            try:
-                sagemaker_session.delete_endpoint(endpoint_name)
-                LOGGER.info('deleted endpoint {}'.format(endpoint_name))
-            except ClientError as ce:
-                if ce.response['Error']['Code'] == 'ValidationException':
-                    # avoids the inner exception to be overwritten
-                    pass
diff --git a/test-toolkit/resources/distributed_operations.py b/test-toolkit/resources/distributed_operations.py
deleted file mode 100644
index e26cac2e..00000000
--- a/test-toolkit/resources/distributed_operations.py
+++ /dev/null
@@ -1,268 +0,0 @@
-# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-from __future__ import absolute_import
-import argparse
-import json
-import logging
-import os
-import sys
-import torch
-import torch.distributed as dist
-from torch.multiprocessing import Process
-import torch.utils.data
-import torch.utils.data.distributed
-
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.DEBUG)
-logger.addHandler(logging.StreamHandler(sys.stdout))
-
-
-def _get_tensor(rank, rows, columns):
-    device = torch.device(
-        "cuda:{}".format(dist.get_rank() % torch.cuda.device_count()) if torch.cuda.is_available()
-        else "cpu"
-    )
-    tensor = torch.ones(rows, columns) * (rank + 1)
-    return tensor.to(device)
-
-
-def _get_zeros_tensor(rows, columns):
-    device = torch.device(
-        "cuda:{}".format(dist.get_rank() % torch.cuda.device_count()) if torch.cuda.is_available()
-        else "cpu"
-    )
-    tensor = torch.zeros(rows, columns)
-    return tensor.to(device)
-
-
-def _get_zeros_tensors_list(rows, columns):
-    return [_get_zeros_tensor(rows, columns) for _ in range(dist.get_world_size())]
-
-
-def _get_tensors_sum(rows, columns):
-    device = torch.device(
-        "cuda:{}".format(dist.get_rank() % torch.cuda.device_count()) if torch.cuda.is_available()
-        else "cpu"
-    )
-    result = (1 + dist.get_world_size()) * dist.get_world_size() / 2
-    tensor = torch.ones(rows, columns) * result
-    return tensor.to(device)
-
-
-def _send_recv(rank, rows, columns):
-    source = 0
-    tensor = _get_tensor(rank, rows, columns)
-    logger.debug('Rank: {},\nTensor BEFORE send_recv: {}'.format(rank, tensor))
-    if rank == 0:
-        for i in range(1, dist.get_world_size()):
-            dist.send(tensor=tensor, dst=i)
-    else:
-        dist.recv(tensor=tensor, src=source)
-    logger.debug('Rank: {},\nTensor AFTER send_recv: {}\n'.format(rank, tensor))
-
-    assert torch.equal(tensor, _get_tensor(source, rows, columns)),\
-        'Rank {}: Tensor was not equal to rank {} tensor after send-recv.'.format(rank, source)
-
-
-def _broadcast(rank, rows, columns):
-    source = 0
-    tensor = _get_tensor(rank, rows, columns)
-    logger.debug('Rank: {},\nTensor BEFORE broadcast: {}'.format(rank, tensor))
-    dist.broadcast(tensor, src=source)
-    logger.debug('Rank: {},\nTensor AFTER broadcast: {}\n'.format(rank, tensor))
-
-    assert torch.equal(tensor, _get_tensor(source, rows, columns)), \
-        'Rank {}: Tensor was not equal to rank {} tensor after broadcast.'.format(rank, source)
-
-
-def _all_reduce(rank, rows, columns):
-    tensor = _get_tensor(rank, rows, columns)
-    logger.debug('Rank: {},\nTensor BEFORE all_reduce: {}'.format(rank, tensor))
-    dist.all_reduce(tensor, op=dist.reduce_op.SUM)
-    logger.debug('Rank: {},\nTensor AFTER all_reduce: {}\n'.format(rank, tensor))
-
-    assert torch.equal(tensor, _get_tensors_sum(rows, columns)), \
-        'Rank {}: Tensor was not equal to SUM of {} tensors after all_reduce.'.format(rank, dist.get_world_size())
-
-
-def _reduce(rank, rows, columns):
-    dest = 0
-    tensor = _get_tensor(rank, rows, columns)
-    logger.debug('Rank: {},\nTensor BEFORE reduce: {}'.format(rank, tensor))
-    # this is inplace operation
-    dist.reduce(tensor, op=dist.reduce_op.SUM, dst=dest)
-    logger.debug('Rank: {},\nTensor AFTER reduce: {}\n'.format(rank, tensor))
-
-    if rank == dest:
-        assert torch.equal(tensor, _get_tensors_sum(rows, columns)), \
-            'Rank {}: Tensor was not equal to SUM of {} tensors after reduce.'.format(rank, dist.get_world_size())
-
-
-def _all_gather(rank, rows, columns):
-    tensor = _get_tensor(rank, rows, columns)
-    tensors_list = _get_zeros_tensors_list(rows, columns)
-    logger.debug('Rank: {},\nTensor BEFORE all_gather: {}'.format(rank, tensor))
-    dist.all_gather(tensors_list, tensor)
-    logger.debug('Rank: {},\nTensor AFTER all_gather: {}. tensors_list: {}\n'.format(
-        rank, tensor, tensors_list))
-
-    # tensor shouldn't have changed
-    assert torch.equal(tensor, _get_tensor(rank, rows, columns)), \
-        'Rank {}: Tensor got changed after all_gather.'.format(rank)
-    for i in range(dist.get_world_size()):
-        assert torch.equal(tensors_list[i], _get_tensor(i, rows, columns)), \
-            'Rank {}: tensors lists are not the same after all_gather.'
-
-
-def _gather(rank, rows, columns):
-    dest = 0
-    tensor = _get_tensor(rank, rows, columns)
-    if rank == dest:
-        tensors_list = _get_zeros_tensors_list(rows, columns)
-        logger.debug('Rank: {},\nTensor BEFORE gather: {}. tensors_list: {}'.format(
-            rank, tensor, tensors_list))
-        dist.gather(tensor=tensor, gather_list=tensors_list)
-        logger.debug('Rank: {},\nTensor AFTER gather: {}. tensors_list: {}\n'.format(
-            rank, tensor, tensors_list))
-        for i in range(dist.get_world_size()):
-            assert torch.equal(tensors_list[i], _get_tensor(i, rows, columns)), \
-                'Rank {}: tensors lists are not the same after gather.'
-    else:
-        logger.debug('Rank: {},\nTensor BEFORE gather: {}\n'.format(rank, tensor))
-        dist.gather(tensor=tensor, dst=dest)
-        logger.debug('Rank: {},\nTensor AFTER gather: {}\n'.format(rank, tensor))
-
-    # tensor shouldn't have changed
-    assert torch.equal(tensor, _get_tensor(rank, rows, columns)), \
-        'Rank {}: Tensor got changed after gather.'.format(rank)
-
-
-def _scatter(rank, rows, columns):
-    source = 0
-    tensor = _get_tensor(rank, rows, columns)
-    if rank == source:
-        tensors_list = _get_zeros_tensors_list(rows, columns)
-        logger.debug('Rank: {},\nTensor BEFORE scatter: {}. tensors_list: {}'.format(
-            rank, tensor, tensors_list))
-        dist.scatter(tensor=tensor, scatter_list=tensors_list)
-    else:
-        logger.debug('Rank: {},\nTensor BEFORE scatter: {}\n'.format(rank, tensor))
-        dist.scatter(tensor=tensor, src=source)
-    logger.debug('Rank: {},\nTensor AFTER scatter: {}\n'.format(rank, tensor))
-
-    assert torch.equal(tensor, _get_zeros_tensor(rows, columns)), \
-        'Rank {}: Tensor should be all zeroes after scatter.'.format(rank)
-
-
-def _barrier(rank):
-    logger.debug('Rank: {}, Waiting for other processes before the barrier.'.format(rank))
-    dist.barrier()
-    logger.debug('Rank: {}, Passing the barrier'.format(rank))
-
-
-def main():
-    print('Starting')
-    parser = argparse.ArgumentParser()
-    # Configurable hyperparameters
-    parser.add_argument('--rows', type=int, default=1,
-                        help='Number of rows in the tensor.')
-    parser.add_argument('--columns', type=int, default=1,
-                        help='Number of columns in the tensor.')
-    parser.add_argument('--backend', type=str, default=None,
-                        help='backend for distributed operations.')
-
-    # Container environment
-    parser.add_argument('--hosts', type=list, default=json.loads(os.environ["SM_HOSTS"]))
-    parser.add_argument('--current-host', type=str, default=os.environ["SM_CURRENT_HOST"])
-    parser.add_argument('--model-dir', type=str, default=os.environ["SM_MODEL_DIR"])
-    parser.add_argument('--num-gpus', type=int, default=os.environ["SM_NUM_GPUS"])
-    parser.add_argument('--num-cpus', type=int, default=os.environ["SM_NUM_CPUS"])
-
-    args = parser.parse_args()
-
-    number_of_processes = args.num_gpus if args.num_gpus > 0 else args.num_cpus
-    world_size = number_of_processes * len(args.hosts)
-    logger.info('Running \'{}\' backend on {} nodes and {} processes. World size is {}.'.format(
-        args.backend, len(args.hosts), number_of_processes, world_size
-    ))
-    host_rank = args.hosts.index(args.current_host)
-    master_addr = args.hosts[0]
-    master_port = '55555'
-    processes = []
-    for rank in range(number_of_processes):
-        process_rank = host_rank * number_of_processes + rank
-        p = Process(
-            target=init_processes,
-            args=(args.backend,
-                  master_addr,
-                  master_port,
-                  process_rank,
-                  world_size,
-                  args.rows,
-                  args.columns,
-                  args.current_host,
-                  args.num_gpus)
-        )
-        p.start()
-        processes.append(p)
-
-    for p in processes:
-        p.join()
-
-    save('success', args.model_dir)
-
-
-def init_processes(backend, master_addr, master_port, rank, world_size,
-                   rows, columns, host, num_gpus):
-    # Initialize the distributed environment.
-    os.environ['WORLD_SIZE'] = str(world_size)
-    os.environ['RANK'] = str(rank)
-    os.environ['MASTER_ADDR'] = master_addr
-    os.environ['MASTER_PORT'] = master_port
-
-    logger.info('Init process rank {} on host \'{}\''.format(rank, host))
-    dist.init_process_group(backend=backend, rank=rank, world_size=world_size)
-    run(backend, rank, rows, columns, num_gpus)
-
-
-def run(backend, rank, rows, columns, num_gpus):
-    # https://pytorch.org/docs/master/distributed.html
-    if backend == 'gloo':
-        print('Run operations supported by \'gloo\' backend.')
-        _broadcast(rank, rows, columns)
-        _all_reduce(rank, rows, columns)
-        _barrier(rank)
-
-        # this operation supported only on cpu
-        if num_gpus == 0:
-            _send_recv(rank, rows, columns)
-    elif backend == 'nccl':
-        print('Run operations supported by \'nccl\' backend.')
-        # Note: nccl does not support gather or scatter as well:
-        # https://github.com/pytorch/pytorch/blob/v0.4.0/torch/lib/THD/base/data_channels/DataChannelNccl.cpp
-        _broadcast(rank, rows, columns)
-        _all_reduce(rank, rows, columns)
-        _reduce(rank, rows, columns)
-        _all_gather(rank, rows, columns)
-
-
-def save(result, model_dir):
-    filename = os.path.join(model_dir, result)
-    if not os.path.exists(filename):
-        logger.info("Saving success result")
-        with open(filename, 'w') as f:
-            f.write(result)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/test-toolkit/resources/mnist/data/training/MNIST/processed/test.pt b/test-toolkit/resources/mnist/data/training/MNIST/processed/test.pt
deleted file mode 100644
index 1d2112e0..00000000
Binary files a/test-toolkit/resources/mnist/data/training/MNIST/processed/test.pt and /dev/null differ
diff --git a/test-toolkit/resources/mnist/data/training/MNIST/processed/training.pt b/test-toolkit/resources/mnist/data/training/MNIST/processed/training.pt
deleted file mode 100644
index 7583094a..00000000
Binary files a/test-toolkit/resources/mnist/data/training/MNIST/processed/training.pt and /dev/null differ
diff --git a/test-toolkit/resources/mnist/data/training/model b/test-toolkit/resources/mnist/data/training/model
deleted file mode 100644
index 393ea140..00000000
Binary files a/test-toolkit/resources/mnist/data/training/model and /dev/null differ
diff --git a/test-toolkit/resources/mnist/mnist.py b/test-toolkit/resources/mnist/mnist.py
deleted file mode 100644
index 1359f712..00000000
--- a/test-toolkit/resources/mnist/mnist.py
+++ /dev/null
@@ -1,216 +0,0 @@
-# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-from __future__ import absolute_import
-
-import argparse
-import logging
-import os
-import sys
-
-from sagemaker_training import environment
-import torch
-import torch.distributed as dist
-import torch.nn as nn
-import torch.nn.functional as F
-import torch.optim as optim
-import torch.utils.data
-import torch.utils.data.distributed
-from torchvision import datasets, transforms
-
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.DEBUG)
-logger.addHandler(logging.StreamHandler(sys.stdout))
-
-
-# Based on https://github.com/pytorch/examples/blob/master/mnist/main.py
-class Net(nn.Module):
-    def __init__(self):
-        logger.info("Create neural network module")
-
-        super(Net, self).__init__()
-        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
-        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
-        self.conv2_drop = nn.Dropout2d()
-        self.fc1 = nn.Linear(320, 50)
-        self.fc2 = nn.Linear(50, 10)
-
-    def forward(self, x):
-        x = F.relu(F.max_pool2d(self.conv1(x), 2))
-        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
-        x = x.view(-1, 320)
-        x = F.relu(self.fc1(x))
-        x = F.dropout(x, training=self.training)
-        x = self.fc2(x)
-        return F.log_softmax(x, dim=1)
-
-
-def _get_train_data_loader(batch_size, training_dir, is_distributed, **kwargs):
-    logger.info("Get train data loader")
-    dataset = datasets.MNIST(training_dir, train=True, transform=transforms.Compose([
-        transforms.ToTensor(),
-        transforms.Normalize((0.1307,), (0.3081,))
-    ]))
-    train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) if is_distributed else None
-    return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=train_sampler is None,
-                                       sampler=train_sampler, **kwargs)
-
-
-def _get_test_data_loader(test_batch_size, training_dir, **kwargs):
-    logger.info("Get test data loader")
-    return torch.utils.data.DataLoader(
-        datasets.MNIST(training_dir, train=False, transform=transforms.Compose([
-            transforms.ToTensor(),
-            transforms.Normalize((0.1307,), (0.3081,))
-        ])),
-        batch_size=test_batch_size, shuffle=True, **kwargs)
-
-
-def _average_gradients(model):
-    # Gradient averaging.
-    size = float(dist.get_world_size())
-    for param in model.parameters():
-        dist.all_reduce(param.grad.data, op=dist.reduce_op.SUM)
-        param.grad.data /= size
-
-
-def train(args):
-    is_distributed = len(args.hosts) > 1 and args.backend is not None
-    logger.debug("Distributed training - {}".format(is_distributed))
-    use_cuda = (args.processor == 'gpu') or (args.num_gpus > 0)
-    logger.debug("Number of gpus available - {}".format(args.num_gpus))
-    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
-    device = torch.device("cuda" if use_cuda else "cpu")
-
-    if is_distributed:
-        # Initialize the distributed environment.
-        world_size = len(args.hosts)
-        os.environ['WORLD_SIZE'] = str(world_size)
-        host_rank = args.hosts.index(args.current_host)
-        os.environ['RANK'] = str(host_rank)
-        dist.init_process_group(backend=args.backend, rank=host_rank, world_size=world_size)
-        logger.info('Initialized the distributed environment: \'{}\' backend on {} nodes. '.format(
-            args.backend, dist.get_world_size()) + 'Current host rank is {}. Number of gpus: {}'.format(
-            dist.get_rank(), args.num_gpus))
-
-    # set the seed for generating random numbers
-    torch.manual_seed(args.seed)
-    if use_cuda:
-        torch.cuda.manual_seed(args.seed)
-
-    train_loader = _get_train_data_loader(args.batch_size, args.data_dir, is_distributed, **kwargs)
-    test_loader = _get_test_data_loader(args.test_batch_size, args.data_dir, **kwargs)
-
-    # TODO: assert the logs when we move to the SDK local mode
-    logger.debug("Processes {}/{} ({:.0f}%) of train data".format(
-        len(train_loader.sampler), len(train_loader.dataset),
-        100. * len(train_loader.sampler) / len(train_loader.dataset)
-    ))
-
-    logger.debug("Processes {}/{} ({:.0f}%) of test data".format(
-        len(test_loader.sampler), len(test_loader.dataset),
-        100. * len(test_loader.sampler) / len(test_loader.dataset)
-    ))
-
-    model = Net().to(device)
-    if is_distributed and use_cuda:
-        # multi-machine multi-gpu case
-        logger.debug("Multi-machine multi-gpu: using DistributedDataParallel.")
-        model = torch.nn.parallel.DistributedDataParallel(model)
-    elif use_cuda:
-        # single-machine multi-gpu case
-        logger.debug("Single-machine multi-gpu: using DataParallel().cuda().")
-        model = torch.nn.DataParallel(model).to(device)
-    else:
-        # single-machine or multi-machine cpu case
-        logger.debug("Single-machine/multi-machine cpu: using DataParallel.")
-        model = torch.nn.DataParallel(model)
-
-    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
-
-    for epoch in range(1, args.epochs + 1):
-        model.train()
-        for batch_idx, (data, target) in enumerate(train_loader, 1):
-            data, target = data.to(device), target.to(device)
-            optimizer.zero_grad()
-            output = model(data)
-            loss = F.nll_loss(output, target)
-            loss.backward()
-            if is_distributed and not use_cuda:
-                # average gradients manually for multi-machine cpu case only
-                _average_gradients(model)
-            optimizer.step()
-            if batch_idx % args.log_interval == 0:
-                logger.debug('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
-                    epoch, batch_idx * len(data), len(train_loader.sampler),
-                    100. * batch_idx / len(train_loader), loss.item()))
-        test(model, test_loader, device)
-    save_model(model, args.model_dir)
-
-
-def test(model, test_loader, device):
-    model.eval()
-    test_loss = 0
-    correct = 0
-    with torch.no_grad():
-        for data, target in test_loader:
-            data, target = data.to(device), target.to(device)
-            output = model(data)
-            test_loss += F.nll_loss(output, target, size_average=None).item()  # sum up batch loss
-            pred = output.max(1, keepdim=True)[1]  # get the index of the max log-probability
-            correct += pred.eq(target.view_as(pred)).sum().item()
-
-    test_loss /= len(test_loader.dataset)
-    logger.debug('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
-        test_loss, correct, len(test_loader.dataset),
-        100. * correct / len(test_loader.dataset)))
-
-
-def save_model(model, model_dir):
-    logger.info("Saving the model.")
-    path = os.path.join(model_dir, 'model.pth')
-    # recommended way from http://pytorch.org/docs/master/notes/serialization.html
-    torch.save(model.state_dict(), path)
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-
-    # Data and model checkpoints directories
-    parser.add_argument('--batch-size', type=int, default=64, metavar='N',
-                        help='input batch size for training (default: 64)')
-    parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
-                        help='input batch size for testing (default: 1000)')
-    parser.add_argument('--epochs', type=int, default=1, metavar='N',
-                        help='number of epochs to train (default: 10)')
-    parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
-                        help='learning rate (default: 0.01)')
-    parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
-                        help='SGD momentum (default: 0.5)')
-    parser.add_argument('--seed', type=int, default=1, metavar='S',
-                        help='random seed (default: 1)')
-    parser.add_argument('--log-interval', type=int, default=100, metavar='N',
-                        help='how many batches to wait before logging training status')
-    parser.add_argument('--backend', type=str, default=None,
-                        help='backend for distributed training')
-    parser.add_argument('--processor', type=str, default='cpu',
-                        help='backend for distributed training')
-
-    # Container environment
-    env = environment.Environment()
-    parser.add_argument('--hosts', type=list, default=env.hosts)
-    parser.add_argument('--current-host', type=str, default=env.current_host)
-    parser.add_argument('--model-dir', type=str, default=env.model_dir)
-    parser.add_argument('--data-dir', type=str, default=env.channel_input_dirs['training'])
-    parser.add_argument('--num-gpus', type=int, default=env.num_gpus)
-
-    train(parser.parse_args())
diff --git a/test-toolkit/utils/__init__.py b/test-toolkit/utils/__init__.py
deleted file mode 100644
index 79cb9cdf..00000000
--- a/test-toolkit/utils/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-from __future__ import absolute_import
diff --git a/test-toolkit/utils/local_mode_utils.py b/test-toolkit/utils/local_mode_utils.py
deleted file mode 100644
index acd1197e..00000000
--- a/test-toolkit/utils/local_mode_utils.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-from __future__ import absolute_import
-
-from contextlib import contextmanager
-import fcntl
-import os
-import tarfile
-import time
-
-from integration import resources_path
-
-LOCK_PATH = os.path.join(resources_path, 'local_mode_lock')
-
-
-@contextmanager
-def lock():
-    # Since Local Mode uses the same port for serving, we need a lock in order
-    # to allow concurrent test execution.
-    local_mode_lock_fd = open(LOCK_PATH, 'w')
-    local_mode_lock = local_mode_lock_fd.fileno()
-
-    fcntl.lockf(local_mode_lock, fcntl.LOCK_EX)
-
-    try:
-        yield
-    finally:
-        time.sleep(5)
-        fcntl.lockf(local_mode_lock, fcntl.LOCK_UN)
-
-
-def assert_files_exist(output_path, directory_file_map):
-    for directory, files in directory_file_map.items():
-        with tarfile.open(os.path.join(output_path, '{}.tar.gz'.format(directory))) as tar:
-            for f in files:
-                tar.getmember(f)
diff --git a/test/__init__.py b/test/__init__.py
deleted file mode 100644
index 79cb9cdf..00000000
--- a/test/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-from __future__ import absolute_import
diff --git a/test/conftest.py b/test/conftest.py
index 5609d0fb..13e28de3 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -22,7 +22,8 @@
 import tempfile
 
 from sagemaker import LocalSession, Session
-from sagemaker.pytorch import PyTorch
+
+from utils import image_utils
 
 logger = logging.getLogger(__name__)
 logging.getLogger('boto').setLevel(logging.INFO)
@@ -42,17 +43,33 @@
 
 
 def pytest_addoption(parser):
-    parser.addoption('--aws-id')
+    parser.addoption('--build-image', '-B', action='store_true')
+    parser.addoption('--push-image', '-P', action='store_true')
+    parser.addoption('--dockerfile-type', '-T', choices=['dlc.cpu', 'dlc.gpu', 'pytorch'],
+                     default='pytorch')
+    parser.addoption('--dockerfile', '-D', default=None)
+    parser.addoption('--aws-id', default=None)
     parser.addoption('--instance-type')
-    parser.addoption('--docker-base-name', default='pytorch')
+    parser.addoption('--docker-base-name', default='sagemaker-pytorch-training')
     parser.addoption('--region', default='us-west-2')
-    parser.addoption('--framework-version', default=PyTorch.LATEST_VERSION)
+    parser.addoption('--framework-version', default="1.4.0")
     parser.addoption('--py-version', choices=['2', '3'], default=str(sys.version_info.major))
     parser.addoption('--processor', choices=['gpu', 'cpu'], default='cpu')
     # If not specified, will default to {framework-version}-{processor}-py{py-version}
     parser.addoption('--tag', default=None)
 
 
+@pytest.fixture(scope='session', name='dockerfile_type')
+def fixture_dockerfile_type(request):
+    return request.config.getoption('--dockerfile-type')
+
+
+@pytest.fixture(scope='session', name='dockerfile')
+def fixture_dockerfile(request, dockerfile_type):
+    dockerfile = request.config.getoption('--dockerfile')
+    return dockerfile if dockerfile else 'Dockerfile.{}'.format(dockerfile_type)
+
+
 @pytest.fixture(scope='session', name='docker_base_name')
 def fixture_docker_base_name(request):
     return request.config.getoption('--docker-base-name')
@@ -85,11 +102,6 @@ def fixture_tag(request, framework_version, processor, py_version):
     return provided_tag if provided_tag else default_tag
 
 
-@pytest.fixture(scope='session', name='docker_image')
-def fixture_docker_image(docker_base_name, tag):
-    return '{}:{}'.format(docker_base_name, tag)
-
-
 @pytest.fixture
 def opt_ml():
     tmp = tempfile.mkdtemp()
@@ -108,6 +120,27 @@ def fixture_use_gpu(processor):
     return processor == 'gpu'
 
 
+@pytest.fixture(scope='session', name='build_image', autouse=True)
+def fixture_build_image(request, framework_version, dockerfile, image_uri, region):
+    build_image = request.config.getoption('--build-image')
+    if build_image:
+        return image_utils.build_image(framework_version=framework_version,
+                                       dockerfile=dockerfile,
+                                       image_uri=image_uri,
+                                       region=region,
+                                       cwd=os.path.join(dir_path, '..'))
+
+    return image_uri
+
+
+@pytest.fixture(scope='session', name='push_image', autouse=True)
+def fixture_push_image(request, image_uri, region, aws_id):
+    push_image = request.config.getoption('--push-image')
+    if push_image:
+        return image_utils.push_image(image_uri, region, aws_id)
+    return None
+
+
 @pytest.fixture(scope='session', name='sagemaker_session')
 def fixture_sagemaker_session(region):
     return Session(boto_session=boto3.Session(region_name=region))
@@ -132,12 +165,14 @@ def fixture_instance_type(request, processor):
 
 @pytest.fixture(name='docker_registry', scope='session')
 def fixture_docker_registry(aws_id, region):
-    return '{}.dkr.ecr.{}.amazonaws.com'.format(aws_id, region)
+    return '{}.dkr.ecr.{}.amazonaws.com'.format(aws_id, region) if aws_id else None
 
 
-@pytest.fixture(name='ecr_image', scope='session')
-def fixture_ecr_image(docker_registry, docker_base_name, tag):
-    return '{}/{}:{}'.format(docker_registry, docker_base_name, tag)
+@pytest.fixture(name='image_uri', scope='session')
+def fixture_image_uri(docker_registry, docker_base_name, tag):
+    if docker_registry:
+        return '{}/{}:{}'.format(docker_registry, docker_base_name, tag)
+    return '{}:{}'.format(docker_base_name, tag)
 
 
 @pytest.fixture(scope='session', name='dist_cpu_backend', params=['gloo'])
diff --git a/test-toolkit/docker/1.4.0/Dockerfile.dlc.cpu b/test/container/1.4.0/Dockerfile.dlc.cpu
similarity index 100%
rename from test-toolkit/docker/1.4.0/Dockerfile.dlc.cpu
rename to test/container/1.4.0/Dockerfile.dlc.cpu
diff --git a/test-toolkit/docker/1.4.0/Dockerfile.dlc.gpu b/test/container/1.4.0/Dockerfile.dlc.gpu
similarity index 100%
rename from test-toolkit/docker/1.4.0/Dockerfile.dlc.gpu
rename to test/container/1.4.0/Dockerfile.dlc.gpu
diff --git a/test-toolkit/docker/1.4.0/Dockerfile.pytorch b/test/container/1.4.0/Dockerfile.pytorch
similarity index 100%
rename from test-toolkit/docker/1.4.0/Dockerfile.pytorch
rename to test/container/1.4.0/Dockerfile.pytorch
diff --git a/test/integration/__init__.py b/test/integration/__init__.py
index b428aaeb..e879ae0e 100644
--- a/test/integration/__init__.py
+++ b/test/integration/__init__.py
@@ -32,6 +32,9 @@
 model_gpu_1d_dir = os.path.join(model_gpu_dir, '1d')
 call_model_fn_once_script = os.path.join(resources_path, 'call_model_fn_once.py')
 
+requirements_dir = os.path.join(resources_path, 'requirements')
+requirements_script = 'entry.py'
+
 ROLE = 'dummy/unused-role'
 DEFAULT_TIMEOUT = 20
 PYTHON3 = 'py3'
diff --git a/test/integration/local/test_distributed_training.py b/test/integration/local/test_distributed_training.py
index b2b2504f..553110a8 100644
--- a/test/integration/local/test_distributed_training.py
+++ b/test/integration/local/test_distributed_training.py
@@ -17,8 +17,8 @@
 import pytest
 from sagemaker.pytorch import PyTorch
 
-from test.integration import data_dir, dist_operations_path, mnist_script, ROLE
-from test.utils.local_mode_utils import assert_files_exist
+from integration import data_dir, dist_operations_path, mnist_script, ROLE
+from utils.local_mode_utils import assert_files_exist
 
 MODEL_SUCCESS_FILES = {
     'model': ['success'],
@@ -32,10 +32,10 @@ def fixture_dist_gpu_backend(request):
 
 
 @pytest.mark.skip_gpu
-def test_dist_operations_path_cpu(docker_image, dist_cpu_backend, sagemaker_local_session, tmpdir):
+def test_dist_operations_path_cpu(image_uri, dist_cpu_backend, sagemaker_local_session, tmpdir):
     estimator = PyTorch(entry_point=dist_operations_path,
                         role=ROLE,
-                        image_name=docker_image,
+                        image_name=image_uri,
                         train_instance_count=2,
                         train_instance_type='local',
                         sagemaker_session=sagemaker_local_session,
@@ -46,10 +46,10 @@ def test_dist_operations_path_cpu(docker_image, dist_cpu_backend, sagemaker_loca
 
 
 @pytest.mark.skip_cpu
-def test_dist_operations_path_gpu_nccl(docker_image, sagemaker_local_session, tmpdir):
+def test_dist_operations_path_gpu_nccl(image_uri, sagemaker_local_session, tmpdir):
     estimator = PyTorch(entry_point=dist_operations_path,
                         role=ROLE,
-                        image_name=docker_image,
+                        image_name=image_uri,
                         train_instance_count=1,
                         train_instance_type='local_gpu',
                         sagemaker_session=sagemaker_local_session,
@@ -60,10 +60,10 @@ def test_dist_operations_path_gpu_nccl(docker_image, sagemaker_local_session, tm
 
 
 @pytest.mark.skip_gpu
-def test_cpu_nccl(docker_image, sagemaker_local_session, tmpdir):
+def test_cpu_nccl(image_uri, sagemaker_local_session, tmpdir):
     estimator = PyTorch(entry_point=mnist_script,
                         role=ROLE,
-                        image_name=docker_image,
+                        image_name=image_uri,
                         train_instance_count=2,
                         train_instance_type='local',
                         sagemaker_session=sagemaker_local_session,
@@ -78,10 +78,10 @@ def test_cpu_nccl(docker_image, sagemaker_local_session, tmpdir):
 
 
 @pytest.mark.skip_gpu
-def test_mnist_cpu(docker_image, dist_cpu_backend, sagemaker_local_session, tmpdir):
+def test_mnist_cpu(image_uri, dist_cpu_backend, sagemaker_local_session, tmpdir):
     estimator = PyTorch(entry_point=mnist_script,
                         role=ROLE,
-                        image_name=docker_image,
+                        image_name=image_uri,
                         train_instance_count=2,
                         train_instance_type='local',
                         sagemaker_session=sagemaker_local_session,
diff --git a/test-toolkit/integration/local/test_requirements.py b/test/integration/local/test_requirements.py
similarity index 100%
rename from test-toolkit/integration/local/test_requirements.py
rename to test/integration/local/test_requirements.py
diff --git a/test/integration/local/test_single_machine_training.py b/test/integration/local/test_single_machine_training.py
index a4852e23..68f99859 100644
--- a/test/integration/local/test_single_machine_training.py
+++ b/test/integration/local/test_single_machine_training.py
@@ -14,17 +14,16 @@
 
 import os
 
-import pytest
 from sagemaker.pytorch import PyTorch
 
-from test.utils.local_mode_utils import assert_files_exist
-from test.integration import data_dir, fastai_path, fastai_mnist_script, mnist_script, PYTHON3, ROLE
+from utils.local_mode_utils import assert_files_exist
+from integration import data_dir, mnist_script, ROLE
 
 
-def test_mnist(docker_image, processor, instance_type, sagemaker_local_session, tmpdir):
+def test_mnist(image_uri, processor, instance_type, sagemaker_local_session, tmpdir):
     estimator = PyTorch(entry_point=mnist_script,
                         role=ROLE,
-                        image_name=docker_image,
+                        image_name=image_uri,
                         train_instance_count=1,
                         train_instance_type=instance_type,
                         sagemaker_session=sagemaker_local_session,
@@ -34,22 +33,6 @@ def test_mnist(docker_image, processor, instance_type, sagemaker_local_session,
     _train_and_assert_success(estimator, data_dir, str(tmpdir))
 
 
-def test_fastai_mnist(docker_image, instance_type, py_version, sagemaker_local_session, tmpdir):
-    if py_version != PYTHON3:
-        pytest.skip('Skipping the test because fastai supports >= Python 3.6.')
-
-    estimator = PyTorch(entry_point=fastai_mnist_script,
-                        role=ROLE,
-                        image_name=docker_image,
-                        train_instance_count=1,
-                        train_instance_type=instance_type,
-                        sagemaker_session=sagemaker_local_session,
-                        output_path='file://{}'.format(tmpdir))
-
-    input_dir = os.path.join(fastai_path, 'mnist_tiny')
-    _train_and_assert_success(estimator, input_dir, str(tmpdir))
-
-
 def _train_and_assert_success(estimator, input_dir, output_path):
     estimator.fit({'training': 'file://{}'.format(os.path.join(input_dir, 'training'))})
 
diff --git a/test/integration/sagemaker/test_dgl.py b/test/integration/sagemaker/test_dgl.py
deleted file mode 100644
index f0eb06e4..00000000
--- a/test/integration/sagemaker/test_dgl.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License").
-# You may not use this file except in compliance with the License.
-# A copy of the License is located at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# or in the "license" file accompanying this file. This file is distributed
-# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
-# express or implied. See the License for the specific language governing
-# permissions and limitations under the License.
-from __future__ import absolute_import
-
-import os
-
-import pytest
-from sagemaker import utils
-from sagemaker.pytorch import PyTorch
-
-from test.integration import resources_path, DEFAULT_TIMEOUT
-from test.integration.sagemaker.timeout import timeout
-
-DGL_DATA_PATH = os.path.join(resources_path, 'dgl-gcn')
-DGL_SCRIPT_PATH = os.path.join(DGL_DATA_PATH, 'gcn.py')
-
-
-@pytest.mark.skip_gpu
-@pytest.mark.skip_py2_containers
-def test_dgl_gcn_training_cpu(sagemaker_session, ecr_image, instance_type):
-    instance_type = instance_type or 'ml.c4.xlarge'
-    _test_dgl_training(sagemaker_session, ecr_image, instance_type)
-
-
-@pytest.mark.skip_cpu
-@pytest.mark.skip_py2_containers
-def test_dgl_gcn_training_gpu(sagemaker_session, ecr_image, instance_type):
-    instance_type = instance_type or 'ml.p2.xlarge'
-    _test_dgl_training(sagemaker_session, ecr_image, instance_type)
-
-
-def _test_dgl_training(sagemaker_session, ecr_image, instance_type):
-    dgl = PyTorch(entry_point=DGL_SCRIPT_PATH,
-                  role='SageMakerRole',
-                  train_instance_count=1,
-                  train_instance_type=instance_type,
-                  sagemaker_session=sagemaker_session,
-                  image_name=ecr_image)
-    with timeout(minutes=DEFAULT_TIMEOUT):
-        job_name = utils.unique_name_from_base('test-pytorch-dgl-image')
-        dgl.fit(job_name=job_name)
diff --git a/test/integration/sagemaker/test_distributed_operations.py b/test/integration/sagemaker/test_distributed_operations.py
index 3f05350d..6e072598 100644
--- a/test/integration/sagemaker/test_distributed_operations.py
+++ b/test/integration/sagemaker/test_distributed_operations.py
@@ -20,9 +20,8 @@
 from sagemaker.pytorch import PyTorch
 from six.moves.urllib.parse import urlparse
 
-from test.integration import (data_dir, dist_operations_path, fastai_path, mnist_script,
-                              DEFAULT_TIMEOUT)
-from test.integration.sagemaker.timeout import timeout
+from integration import data_dir, dist_operations_path, mnist_script, DEFAULT_TIMEOUT
+from integration.sagemaker.timeout import timeout
 
 MULTI_GPU_INSTANCE = 'ml.p3.8xlarge'
 
@@ -30,75 +29,50 @@
 @pytest.mark.skip_gpu
 @pytest.mark.deploy_test
 @pytest.mark.skip_test_in_region
-def test_dist_operations_cpu(sagemaker_session, ecr_image, instance_type, dist_cpu_backend):
+def test_dist_operations_cpu(sagemaker_session, image_uri, instance_type, dist_cpu_backend):
     instance_type = instance_type or 'ml.c4.xlarge'
-    _test_dist_operations(sagemaker_session, ecr_image, instance_type, dist_cpu_backend)
+    _test_dist_operations(sagemaker_session, image_uri, instance_type, dist_cpu_backend)
 
 
 @pytest.mark.skip_cpu
 @pytest.mark.deploy_test
-def test_dist_operations_gpu(sagemaker_session, instance_type, ecr_image, dist_gpu_backend):
+def test_dist_operations_gpu(sagemaker_session, instance_type, image_uri, dist_gpu_backend):
     instance_type = instance_type or 'ml.p2.xlarge'
-    _test_dist_operations(sagemaker_session, ecr_image, instance_type, dist_gpu_backend)
+    _test_dist_operations(sagemaker_session, image_uri, instance_type, dist_gpu_backend)
 
 
 @pytest.mark.skip_cpu
-def test_dist_operations_multi_gpu(sagemaker_session, ecr_image, dist_gpu_backend):
-    _test_dist_operations(sagemaker_session, ecr_image, MULTI_GPU_INSTANCE, dist_gpu_backend, 1)
+def test_dist_operations_multi_gpu(sagemaker_session, image_uri, dist_gpu_backend):
+    _test_dist_operations(sagemaker_session, image_uri, MULTI_GPU_INSTANCE, dist_gpu_backend, 1)
 
 
 @pytest.mark.skip_cpu
 @pytest.mark.skip_py2_containers
-def test_dist_operations_fastai_gpu(sagemaker_session, ecr_image):
-    with timeout(minutes=DEFAULT_TIMEOUT):
-        pytorch = PyTorch(entry_point='train_cifar.py',
-                          source_dir=os.path.join(fastai_path, 'cifar'),
-                          role='SageMakerRole',
-                          train_instance_count=1,
-                          train_instance_type=MULTI_GPU_INSTANCE,
-                          sagemaker_session=sagemaker_session,
-                          image_name=ecr_image)
-
-        pytorch.sagemaker_session.default_bucket()
-        training_input = pytorch.sagemaker_session.upload_data(
-            path=os.path.join(fastai_path, 'cifar_tiny', 'training'),
-            key_prefix='pytorch/distributed_operations'
-        )
-
-        job_name = utils.unique_name_from_base('test-pytorch-dist-ops')
-        pytorch.fit({'training': training_input}, job_name=job_name)
-
-    model_s3_url = pytorch.create_model().model_data
-    _assert_s3_file_exists(sagemaker_session.boto_region_name, model_s3_url)
-
-
-@pytest.mark.skip_cpu
-@pytest.mark.skip_py2_containers
-def test_mnist_gpu(sagemaker_session, ecr_image, dist_gpu_backend):
+def test_mnist_gpu(sagemaker_session, image_uri, dist_gpu_backend):
     with timeout(minutes=DEFAULT_TIMEOUT):
         pytorch = PyTorch(entry_point=mnist_script,
                           role='SageMakerRole',
                           train_instance_count=2,
-                          image_name=ecr_image,
+                          image_name=image_uri,
                           train_instance_type=MULTI_GPU_INSTANCE,
                           sagemaker_session=sagemaker_session,
                           hyperparameters={'backend': dist_gpu_backend})
 
         training_input = sagemaker_session.upload_data(path=os.path.join(data_dir, 'training'),
                                                        key_prefix='pytorch/mnist')
-        job_name = utils.unique_name_from_base('test-pytorch-dist-ops')
 
+        job_name = utils.unique_name_from_base('test-pytorch-dist-ops')
         pytorch.fit({'training': training_input}, job_name=job_name)
 
 
-def _test_dist_operations(sagemaker_session, ecr_image, instance_type, dist_backend, train_instance_count=3):
+def _test_dist_operations(sagemaker_session, image_uri, instance_type, dist_backend, train_instance_count=3):
     with timeout(minutes=DEFAULT_TIMEOUT):
         pytorch = PyTorch(entry_point=dist_operations_path,
                           role='SageMakerRole',
                           train_instance_count=train_instance_count,
                           train_instance_type=instance_type,
                           sagemaker_session=sagemaker_session,
-                          image_name=ecr_image,
+                          image_name=image_uri,
                           hyperparameters={'backend': dist_backend})
 
         pytorch.sagemaker_session.default_bucket()
diff --git a/test/integration/sagemaker/test_experiments.py b/test/integration/sagemaker/test_experiments.py
deleted file mode 100644
index 33fd63ec..00000000
--- a/test/integration/sagemaker/test_experiments.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License").
-# You may not use this file except in compliance with the License.
-# A copy of the License is located at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# or in the "license" file accompanying this file. This file is distributed
-# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
-# express or implied. See the License for the specific language governing
-# permissions and limitations under the License.
-from __future__ import absolute_import
-
-import time
-
-import pytest
-from sagemaker.pytorch import PyTorch
-from sagemaker import utils
-from test.integration import training_dir, smdebug_mnist_script, DEFAULT_TIMEOUT
-from test.integration.sagemaker.timeout import timeout
-
-
-@pytest.mark.skip_py2_containers
-def test_training(sagemaker_session, ecr_image, instance_type):
-
-    from smexperiments.experiment import Experiment
-    from smexperiments.trial import Trial
-    from smexperiments.trial_component import TrialComponent
-
-    sm_client = sagemaker_session.sagemaker_client
-
-    experiment_name = "pytorch-container-integ-test-{}".format(int(time.time()))
-
-    experiment = Experiment.create(
-        experiment_name=experiment_name,
-        description="Integration test full customer e2e from sagemaker-pytorch-container",
-        sagemaker_boto_client=sm_client,
-    )
-
-    trial_name = "pytorch-container-integ-test-{}".format(int(time.time()))
-    trial = Trial.create(
-        experiment_name=experiment_name, trial_name=trial_name, sagemaker_boto_client=sm_client
-    )
-
-    hyperparameters = {
-        "random_seed": True,
-        "num_steps": 50,
-        "smdebug_path": "/opt/ml/output/tensors",
-        "epochs": 1,
-        "data_dir": training_dir,
-    }
-
-    training_job_name = utils.unique_name_from_base("test-pytorch-experiments-image")
-
-    # create a training job and wait for it to complete
-    with timeout(minutes=DEFAULT_TIMEOUT):
-        pytorch = PyTorch(
-            entry_point=smdebug_mnist_script,
-            role="SageMakerRole",
-            train_instance_count=1,
-            train_instance_type=instance_type,
-            sagemaker_session=sagemaker_session,
-            image_name=ecr_image,
-            hyperparameters=hyperparameters,
-        )
-        training_input = pytorch.sagemaker_session.upload_data(
-            path=training_dir, key_prefix="pytorch/mnist"
-        )
-        pytorch.fit({"training": training_input}, job_name=training_job_name)
-
-    training_job = sm_client.describe_training_job(TrainingJobName=training_job_name)
-    training_job_arn = training_job["TrainingJobArn"]
-
-    # verify trial component auto created from the training job
-    trial_components = list(
-        TrialComponent.list(source_arn=training_job_arn, sagemaker_boto_client=sm_client)
-    )
-
-    trial_component_summary = trial_components[0]
-    trial_component = TrialComponent.load(
-        trial_component_name=trial_component_summary.trial_component_name,
-        sagemaker_boto_client=sm_client,
-    )
-
-    # associate the trial component with the trial
-    trial.add_trial_component(trial_component)
-
-    # cleanup
-    trial.remove_trial_component(trial_component_summary.trial_component_name)
-    trial_component.delete()
-    trial.delete()
-    experiment.delete()
diff --git a/test/integration/sagemaker/test_mnist.py b/test/integration/sagemaker/test_mnist.py
index 33b5d814..0a7fec96 100644
--- a/test/integration/sagemaker/test_mnist.py
+++ b/test/integration/sagemaker/test_mnist.py
@@ -16,34 +16,34 @@
 from sagemaker import utils
 from sagemaker.pytorch import PyTorch
 
-from test.integration import training_dir, mnist_script, DEFAULT_TIMEOUT
-from test.integration.sagemaker.timeout import timeout
+from integration import training_dir, mnist_script, DEFAULT_TIMEOUT
+from integration.sagemaker.timeout import timeout
 
 
 @pytest.mark.skip_gpu
-def test_mnist_distributed_cpu(sagemaker_session, ecr_image, instance_type, dist_cpu_backend):
+def test_mnist_distributed_cpu(sagemaker_session, image_uri, instance_type, dist_cpu_backend):
     instance_type = instance_type or 'ml.c4.xlarge'
-    _test_mnist_distributed(sagemaker_session, ecr_image, instance_type, dist_cpu_backend)
+    _test_mnist_distributed(sagemaker_session, image_uri, instance_type, dist_cpu_backend)
 
 
 @pytest.mark.skip_cpu
-def test_mnist_distributed_gpu(sagemaker_session, ecr_image, instance_type, dist_gpu_backend):
+def test_mnist_distributed_gpu(sagemaker_session, image_uri, instance_type, dist_gpu_backend):
     instance_type = instance_type or 'ml.p2.xlarge'
-    _test_mnist_distributed(sagemaker_session, ecr_image, instance_type, dist_gpu_backend)
+    _test_mnist_distributed(sagemaker_session, image_uri, instance_type, dist_gpu_backend)
 
 
-def _test_mnist_distributed(sagemaker_session, ecr_image, instance_type, dist_backend):
+def _test_mnist_distributed(sagemaker_session, image_uri, instance_type, dist_backend):
     with timeout(minutes=DEFAULT_TIMEOUT):
         pytorch = PyTorch(entry_point=mnist_script,
                           role='SageMakerRole',
                           train_instance_count=2,
                           train_instance_type=instance_type,
                           sagemaker_session=sagemaker_session,
-                          image_name=ecr_image,
+                          image_name=image_uri,
                           hyperparameters={'backend': dist_backend, 'epochs': 2})
-
         training_input = pytorch.sagemaker_session.upload_data(path=training_dir,
                                                                key_prefix='pytorch/mnist')
+
         job_name = utils.unique_name_from_base('test-pytorch-mnist')
 
         pytorch.fit({'training': training_input}, job_name=job_name)
diff --git a/test/integration/sagemaker/test_training_smdebug.py b/test/integration/sagemaker/test_training_smdebug.py
deleted file mode 100644
index 604e7cec..00000000
--- a/test/integration/sagemaker/test_training_smdebug.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License").
-# You may not use this file except in compliance with the License.
-# A copy of the License is located at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# or in the "license" file accompanying this file. This file is distributed
-# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
-# express or implied. See the License for the specific language governing
-# permissions and limitations under the License.
-from __future__ import absolute_import
-
-import pytest
-from sagemaker import utils
-from sagemaker.pytorch import PyTorch
-
-from test.integration import training_dir, smdebug_mnist_script, DEFAULT_TIMEOUT
-from test.integration.sagemaker.timeout import timeout
-
-
-@pytest.mark.skip_py2_containers
-def test_training_smdebug(sagemaker_session, ecr_image, instance_type):
-    hyperparameters = {'random_seed': True, 'num_steps': 50, 'smdebug_path': '/opt/ml/output/tensors', 'epochs': 1,
-                       'data_dir': training_dir}
-
-    with timeout(minutes=DEFAULT_TIMEOUT):
-        pytorch = PyTorch(entry_point=smdebug_mnist_script,
-                          role='SageMakerRole',
-                          train_instance_count=1,
-                          train_instance_type=instance_type,
-                          sagemaker_session=sagemaker_session,
-                          image_name=ecr_image,
-                          hyperparameters=hyperparameters)
-
-        training_input = pytorch.sagemaker_session.upload_data(path=training_dir,
-                                                               key_prefix='pytorch/mnist')
-        job_name = utils.unique_name_from_base('test-pytorch-smdebug')
-
-        pytorch.fit({'training': training_input}, job_name=job_name)
diff --git a/test/resources/dgl-gcn/gcn.py b/test/resources/dgl-gcn/gcn.py
deleted file mode 100644
index a0330d31..00000000
--- a/test/resources/dgl-gcn/gcn.py
+++ /dev/null
@@ -1,197 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-
-"""GCN using DGL nn package
-
-References:
-- Semi-Supervised Classification with Graph Convolutional Networks
-- Paper: https://arxiv.org/abs/1609.02907
-- Code: https://github.com/tkipf/gcn
-"""
-import os
-import argparse, time
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-from dgl import DGLGraph
-from dgl.nn.pytorch import GraphConv
-from dgl.data import register_data_args, load_data
-
-class GCN(nn.Module):
-    def __init__(self,
-                 g,
-                 in_feats,
-                 n_hidden,
-                 n_classes,
-                 n_layers,
-                 activation,
-                 dropout):
-        super(GCN, self).__init__()
-        self.g = g
-        self.layers = nn.ModuleList()
-        # input layer
-        self.layers.append(GraphConv(in_feats, n_hidden, activation=activation))
-        # hidden layers
-        for i in range(n_layers - 1):
-            self.layers.append(GraphConv(n_hidden, n_hidden, activation=activation))
-        # output layer
-        self.layers.append(GraphConv(n_hidden, n_classes))
-        self.dropout = nn.Dropout(p=dropout)
-
-    def forward(self, features):
-        h = features
-        for i, layer in enumerate(self.layers):
-            if i != 0:
-                h = self.dropout(h)
-            h = layer(self.g, h)
-        return h
-
-def evaluate(model, features, labels, mask):
-    model.eval()
-    with torch.no_grad():
-        logits = model(features)
-        logits = logits[mask]
-        labels = labels[mask]
-        _, indices = torch.max(logits, dim=1)
-        correct = torch.sum(indices == labels)
-        return correct.item() * 1.0 / len(labels)
-
-def main(args):
-    # load and preprocess dataset
-    data = load_data(args)
-    features = torch.FloatTensor(data.features)
-    labels = torch.LongTensor(data.labels)
-    if hasattr(torch, 'BoolTensor'):
-        train_mask = torch.BoolTensor(data.train_mask)
-        val_mask = torch.BoolTensor(data.val_mask)
-        test_mask = torch.BoolTensor(data.test_mask)
-    else:
-        train_mask = torch.ByteTensor(data.train_mask)
-        val_mask = torch.ByteTensor(data.val_mask)
-        test_mask = torch.ByteTensor(data.test_mask)
-    in_feats = features.shape[1]
-    n_classes = data.num_labels
-    n_edges = data.graph.number_of_edges()
-    print("""----Data statistics------'
-      #Edges %d
-      #Classes %d
-      #Train samples %d
-      #Val samples %d
-      #Test samples %d""" %
-          (n_edges, n_classes,
-              train_mask.sum().item(),
-              val_mask.sum().item(),
-              test_mask.sum().item()))
-
-    if args.gpu < 0:
-        cuda = False
-    else:
-        cuda = True
-        torch.cuda.set_device(args.gpu)
-        features = features.cuda()
-        labels = labels.cuda()
-        train_mask = train_mask.cuda()
-        val_mask = val_mask.cuda()
-        test_mask = test_mask.cuda()
-
-    # graph preprocess and calculate normalization factor
-    g = data.graph
-    # add self loop
-    if args.self_loop:
-        g.remove_edges_from(g.selfloop_edges())
-        g.add_edges_from(zip(g.nodes(), g.nodes()))
-    g = DGLGraph(g)
-    n_edges = g.number_of_edges()
-    # normalization
-    degs = g.in_degrees().float()
-    norm = torch.pow(degs, -0.5)
-    norm[torch.isinf(norm)] = 0
-    if cuda:
-        norm = norm.cuda()
-    g.ndata['norm'] = norm.unsqueeze(1)
-
-    # create GCN model
-    model = GCN(g,
-                in_feats,
-                args.n_hidden,
-                n_classes,
-                args.n_layers,
-                F.relu,
-                args.dropout)
-
-    if cuda:
-        model.cuda()
-    loss_fcn = torch.nn.CrossEntropyLoss()
-
-    # use optimizer
-    optimizer = torch.optim.Adam(model.parameters(),
-                                 lr=args.lr,
-                                 weight_decay=args.weight_decay)
-
-    # initialize graph
-    dur = []
-    for epoch in range(args.n_epochs):
-        model.train()
-        if epoch >= 3:
-            t0 = time.time()
-        # forward
-        logits = model(features)
-        loss = loss_fcn(logits[train_mask], labels[train_mask])
-
-        optimizer.zero_grad()
-        loss.backward()
-        optimizer.step()
-
-        if epoch >= 3:
-            dur.append(time.time() - t0)
-
-        acc = evaluate(model, features, labels, val_mask)
-        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
-              "ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(),
-                                             acc, n_edges / np.mean(dur) / 1000))
-
-    acc = evaluate(model, features, labels, test_mask)
-    print("Test accuracy {:.2%}".format(acc))
-
-    acc_threshold = 0.75
-    assert acc > acc_threshold, "Accuracy {} is not greater than threshold {}".format(acc, acc_threshold)
-
-def parse_args():
-    parser = argparse.ArgumentParser(description='GCN')
-    register_data_args(parser)
-    parser.add_argument("--dropout", type=float, default=0.5,
-            help="dropout probability")
-    parser.add_argument("--gpu", type=int, default=-1,
-            help="gpu")
-    parser.add_argument("--lr", type=float, default=1e-2,
-            help="learning rate")
-    parser.add_argument("--n-epochs", type=int, default=200,
-            help="number of training epochs")
-    parser.add_argument("--n-hidden", type=int, default=16,
-            help="number of hidden gcn units")
-    parser.add_argument("--n-layers", type=int, default=1,
-            help="number of hidden gcn layers")
-    parser.add_argument("--weight-decay", type=float, default=5e-4,
-            help="Weight for L2 loss")
-    parser.add_argument("--self-loop", action='store_true',
-            help="graph self-loop (default=False)")
-    parser.set_defaults(self_loop=False)
-
-    return parser.parse_args()
-
-if __name__ == '__main__':
-    num_gpus = int(os.environ['SM_NUM_GPUS'])
-    if num_gpus > 0:
-        gpu = 0
-    else:
-        gpu = -1
-
-    args = parse_args()
-    args.gpu = gpu
-    args.dataset = 'cora'
-    print(args)
-
-    main(args)
-
diff --git a/test/resources/fastai/cifar/cifar.py b/test/resources/fastai/cifar/cifar.py
deleted file mode 100644
index e41fbfde..00000000
--- a/test/resources/fastai/cifar/cifar.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-
-# Based on https://github.com/fastai/fastai/blob/master/examples/train_cifar.py
-# imports and the code was as much preserved to match the official example
-from fastai.script import *
-from fastai.vision import *
-from fastai.vision.models.wrn import wrn_22
-from fastai.distributed import *
-import torch
-
-torch.backends.cudnn.benchmark = True
-
-@call_parse
-def main(gpu:Param("GPU to run on", str)=None):
-    """Distrubuted training of CIFAR-10.
-    Fastest speed is if you run as follows:
-    python -m fastai.launch cifar.py"""
-    gpu = setup_distrib(gpu)
-    n_gpus = int(os.environ.get("WORLD_SIZE", 1))
-    tgz_path = os.environ.get('SM_CHANNEL_TRAINING')
-    path = os.path.join(tgz_path, 'cifar10_tiny')
-    tarfile.open(f'{path}.tgz', 'r:gz').extractall(tgz_path)
-    ds_tfms = ([*rand_pad(4, 32), flip_lr(p=0.5)], [])
-    workers = min(16, num_cpus()//n_gpus)
-    data = ImageDataBunch.from_folder(path, valid='test', ds_tfms=ds_tfms, bs=512//n_gpus,
-                                      num_workers=workers)
-    data.normalize(cifar_stats)
-    learn = Learner(data, wrn_22(), metrics=accuracy, path='/opt/ml', model_dir='model')
-    if gpu is None:
-        learn.model = nn.DataParallel(learn.model)
-    else:
-        learn.to_distributed(gpu)
-    learn.to_fp16()
-    learn.fit_one_cycle(2, 3e-3, wd=0.4)
-    learn.save('model')
diff --git a/test/resources/fastai/cifar/train_cifar.py b/test/resources/fastai/cifar/train_cifar.py
deleted file mode 100644
index febac46b..00000000
--- a/test/resources/fastai/cifar/train_cifar.py
+++ /dev/null
@@ -1,6 +0,0 @@
-
-# TODO: replace with a shell script when it's supported by the container
-import os
-
-# Using system instead of subprocess as an easier way to simulate shell script call
-os.system('python -m fastai.launch cifar.py')
diff --git a/test/resources/fastai/cifar_tiny/training/cifar10_tiny.tgz b/test/resources/fastai/cifar_tiny/training/cifar10_tiny.tgz
deleted file mode 100644
index 7ce6d275..00000000
Binary files a/test/resources/fastai/cifar_tiny/training/cifar10_tiny.tgz and /dev/null differ
diff --git a/test/resources/fastai/mnist.py b/test/resources/fastai/mnist.py
deleted file mode 100644
index cf806ee8..00000000
--- a/test/resources/fastai/mnist.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-
-# Based on https://github.com/fastai/fastai/blob/master/examples/train_mnist.py
-# imports and the code was as much preserved to match the official example
-from fastai.script import *
-from fastai.vision import *
-
-@call_parse
-def main():
-    tgz_path = os.environ.get('SM_CHANNEL_TRAINING')
-    path = os.path.join(tgz_path, 'mnist_tiny')
-    tarfile.open(f'{path}.tgz', 'r:gz').extractall(tgz_path)
-    tfms = (rand_pad(2, 28), [])
-    data = ImageDataBunch.from_folder(path, ds_tfms=tfms, bs=64)
-    data.normalize(imagenet_stats)
-    learn = create_cnn(data, models.resnet18, metrics=accuracy, path='/opt/ml', model_dir='model')
-    learn.fit_one_cycle(1, 0.02)
-    learn.save('model')
diff --git a/test/resources/fastai/mnist_tiny/training/mnist_tiny.tgz b/test/resources/fastai/mnist_tiny/training/mnist_tiny.tgz
deleted file mode 100644
index c24f3f03..00000000
Binary files a/test/resources/fastai/mnist_tiny/training/mnist_tiny.tgz and /dev/null differ
diff --git a/test/resources/mnist/mnist.py b/test/resources/mnist/mnist.py
index 221e94c5..1359f712 100644
--- a/test/resources/mnist/mnist.py
+++ b/test/resources/mnist/mnist.py
@@ -11,13 +11,13 @@
 # ANY KIND, either express or implied. See the License for the specific
 # language governing permissions and limitations under the License.
 from __future__ import absolute_import
+
 import argparse
 import logging
 import os
 import sys
 
-import cv2 as cv
-import sagemaker_containers
+from sagemaker_training import environment
 import torch
 import torch.distributed as dist
 import torch.nn as nn
@@ -25,7 +25,6 @@
 import torch.optim as optim
 import torch.utils.data
 import torch.utils.data.distributed
-
 from torchvision import datasets, transforms
 
 logger = logging.getLogger(__name__)
@@ -122,25 +121,18 @@ def train(args):
         100. * len(test_loader.sampler) / len(test_loader.dataset)
     ))
 
-    model = Net()
+    model = Net().to(device)
     if is_distributed and use_cuda:
         # multi-machine multi-gpu case
         logger.debug("Multi-machine multi-gpu: using DistributedDataParallel.")
-        # establish host rank and set device on this node
-        torch.cuda.set_device(host_rank)
-        model.cuda(host_rank)
-        # for multiprocessing distributed, the DDP constructor should always set
-        # the single device scope. otherwise, DDP will use all available devices.
-        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[host_rank], output_device=host_rank)
+        model = torch.nn.parallel.DistributedDataParallel(model)
     elif use_cuda:
         # single-machine multi-gpu case
         logger.debug("Single-machine multi-gpu: using DataParallel().cuda().")
-        model =  model.to(device)
         model = torch.nn.DataParallel(model).to(device)
     else:
         # single-machine or multi-machine cpu case
         logger.debug("Single-machine/multi-machine cpu: using DataParallel.")
-        model =  model.to(device)
         model = torch.nn.DataParallel(model)
 
     optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
@@ -148,11 +140,7 @@ def train(args):
     for epoch in range(1, args.epochs + 1):
         model.train()
         for batch_idx, (data, target) in enumerate(train_loader, 1):
-            if is_distributed and use_cuda:
-                # multi-machine multi-gpu case - allow asynchrous GPU copies of the data
-                data, target = data.cuda(non_blocking=True), target.cuda(non_blocking=True)
-            else:
-                data, target = data.to(device), target.to(device)
+            data, target = data.to(device), target.to(device)
             optimizer.zero_grad()
             output = model(data)
             loss = F.nll_loss(output, target)
@@ -168,9 +156,6 @@ def train(args):
         test(model, test_loader, device)
     save_model(model, args.model_dir)
 
-    if is_distributed and host_rank == 0 or not is_distributed:
-        assert_can_track_sagemaker_experiments()
-
 
 def test(model, test_loader, device):
     model.eval()
@@ -190,14 +175,6 @@ def test(model, test_loader, device):
         100. * correct / len(test_loader.dataset)))
 
 
-def model_fn(model_dir):
-    logger.info('model_fn')
-    model = torch.nn.DataParallel(Net())
-    with open(os.path.join(model_dir, 'model.pth'), 'rb') as f:
-        model.load_state_dict(torch.load(f))
-    return model
-
-
 def save_model(model, model_dir):
     logger.info("Saving the model.")
     path = os.path.join(model_dir, 'model.pth')
@@ -205,22 +182,7 @@ def save_model(model, model_dir):
     torch.save(model.state_dict(), path)
 
 
-def assert_can_track_sagemaker_experiments():
-    in_sagemaker_training = 'TRAINING_JOB_ARN' in os.environ
-    in_python_three = sys.version_info[0] == 3
-
-    if in_sagemaker_training and in_python_three:
-        import smexperiments.tracker
-
-        with smexperiments.tracker.Tracker.load() as tracker:
-            tracker.log_parameter('param', 1)
-            tracker.log_metric('metric', 1.0)
-
-
 if __name__ == '__main__':
-    # test opencv
-    print(cv.__version__)
-
     parser = argparse.ArgumentParser()
 
     # Data and model checkpoints directories
@@ -244,7 +206,7 @@ def assert_can_track_sagemaker_experiments():
                         help='backend for distributed training')
 
     # Container environment
-    env = sagemaker_containers.training_env()
+    env = environment.Environment()
     parser.add_argument('--hosts', type=list, default=env.hosts)
     parser.add_argument('--current-host', type=str, default=env.current_host)
     parser.add_argument('--model-dir', type=str, default=env.model_dir)
@@ -252,4 +214,3 @@ def assert_can_track_sagemaker_experiments():
     parser.add_argument('--num-gpus', type=int, default=env.num_gpus)
 
     train(parser.parse_args())
-    
diff --git a/test/resources/mnist/smdebug_mnist.py b/test/resources/mnist/smdebug_mnist.py
deleted file mode 100644
index 4485baa0..00000000
--- a/test/resources/mnist/smdebug_mnist.py
+++ /dev/null
@@ -1,224 +0,0 @@
-# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"). You
-# may not use this file except in compliance with the License. A copy of
-# the License is located at
-#
-#     http://aws.amazon.com/apache2.0/
-#
-# or in the "license" file accompanying this file. This file is
-# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
-# ANY KIND, either express or implied. See the License for the specific
-# language governing permissions and limitations under the License.
-from __future__ import absolute_import
-import argparse
-import logging
-import os
-import sys
-
-import cv2 as cv
-import sagemaker_containers
-import torch
-import torch.distributed as dist
-import torch.nn as nn
-import torch.nn.functional as F
-import torch.optim as optim
-import torch.utils.data
-import torch.utils.data.distributed
-from torchvision import datasets, transforms
-from smdebug.pytorch import *
-import numpy as np
-import random
-
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.DEBUG)
-logger.addHandler(logging.StreamHandler(sys.stdout))
-
-
-# Based on https://github.com/pytorch/examples/blob/master/mnist/main.py
-class Net(nn.Module):
-    def __init__(self):
-        logger.info("Create neural network module")
-
-        super(Net, self).__init__()
-        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
-        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
-        self.conv2_drop = nn.Dropout2d()
-        self.fc1 = nn.Linear(320, 50)
-        self.fc2 = nn.Linear(50, 10)
-
-    def forward(self, x):
-        x = F.relu(F.max_pool2d(self.conv1(x), 2))
-        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
-        x = x.view(-1, 320)
-        x = F.relu(self.fc1(x))
-        x = F.dropout(x, training=self.training)
-        x = self.fc2(x)
-        return F.log_softmax(x, dim=1)
-
-
-def parse_args():
-    env = sagemaker_containers.training_env()
-    parser = argparse.ArgumentParser(description="PyTorch MNIST Example")
-
-    parser.add_argument('--data_dir', type=str)
-    parser.add_argument("--batch-size", type=int, default=4, help="Batch size")
-    parser.add_argument("--epochs", type=int, default=1, help="Number of Epochs")
-    parser.add_argument(
-        "--smdebug_path",
-        type=str,
-        default=None,
-        help="S3 URI of the bucket where tensor data will be stored.",
-    )
-    parser.add_argument("--learning_rate", type=float, default=0.1)
-    parser.add_argument("--momentum", type=float, default=0.9)
-    parser.add_argument("--random_seed", type=bool, default=False)
-    parser.add_argument(
-        "--num_steps",
-        type=int,
-        default=50,
-        help="Reduce the number of training "
-             "and evaluation steps to the give number if desired."
-             "If this is not passed, trains for one epoch "
-             "of training and validation data",
-    )
-    parser.add_argument('--log_interval', type=int, default=100, metavar='N',
-                        help='how many batches to wait before logging training status')
-
-    opt = parser.parse_args()
-    return opt
-
-
-def _get_train_data_loader(batch_size, training_dir):
-    logger.info("Get train data loader")
-    dataset = datasets.MNIST(training_dir, train=True, download=True, transform=transforms.Compose([
-        transforms.ToTensor(),
-        transforms.Normalize((0.1307,), (0.3081,))
-    ]))
-    return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True,
-                                       num_workers=4)
-
-
-def _get_test_data_loader(test_batch_size, training_dir):
-    logger.info("Get test data loader")
-    return torch.utils.data.DataLoader(
-        datasets.MNIST(training_dir, train=False, download=True, transform=transforms.Compose([
-            transforms.ToTensor(),
-            transforms.Normalize((0.1307,), (0.3081,))
-        ])),
-        batch_size=test_batch_size, shuffle=False, num_workers=4)
-
-
-def create_smdebug_hook(output_s3_uri):
-    # With the following SaveConfig, we will save tensors for steps 1, 2 and 3
-    # (indexing starts with 0).
-    save_config = SaveConfig(save_steps=[1, 2, 3])
-
-    # Create a hook that logs weights, biases and gradients while training the model.
-    hook = Hook(
-        out_dir=output_s3_uri,
-        save_config=save_config,
-        include_collections=["weights", "gradients", "losses"],
-    )
-    return hook
-
-
-def train(model, device, optimizer, hook, epochs, log_interval, training_dir):
-    criterion = nn.CrossEntropyLoss()
-    hook.register_loss(criterion)
-
-    trainloader = _get_train_data_loader(4, training_dir)
-    validloader = _get_test_data_loader(4, training_dir)
-
-    for epoch in range(epochs):
-        model.train()
-        hook.set_mode(modes.TRAIN)
-        for i, data in enumerate(trainloader):
-            inputs, labels = data
-            optimizer.zero_grad()
-            output = model(inputs)
-            loss = criterion(output, labels)
-            loss.backward()
-            optimizer.step()
-
-            if i % log_interval == 0:
-                logger.debug('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
-                    epoch, i * len(data), len(trainloader.sampler),
-                           100. * i / len(trainloader), loss.item()))
-
-        test(model, hook, validloader, device, criterion)
-
-
-def test(model, hook, test_loader, device, loss_fn):
-    model.eval()
-    hook.set_mode(modes.EVAL)
-    test_loss = 0
-    correct = 0
-    with torch.no_grad():
-        for data, target in test_loader:
-            data, target = data.to(device), target.to(device)
-            output = model(data)
-            test_loss += loss_fn(output, target).item()  # sum up batch loss
-            pred = output.max(1, keepdim=True)[1]  # get the index of the max log-probability
-            correct += pred.eq(target.view_as(pred)).sum().item()
-
-    test_loss /= len(test_loader.dataset)
-    logger.debug('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
-        test_loss, correct, len(test_loader.dataset),
-        100. * correct / len(test_loader.dataset)))
-
-
-def main():
-    opt = parse_args()
-
-    if opt.random_seed:
-        torch.manual_seed(128)
-        random.seed(12)
-        np.random.seed(2)
-
-    device = torch.device("cpu")
-    out_dir = opt.smdebug_path
-    training_dir = opt.data_dir
-    hook = create_smdebug_hook(out_dir)
-    model = Net().to(device)
-    hook.register_hook(model)
-    optimizer = optim.SGD(model.parameters(), lr=opt.learning_rate, momentum=opt.momentum)
-    train(model, device, optimizer, hook, opt.epochs, opt.log_interval, training_dir)
-    print("Training is complete")
-
-    from smdebug.trials import create_trial
-    print("Created the trial with out_dir {0}".format(out_dir))
-    trial = create_trial(out_dir)
-    assert trial
-    print("Train steps: " + str(trial.steps(mode=modes.TRAIN)))
-    print("Eval steps: " + str(trial.steps(mode=modes.EVAL)))
-
-    print(
-        f"trial.tensor_names() = {trial.tensor_names()}"
-    )  
-    # if loss collection tensors not in in trial.tensor_names()
-    # means they were not saved
-
-    print(f"collection_manager = {hook.collection_manager}")
-
-    weights_tensors = hook.collection_manager.get("weights").tensor_names
-    print(f"'weights' collection tensors = {weights_tensors}")
-    assert len(weights_tensors) > 0
-
-    gradients_tensors = hook.collection_manager.get("gradients").tensor_names
-    print(f"'gradients' collection tensors = {gradients_tensors}")
-    assert len(gradients_tensors) > 0
-
-    losses_tensors = hook.collection_manager.get("losses").tensor_names
-    print(f"'losses' collection tensors = {losses_tensors}")
-    assert len(losses_tensors) > 0
-
-    assert all(
-        [name in trial.tensor_names() for name in losses_tensors]
-    )
-
-    print("Validation Complete")
-
-if __name__ == "__main__":
-    main()
-
diff --git a/test-toolkit/resources/requirements/entry.py b/test/resources/requirements/entry.py
similarity index 100%
rename from test-toolkit/resources/requirements/entry.py
rename to test/resources/requirements/entry.py
diff --git a/test-toolkit/resources/requirements/requirements.txt b/test/resources/requirements/requirements.txt
similarity index 100%
rename from test-toolkit/resources/requirements/requirements.txt
rename to test/resources/requirements/requirements.txt
diff --git a/test-toolkit/unit/test_train.py b/test/unit/test_train.py
similarity index 100%
rename from test-toolkit/unit/test_train.py
rename to test/unit/test_train.py
diff --git a/test-toolkit/utils/image_utils.py b/test/utils/image_utils.py
similarity index 95%
rename from test-toolkit/utils/image_utils.py
rename to test/utils/image_utils.py
index bcc8bd86..4135f6f3 100644
--- a/test-toolkit/utils/image_utils.py
+++ b/test/utils/image_utils.py
@@ -27,7 +27,7 @@ def build_image(framework_version, dockerfile, image_uri, region, cwd='.'):
     if 'dlc' in dockerfile:
         ecr_login(region, DLC_AWS_ID)
 
-    dockerfile_location = os.path.join('test-toolkit', 'docker', framework_version, dockerfile)
+    dockerfile_location = os.path.join('test', 'container', framework_version, dockerfile)
 
     subprocess.check_call(
         ['docker', 'build', '-t', image_uri, '-f', dockerfile_location, '--build-arg',
diff --git a/test/utils/local_mode_utils.py b/test/utils/local_mode_utils.py
index 23e8945a..acd1197e 100644
--- a/test/utils/local_mode_utils.py
+++ b/test/utils/local_mode_utils.py
@@ -18,7 +18,7 @@
 import tarfile
 import time
 
-from test.integration import resources_path
+from integration import resources_path
 
 LOCK_PATH = os.path.join(resources_path, 'local_mode_lock')
 
diff --git a/tox.ini b/tox.ini
index deeabe8b..eb89a87b 100644
--- a/tox.ini
+++ b/tox.ini
@@ -15,7 +15,6 @@ exclude =
     __pycache__
     .tox
     test/resources/
-    test-toolkit/resources/
     lib/
 max-complexity = 10
 ignore =