From c4adde5be86318973beebdfc52e58214dacc879b Mon Sep 17 00:00:00 2001
From: Sai Parthasarathy Miduthuri <54188298+saimidu@users.noreply.github.com>
Date: Mon, 30 Dec 2019 15:58:07 -0800
Subject: [PATCH 01/10] update: Release TF 1.15.0 dockerfiles (#264)

* Add TF 1.15 dockerfiles and changes to entrypoint

Co-authored-by: akhilmehra <akhilrmehra@gmail.com>
Co-authored-by: ElizaZh <elizazhang087@gmail.com>
Co-authored-by: Owen Thomas <31292660+owen-t@users.noreply.github.com>
Co-authored-by: Kartik Kalamadi <akartsky@gmail.com>
Co-authored-by: Arjuna Keshavan <33526713+arjkesh@users.noreply.github.com>

Co-authored-by: akhilmehra <akhilrmehra@gmail.com>
Co-authored-by: ElizaZh <elizazhang087@gmail.com>
Co-authored-by: Owen Thomas <31292660+owen-t@users.noreply.github.com>
Co-authored-by: Kartik Kalamadi <akartsky@gmail.com>
Co-authored-by: Arjuna Keshavan <33526713+arjkesh@users.noreply.github.com>
---
 docker/1.15.0/py2/Dockerfile.cpu              | 129 +++++++++++++
 docker/1.15.0/py2/Dockerfile.gpu              | 171 +++++++++++++++++
 docker/1.15.0/py2/dockerd-entrypoint.py       |  23 +++
 docker/1.15.0/py3/Dockerfile.cpu              | 133 +++++++++++++
 docker/1.15.0/py3/Dockerfile.gpu              | 179 ++++++++++++++++++
 docker/1.15.0/py3/dockerd-entrypoint.py       |  23 +++
 setup.py                                      |   4 +-
 .../deep_learning_container.py                | 112 +++++++++++
 test/resources/mnist/mnist.py                 |  21 +-
 test/unit/test_deep_learning_containers.py    | 158 ++++++++++++++++
 10 files changed, 950 insertions(+), 3 deletions(-)
 create mode 100644 docker/1.15.0/py2/Dockerfile.cpu
 create mode 100644 docker/1.15.0/py2/Dockerfile.gpu
 create mode 100644 docker/1.15.0/py2/dockerd-entrypoint.py
 create mode 100644 docker/1.15.0/py3/Dockerfile.cpu
 create mode 100644 docker/1.15.0/py3/Dockerfile.gpu
 create mode 100644 docker/1.15.0/py3/dockerd-entrypoint.py
 create mode 100644 src/sagemaker_tensorflow_container/deep_learning_container.py
 create mode 100644 test/unit/test_deep_learning_containers.py

diff --git a/docker/1.15.0/py2/Dockerfile.cpu b/docker/1.15.0/py2/Dockerfile.cpu
new file mode 100644
index 00000000..f9387aa0
--- /dev/null
+++ b/docker/1.15.0/py2/Dockerfile.cpu
@@ -0,0 +1,129 @@
+FROM ubuntu:18.04
+
+LABEL maintainer="Amazon AI"
+
+# Prevent docker build get stopped by requesting user interaction
+ENV DEBIAN_FRONTEND=noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN=true
+# Set environment variables for MKL
+# https://www.tensorflow.org/performance/performance_guide#tensorflow_with_intel%C2%AE_mkl_dnn
+ENV KMP_AFFINITY=granularity=fine,compact,1,0
+ENV KMP_BLOCKTIME=1
+ENV KMP_SETTINGS=0
+# Python won’t try to write .pyc or .pyo files on the import of source modules
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+# See http://bugs.python.org/issue19846
+ENV PYTHONIOENCODING=UTF-8
+ENV LANG=C.UTF-8
+ENV LC_ALL=C.UTF-8
+# Specify the location of module that contains the training logic for SageMaker
+# https://docs.aws.amazon.com/sagemaker/latest/dg/docker-container-environmental-variables-entrypoint.html
+ENV SAGEMAKER_TRAINING_MODULE=sagemaker_tensorflow_container.training:main
+
+# Define framework-related package sources
+ARG FRAMEWORK_SUPPORT_INSTALLABLE=sagemaker_tensorflow_container*.tar.gz
+ARG TF_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/1.15/AmazonLinux/cpu/final/tensorflow-1.15.0-cp27-cp27mu-manylinux2010_x86_64.whl
+
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+    software-properties-common \
+    build-essential \
+    openssh-client \
+    openssh-server \
+    ca-certificates \
+    curl \
+    git \
+    wget \
+    vim \
+    zlib1g-dev \
+ && rm -rf /var/lib/apt/lists/*
+
+# Install Open MPI
+RUN mkdir /tmp/openmpi \
+ && cd /tmp/openmpi \
+ && curl -fSsL -O https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.1.tar.gz \
+ && tar zxf openmpi-4.0.1.tar.gz \
+ && cd openmpi-4.0.1 \
+ && ./configure --enable-orterun-prefix-by-default \
+ && make -j $(nproc) all \
+ && make install \
+ && ldconfig \
+ && rm -rf /tmp/openmpi
+
+# Create a wrapper for OpenMPI to allow running as root by default
+RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real \
+ && echo '#!/bin/bash' > /usr/local/bin/mpirun \
+ && echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun \
+ && chmod a+x /usr/local/bin/mpirun
+
+RUN echo "hwloc_base_binding_policy = none" >> /usr/local/etc/openmpi-mca-params.conf \
+ && echo "rmaps_base_mapping_policy = slot" >> /usr/local/etc/openmpi-mca-params.conf
+
+ENV LD_LIBRARY_PATH=/usr/local/openmpi/lib:$LD_LIBRARY_PATH
+ENV PATH=/usr/local/openmpi/bin/:$PATH
+
+# SSH login fix. Otherwise user is kicked off after login
+RUN sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd
+
+# Create SSH key.
+RUN mkdir -p /root/.ssh/ \
+ && mkdir -p /var/run/sshd \
+ && ssh-keygen -q -t rsa -N '' -f /root/.ssh/id_rsa \
+ && cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys \
+ && printf "Host *\n  StrictHostKeyChecking no\n" >> /root/.ssh/config
+
+WORKDIR /
+
+RUN apt-get update \
+ && apt-get install -y \
+    python \
+    python-pip
+
+COPY $FRAMEWORK_SUPPORT_INSTALLABLE .
+
+RUN pip --no-cache-dir install --upgrade \
+    pip \
+    setuptools
+
+# Some TF tools expect a "python" binary
+RUN ln -s $(which python) /usr/local/bin/python
+
+RUN pip install --no-cache-dir -U \
+    numpy==1.16.5 \
+    scipy==1.2.2 \
+    scikit-learn==0.20.3 \
+    pandas==0.24.2 \
+    Pillow==6.2.1 \
+    h5py==2.9.0 \
+    keras_applications==1.0.8 \
+    keras_preprocessing==1.1.0 \
+    requests==2.22.0 \
+    keras==2.3.1 \
+    # botocore requires python-dateutil<2.8.1
+    "python-dateutil<2.8.1" \
+    awscli==1.16.296 \
+    mpi4py==3.0.2 \
+    "cryptography>=2.3" \
+    "sagemaker-tensorflow>=1.15,<1.16" \
+    # Let's install TensorFlow separately in the end to avoid the library version to be overwritten
+ && pip install --force-reinstall --no-cache-dir -U \
+    ${TF_URL} \
+ && pip install --no-cache-dir -U \
+    $FRAMEWORK_SUPPORT_INSTALLABLE \
+ && rm -f $FRAMEWORK_SUPPORT_INSTALLABLE \
+ && pip install --no-cache-dir -U \
+    # awscli requires PyYAML<5.2
+    "PyYAML<5.2" \
+    horovod==0.18.2
+
+COPY dockerd-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
+COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py
+
+RUN chmod +x /usr/local/bin/dockerd-entrypoint.py \
+ && chmod +x /usr/local/bin/deep_learning_container.py
+
+RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow/license.txt -o /license.txt
+
+ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
+CMD ["bin/bash"]
diff --git a/docker/1.15.0/py2/Dockerfile.gpu b/docker/1.15.0/py2/Dockerfile.gpu
new file mode 100644
index 00000000..fa86d9dc
--- /dev/null
+++ b/docker/1.15.0/py2/Dockerfile.gpu
@@ -0,0 +1,171 @@
+# Nvidia does not publish a TensorRT Runtime library for Ubuntu 18.04 with Cuda 10.1 support, so we stick with cuda 10.0.
+# https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/
+FROM nvidia/cuda:10.0-base-ubuntu18.04
+
+LABEL maintainer="Amazon AI"
+
+# Prevent docker build get stopped by requesting user interaction
+ENV DEBIAN_FRONTEND=noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN=true
+# Python won’t try to write .pyc or .pyo files on the import of source modules
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+# See http://bugs.python.org/issue19846
+ENV PYTHONIOENCODING=UTF-8
+ENV LANG=C.UTF-8
+ENV LC_ALL=C.UTF-8
+# Specify the location of module that contains the training logic for SageMaker
+# https://docs.aws.amazon.com/sagemaker/latest/dg/docker-container-environmental-variables-entrypoint.html
+ENV SAGEMAKER_TRAINING_MODULE=sagemaker_tensorflow_container.training:main
+
+# Define framework-related package sources
+ARG FRAMEWORK_SUPPORT_INSTALLABLE=sagemaker_tensorflow_container*.tar.gz
+ARG TF_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/1.15/AmazonLinux/gpu/final/tensorflow_gpu-1.15.0-cp27-cp27mu-manylinux2010_x86_64.whl
+
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends --allow-unauthenticated \
+    ca-certificates \
+    cuda-command-line-tools-10-0 \
+    cuda-cublas-dev-10-0 \
+    cuda-cudart-dev-10-0 \
+    cuda-cufft-dev-10-0 \
+    cuda-curand-dev-10-0 \
+    cuda-cusolver-dev-10-0 \
+    cuda-cusparse-dev-10-0 \
+    curl \
+    libcudnn7=7.5.1.10-1+cuda10.0 \
+    # TensorFlow doesn't require libnccl anymore but Open MPI still depends on it
+    libnccl2=2.4.7-1+cuda10.0 \
+    libgomp1 \
+    libnccl-dev=2.4.7-1+cuda10.0 \
+    libfreetype6-dev \
+    libhdf5-serial-dev \
+    libpng-dev \
+    libzmq3-dev \
+    git \
+    wget \
+    vim \
+    build-essential \
+    openssh-client \
+    openssh-server \
+    zlib1g-dev \
+    # The 'apt-get install' of nvinfer-runtime-trt-repo-ubuntu1804-5.0.2-ga-cuda10.0
+    # adds a new list which contains libnvinfer library, so it needs another
+    # 'apt-get update' to retrieve that list before it can actually install the library.
+    # We don't install libnvinfer-dev since we don't need to build against TensorRT,
+    # and libnvinfer4 doesn't contain libnvinfer.a static library.
+ && apt-get update \
+ && apt-get install -y --no-install-recommends --allow-unauthenticated  \
+    nvinfer-runtime-trt-repo-ubuntu1804-5.0.2-ga-cuda10.0 \
+ && apt-get update \
+ && apt-get install -y --no-install-recommends --allow-unauthenticated  \
+    libnvinfer5=5.0.2-1+cuda10.0 \
+ && rm /usr/lib/x86_64-linux-gnu/libnvinfer_plugin* \
+ && rm /usr/lib/x86_64-linux-gnu/libnvcaffe_parser* \
+ && rm /usr/lib/x86_64-linux-gnu/libnvparsers* \
+ && rm -rf /var/lib/apt/lists/* \
+ && mkdir -p /var/run/sshd
+
+# Install Open MPI
+RUN mkdir /tmp/openmpi \
+ && cd /tmp/openmpi \
+ && curl -fSsL -O https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.1.tar.gz \
+ && tar zxf openmpi-4.0.1.tar.gz \
+ && cd openmpi-4.0.1 \
+ && ./configure --enable-orterun-prefix-by-default \
+ && make -j $(nproc) all \
+ && make install \
+ && ldconfig \
+ && rm -rf /tmp/openmpi
+
+RUN apt-get update \
+ && apt-get install -y \
+    python \
+    python-pip
+
+# Create a wrapper for OpenMPI to allow running as root by default
+RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real \
+ && echo '#!/bin/bash' > /usr/local/bin/mpirun \
+ && echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun \
+ && chmod a+x /usr/local/bin/mpirun
+
+# Configure OpenMPI to run good defaults:
+#   --bind-to none --map-by slot --mca btl_tcp_if_exclude lo,docker0
+RUN echo "hwloc_base_binding_policy = none" >> /usr/local/etc/openmpi-mca-params.conf \
+ && echo "rmaps_base_mapping_policy = slot" >> /usr/local/etc/openmpi-mca-params.conf
+
+# Set default NCCL parameters
+RUN echo NCCL_DEBUG=INFO >> /etc/nccl.conf
+
+ENV LD_LIBRARY_PATH=/usr/local/openmpi/lib:$LD_LIBRARY_PATH
+ENV PATH /usr/local/openmpi/bin/:$PATH
+ENV PATH=/usr/local/nvidia/bin:$PATH
+
+# SSH login fix. Otherwise user is kicked off after login
+RUN mkdir -p /var/run/sshd \
+ && sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd
+
+# Create SSH key.
+RUN mkdir -p /root/.ssh/ \
+ && ssh-keygen -q -t rsa -N '' -f /root/.ssh/id_rsa \
+ && cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys \
+ && printf "Host *\n  StrictHostKeyChecking no\n" >> /root/.ssh/config
+
+WORKDIR /
+
+RUN pip --no-cache-dir install --upgrade \
+    pip \
+    setuptools
+
+# Some TF tools expect a "python" binary
+RUN ln -s $(which python) /usr/local/bin/python
+
+COPY $FRAMEWORK_SUPPORT_INSTALLABLE .
+
+RUN pip install --no-cache-dir -U \
+    numpy==1.16.5 \
+    scipy==1.2.2 \
+    scikit-learn==0.20.3 \
+    pandas==0.24.2 \
+    Pillow==6.2.1 \
+    h5py==2.9.0 \
+    keras_applications==1.0.8 \
+    keras_preprocessing==1.1.0 \
+    requests==2.22.0 \
+    keras==2.3.1 \
+    # botocore requires python-dateutil<2.8.1
+    "python-dateutil<2.8.1" \
+    awscli==1.16.296 \
+    mpi4py==3.0.2 \
+    "cryptography>=2.3" \
+    "sagemaker-tensorflow>=1.15,<1.16" \
+    # Let's install TensorFlow separately in the end to avoid the library version to be overwritten
+ && pip install --force-reinstall --no-cache-dir -U \
+    ${TF_URL} \
+ && pip install --no-cache-dir -U \
+    $FRAMEWORK_SUPPORT_INSTALLABLE \
+ && rm -f $FRAMEWORK_SUPPORT_INSTALLABLE
+
+# Install Horovod, temporarily using CUDA stubs
+RUN ldconfig /usr/local/cuda/targets/x86_64-linux/lib/stubs \
+ && HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_WITH_TENSORFLOW=1 pip install --no-cache-dir \
+    # awscli requires PyYAML<5.2
+    "PyYAML<5.2" \
+    horovod==0.18.2 \
+ && ldconfig
+
+# Allow OpenSSH to talk to containers without asking for confirmation
+RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new \
+ && echo "    StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new \
+ && mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
+
+COPY dockerd-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
+COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py
+
+RUN chmod +x /usr/local/bin/dockerd-entrypoint.py \
+ && chmod +x /usr/local/bin/deep_learning_container.py
+
+RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow/license.txt -o /license.txt
+
+ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
+CMD ["bin/bash"]
diff --git a/docker/1.15.0/py2/dockerd-entrypoint.py b/docker/1.15.0/py2/dockerd-entrypoint.py
new file mode 100644
index 00000000..b9231abc
--- /dev/null
+++ b/docker/1.15.0/py2/dockerd-entrypoint.py
@@ -0,0 +1,23 @@
+# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import os.path
+import shlex
+import subprocess
+import sys
+
+if not os.path.exists("/opt/ml/input/config"):
+    subprocess.call(['python', '/usr/local/bin/deep_learning_container.py', '&>/dev/null', '&'])
+
+subprocess.check_call(shlex.split(' '.join(sys.argv[1:])))
diff --git a/docker/1.15.0/py3/Dockerfile.cpu b/docker/1.15.0/py3/Dockerfile.cpu
new file mode 100644
index 00000000..b204769a
--- /dev/null
+++ b/docker/1.15.0/py3/Dockerfile.cpu
@@ -0,0 +1,133 @@
+FROM ubuntu:18.04
+
+LABEL maintainer="Amazon AI"
+
+# Prevent docker build get stopped by requesting user interaction
+ENV DEBIAN_FRONTEND=noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN=true
+# Set environment variables for MKL
+# https://www.tensorflow.org/performance/performance_guide#tensorflow_with_intel%C2%AE_mkl_dnn
+ENV KMP_AFFINITY=granularity=fine,compact,1,0
+ENV KMP_BLOCKTIME=1
+ENV KMP_SETTINGS=0
+# Python won’t try to write .pyc or .pyo files on the import of source modules
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+# See http://bugs.python.org/issue19846
+ENV PYTHONIOENCODING=UTF-8
+ENV LANG=C.UTF-8
+ENV LC_ALL=C.UTF-8
+# Specify the location of module that contains the training logic for SageMaker
+# https://docs.aws.amazon.com/sagemaker/latest/dg/docker-container-environmental-variables-entrypoint.html
+ENV SAGEMAKER_TRAINING_MODULE=sagemaker_tensorflow_container.training:main
+
+# Define framework-related package sources
+ARG FRAMEWORK_SUPPORT_INSTALLABLE=sagemaker_tensorflow_container*.tar.gz
+ARG TF_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/1.15/AmazonLinux/cpu/final/tensorflow-1.15.0-cp36-cp36m-manylinux2010_x86_64.whl
+
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+    python3-dev \
+    python3-pip \
+    python3-setuptools \
+    software-properties-common \
+    build-essential \
+    openssh-client \
+    openssh-server \
+    ca-certificates \
+    curl \
+    git \
+    wget \
+    vim \
+    zlib1g-dev \
+ && rm -rf /var/lib/apt/lists/*
+
+# Install Open MPI
+RUN mkdir /tmp/openmpi \
+ && cd /tmp/openmpi \
+ && curl -fSsL -O https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.1.tar.gz \
+ && tar zxf openmpi-4.0.1.tar.gz \
+ && cd openmpi-4.0.1 \
+ && ./configure --enable-orterun-prefix-by-default \
+ && make -j $(nproc) all \
+ && make install \
+ && ldconfig \
+ && rm -rf /tmp/openmpi
+
+# Create a wrapper for OpenMPI to allow running as root by default
+RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real \
+ && echo '#!/bin/bash' > /usr/local/bin/mpirun \
+ && echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun \
+ && chmod a+x /usr/local/bin/mpirun
+
+RUN echo "hwloc_base_binding_policy = none" >> /usr/local/etc/openmpi-mca-params.conf \
+ && echo "rmaps_base_mapping_policy = slot" >> /usr/local/etc/openmpi-mca-params.conf
+
+ENV LD_LIBRARY_PATH=/usr/local/openmpi/lib:$LD_LIBRARY_PATH
+ENV PATH=/usr/local/openmpi/bin/:$PATH
+
+# SSH login fix. Otherwise user is kicked off after login
+RUN sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd
+
+# Create SSH key.
+RUN mkdir -p /root/.ssh/ \
+ && mkdir -p /var/run/sshd \
+ && ssh-keygen -q -t rsa -N '' -f /root/.ssh/id_rsa \
+ && cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys \
+ && printf "Host *\n  StrictHostKeyChecking no\n" >> /root/.ssh/config
+
+WORKDIR /
+
+COPY $FRAMEWORK_SUPPORT_INSTALLABLE .
+
+RUN pip3 --no-cache-dir install --upgrade \
+    pip \
+    setuptools
+
+# Some TF tools expect a "python" binary
+RUN ln -s $(which python3) /usr/local/bin/python \
+ && ln -s $(which pip3) /usr/bin/pip
+
+# install PyYAML==5.1.2 to avoid conflict with latest awscli
+# python-dateutil==2.8.0 to satisfy botocore associated with latest awscli
+RUN pip install --no-cache-dir -U \
+    numpy==1.17.4 \
+    scipy==1.2.2 \
+    scikit-learn==0.20.3 \
+    pandas==0.24.2 \
+    Pillow==6.2.1 \
+    h5py==2.9.0 \
+    keras_applications==1.0.8 \
+    keras_preprocessing==1.1.0 \
+    keras==2.3.1 \
+    # botocore requires python-dateutil<2.8.1
+    "python-dateutil<2.8.1" \
+    requests==2.22.0 \
+    smdebug==0.4.14 \
+    sagemaker-experiments==0.1.3 \
+    awscli==1.16.296 \
+    mpi4py==3.0.2 \
+    "cryptography>=2.3" \
+    "sagemaker-tensorflow>=1.15,<1.16" \
+    # Let's install TensorFlow separately in the end to avoid
+    # the library version to be overwritten
+ && pip install --force-reinstall --no-cache-dir -U \
+    ${TF_URL} \
+ && pip install --force-reinstall --no-cache-dir -U \
+    # awscli requires PyYAML<5.2
+    "PyYAML<5.2" \
+    horovod==0.18.2 \
+ && pip install --no-cache-dir -U \
+    $FRAMEWORK_SUPPORT_INSTALLABLE \
+ && rm -f $FRAMEWORK_SUPPORT_INSTALLABLE
+
+COPY dockerd-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
+COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py
+
+RUN chmod +x /usr/local/bin/dockerd-entrypoint.py \
+ && chmod +x /usr/local/bin/deep_learning_container.py
+
+RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow/license.txt -o /license.txt
+
+ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
+CMD ["bin/bash"]
diff --git a/docker/1.15.0/py3/Dockerfile.gpu b/docker/1.15.0/py3/Dockerfile.gpu
new file mode 100644
index 00000000..38c86b14
--- /dev/null
+++ b/docker/1.15.0/py3/Dockerfile.gpu
@@ -0,0 +1,179 @@
+# Nvidia does not publish a TensorRT Runtime library for Ubuntu 18.04 with Cuda 10.1 support, so we stick with cuda 10.0.
+# https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/
+FROM nvidia/cuda:10.0-base-ubuntu18.04
+
+LABEL maintainer="Amazon AI"
+
+# Prevent docker build get stopped by requesting user interaction
+ENV DEBIAN_FRONTEND=noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN=true
+# Python won’t try to write .pyc or .pyo files on the import of source modules
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+# See http://bugs.python.org/issue19846
+ENV PYTHONIOENCODING=UTF-8
+ENV LANG=C.UTF-8
+ENV LC_ALL=C.UTF-8
+# Specify the location of module that contains the training logic for SageMaker
+# https://docs.aws.amazon.com/sagemaker/latest/dg/docker-container-environmental-variables-entrypoint.html
+ENV SAGEMAKER_TRAINING_MODULE=sagemaker_tensorflow_container.training:main
+
+# Define framework-related package sources
+ARG FRAMEWORK_SUPPORT_INSTALLABLE=sagemaker_tensorflow_container*.tar.gz
+ARG TF_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/1.15/AmazonLinux/gpu/final/tensorflow_gpu-1.15.0-cp36-cp36m-manylinux2010_x86_64.whl
+
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends --allow-unauthenticated \
+    python3-dev \
+    python3-pip \
+    python3-setuptools \
+    python3-dev \
+    ca-certificates \
+    cuda-command-line-tools-10-0 \
+    cuda-cublas-dev-10-0 \
+    cuda-cudart-dev-10-0 \
+    cuda-cufft-dev-10-0 \
+    cuda-curand-dev-10-0 \
+    cuda-cusolver-dev-10-0 \
+    cuda-cusparse-dev-10-0 \
+    curl \
+    libcudnn7=7.5.1.10-1+cuda10.0 \
+    # TensorFlow doesn't require libnccl anymore but Open MPI still depends on it
+    libnccl2=2.4.7-1+cuda10.0 \
+    libgomp1 \
+    libnccl-dev=2.4.7-1+cuda10.0 \
+    libfreetype6-dev \
+    libhdf5-serial-dev \
+    libpng-dev \
+    libzmq3-dev \
+    git \
+    wget \
+    vim \
+    build-essential \
+    openssh-client \
+    openssh-server \
+    zlib1g-dev \
+    # The 'apt-get install' of nvinfer-runtime-trt-repo-ubuntu1804-5.0.2-ga-cuda10.0
+    # adds a new list which contains libnvinfer library, so it needs another
+    # 'apt-get update' to retrieve that list before it can actually install the
+    # library.
+    # We don't install libnvinfer-dev since we don't need to build against TensorRT,
+    # and libnvinfer4 doesn't contain libnvinfer.a static library.
+ && apt-get update && apt-get install -y --no-install-recommends --allow-unauthenticated  \
+    nvinfer-runtime-trt-repo-ubuntu1804-5.0.2-ga-cuda10.0 \
+ && apt-get update && apt-get install -y --no-install-recommends --allow-unauthenticated  \
+    libnvinfer5=5.0.2-1+cuda10.0 \
+ && rm /usr/lib/x86_64-linux-gnu/libnvinfer_plugin* \
+ && rm /usr/lib/x86_64-linux-gnu/libnvcaffe_parser* \
+ && rm /usr/lib/x86_64-linux-gnu/libnvparsers* \
+ && rm -rf /var/lib/apt/lists/* \
+ && mkdir -p /var/run/sshd
+
+###########################################################################
+# Horovod & its dependencies
+###########################################################################
+
+# Install Open MPI
+RUN mkdir /tmp/openmpi \
+ && cd /tmp/openmpi \
+ && curl -fSsL -O https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.1.tar.gz \
+ && tar zxf openmpi-4.0.1.tar.gz \
+ && cd openmpi-4.0.1 \
+ && ./configure --enable-orterun-prefix-by-default \
+ && make -j $(nproc) all \
+ && make install \
+ && ldconfig \
+ && rm -rf /tmp/openmpi
+
+# Create a wrapper for OpenMPI to allow running as root by default
+RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real \
+ && echo '#!/bin/bash' > /usr/local/bin/mpirun \
+ && echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun \
+ && chmod a+x /usr/local/bin/mpirun
+
+# Configure OpenMPI to run good defaults:
+#   --bind-to none --map-by slot --mca btl_tcp_if_exclude lo,docker0
+RUN echo "hwloc_base_binding_policy = none" >> /usr/local/etc/openmpi-mca-params.conf \
+ && echo "rmaps_base_mapping_policy = slot" >> /usr/local/etc/openmpi-mca-params.conf
+
+# Set default NCCL parameters
+RUN echo NCCL_DEBUG=INFO >> /etc/nccl.conf
+
+ENV LD_LIBRARY_PATH=/usr/local/openmpi/lib:$LD_LIBRARY_PATH
+ENV PATH=/usr/local/openmpi/bin/:$PATH
+ENV PATH=/usr/local/nvidia/bin:$PATH
+
+# SSH login fix. Otherwise user is kicked off after login
+RUN mkdir -p /var/run/sshd \
+ && sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd
+
+# Create SSH key.
+RUN mkdir -p /root/.ssh/ \
+ && ssh-keygen -q -t rsa -N '' -f /root/.ssh/id_rsa \
+ && cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys \
+ && printf "Host *\n  StrictHostKeyChecking no\n" >> /root/.ssh/config
+
+WORKDIR /
+
+RUN pip3 --no-cache-dir install --upgrade \
+    pip \
+    setuptools
+
+# Some TF tools expect a "python" binary
+RUN ln -s $(which python3) /usr/local/bin/python \
+ && ln -s $(which pip3) /usr/bin/pip
+
+COPY $FRAMEWORK_SUPPORT_INSTALLABLE .
+
+# install PyYAML==5.1.2 to avoid conflict with latest awscli
+# python-dateutil==2.8.0 to satisfy botocore associated with latest awscli
+RUN pip install --no-cache-dir -U \
+    numpy==1.17.4 \
+    scipy==1.2.2 \
+    scikit-learn==0.20.3 \
+    pandas==0.24.2 \
+    Pillow==6.2.1 \
+    h5py==2.9.0 \
+    keras_applications==1.0.8 \
+    keras_preprocessing==1.1.0 \
+    requests==2.22.0 \
+    keras==2.3.1 \
+    # botocore requires python-dateutil<2.8.1
+    "python-dateutil<2.8.1" \
+    smdebug==0.4.14 \
+    sagemaker-experiments==0.1.3 \
+    awscli==1.16.296 \
+    mpi4py==3.0.2 \
+    "cryptography>=2.3" \
+    "sagemaker-tensorflow>=1.15,<1.16" \
+    # Let's install TensorFlow separately in the end to avoid
+    # the library version to be overwritten
+ && pip install --force-reinstall --no-cache-dir -U \
+    ${TF_URL} \
+ && pip install --no-cache-dir -U \
+    $FRAMEWORK_SUPPORT_INSTALLABLE \
+ && rm -f $FRAMEWORK_SUPPORT_INSTALLABLE
+
+# Install Horovod, temporarily using CUDA stubs
+RUN ldconfig /usr/local/cuda-10.0/targets/x86_64-linux/lib/stubs \
+ && HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_WITH_TENSORFLOW=1 pip install --no-cache-dir \
+    # awscli requires PyYAML<5.2
+    "PyYAML<5.2" \
+    horovod==0.18.2 \
+ && ldconfig
+
+# Allow OpenSSH to talk to containers without asking for confirmation
+RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new \
+ && echo "    StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new \
+ && mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
+
+COPY dockerd-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
+COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py
+
+RUN chmod +x /usr/local/bin/dockerd-entrypoint.py \
+ && chmod +x /usr/local/bin/deep_learning_container.py
+
+RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow/license.txt -o /license.txt
+
+ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
+CMD ["bin/bash"]
diff --git a/docker/1.15.0/py3/dockerd-entrypoint.py b/docker/1.15.0/py3/dockerd-entrypoint.py
new file mode 100644
index 00000000..b9231abc
--- /dev/null
+++ b/docker/1.15.0/py3/dockerd-entrypoint.py
@@ -0,0 +1,23 @@
+# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import os.path
+import shlex
+import subprocess
+import sys
+
+if not os.path.exists("/opt/ml/input/config"):
+    subprocess.call(['python', '/usr/local/bin/deep_learning_container.py', '&>/dev/null', '&'])
+
+subprocess.check_call(shlex.split(' '.join(sys.argv[1:])))
diff --git a/setup.py b/setup.py
index 88e412b0..02c007c4 100644
--- a/setup.py
+++ b/setup.py
@@ -57,7 +57,9 @@ def read_version():
                       'pandas', 'Pillow', 'h5py'],
     extras_require={
         'test': ['tox', 'flake8', 'pytest', 'pytest-cov', 'pytest-xdist', 'mock',
-                 'sagemaker==1.19.1', 'tensorflow<2.0', 'docker-compose', 'botocore>=1.12.140'],
+                 'sagemaker==1.19.1', 'tensorflow<2.0', 'docker-compose', 'boto3==1.10.32',
+                 'six==1.13.0', 'python-dateutil>=2.1,<2.8.1', 'botocore==1.13.32',
+                 'requests-mock', 'awscli==1.16.296'],
         'benchmark': ['click']
     },
 )
diff --git a/src/sagemaker_tensorflow_container/deep_learning_container.py b/src/sagemaker_tensorflow_container/deep_learning_container.py
new file mode 100644
index 00000000..0776dfb3
--- /dev/null
+++ b/src/sagemaker_tensorflow_container/deep_learning_container.py
@@ -0,0 +1,112 @@
+# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import json
+import logging
+import re
+
+import requests
+
+
+def _validate_instance_id(instance_id):
+    """
+    Validate instance ID
+    """
+    instance_id_regex = r'^(i-\S{17})'
+    compiled_regex = re.compile(instance_id_regex)
+    match = compiled_regex.match(instance_id)
+
+    if not match:
+        return None
+
+    return match.group(1)
+
+
+def _retrieve_instance_id():
+    """
+    Retrieve instance ID from instance metadata service
+    """
+    instance_id = None
+    url = "http://169.254.169.254/latest/meta-data/instance-id"
+    response = requests_helper(url, timeout=0.1)
+
+    if response is not None:
+        instance_id = _validate_instance_id(response.text)
+
+    return instance_id
+
+
+def _retrieve_instance_region():
+    """
+    Retrieve instance region from instance metadata service
+    """
+    region = None
+    valid_regions = ['ap-northeast-1', 'ap-northeast-2', 'ap-southeast-1', 'ap-southeast-2',
+                     'ap-south-1', 'ca-central-1', 'eu-central-1', 'eu-north-1',
+                     'eu-west-1', 'eu-west-2', 'eu-west-3', 'sa-east-1',
+                     'us-east-1', 'us-east-2', 'us-west-1', 'us-west-2']
+
+    url = "http://169.254.169.254/latest/dynamic/instance-identity/document"
+    response = requests_helper(url, timeout=0.1)
+
+    if response is not None:
+        response_json = json.loads(response.text)
+
+        if response_json['region'] in valid_regions:
+            region = response_json['region']
+
+    return region
+
+
+def query_bucket():
+    """
+    GET request on an empty object from an Amazon S3 bucket
+    """
+    response = None
+    instance_id = _retrieve_instance_id()
+    region = _retrieve_instance_region()
+
+    if instance_id is not None and region is not None:
+        url = ("https://aws-deep-learning-containers-{0}.s3.{0}.amazonaws.com"
+               "/dlc-containers.txt?x-instance-id={1}".format(region, instance_id))
+        response = requests_helper(url, timeout=0.2)
+
+    logging.debug("Query bucket finished: {}".format(response))
+
+    return response
+
+
+def requests_helper(url, timeout):
+    response = None
+    try:
+        response = requests.get(url, timeout=timeout)
+    except requests.exceptions.RequestException as e:
+        logging.error("Request exception: {}".format(e))
+
+    return response
+
+
+def main():
+    """
+    Invoke bucket query
+    """
+    # Logs are not necessary for normal run. Remove this line while debugging.
+    logging.getLogger().disabled = True
+
+    logging.basicConfig(level=logging.ERROR)
+    query_bucket()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/test/resources/mnist/mnist.py b/test/resources/mnist/mnist.py
index 47d2bcd0..e4349ce2 100644
--- a/test/resources/mnist/mnist.py
+++ b/test/resources/mnist/mnist.py
@@ -1,8 +1,11 @@
-import tensorflow as tf
 import argparse
+import json
 import os
+import sys
+
 import numpy as np
-import json
+import tensorflow as tf
+
 
 
 def _parse_args():
@@ -32,6 +35,18 @@ def _load_testing_data(base_dir):
     return x_test, y_test
 
 
+def assert_can_track_sagemaker_experiments():
+    in_sagemaker_training = 'TRAINING_JOB_ARN' in os.environ
+    in_python_three = sys.version_info[0] == 3
+
+    if in_sagemaker_training and in_python_three:
+        import smexperiments.tracker
+
+        with smexperiments.tracker.Tracker.load() as tracker:
+            tracker.log_parameter('param', 1)
+            tracker.log_metric('metric', 1.0)
+
+
 args, unknown = _parse_args()
 
 model = tf.keras.models.Sequential([
@@ -48,5 +63,7 @@ def _load_testing_data(base_dir):
 x_test, y_test = _load_testing_data(args.train)
 model.fit(x_train, y_train, epochs=args.epochs)
 model.evaluate(x_test, y_test)
+
 if args.current_host == args.hosts[0]:
     model.save(os.path.join('/opt/ml/model', 'my_model.h5'))
+    assert_can_track_sagemaker_experiments()
diff --git a/test/unit/test_deep_learning_containers.py b/test/unit/test_deep_learning_containers.py
new file mode 100644
index 00000000..2da6959c
--- /dev/null
+++ b/test/unit/test_deep_learning_containers.py
@@ -0,0 +1,158 @@
+# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the 'License'). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the 'license' file accompanying this file. This file is
+# distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import unittest
+
+import pytest
+import requests
+
+from sagemaker_tensorflow_container import deep_learning_container as deep_learning_container_to_test
+
+
+@pytest.fixture(name='fixture_valid_instance_id')
+def fixture_valid_instance_id(requests_mock):
+    return requests_mock.get('http://169.254.169.254/latest/meta-data/instance-id',
+                             text='i-123t32e11s32t1231')
+
+
+@pytest.fixture(name='fixture_invalid_instance_id')
+def fixture_invalid_instance_id(requests_mock):
+    return requests_mock.get('http://169.254.169.254/latest/meta-data/instance-id', text='i-123')
+
+
+@pytest.fixture(name='fixture_none_instance_id')
+def fixture_none_instance_id(requests_mock):
+    return requests_mock.get('http://169.254.169.254/latest/meta-data/instance-id', text=None)
+
+
+@pytest.fixture(name='fixture_invalid_region')
+def fixture_invalid_region(requests_mock):
+    return requests_mock.get('http://169.254.169.254/latest/dynamic/instance-identity/document',
+                             json={'region': 'test'})
+
+
+@pytest.fixture(name='fixture_valid_region')
+def fixture_valid_region(requests_mock):
+    return requests_mock.get('http://169.254.169.254/latest/dynamic/instance-identity/document',
+                             json={'region': 'us-east-1'})
+
+
+def test_retrieve_instance_id(fixture_valid_instance_id):
+    result = deep_learning_container_to_test._retrieve_instance_id()
+    assert 'i-123t32e11s32t1231' == result
+
+
+def test_retrieve_none_instance_id(fixture_none_instance_id):
+    result = deep_learning_container_to_test._retrieve_instance_id()
+    assert result is None
+
+
+def test_retrieve_invalid_instance_id(fixture_invalid_instance_id):
+    result = deep_learning_container_to_test._retrieve_instance_id()
+    assert result is None
+
+
+def test_retrieve_invalid_region(fixture_invalid_region):
+    result = deep_learning_container_to_test._retrieve_instance_region()
+    assert result is None
+
+
+def test_retrieve_valid_region(fixture_valid_region):
+    result = deep_learning_container_to_test._retrieve_instance_region()
+    assert 'us-east-1' == result
+
+
+def test_query_bucket(requests_mock, fixture_valid_region, fixture_valid_instance_id):
+    fixture_valid_instance_id.return_value = 'i-123t32e11s32t1231'
+    fixture_valid_region.return_value = 'us-east-1'
+    requests_mock.get(('https://aws-deep-learning-containers-us-east-1.s3.us-east-1.amazonaws.com'
+                       '/dlc-containers.txt?x-instance-id=i-123t32e11s32t1231'),
+                      text='Access Denied')
+    actual_response = deep_learning_container_to_test.query_bucket()
+    assert 'Access Denied' == actual_response.text
+
+
+def test_query_bucket_region_none(fixture_invalid_region, fixture_valid_instance_id):
+    fixture_valid_instance_id.return_value = 'i-123t32e11s32t1231'
+    fixture_invalid_region.return_value = None
+    actual_response = deep_learning_container_to_test.query_bucket()
+    assert actual_response is None
+
+
+def test_query_bucket_instance_id_none(requests_mock, fixture_valid_region, fixture_none_instance_id):
+    fixture_none_instance_id.return_value = None
+    fixture_valid_region.return_value = 'us-east-1'
+    actual_response = deep_learning_container_to_test.query_bucket()
+    assert actual_response is None
+
+
+def test_query_bucket_instance_id_invalid(requests_mock, fixture_valid_region, fixture_invalid_instance_id):
+    fixture_invalid_instance_id.return_value = None
+    fixture_valid_region.return_value = 'us-east-1'
+    actual_response = deep_learning_container_to_test.query_bucket()
+    assert actual_response is None
+
+
+def test_HTTP_error_on_S3(requests_mock, fixture_valid_region, fixture_valid_instance_id):
+    fixture_valid_instance_id.return_value = 'i-123t32e11s32t1231'
+    fixture_valid_region.return_value = 'us-east-1'
+    query_s3_url = ('https://aws-deep-learning-containers-us-east-1.s3.us-east-1.amazonaws.com'
+                    '/dlc-containers.txt?x-instance-id=i-123t32e11s32t1231')
+
+    requests_mock.get(
+        query_s3_url,
+        exc=requests.exceptions.HTTPError)
+    requests_mock.side_effect = requests.exceptions.HTTPError
+
+    with pytest.raises(requests.exceptions.HTTPError):
+        actual_response = requests.get(query_s3_url)
+        assert actual_response is None
+
+
+def test_connection_error_on_S3(requests_mock, fixture_valid_region, fixture_valid_instance_id):
+    fixture_valid_instance_id.return_value = 'i-123t32e11s32t1231'
+    fixture_valid_region.return_value = 'us-east-1'
+    query_s3_url = ('https://aws-deep-learning-containers-us-east-1.s3.us-east-1.amazonaws.com'
+                    '/dlc-containers.txt?x-instance-id=i-123t32e11s32t1231')
+
+    requests_mock.get(
+        query_s3_url,
+        exc=requests.exceptions.ConnectionError)
+
+    with pytest.raises(requests.exceptions.ConnectionError):
+        actual_response = requests.get(
+            query_s3_url)
+
+        assert actual_response is None
+
+
+def test_timeout_error_on_S3(requests_mock, fixture_valid_region, fixture_valid_instance_id):
+    fixture_valid_instance_id.return_value = 'i-123t32e11s32t1231'
+    fixture_valid_region.return_value = 'us-east-1'
+    query_s3_url = ('https://aws-deep-learning-containers-us-east-1.s3.us-east-1.amazonaws.com'
+                    '/dlc-containers.txt?x-instance-id=i-123t32e11s32t1231')
+
+    requests_mock.get(
+        query_s3_url,
+        exc=requests.Timeout)
+
+    with pytest.raises(requests.exceptions.Timeout):
+        actual_response = requests.get(
+            query_s3_url)
+
+        assert actual_response is None
+
+
+if __name__ == '__main__':
+    unittest.main()

From 90a7b8426f639fc65e1d994ecad5f6475a4c9717 Mon Sep 17 00:00:00 2001
From: Lauren Yu <6631887+laurenyu@users.noreply.github.com>
Date: Tue, 7 Jan 2020 12:43:51 -0800
Subject: [PATCH 02/10] change: update copyright year in license header (#266)

---
 docker/1.15.0/py2/dockerd-entrypoint.py                       | 2 +-
 docker/1.15.0/py3/dockerd-entrypoint.py                       | 2 +-
 scripts/build_all.py                                          | 2 +-
 scripts/publish_all.py                                        | 2 +-
 setup.py                                                      | 2 +-
 src/sagemaker_tensorflow_container/deep_learning_container.py | 2 +-
 src/sagemaker_tensorflow_container/s3_utils.py                | 2 +-
 src/sagemaker_tensorflow_container/training.py                | 2 +-
 test/integration/local/test_horovod.py                        | 2 +-
 test/integration/local/test_keras.py                          | 2 +-
 test/integration/local/test_training.py                       | 2 +-
 test/integration/sagemaker/test_horovod.py                    | 2 +-
 test/integration/sagemaker/test_mnist.py                      | 2 +-
 test/integration/sagemaker/test_tuning_model_dir.py           | 2 +-
 test/integration/sagemaker/timeout.py                         | 2 +-
 test/integration/utils.py                                     | 2 +-
 test/resources/mnist/horovod_mnist.py                         | 2 +-
 test/resources/test_py_version/entry.py                       | 2 +-
 test/resources/tuning_model_dir/entry.py                      | 2 +-
 test/unit/test_deep_learning_containers.py                    | 2 +-
 test/unit/test_s3_utils.py                                    | 2 +-
 test/unit/test_training.py                                    | 2 +-
 22 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/docker/1.15.0/py2/dockerd-entrypoint.py b/docker/1.15.0/py2/dockerd-entrypoint.py
index b9231abc..cd222026 100644
--- a/docker/1.15.0/py2/dockerd-entrypoint.py
+++ b/docker/1.15.0/py2/dockerd-entrypoint.py
@@ -1,4 +1,4 @@
-# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"). You
 # may not use this file except in compliance with the License. A copy of
diff --git a/docker/1.15.0/py3/dockerd-entrypoint.py b/docker/1.15.0/py3/dockerd-entrypoint.py
index b9231abc..cd222026 100644
--- a/docker/1.15.0/py3/dockerd-entrypoint.py
+++ b/docker/1.15.0/py3/dockerd-entrypoint.py
@@ -1,4 +1,4 @@
-# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"). You
 # may not use this file except in compliance with the License. A copy of
diff --git a/scripts/build_all.py b/scripts/build_all.py
index de7913d3..9f340d5d 100644
--- a/scripts/build_all.py
+++ b/scripts/build_all.py
@@ -1,4 +1,4 @@
-# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"). You
 # may not use this file except in compliance with the License. A copy of
diff --git a/scripts/publish_all.py b/scripts/publish_all.py
index 092ae113..2c78e8a7 100644
--- a/scripts/publish_all.py
+++ b/scripts/publish_all.py
@@ -1,4 +1,4 @@
-# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"). You
 # may not use this file except in compliance with the License. A copy of
diff --git a/setup.py b/setup.py
index 02c007c4..11c8be66 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,4 @@
-# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# Copyright 2017-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"). You
 # may not use this file except in compliance with the License. A copy of
diff --git a/src/sagemaker_tensorflow_container/deep_learning_container.py b/src/sagemaker_tensorflow_container/deep_learning_container.py
index 0776dfb3..7e3967c7 100644
--- a/src/sagemaker_tensorflow_container/deep_learning_container.py
+++ b/src/sagemaker_tensorflow_container/deep_learning_container.py
@@ -1,4 +1,4 @@
-# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"). You
 # may not use this file except in compliance with the License. A copy of
diff --git a/src/sagemaker_tensorflow_container/s3_utils.py b/src/sagemaker_tensorflow_container/s3_utils.py
index 22e2ef74..0137ef25 100644
--- a/src/sagemaker_tensorflow_container/s3_utils.py
+++ b/src/sagemaker_tensorflow_container/s3_utils.py
@@ -1,4 +1,4 @@
-# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# Copyright 2017-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"). You
 # may not use this file except in compliance with the License. A copy of
diff --git a/src/sagemaker_tensorflow_container/training.py b/src/sagemaker_tensorflow_container/training.py
index 5b176a28..bce6a69c 100644
--- a/src/sagemaker_tensorflow_container/training.py
+++ b/src/sagemaker_tensorflow_container/training.py
@@ -1,4 +1,4 @@
-# Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the 'License'). You
 # may not use this file except in compliance with the License. A copy of
diff --git a/test/integration/local/test_horovod.py b/test/integration/local/test_horovod.py
index 2d4e9ce3..f35ba03a 100644
--- a/test/integration/local/test_horovod.py
+++ b/test/integration/local/test_horovod.py
@@ -1,4 +1,4 @@
-# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# Copyright 2017-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"). You
 # may not use this file except in compliance with the License. A copy of
diff --git a/test/integration/local/test_keras.py b/test/integration/local/test_keras.py
index 2e473bf9..1eca0c2a 100644
--- a/test/integration/local/test_keras.py
+++ b/test/integration/local/test_keras.py
@@ -1,4 +1,4 @@
-# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"). You
 # may not use this file except in compliance with the License. A copy of
diff --git a/test/integration/local/test_training.py b/test/integration/local/test_training.py
index 6a2bab25..bd1641b0 100644
--- a/test/integration/local/test_training.py
+++ b/test/integration/local/test_training.py
@@ -1,4 +1,4 @@
-# Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# Copyright 2017-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"). You
 # may not use this file except in compliance with the License. A copy of
diff --git a/test/integration/sagemaker/test_horovod.py b/test/integration/sagemaker/test_horovod.py
index 08e41704..1d2bd8ac 100644
--- a/test/integration/sagemaker/test_horovod.py
+++ b/test/integration/sagemaker/test_horovod.py
@@ -1,4 +1,4 @@
-# Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# Copyright 2017-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"). You
 # may not use this file except in compliance with the License. A copy of
diff --git a/test/integration/sagemaker/test_mnist.py b/test/integration/sagemaker/test_mnist.py
index 15e51a99..25c8db3e 100644
--- a/test/integration/sagemaker/test_mnist.py
+++ b/test/integration/sagemaker/test_mnist.py
@@ -1,4 +1,4 @@
-# Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# Copyright 2017-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"). You
 # may not use this file except in compliance with the License. A copy of
diff --git a/test/integration/sagemaker/test_tuning_model_dir.py b/test/integration/sagemaker/test_tuning_model_dir.py
index 604d4c93..e833c3a4 100644
--- a/test/integration/sagemaker/test_tuning_model_dir.py
+++ b/test/integration/sagemaker/test_tuning_model_dir.py
@@ -1,4 +1,4 @@
-# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"). You
 # may not use this file except in compliance with the License. A copy of
diff --git a/test/integration/sagemaker/timeout.py b/test/integration/sagemaker/timeout.py
index 4360987a..d4738d32 100644
--- a/test/integration/sagemaker/timeout.py
+++ b/test/integration/sagemaker/timeout.py
@@ -1,4 +1,4 @@
-# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License").
 # You may not use this file except in compliance with the License.
diff --git a/test/integration/utils.py b/test/integration/utils.py
index 83271f67..4944eb20 100644
--- a/test/integration/utils.py
+++ b/test/integration/utils.py
@@ -1,4 +1,4 @@
-# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"). You
 # may not use this file except in compliance with the License. A copy of
diff --git a/test/resources/mnist/horovod_mnist.py b/test/resources/mnist/horovod_mnist.py
index cb5f81c6..1014f2bb 100644
--- a/test/resources/mnist/horovod_mnist.py
+++ b/test/resources/mnist/horovod_mnist.py
@@ -1,4 +1,4 @@
-# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# Copyright 2017-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"). You
 # may not use this file except in compliance with the License. A copy of
diff --git a/test/resources/test_py_version/entry.py b/test/resources/test_py_version/entry.py
index e844e07c..8f71a01b 100644
--- a/test/resources/test_py_version/entry.py
+++ b/test/resources/test_py_version/entry.py
@@ -1,4 +1,4 @@
-# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"). You
 # may not use this file except in compliance with the License. A copy of
diff --git a/test/resources/tuning_model_dir/entry.py b/test/resources/tuning_model_dir/entry.py
index 2fae72fc..0bce7165 100644
--- a/test/resources/tuning_model_dir/entry.py
+++ b/test/resources/tuning_model_dir/entry.py
@@ -1,4 +1,4 @@
-# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"). You
 # may not use this file except in compliance with the License. A copy of
diff --git a/test/unit/test_deep_learning_containers.py b/test/unit/test_deep_learning_containers.py
index 2da6959c..8d6fe08e 100644
--- a/test/unit/test_deep_learning_containers.py
+++ b/test/unit/test_deep_learning_containers.py
@@ -1,4 +1,4 @@
-# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the 'License'). You
 # may not use this file except in compliance with the License. A copy of
diff --git a/test/unit/test_s3_utils.py b/test/unit/test_s3_utils.py
index fa2cef6b..03de70a3 100644
--- a/test/unit/test_s3_utils.py
+++ b/test/unit/test_s3_utils.py
@@ -1,4 +1,4 @@
-# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# Copyright 2017-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"). You
 # may not use this file except in compliance with the License. A copy of
diff --git a/test/unit/test_training.py b/test/unit/test_training.py
index f49d34ed..b69beed2 100644
--- a/test/unit/test_training.py
+++ b/test/unit/test_training.py
@@ -1,4 +1,4 @@
-# Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# Copyright 2017-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"). You
 # may not use this file except in compliance with the License. A copy of

From 97be95230f160e4aef58387330f2077c8e25e58b Mon Sep 17 00:00:00 2001
From: Sai Parthasarathy Miduthuri <54188298+saimidu@users.noreply.github.com>
Date: Thu, 9 Jan 2020 11:32:03 -0800
Subject: [PATCH 03/10] update: Update buildspec for TF 1.15.0 (#265)

* Change path for entrypoint script

* Change path for deep learning container script

* Change build context to folder containing dockerfiles

* Update buildspec

* Update buildspec-release
---
 buildspec-release.yml                         | 22 +++---
 buildspec.yml                                 | 79 +++++++++++--------
 docker/__init__.py                            |  0
 docker/build_artifacts/__init__.py            |  0
 .../deep_learning_container.py                |  0
 .../dockerd-entrypoint.py                     |  0
 ...ers.py => test_deep_learning_container.py} |  3 +-
 7 files changed, 60 insertions(+), 44 deletions(-)
 create mode 100644 docker/__init__.py
 create mode 100644 docker/build_artifacts/__init__.py
 rename {src/sagemaker_tensorflow_container => docker/build_artifacts}/deep_learning_container.py (100%)
 rename docker/{1.15.0/py2 => build_artifacts}/dockerd-entrypoint.py (100%)
 rename test/unit/{test_deep_learning_containers.py => test_deep_learning_container.py} (98%)

diff --git a/buildspec-release.yml b/buildspec-release.yml
index a4ff55a5..2e5a9a86 100644
--- a/buildspec-release.yml
+++ b/buildspec-release.yml
@@ -2,7 +2,7 @@ version: 0.2
 
 env:
   variables:
-    FRAMEWORK_VERSION: '1.13.1'
+    FRAMEWORK_VERSION: '1.15.0'
     GPU_INSTANCE_TYPE: 'ml.p2.xlarge'
     SETUP_FILE: 'setup_cmds.sh'
     SETUP_CMDS: '#!/bin/bash\npip install --upgrade pip\npip install -U -e .\npip install -U -e .[test]'
@@ -60,21 +60,21 @@ phases:
         echo '[{
           "repository": "sagemaker-tensorflow-scriptmode",
           "tags": [{
-            "source": "1.13.1-cpu-py2",
-            "dest": ["1.13.1-cpu-py2", "1.13-cpu-py2", "1.13.1-cpu-py2-'${CODEBUILD_BUILD_ID#*:}'"]
+            "source": "1.15.0-cpu-py2",
+            "dest": ["1.15.0-cpu-py2", "1.15-cpu-py2", "1.15.0-cpu-py2-'${CODEBUILD_BUILD_ID#*:}'"]
           },{
-            "source": "1.13.1-cpu-py3",
-            "dest": ["1.13.1-cpu-py3", "1.13-cpu-py3", "1.13.1-cpu-py3-'${CODEBUILD_BUILD_ID#*:}'"]
+            "source": "1.15.0-cpu-py3",
+            "dest": ["1.15.0-cpu-py3", "1.15-cpu-py3", "1.15.0-cpu-py3-'${CODEBUILD_BUILD_ID#*:}'"]
           },{
-            "source": "1.13.1-gpu-py2",
-            "dest": ["1.13.1-gpu-py2", "1.13-gpu-py2", "1.13.1-gpu-py2-'${CODEBUILD_BUILD_ID#*:}'"]
+            "source": "1.15.0-gpu-py2",
+            "dest": ["1.15.0-gpu-py2", "1.15-gpu-py2", "1.15.0-gpu-py2-'${CODEBUILD_BUILD_ID#*:}'"]
           },{
-            "source": "1.13.1-gpu-py3",
-            "dest": ["1.13.1-gpu-py3", "1.13-gpu-py3", "1.13.1-gpu-py3-'${CODEBUILD_BUILD_ID#*:}'"]
+            "source": "1.15.0-gpu-py3",
+            "dest": ["1.15.0-gpu-py3", "1.15-gpu-py3", "1.15.0-gpu-py3-'${CODEBUILD_BUILD_ID#*:}'"]
           }],
           "test": [
-            "IGNORE_COVERAGE=- tox -e py36 -- -m deploy_test test/integration/sagemaker -n 4 --region {region} --account-id {aws-id} --instance-type {cpu-instance-type} --docker-base-name sagemaker-tensorflow-scriptmode --framework-version 1.13.1 --processor cpu --py-version 2,3",
-            "IGNORE_COVERAGE=- tox -e py36 -- -m deploy_test test/integration/sagemaker -n 4 --region {region} --account-id {aws-id} --docker-base-name sagemaker-tensorflow-scriptmode --framework-version 1.13.1 --processor gpu --py-version 2,3"
+            "IGNORE_COVERAGE=- tox -e py36 -- -m deploy_test test/integration/sagemaker -n 4 --region {region} --account-id {aws-id} --instance-type {cpu-instance-type} --docker-base-name sagemaker-tensorflow-scriptmode --framework-version 1.15.0 --processor cpu --py-version 2,3",
+            "IGNORE_COVERAGE=- tox -e py36 -- -m deploy_test test/integration/sagemaker -n 4 --region {region} --account-id {aws-id} --docker-base-name sagemaker-tensorflow-scriptmode --framework-version 1.15.0 --processor gpu --py-version 2,3"
           ]
         }]' > deployments.json
 
diff --git a/buildspec.yml b/buildspec.yml
index 214cdcca..d59393c3 100644
--- a/buildspec.yml
+++ b/buildspec.yml
@@ -2,11 +2,7 @@ version: 0.2
 
 env:
   variables:
-    FRAMEWORK_VERSION: '1.13.1'
-    CPU_FRAMEWORK_BINARY: 'https://s3-us-west-2.amazonaws.com/tensorflow-aws/1.13/AmazonLinux/cpu/latest-patch-latest-patch/tensorflow-1.13.1-cp36-cp36m-linux_x86_64.whl'
-    CPU_PY_VERSION: '3'
-    GPU_FRAMEWORK_BINARY: 'https://s3-us-west-2.amazonaws.com/tensorflow-aws/1.13/AmazonLinux/gpu/latest-patch-latest-patch/tensorflow-1.13.1-cp36-cp36m-linux_x86_64.whl'
-    GPU_PY_VERSION: '3'
+    FRAMEWORK_VERSION: '1.15.0'
     ECR_REPO: 'sagemaker-test'
     GITHUB_REPO: 'sagemaker-tensorflow-container'
     SETUP_FILE: 'setup_cmds.sh'
@@ -34,42 +30,56 @@ phases:
       - tox -e py36,py27 test/unit
 
       # Create pip archive
-      - build_dir="docker/$FRAMEWORK_VERSION"
+      - root_dir=$(pwd)
       - build_id="$(echo $CODEBUILD_BUILD_ID | sed -e 's/:/-/g')"
       - python3 setup.py sdist
       - tar_name=$(ls dist)
-      - cp dist/$tar_name $build_dir
 
-      # build cpu image
-      - cpu_dockerfile="Dockerfile.cpu"
+      # Find build artifacts
+      - build_artifacts=$root_dir/docker/artifacts
 
-      # Download framework binary
-      - cpu_fw_binary=$(basename $CPU_FRAMEWORK_BINARY)
-      - wget -O $build_dir/$cpu_fw_binary $CPU_FRAMEWORK_BINARY
-
-      - CPU_TAG="$FRAMEWORK_VERSION-cpu-py$CPU_PY_VERSION-$build_id"
+      # build py2 images
 
+      # prepare build context
+      - build_dir="$root_dir/docker/$FRAMEWORK_VERSION/py2"
+      - cp $root_dir/dist/$tar_name $build_dir
+      - cp $build_artifacts/* $build_dir/
       - cd $build_dir
-      - docker build -f $cpu_dockerfile --build-arg framework_support_installable=$tar_name --build-arg py_version=$CPU_PY_VERSION --build-arg framework_installable=$cpu_fw_binary -t $PREPROD_IMAGE:$CPU_TAG .
-      - cd ../../
+
+      # build cpu image
+      - cpu_dockerfile="Dockerfile.cpu"
+      - CPU_TAG_PY2="$FRAMEWORK_VERSION-cpu-py2-$build_id"
+      - docker build -f $cpu_dockerfile -t $PREPROD_IMAGE:$CPU_TAG_PY2 .
 
       # build gpu image
       - gpu_dockerfile="Dockerfile.gpu"
+      - GPU_TAG_PY2="$FRAMEWORK_VERSION-gpu-py2-$build_id"
+      - docker build -f $gpu_dockerfile -t $PREPROD_IMAGE:$GPU_TAG_PY2 .
 
-      # Download framework binary
-      - gpu_fw_binary=$(basename $GPU_FRAMEWORK_BINARY)
-      - wget -O $build_dir/$gpu_fw_binary $GPU_FRAMEWORK_BINARY
-
-      - GPU_TAG="$FRAMEWORK_VERSION-gpu-py$GPU_PY_VERSION-$build_id"
+      # build py3 images
 
+      # prepare build context
+      - build_dir="$root_dir/docker/$FRAMEWORK_VERSION/py3"
+      - cp $root_dir/dist/$tar_name $build_dir
+      - cp $build_artifacts/* $build_dir/
       - cd $build_dir
-      - docker build -f $gpu_dockerfile --build-arg framework_support_installable=$tar_name --build-arg py_version=$GPU_PY_VERSION --build-arg framework_installable=$gpu_fw_binary -t $PREPROD_IMAGE:$GPU_TAG .
-      - cd ../../
+
+      # build cpu image
+      - cpu_dockerfile="Dockerfile.cpu"
+      - CPU_TAG_PY3="$FRAMEWORK_VERSION-cpu-py3-$build_id"
+      - docker build -f $cpu_dockerfile -t $PREPROD_IMAGE:$CPU_TAG_PY3 .
+
+      # build gpu image
+      - gpu_dockerfile="Dockerfile.gpu"
+      - GPU_TAG_PY3="$FRAMEWORK_VERSION-gpu-py3-$build_id"
+      - docker build -f $gpu_dockerfile -t $PREPROD_IMAGE:$GPU_TAG_PY3 .
 
       # push images to ecr
       - $(aws ecr get-login --registry-ids $ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION)
-      - docker push $PREPROD_IMAGE:$CPU_TAG
-      - docker push $PREPROD_IMAGE:$GPU_TAG
+      - docker push $PREPROD_IMAGE:$CPU_TAG_PY2
+      - docker push $PREPROD_IMAGE:$GPU_TAG_PY2
+      - docker push $PREPROD_IMAGE:$CPU_TAG_PY3
+      - docker push $PREPROD_IMAGE:$GPU_TAG_PY3
 
       # launch remote gpu instance
       - instance_type='p2.xlarge'
@@ -79,7 +89,8 @@ phases:
       # run cpu integration tests
       - |
         if has-matching-changes "test/" "tests/" "src/*.py" "docker/*" "buildspec.yml"; then
-          pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $CPU_TAG --framework-version $FRAMEWORK_VERSION --py-version $CPU_PY_VERSION --processor cpu
+          pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $CPU_TAG_PY2 --framework-version $FRAMEWORK_VERSION --py-version 2 --processor cpu
+          pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $CPU_TAG_PY3 --framework-version $FRAMEWORK_VERSION --py-version 3 --processor cpu
         else
           echo "skipping cpu integration tests"
         fi
@@ -88,7 +99,9 @@ phases:
       - |
         if has-matching-changes "test/" "tests/" "src/*.py" "docker/*" "buildspec.yml"; then
           printf "$SETUP_CMDS" > $SETUP_FILE
-          cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $GPU_TAG --framework-version $FRAMEWORK_VERSION --py-version $GPU_PY_VERSION --processor gpu"
+          cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $GPU_TAG_PY2 --framework-version $FRAMEWORK_VERSION --py-version 2 --processor gpu"
+          remote-test --github-repo $GITHUB_REPO --test-cmd "$cmd" --setup-file $SETUP_FILE --pr-number "$PR_NUM"
+          cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $GPU_TAG_PY3 --framework-version $FRAMEWORK_VERSION --py-version 3 --processor gpu"
           remote-test --github-repo $GITHUB_REPO --test-cmd "$cmd" --setup-file $SETUP_FILE --pr-number "$PR_NUM"
         else
           echo "skipping gpu integration tests"
@@ -97,8 +110,10 @@ phases:
       # run sagemaker tests
       - |
         if has-matching-changes "test/" "tests/" "src/*.py" "docker/*" "buildspec.yml"; then
-          pytest test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $CPU_TAG --py-version $CPU_PY_VERSION --processor cpu
-          pytest test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $GPU_TAG --py-version $GPU_PY_VERSION --processor gpu
+          pytest test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $CPU_TAG_PY2 --py-version 2 --processor cpu
+          pytest test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $GPU_TAG_PY2 --py-version 2 --processor gpu
+          pytest test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $CPU_TAG_PY3 --py-version 3 --processor cpu
+          pytest test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $GPU_TAG_PY3 --py-version 3 --processor gpu
         else
           echo "skipping sagemaker tests"
         fi
@@ -109,5 +124,7 @@ phases:
       - cleanup-key-pairs
 
       # remove ecr image
-      - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$CPU_TAG
-      - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GPU_TAG
+      - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$CPU_TAG_PY2
+      - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GPU_TAG_PY2
+      - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$CPU_TAG_PY3
+      - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GPU_TAG_PY3
diff --git a/docker/__init__.py b/docker/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/docker/build_artifacts/__init__.py b/docker/build_artifacts/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/sagemaker_tensorflow_container/deep_learning_container.py b/docker/build_artifacts/deep_learning_container.py
similarity index 100%
rename from src/sagemaker_tensorflow_container/deep_learning_container.py
rename to docker/build_artifacts/deep_learning_container.py
diff --git a/docker/1.15.0/py2/dockerd-entrypoint.py b/docker/build_artifacts/dockerd-entrypoint.py
similarity index 100%
rename from docker/1.15.0/py2/dockerd-entrypoint.py
rename to docker/build_artifacts/dockerd-entrypoint.py
diff --git a/test/unit/test_deep_learning_containers.py b/test/unit/test_deep_learning_container.py
similarity index 98%
rename from test/unit/test_deep_learning_containers.py
rename to test/unit/test_deep_learning_container.py
index 8d6fe08e..7d5d7d86 100644
--- a/test/unit/test_deep_learning_containers.py
+++ b/test/unit/test_deep_learning_container.py
@@ -14,11 +14,10 @@
 
 import unittest
 
+from docker.build_artifacts import deep_learning_container as deep_learning_container_to_test
 import pytest
 import requests
 
-from sagemaker_tensorflow_container import deep_learning_container as deep_learning_container_to_test
-
 
 @pytest.fixture(name='fixture_valid_instance_id')
 def fixture_valid_instance_id(requests_mock):

From 2792fcbd3a9de288f3f8660e2ca9d186b6f23024 Mon Sep 17 00:00:00 2001
From: Sai Parthasarathy Miduthuri <54188298+saimidu@users.noreply.github.com>
Date: Fri, 10 Jan 2020 19:45:11 -0800
Subject: [PATCH 04/10] update: Update awscli version and remove related pins
 (#267)

* Update awscli, remove dependent pins

* Update setup.py package versions to latest
---
 docker/1.15.0/py2/Dockerfile.cpu | 6 +-----
 docker/1.15.0/py2/Dockerfile.gpu | 6 +-----
 docker/1.15.0/py3/Dockerfile.cpu | 8 +-------
 docker/1.15.0/py3/Dockerfile.gpu | 8 +-------
 setup.py                         | 8 ++++----
 5 files changed, 8 insertions(+), 28 deletions(-)

diff --git a/docker/1.15.0/py2/Dockerfile.cpu b/docker/1.15.0/py2/Dockerfile.cpu
index f9387aa0..4fa24019 100644
--- a/docker/1.15.0/py2/Dockerfile.cpu
+++ b/docker/1.15.0/py2/Dockerfile.cpu
@@ -100,9 +100,6 @@ RUN pip install --no-cache-dir -U \
     keras_preprocessing==1.1.0 \
     requests==2.22.0 \
     keras==2.3.1 \
-    # botocore requires python-dateutil<2.8.1
-    "python-dateutil<2.8.1" \
-    awscli==1.16.296 \
     mpi4py==3.0.2 \
     "cryptography>=2.3" \
     "sagemaker-tensorflow>=1.15,<1.16" \
@@ -111,10 +108,9 @@ RUN pip install --no-cache-dir -U \
     ${TF_URL} \
  && pip install --no-cache-dir -U \
     $FRAMEWORK_SUPPORT_INSTALLABLE \
+    awscli==1.16.314 \
  && rm -f $FRAMEWORK_SUPPORT_INSTALLABLE \
  && pip install --no-cache-dir -U \
-    # awscli requires PyYAML<5.2
-    "PyYAML<5.2" \
     horovod==0.18.2
 
 COPY dockerd-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
diff --git a/docker/1.15.0/py2/Dockerfile.gpu b/docker/1.15.0/py2/Dockerfile.gpu
index fa86d9dc..534066e8 100644
--- a/docker/1.15.0/py2/Dockerfile.gpu
+++ b/docker/1.15.0/py2/Dockerfile.gpu
@@ -133,9 +133,6 @@ RUN pip install --no-cache-dir -U \
     keras_preprocessing==1.1.0 \
     requests==2.22.0 \
     keras==2.3.1 \
-    # botocore requires python-dateutil<2.8.1
-    "python-dateutil<2.8.1" \
-    awscli==1.16.296 \
     mpi4py==3.0.2 \
     "cryptography>=2.3" \
     "sagemaker-tensorflow>=1.15,<1.16" \
@@ -144,13 +141,12 @@ RUN pip install --no-cache-dir -U \
     ${TF_URL} \
  && pip install --no-cache-dir -U \
     $FRAMEWORK_SUPPORT_INSTALLABLE \
+    awscli==1.16.314 \
  && rm -f $FRAMEWORK_SUPPORT_INSTALLABLE
 
 # Install Horovod, temporarily using CUDA stubs
 RUN ldconfig /usr/local/cuda/targets/x86_64-linux/lib/stubs \
  && HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_WITH_TENSORFLOW=1 pip install --no-cache-dir \
-    # awscli requires PyYAML<5.2
-    "PyYAML<5.2" \
     horovod==0.18.2 \
  && ldconfig
 
diff --git a/docker/1.15.0/py3/Dockerfile.cpu b/docker/1.15.0/py3/Dockerfile.cpu
index b204769a..7eaef285 100644
--- a/docker/1.15.0/py3/Dockerfile.cpu
+++ b/docker/1.15.0/py3/Dockerfile.cpu
@@ -88,8 +88,6 @@ RUN pip3 --no-cache-dir install --upgrade \
 RUN ln -s $(which python3) /usr/local/bin/python \
  && ln -s $(which pip3) /usr/bin/pip
 
-# install PyYAML==5.1.2 to avoid conflict with latest awscli
-# python-dateutil==2.8.0 to satisfy botocore associated with latest awscli
 RUN pip install --no-cache-dir -U \
     numpy==1.17.4 \
     scipy==1.2.2 \
@@ -100,12 +98,9 @@ RUN pip install --no-cache-dir -U \
     keras_applications==1.0.8 \
     keras_preprocessing==1.1.0 \
     keras==2.3.1 \
-    # botocore requires python-dateutil<2.8.1
-    "python-dateutil<2.8.1" \
     requests==2.22.0 \
     smdebug==0.4.14 \
     sagemaker-experiments==0.1.3 \
-    awscli==1.16.296 \
     mpi4py==3.0.2 \
     "cryptography>=2.3" \
     "sagemaker-tensorflow>=1.15,<1.16" \
@@ -114,11 +109,10 @@ RUN pip install --no-cache-dir -U \
  && pip install --force-reinstall --no-cache-dir -U \
     ${TF_URL} \
  && pip install --force-reinstall --no-cache-dir -U \
-    # awscli requires PyYAML<5.2
-    "PyYAML<5.2" \
     horovod==0.18.2 \
  && pip install --no-cache-dir -U \
     $FRAMEWORK_SUPPORT_INSTALLABLE \
+    awscli==1.16.314 \
  && rm -f $FRAMEWORK_SUPPORT_INSTALLABLE
 
 COPY dockerd-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
diff --git a/docker/1.15.0/py3/Dockerfile.gpu b/docker/1.15.0/py3/Dockerfile.gpu
index 38c86b14..d912f5ba 100644
--- a/docker/1.15.0/py3/Dockerfile.gpu
+++ b/docker/1.15.0/py3/Dockerfile.gpu
@@ -125,8 +125,6 @@ RUN ln -s $(which python3) /usr/local/bin/python \
 
 COPY $FRAMEWORK_SUPPORT_INSTALLABLE .
 
-# install PyYAML==5.1.2 to avoid conflict with latest awscli
-# python-dateutil==2.8.0 to satisfy botocore associated with latest awscli
 RUN pip install --no-cache-dir -U \
     numpy==1.17.4 \
     scipy==1.2.2 \
@@ -138,11 +136,8 @@ RUN pip install --no-cache-dir -U \
     keras_preprocessing==1.1.0 \
     requests==2.22.0 \
     keras==2.3.1 \
-    # botocore requires python-dateutil<2.8.1
-    "python-dateutil<2.8.1" \
     smdebug==0.4.14 \
     sagemaker-experiments==0.1.3 \
-    awscli==1.16.296 \
     mpi4py==3.0.2 \
     "cryptography>=2.3" \
     "sagemaker-tensorflow>=1.15,<1.16" \
@@ -152,13 +147,12 @@ RUN pip install --no-cache-dir -U \
     ${TF_URL} \
  && pip install --no-cache-dir -U \
     $FRAMEWORK_SUPPORT_INSTALLABLE \
+    awscli==1.16.314 \
  && rm -f $FRAMEWORK_SUPPORT_INSTALLABLE
 
 # Install Horovod, temporarily using CUDA stubs
 RUN ldconfig /usr/local/cuda-10.0/targets/x86_64-linux/lib/stubs \
  && HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_WITH_TENSORFLOW=1 pip install --no-cache-dir \
-    # awscli requires PyYAML<5.2
-    "PyYAML<5.2" \
     horovod==0.18.2 \
  && ldconfig
 
diff --git a/setup.py b/setup.py
index 11c8be66..6b7537f9 100644
--- a/setup.py
+++ b/setup.py
@@ -53,13 +53,13 @@ def read_version():
         'Programming Language :: Python :: 3.6',
     ],
 
-    install_requires=['sagemaker-containers>=2.4.6', 'numpy', 'scipy', 'sklearn',
+    install_requires=['sagemaker-containers>=2.6.2', 'numpy', 'scipy', 'sklearn',
                       'pandas', 'Pillow', 'h5py'],
     extras_require={
         'test': ['tox', 'flake8', 'pytest', 'pytest-cov', 'pytest-xdist', 'mock',
-                 'sagemaker==1.19.1', 'tensorflow<2.0', 'docker-compose', 'boto3==1.10.32',
-                 'six==1.13.0', 'python-dateutil>=2.1,<2.8.1', 'botocore==1.13.32',
-                 'requests-mock', 'awscli==1.16.296'],
+                 'sagemaker==1.50.1', 'tensorflow<2.0', 'docker-compose', 'boto3==1.10.50',
+                 'six==1.13.0', 'python-dateutil>=2.1,<2.8.1', 'botocore==1.13.50',
+                 'requests-mock', 'awscli==1.16.314'],
         'benchmark': ['click']
     },
 )

From f0a557c546ec7fe185594aa2e553dc2e8f8d217e Mon Sep 17 00:00:00 2001
From: Denis Davydenko <dzianis.davydzenka@gmail.com>
Date: Fri, 10 Jan 2020 20:01:00 -0800
Subject: [PATCH 05/10] bump smdebug version to 0.5.0.post0 (#268)

* bump smdebug to 0.5.0

* changed awscli to prevent botocore conflict
---
 docker/1.15.0/py3/Dockerfile.cpu | 2 +-
 docker/1.15.0/py3/Dockerfile.gpu | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/1.15.0/py3/Dockerfile.cpu b/docker/1.15.0/py3/Dockerfile.cpu
index 7eaef285..4b72f937 100644
--- a/docker/1.15.0/py3/Dockerfile.cpu
+++ b/docker/1.15.0/py3/Dockerfile.cpu
@@ -99,7 +99,7 @@ RUN pip install --no-cache-dir -U \
     keras_preprocessing==1.1.0 \
     keras==2.3.1 \
     requests==2.22.0 \
-    smdebug==0.4.14 \
+    smdebug==0.5.0.post0 \
     sagemaker-experiments==0.1.3 \
     mpi4py==3.0.2 \
     "cryptography>=2.3" \
diff --git a/docker/1.15.0/py3/Dockerfile.gpu b/docker/1.15.0/py3/Dockerfile.gpu
index d912f5ba..3a409e73 100644
--- a/docker/1.15.0/py3/Dockerfile.gpu
+++ b/docker/1.15.0/py3/Dockerfile.gpu
@@ -136,7 +136,7 @@ RUN pip install --no-cache-dir -U \
     keras_preprocessing==1.1.0 \
     requests==2.22.0 \
     keras==2.3.1 \
-    smdebug==0.4.14 \
+    smdebug==0.5.0.post0 \
     sagemaker-experiments==0.1.3 \
     mpi4py==3.0.2 \
     "cryptography>=2.3" \

From ca1a008172f846f9d9ea52f255dcac032da3d15c Mon Sep 17 00:00:00 2001
From: Sai Parthasarathy Miduthuri <54188298+saimidu@users.noreply.github.com>
Date: Mon, 13 Jan 2020 15:20:52 -0800
Subject: [PATCH 06/10] documentation: Add link to TF 2.0 branch (#269)

* Add link to TF 2.0 branch

* Add url for dockerfiles
---
 README.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.rst b/README.rst
index 6e83d245..72418561 100644
--- a/README.rst
+++ b/README.rst
@@ -56,6 +56,10 @@ The Docker images are built from the Dockerfiles specified in
 The Docker files are grouped based on TensorFlow version and separated
 based on Python version and processor type.
 
+The Docker files for TensorFlow 2.0 are available in the
+`tf-2 <https://github.com/aws/sagemaker-tensorflow-container/tree/tf-2>`__ branch, in
+`docker/2.0.0/ <https://github.com/aws/sagemaker-tensorflow-container/tree/tf-2/docker/2.0.0>`__.
+
 The Docker images, used to run training & inference jobs, are built from
 both corresponding "base" and "final" Dockerfiles.
 

From 3c384adab5960e18651463a5850f8b22680c7c4e Mon Sep 17 00:00:00 2001
From: Sai Parthasarathy Miduthuri <54188298+saimidu@users.noreply.github.com>
Date: Mon, 13 Jan 2020 16:40:18 -0800
Subject: [PATCH 07/10] Pin awscli to latest (#270)

---
 docker/1.15.0/py3/Dockerfile.cpu | 2 +-
 docker/1.15.0/py3/Dockerfile.gpu | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/1.15.0/py3/Dockerfile.cpu b/docker/1.15.0/py3/Dockerfile.cpu
index 4b72f937..7e60c4f4 100644
--- a/docker/1.15.0/py3/Dockerfile.cpu
+++ b/docker/1.15.0/py3/Dockerfile.cpu
@@ -112,7 +112,7 @@ RUN pip install --no-cache-dir -U \
     horovod==0.18.2 \
  && pip install --no-cache-dir -U \
     $FRAMEWORK_SUPPORT_INSTALLABLE \
-    awscli==1.16.314 \
+    awscli==1.17.1 \
  && rm -f $FRAMEWORK_SUPPORT_INSTALLABLE
 
 COPY dockerd-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
diff --git a/docker/1.15.0/py3/Dockerfile.gpu b/docker/1.15.0/py3/Dockerfile.gpu
index 3a409e73..c16fb42d 100644
--- a/docker/1.15.0/py3/Dockerfile.gpu
+++ b/docker/1.15.0/py3/Dockerfile.gpu
@@ -147,7 +147,7 @@ RUN pip install --no-cache-dir -U \
     ${TF_URL} \
  && pip install --no-cache-dir -U \
     $FRAMEWORK_SUPPORT_INSTALLABLE \
-    awscli==1.16.314 \
+    awscli==1.17.1 \
  && rm -f $FRAMEWORK_SUPPORT_INSTALLABLE
 
 # Install Horovod, temporarily using CUDA stubs

From c46b6f60848388487cef71705ca209ac539e3c72 Mon Sep 17 00:00:00 2001
From: Arjuna Keshavan <33526713+arjkesh@users.noreply.github.com>
Date: Tue, 21 Jan 2020 15:56:16 -0800
Subject: [PATCH 08/10] pin awscli to latest version (#272)

awscli==1.17.7
---
 docker/1.15.0/py2/Dockerfile.cpu | 2 +-
 docker/1.15.0/py2/Dockerfile.gpu | 2 +-
 docker/1.15.0/py3/Dockerfile.cpu | 2 +-
 docker/1.15.0/py3/Dockerfile.gpu | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docker/1.15.0/py2/Dockerfile.cpu b/docker/1.15.0/py2/Dockerfile.cpu
index 4fa24019..5a161f80 100644
--- a/docker/1.15.0/py2/Dockerfile.cpu
+++ b/docker/1.15.0/py2/Dockerfile.cpu
@@ -108,7 +108,7 @@ RUN pip install --no-cache-dir -U \
     ${TF_URL} \
  && pip install --no-cache-dir -U \
     $FRAMEWORK_SUPPORT_INSTALLABLE \
-    awscli==1.16.314 \
+    awscli==1.17.7 \
  && rm -f $FRAMEWORK_SUPPORT_INSTALLABLE \
  && pip install --no-cache-dir -U \
     horovod==0.18.2
diff --git a/docker/1.15.0/py2/Dockerfile.gpu b/docker/1.15.0/py2/Dockerfile.gpu
index 534066e8..50b1484f 100644
--- a/docker/1.15.0/py2/Dockerfile.gpu
+++ b/docker/1.15.0/py2/Dockerfile.gpu
@@ -141,7 +141,7 @@ RUN pip install --no-cache-dir -U \
     ${TF_URL} \
  && pip install --no-cache-dir -U \
     $FRAMEWORK_SUPPORT_INSTALLABLE \
-    awscli==1.16.314 \
+    awscli==1.17.7 \
  && rm -f $FRAMEWORK_SUPPORT_INSTALLABLE
 
 # Install Horovod, temporarily using CUDA stubs
diff --git a/docker/1.15.0/py3/Dockerfile.cpu b/docker/1.15.0/py3/Dockerfile.cpu
index 7e60c4f4..d0fe3027 100644
--- a/docker/1.15.0/py3/Dockerfile.cpu
+++ b/docker/1.15.0/py3/Dockerfile.cpu
@@ -112,7 +112,7 @@ RUN pip install --no-cache-dir -U \
     horovod==0.18.2 \
  && pip install --no-cache-dir -U \
     $FRAMEWORK_SUPPORT_INSTALLABLE \
-    awscli==1.17.1 \
+    awscli==1.17.7 \
  && rm -f $FRAMEWORK_SUPPORT_INSTALLABLE
 
 COPY dockerd-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
diff --git a/docker/1.15.0/py3/Dockerfile.gpu b/docker/1.15.0/py3/Dockerfile.gpu
index c16fb42d..68c68383 100644
--- a/docker/1.15.0/py3/Dockerfile.gpu
+++ b/docker/1.15.0/py3/Dockerfile.gpu
@@ -147,7 +147,7 @@ RUN pip install --no-cache-dir -U \
     ${TF_URL} \
  && pip install --no-cache-dir -U \
     $FRAMEWORK_SUPPORT_INSTALLABLE \
-    awscli==1.17.1 \
+    awscli==1.17.7 \
  && rm -f $FRAMEWORK_SUPPORT_INSTALLABLE
 
 # Install Horovod, temporarily using CUDA stubs

From 8961aacea642219f0679b35b462671c96c00b461 Mon Sep 17 00:00:00 2001
From: Lauren Yu <6631887+laurenyu@users.noreply.github.com>
Date: Fri, 24 Jan 2020 16:31:22 -0800
Subject: [PATCH 09/10] infra: properly fail build if has-matching-changes
 fails (#273)

---
 buildspec.yml | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/buildspec.yml b/buildspec.yml
index d59393c3..5deb4c15 100644
--- a/buildspec.yml
+++ b/buildspec.yml
@@ -86,9 +86,11 @@ phases:
       - create-key-pair
       - launch-ec2-instance --instance-type $instance_type --ami-name dlami-ubuntu
 
+      - HAS_MATCHING_CHANGES_OUTPUT=$(has-matching-changes "test/" "tests/" "src/*.py" "setup.py" "docker/*" "buildspec.yml")
+
       # run cpu integration tests
       - |
-        if has-matching-changes "test/" "tests/" "src/*.py" "docker/*" "buildspec.yml"; then
+        if [ "$HAS_MATCHING_CHANGES" = "Changes Found" ] ; then
           pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $CPU_TAG_PY2 --framework-version $FRAMEWORK_VERSION --py-version 2 --processor cpu
           pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $CPU_TAG_PY3 --framework-version $FRAMEWORK_VERSION --py-version 3 --processor cpu
         else
@@ -97,7 +99,7 @@ phases:
 
       # run gpu integration tests
       - |
-        if has-matching-changes "test/" "tests/" "src/*.py" "docker/*" "buildspec.yml"; then
+        if [ "$HAS_MATCHING_CHANGES" = "Changes Found" ] ; then
           printf "$SETUP_CMDS" > $SETUP_FILE
           cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $GPU_TAG_PY2 --framework-version $FRAMEWORK_VERSION --py-version 2 --processor gpu"
           remote-test --github-repo $GITHUB_REPO --test-cmd "$cmd" --setup-file $SETUP_FILE --pr-number "$PR_NUM"
@@ -109,7 +111,7 @@ phases:
 
       # run sagemaker tests
       - |
-        if has-matching-changes "test/" "tests/" "src/*.py" "docker/*" "buildspec.yml"; then
+        if [ "$HAS_MATCHING_CHANGES" = "Changes Found" ] ; then
           pytest test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $CPU_TAG_PY2 --py-version 2 --processor cpu
           pytest test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $GPU_TAG_PY2 --py-version 2 --processor gpu
           pytest test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $CPU_TAG_PY3 --py-version 3 --processor cpu

From f00a3b12fe7b83378f8f65ad233b95cd1671e82c Mon Sep 17 00:00:00 2001
From: Lauren Yu <6631887+laurenyu@users.noreply.github.com>
Date: Mon, 27 Jan 2020 17:57:55 -0800
Subject: [PATCH 10/10] infra: properly fail build if has-matching-changes
 fails (#274)

---
 buildspec.yml | 48 ++++++++++++++++++++----------------------------
 1 file changed, 20 insertions(+), 28 deletions(-)

diff --git a/buildspec.yml b/buildspec.yml
index 5deb4c15..eece6ae1 100644
--- a/buildspec.yml
+++ b/buildspec.yml
@@ -86,39 +86,31 @@ phases:
       - create-key-pair
       - launch-ec2-instance --instance-type $instance_type --ami-name dlami-ubuntu
 
-      - HAS_MATCHING_CHANGES_OUTPUT=$(has-matching-changes "test/" "tests/" "src/*.py" "setup.py" "docker/*" "buildspec.yml")
-
       # run cpu integration tests
-      - |
-        if [ "$HAS_MATCHING_CHANGES" = "Changes Found" ] ; then
-          pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $CPU_TAG_PY2 --framework-version $FRAMEWORK_VERSION --py-version 2 --processor cpu
-          pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $CPU_TAG_PY3 --framework-version $FRAMEWORK_VERSION --py-version 3 --processor cpu
-        else
-          echo "skipping cpu integration tests"
-        fi
+      - py3_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $CPU_TAG_PY2 --framework-version $FRAMEWORK_VERSION --py-version 2 --processor cpu"
+      - py2_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $CPU_TAG_PY3 --framework-version $FRAMEWORK_VERSION --py-version 3 --processor cpu"
+      - execute-command-if-has-matching-changes "$py3_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
+      - execute-command-if-has-matching-changes "$py2_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
 
       # run gpu integration tests
-      - |
-        if [ "$HAS_MATCHING_CHANGES" = "Changes Found" ] ; then
-          printf "$SETUP_CMDS" > $SETUP_FILE
-          cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $GPU_TAG_PY2 --framework-version $FRAMEWORK_VERSION --py-version 2 --processor gpu"
-          remote-test --github-repo $GITHUB_REPO --test-cmd "$cmd" --setup-file $SETUP_FILE --pr-number "$PR_NUM"
-          cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $GPU_TAG_PY3 --framework-version $FRAMEWORK_VERSION --py-version 3 --processor gpu"
-          remote-test --github-repo $GITHUB_REPO --test-cmd "$cmd" --setup-file $SETUP_FILE --pr-number "$PR_NUM"
-        else
-          echo "skipping gpu integration tests"
-        fi
+      - printf "$SETUP_CMDS" > $SETUP_FILE
+      - cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $GPU_TAG_PY2 --framework-version $FRAMEWORK_VERSION --py-version 2 --processor gpu"
+      - py3_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\""
+      - execute-command-if-has-matching-changes "$py3_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
+
+      - cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $GPU_TAG_PY3 --framework-version $FRAMEWORK_VERSION --py-version 3 --processor gpu"
+      - py2_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\""
+      - execute-command-if-has-matching-changes "$py2_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
 
       # run sagemaker tests
-      - |
-        if [ "$HAS_MATCHING_CHANGES" = "Changes Found" ] ; then
-          pytest test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $CPU_TAG_PY2 --py-version 2 --processor cpu
-          pytest test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $GPU_TAG_PY2 --py-version 2 --processor gpu
-          pytest test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $CPU_TAG_PY3 --py-version 3 --processor cpu
-          pytest test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $GPU_TAG_PY3 --py-version 3 --processor gpu
-        else
-          echo "skipping sagemaker tests"
-        fi
+      - test_cmd="pytest test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $CPU_TAG_PY2 --py-version 2 --processor cpu"
+      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
+      - test_cmd="pytest test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $GPU_TAG_PY2 --py-version 2 --processor gpu"
+      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
+      - test_cmd="pytest test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $CPU_TAG_PY3 --py-version 3 --processor cpu"
+      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
+      - test_cmd="pytest test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $GPU_TAG_PY3 --py-version 3 --processor gpu"
+      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
 
     finally:
       # shut down remote gpu instance