Skip to content

Commit 15d97fc

Browse files
authored
change: merge dockerfiles (#235)
merge asimov dockerfiles to master branch
1 parent 12fd7ef commit 15d97fc

11 files changed

+729
-62
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Changelog
22

3+
## v0.1.0 (2019-05-22)
4+
5+
### Bug fixes and other changes
6+
37
## v2.0.7 (2019-08-15)
48

59
### Bug fixes and other changes

docker/1.13.1/Dockerfile.cpu

Lines changed: 46 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
99
openssh-server \
1010
ca-certificates \
1111
curl \
12-
&& add-apt-repository ppa:deadsnakes/ppa -y \
12+
git \
13+
wget \
14+
vim \
15+
zlib1g-dev \
1316
&& rm -rf /var/lib/apt/lists/*
1417

1518
# Install Open MPI
@@ -54,39 +57,54 @@ ENV KMP_AFFINITY=granularity=fine,compact,1,0 KMP_BLOCKTIME=1 KMP_SETTINGS=0
5457

5558
WORKDIR /
5659

57-
ARG py_version
58-
ARG framework_installable
59-
ARG framework_support_installable=sagemaker_tensorflow_container-2.0.0.tar.gz
60+
ARG PYTHON=python3
61+
ARG PYTHON_PIP=python3-pip
62+
ARG PIP=pip3
63+
ARG PYTHON_VERSION=3.6.6
6064

61-
RUN if [ $py_version -eq 3 ]; then PYTHON_VERSION=python3.6; else PYTHON_VERSION=python2.7; fi && \
62-
apt-get update && apt-get install -y --no-install-recommends $PYTHON_VERSION-dev --allow-unauthenticated && \
63-
ln -s -f /usr/bin/$PYTHON_VERSION /usr/bin/python && \
64-
ln -s -f /usr/bin/$PYTHON_VERSION /usr/local/bin/python && \
65-
rm -rf /var/lib/apt/lists/*
65+
RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz && \
66+
tar -xvf Python-$PYTHON_VERSION.tgz && cd Python-$PYTHON_VERSION && \
67+
./configure && make && make install && \
68+
apt-get update && apt-get install -y --no-install-recommends libreadline-gplv2-dev libncursesw5-dev libssl-dev libsqlite3-dev tk-dev libgdbm-dev libc6-dev libbz2-dev && \
69+
make && make install && rm -rf ../Python-$PYTHON_VERSION* && \
70+
ln -s /usr/local/bin/pip3 /usr/bin/pip
6671

6772
ENV PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFERED=1 PYTHONIOENCODING=UTF-8 LANG=C.UTF-8 LC_ALL=C.UTF-8
6873

69-
RUN curl -fSsL -O https://bootstrap.pypa.io/get-pip.py && \
70-
python get-pip.py --disable-pip-version-check --no-cache-dir "pip==18.1" && \
71-
rm get-pip.py
72-
73-
COPY $framework_installable tensorflow-1.13.1-py2.py3-none-any.whl
74+
ARG framework_support_installable=sagemaker_tensorflow_container-2.0.0.tar.gz
7475
COPY $framework_support_installable .
75-
76-
RUN pip install --no-cache-dir -U \
77-
keras==2.2.4 \
78-
mpi4py==3.0.1 \
79-
"sagemaker-tensorflow>=1.13,<1.14" && \
76+
ARG TF_URL="https://tensorflow-aws.s3-us-west-2.amazonaws.com/1.13/AmazonLinux/cpu/latest-patch-latest-patch/tensorflow-1.13.1-cp36-cp36m-linux_x86_64.whl"
77+
78+
RUN ${PIP} --no-cache-dir install --upgrade pip setuptools
79+
80+
# Some TF tools expect a "python" binary
81+
RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
82+
83+
RUN ${PIP} install --no-cache-dir -U \
84+
numpy==1.16.2 \
85+
scipy==1.2.1 \
86+
scikit-learn==0.20.3 \
87+
pandas==0.24.2 \
88+
Pillow==5.4.1 \
89+
h5py==2.9.0 \
90+
keras_applications==1.0.7 \
91+
keras_preprocessing==1.0.9 \
92+
keras==2.2.4 \
93+
requests==2.21.0 \
94+
awscli==1.16.130 \
95+
mpi4py==3.0.1 \
96+
"sagemaker-tensorflow>=1.13,<1.14" && \
8097
# Let's install TensorFlow separately in the end to avoid
8198
# the library version to be overwritten
82-
pip install --force-reinstall --no-cache-dir -U \
83-
tensorflow-1.13.1-py2.py3-none-any.whl \
84-
horovod && \
85-
pip install --no-cache-dir -U $framework_support_installable && \
86-
rm -f tensorflow-1.13.1-py2.py3-none-any.whl && \
87-
rm -f $framework_support_installable && \
88-
pip uninstall -y --no-cache-dir \
89-
markdown \
90-
tensorboard
99+
${PIP} install --force-reinstall --no-cache-dir -U \
100+
${TF_URL} \
101+
horovod==0.16.4 && \
102+
${PIP} install --no-cache-dir -U $framework_support_installable && \
103+
rm -f $framework_support_installable && \
104+
${PIP} uninstall -y --no-cache-dir \
105+
markdown \
106+
tensorboard
91107

92108
ENV SAGEMAKER_TRAINING_MODULE sagemaker_tensorflow_container.training:main
109+
110+
CMD ["bin/bash"]

docker/1.13.1/Dockerfile.gpu

Lines changed: 52 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,6 @@ FROM nvidia/cuda:10.0-base-ubuntu16.04
22

33
LABEL maintainer="Amazon AI"
44

5-
RUN apt-get update && apt-get install -y --no-install-recommends --allow-unauthenticated \
6-
software-properties-common && \
7-
add-apt-repository ppa:deadsnakes/ppa -y && \
8-
rm -rf /var/lib/apt/lists/*
9-
105
RUN apt-get update && apt-get install -y --no-install-recommends --allow-unauthenticated \
116
ca-certificates \
127
cuda-command-line-tools-10-0 \
@@ -17,18 +12,22 @@ RUN apt-get update && apt-get install -y --no-install-recommends --allow-unauthe
1712
cuda-cusolver-dev-10-0 \
1813
cuda-cusparse-dev-10-0 \
1914
curl \
20-
libcudnn7=7.4.1.5-1+cuda10.0 \
15+
libcudnn7=7.5.1.10-1+cuda10.0 \
2116
# TensorFlow doesn't require libnccl anymore but Open MPI still depends on it
22-
libnccl2 \
23-
libnccl-dev \
17+
libnccl2=2.4.7-1+cuda10.0 \
18+
libgomp1 \
19+
libnccl-dev=2.4.7-1+cuda10.0 \
2420
libfreetype6-dev \
2521
libhdf5-serial-dev \
2622
libpng12-dev \
2723
libzmq3-dev \
24+
git \
2825
wget \
26+
vim \
27+
build-essential \
2928
openssh-client \
3029
openssh-server \
31-
build-essential && \
30+
zlib1g-dev && \
3231
# The 'apt-get install' of nvinfer-runtime-trt-repo-ubuntu1604-4.0.1-ga-cuda9.0
3332
# adds a new list which contains libnvinfer library, so it needs another
3433
# 'apt-get update' to retrieve that list before it can actually install the
@@ -42,7 +41,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends --allow-unauthe
4241
rm /usr/lib/x86_64-linux-gnu/libnvinfer_plugin* && \
4342
rm /usr/lib/x86_64-linux-gnu/libnvcaffe_parser* && \
4443
rm /usr/lib/x86_64-linux-gnu/libnvparsers* && \
45-
rm -rf /var/lib/apt/lists/*
44+
rm -rf /var/lib/apt/lists/* && \
45+
mkdir -p /var/run/sshd
4646

4747
###########################################################################
4848
# Horovod & its dependencies
@@ -60,14 +60,17 @@ RUN mkdir /tmp/openmpi && \
6060
ldconfig && \
6161
rm -rf /tmp/openmpi
6262

63-
ARG py_version
64-
ARG framework_installable
65-
ARG framework_support_installable=sagemaker_tensorflow_container-2.0.0.tar.gz
63+
ARG PYTHON=python3
64+
ARG PYTHON_PIP=python3-pip
65+
ARG PIP=pip3
66+
ARG PYTHON_VERSION=3.6.6
6667

67-
RUN if [ $py_version -eq 3 ]; then PYTHON_VERSION=python3.6; else PYTHON_VERSION=python2.7; fi && \
68-
apt-get update && apt-get install -y --no-install-recommends $PYTHON_VERSION-dev --allow-unauthenticated && \
69-
ln -s -f /usr/bin/$PYTHON_VERSION /usr/bin/python && \
70-
rm -rf /var/lib/apt/lists/*
68+
RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz && \
69+
tar -xvf Python-$PYTHON_VERSION.tgz && cd Python-$PYTHON_VERSION && \
70+
./configure && make && make install && \
71+
apt-get update && apt-get install -y --no-install-recommends libreadline-gplv2-dev libncursesw5-dev libssl-dev libsqlite3-dev tk-dev libgdbm-dev libc6-dev libbz2-dev && \
72+
make && make install && rm -rf ../Python-$PYTHON_VERSION* && \
73+
ln -s /usr/local/bin/pip3 /usr/bin/pip
7174

7275
# Create a wrapper for OpenMPI to allow running as root by default
7376
RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real && \
@@ -100,33 +103,51 @@ RUN mkdir -p /root/.ssh/ && \
100103
# Python won’t try to write .pyc or .pyo files on the import of source modules
101104
ENV PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFERED=1 PYTHONIOENCODING=UTF-8 LANG=C.UTF-8 LC_ALL=C.UTF-8
102105

103-
RUN curl -fSsL -O https://bootstrap.pypa.io/get-pip.py && \
104-
python get-pip.py --disable-pip-version-check --no-cache-dir "pip==18.1" && \
105-
rm get-pip.py
106-
107106
WORKDIR /
108107

109-
COPY $framework_installable tensorflow-1.13.1-py2.py3-none-any.whl
108+
ARG TF_URL="https://tensorflow-aws.s3-us-west-2.amazonaws.com/1.13/AmazonLinux/gpu/latest-patch-latest-patch/tensorflow-1.13.1-cp36-cp36m-linux_x86_64.whl"
109+
110+
RUN ${PIP} --no-cache-dir install --upgrade pip setuptools
111+
112+
# Some TF tools expect a "python" binary
113+
RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
114+
115+
ARG framework_support_installable=sagemaker_tensorflow_container-2.0.0.tar.gz
110116
COPY $framework_support_installable .
111117

112-
RUN pip install --no-cache-dir -U \
118+
RUN ${PIP} install --no-cache-dir -U \
119+
numpy==1.16.2 \
120+
scipy==1.2.1 \
121+
scikit-learn==0.20.3 \
122+
pandas==0.24.2 \
123+
Pillow==5.4.1 \
124+
h5py==2.9.0 \
125+
keras_applications==1.0.7 \
126+
keras_preprocessing==1.0.9 \
127+
requests==2.21.0 \
113128
keras==2.2.4 \
129+
awscli==1.16.130 \
114130
mpi4py==3.0.1 \
115-
$framework_support_installable \
116131
"sagemaker-tensorflow>=1.13,<1.14" \
117132
# Let's install TensorFlow separately in the end to avoid
118133
# the library version to be overwritten
119-
&& pip install --force-reinstall --no-cache-dir -U tensorflow-1.13.1-py2.py3-none-any.whl \
120-
\
121-
&& rm -f tensorflow-1.13.1-py2.py3-none-any.whl \
122-
&& rm -f $framework_support_installable \
123-
&& pip uninstall -y --no-cache-dir \
134+
&& ${PIP} install --force-reinstall --no-cache-dir -U ${TF_URL} \
135+
&& ${PIP} install --no-cache-dir -U $framework_support_installable && \
136+
rm -f $framework_support_installable \
137+
&& ${PIP} uninstall -y --no-cache-dir \
124138
markdown \
125139
tensorboard
126140

127141
# Install Horovod, temporarily using CUDA stubs
128142
RUN ldconfig /usr/local/cuda-10.0/targets/x86_64-linux/lib/stubs && \
129-
HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_WITH_TENSORFLOW=1 pip install --no-cache-dir horovod && \
143+
HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_WITH_TENSORFLOW=1 ${PIP} install --no-cache-dir horovod==0.16.4 && \
130144
ldconfig
131145

132-
ENV SAGEMAKER_TRAINING_MODULE sagemaker_tensorflow_container.training:main
146+
# Allow OpenSSH to talk to containers without asking for confirmation
147+
RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
148+
echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
149+
mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
150+
151+
ENV SAGEMAKER_TRAINING_MODULE sagemaker_tensorflow_container.training:main
152+
153+
CMD ["bin/bash"]

docker/1.14.0/py2/Dockerfile.cpu

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
FROM ubuntu:16.04
2+
3+
LABEL maintainer="Amazon AI"
4+
5+
RUN apt-get update && apt-get install -y --no-install-recommends \
6+
software-properties-common \
7+
build-essential \
8+
openssh-client \
9+
openssh-server \
10+
ca-certificates \
11+
curl \
12+
git \
13+
wget \
14+
vim \
15+
gcc-4.9 \
16+
g++-4.9 \
17+
gcc-4.9-base \
18+
zlib1g-dev \
19+
&& rm -rf /var/lib/apt/lists/*
20+
21+
# Install Open MPI
22+
RUN mkdir /tmp/openmpi && \
23+
cd /tmp/openmpi && \
24+
curl -fSsL -O https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.1.tar.gz && \
25+
tar zxf openmpi-4.0.1.tar.gz && \
26+
cd openmpi-4.0.1 && \
27+
./configure --enable-orterun-prefix-by-default && \
28+
make -j $(nproc) all && \
29+
make install && \
30+
ldconfig && \
31+
rm -rf /tmp/openmpi
32+
33+
# Create a wrapper for OpenMPI to allow running as root by default
34+
RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real && \
35+
echo '#!/bin/bash' > /usr/local/bin/mpirun && \
36+
echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun && \
37+
chmod a+x /usr/local/bin/mpirun
38+
39+
RUN echo "hwloc_base_binding_policy = none" >> /usr/local/etc/openmpi-mca-params.conf && \
40+
echo "rmaps_base_mapping_policy = slot" >> /usr/local/etc/openmpi-mca-params.conf
41+
42+
ENV LD_LIBRARY_PATH=/usr/local/openmpi/lib:$LD_LIBRARY_PATH
43+
44+
ENV PATH /usr/local/openmpi/bin/:$PATH
45+
46+
# SSH login fix. Otherwise user is kicked off after login
47+
RUN sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd
48+
49+
# Create SSH key.
50+
RUN mkdir -p /root/.ssh/ && \
51+
mkdir -p /var/run/sshd && \
52+
ssh-keygen -q -t rsa -N '' -f /root/.ssh/id_rsa && \
53+
cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys && \
54+
printf "Host *\n StrictHostKeyChecking no\n" >> /root/.ssh/config
55+
56+
# Set environment variables for MKL
57+
# For more about MKL with TensorFlow see:
58+
# https://www.tensorflow.org/performance/performance_guide#tensorflow_with_intel%C2%AE_mkl_dnn
59+
ENV KMP_AFFINITY=granularity=fine,compact,1,0 KMP_BLOCKTIME=1 KMP_SETTINGS=0
60+
61+
WORKDIR /
62+
63+
ARG PYTHON=python
64+
ARG PYTHON_PIP=python-pip
65+
ARG PIP=pip
66+
67+
RUN apt-get update && apt-get install -y \
68+
${PYTHON} \
69+
${PYTHON_PIP}
70+
71+
ENV PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFERED=1 PYTHONIOENCODING=UTF-8 LANG=C.UTF-8 LC_ALL=C.UTF-8
72+
73+
ARG framework_support_installable=sagemaker_tensorflow_container-2.0.0.tar.gz
74+
ARG sagemaker_tensorflow_extensions=sagemaker_tensorflow-1.14.0.1.0.0-cp27-cp27mu-manylinux1_x86_64.whl
75+
COPY $framework_support_installable .
76+
COPY $sagemaker_tensorflow_extensions .
77+
ARG TF_URL="https://tensorflow-aws.s3-us-west-2.amazonaws.com/1.14/AmazonLinux/cpu/final/tensorflow-1.14.0-cp27-cp27mu-linux_x86_64.whl"
78+
79+
# Pin GCC to 4.9 (priority 200) to compile correctly against TensorFlow, PyTorch, and MXNet with horovod
80+
# Backup existing GCC installation as priority 100, so that it can be recovered later.
81+
RUN update-alternatives --install /usr/bin/gcc gcc $(readlink -f $(which gcc)) 100 && \
82+
update-alternatives --install /usr/bin/x86_64-linux-gnu-gcc x86_64-linux-gnu-gcc $(readlink -f $(which gcc)) 100 && \
83+
update-alternatives --install /usr/bin/g++ g++ $(readlink -f $(which g++)) 100 && \
84+
update-alternatives --install /usr/bin/x86_64-linux-gnu-g++ x86_64-linux-gnu-g++ $(readlink -f $(which g++)) 100
85+
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-4.9 200 && \
86+
update-alternatives --install /usr/bin/x86_64-linux-gnu-gcc x86_64-linux-gnu-gcc /usr/bin/gcc-4.9 200 && \
87+
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-4.9 200 && \
88+
update-alternatives --install /usr/bin/x86_64-linux-gnu-g++ x86_64-linux-gnu-g++ /usr/bin/g++-4.9 200
89+
90+
RUN ${PIP} --no-cache-dir install --upgrade pip setuptools
91+
92+
# Some TF tools expect a "python" binary
93+
RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
94+
95+
RUN ${PIP} install --no-cache-dir -U \
96+
numpy==1.16.4 \
97+
scipy==1.2.2 \
98+
scikit-learn==0.20.3 \
99+
pandas==0.24.2 \
100+
Pillow==6.1.0 \
101+
h5py==2.9.0 \
102+
keras_applications==1.0.8 \
103+
keras_preprocessing==1.1.0 \
104+
requests==2.22.0 \
105+
keras==2.2.4 \
106+
awscli==1.16.196 \
107+
mpi4py==3.0.2 \
108+
$sagemaker_tensorflow_extensions \
109+
# Let's install TensorFlow separately in the end to avoid
110+
# the library version to be overwritten
111+
&& ${PIP} install --force-reinstall --no-cache-dir -U ${TF_URL} \
112+
&& ${PIP} install --no-cache-dir -U $framework_support_installable && \
113+
rm -f $framework_support_installable \
114+
&& ${PIP} install --no-cache-dir -U horovod==0.16.4 \
115+
&& ${PIP} uninstall -y --no-cache-dir \
116+
markdown
117+
118+
# Remove GCC pinning
119+
RUN update-alternatives --remove gcc /usr/bin/gcc-4.9 && \
120+
update-alternatives --remove x86_64-linux-gnu-gcc /usr/bin/gcc-4.9 && \
121+
update-alternatives --remove g++ /usr/bin/g++-4.9 && \
122+
update-alternatives --remove x86_64-linux-gnu-g++ /usr/bin/g++-4.9
123+
124+
125+
ENV SAGEMAKER_TRAINING_MODULE sagemaker_tensorflow_container.training:main
126+
127+
CMD ["bin/bash"]

0 commit comments

Comments
 (0)