Skip to content

Commit 0c2d69b

Browse files
authored
Add TensorFlow 1.6 (#24)
* Add docker file for tf 1.6 * Try using tensorflow-model-server 1.5.0 * Attempt migration to tf.estimator.train_and_evaluate * Install container support from pypi * Make fix exporter naming + make serving_input_fn optional * Use lambdas correctly to create input_fns * Fix tests to return values instead of functions from user modules. Needed for 1.6 interface changes. * readd needed saves_training method * Add GPU dockerfile * Fix tar name in gpu dockerfile * Split 1.6 dockerfiles into "base" and "final" dockerfiles due to expensive TF serving builds * Temporarily enable tf debug logging * Add experiment_trainer.py to maintain existing functionality for 1.4 and 1.5 * Start adding unit tests * Add more unit tests on trainer.py * Change tf logging verbosity back to INFO
1 parent a55cd72 commit 0c2d69b

19 files changed

+1282
-547
lines changed

docker/1.6.0/base/Dockerfile.cpu

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
FROM ubuntu:16.04
2+
3+
RUN apt-get update && apt-get install -y --no-install-recommends \
4+
build-essential \
5+
curl \
6+
git \
7+
libcurl3-dev \
8+
libfreetype6-dev \
9+
libpng12-dev \
10+
libzmq3-dev \
11+
pkg-config \
12+
python-dev \
13+
rsync \
14+
software-properties-common \
15+
unzip \
16+
zip \
17+
zlib1g-dev \
18+
openjdk-8-jdk \
19+
openjdk-8-jre-headless \
20+
wget \
21+
vim \
22+
iputils-ping \
23+
nginx \
24+
&& \
25+
apt-get clean && \
26+
rm -rf /var/lib/apt/lists/*
27+
28+
RUN curl -fSsL -O https://bootstrap.pypa.io/get-pip.py && \
29+
python get-pip.py && \
30+
rm get-pip.py
31+
32+
RUN pip --no-cache-dir install \
33+
numpy \
34+
scipy \
35+
sklearn \
36+
pandas \
37+
h5py
38+
39+
WORKDIR /root
40+
41+
RUN pip install numpy boto3 six awscli flask==0.11 Jinja2==2.9 tensorflow-serving-api==1.5 gevent gunicorn
42+
43+
# install tensorflow-model-server 1.5. 1.6 is not working as of 3/29/2018 for unknown reasons.
44+
RUN wget 'http://storage.googleapis.com/tensorflow-serving-apt/pool/tensorflow-model-server/t/tensorflow-model-server/tensorflow-model-server_1.5.0_all.deb' && \
45+
dpkg -i tensorflow-model-server_1.5.0_all.deb
46+

docker/1.6.0/base/Dockerfile.gpu

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
FROM nvidia/cuda:9.0-base-ubuntu16.04
2+
3+
RUN apt-get update && apt-get install -y --no-install-recommends \
4+
build-essential \
5+
cuda-command-line-tools-9-0 \
6+
cuda-cublas-dev-9-0 \
7+
cuda-cudart-dev-9-0 \
8+
cuda-cufft-dev-9-0 \
9+
cuda-curand-dev-9-0 \
10+
cuda-cusolver-dev-9-0 \
11+
cuda-cusparse-dev-9-0 \
12+
curl \
13+
git \
14+
libcudnn7=7.0.5.15-1+cuda9.0 \
15+
libcudnn7-dev=7.0.5.15-1+cuda9.0 \
16+
libcurl3-dev \
17+
libfreetype6-dev \
18+
libpng12-dev \
19+
libzmq3-dev \
20+
pkg-config \
21+
python-dev \
22+
rsync \
23+
software-properties-common \
24+
unzip \
25+
zip \
26+
zlib1g-dev \
27+
wget \
28+
vim \
29+
nginx \
30+
iputils-ping \
31+
&& \
32+
rm -rf /var/lib/apt/lists/* && \
33+
find /usr/local/cuda-9.0/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \
34+
rm /usr/lib/x86_64-linux-gnu/libcudnn_static_v7.a
35+
36+
RUN curl -fSsL -O https://bootstrap.pypa.io/get-pip.py && \
37+
python get-pip.py && \
38+
rm get-pip.py
39+
40+
RUN pip --no-cache-dir install \
41+
numpy \
42+
scipy \
43+
sklearn \
44+
pandas \
45+
h5py
46+
47+
RUN pip install numpy tensorflow-serving-api==1.5
48+
49+
# Set up grpc
50+
RUN pip install enum34 futures mock six && \
51+
pip install --pre 'protobuf>=3.0.0a3' && \
52+
pip install -i https://testpypi.python.org/simple --pre grpcio
53+
54+
# Set up Bazel.
55+
56+
# Running bazel inside a `docker build` command causes trouble, cf:
57+
# https://github.com/bazelbuild/bazel/issues/134
58+
# The easiest solution is to set up a bazelrc file forcing --batch.
59+
RUN echo "startup --batch" >>/etc/bazel.bazelrc
60+
# Similarly, we need to workaround sandboxing issues:
61+
# https://github.com/bazelbuild/bazel/issues/418
62+
RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
63+
>>/etc/bazel.bazelrc
64+
# Install the most recent bazel release.
65+
ENV BAZEL_VERSION 0.8.0
66+
WORKDIR /
67+
RUN mkdir /bazel && \
68+
cd /bazel && \
69+
curl -H "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36" -fSsL -O https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh && \
70+
curl -H "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36" -fSsL -o /bazel/LICENSE.txt https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE && \
71+
chmod +x bazel-*.sh && \
72+
./bazel-$BAZEL_VERSION-installer-linux-x86_64.sh && \
73+
cd / && \
74+
rm -f /bazel/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh
75+
76+
# Configure the build for our CUDA configuration.
77+
ENV CI_BUILD_PYTHON python
78+
ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
79+
ENV TF_NEED_CUDA 1
80+
ENV TF_CUDA_COMPUTE_CAPABILITIES=3.7,6.1
81+
ENV TF_CUDA_VERSION=9.0
82+
ENV TF_CUDNN_VERSION=7
83+
ENV CUDNN_INSTALL_PATH=/usr/lib/x86_64-linux-gnu
84+
85+
ENV TF_SERVING_VERSION=1.5.0
86+
87+
# Download TensorFlow Serving
88+
RUN cd / && git clone --recurse-submodules https://github.com/tensorflow/serving && \
89+
cd serving && \
90+
git checkout $TF_SERVING_VERSION
91+
92+
# Configure Tensorflow to use the GPU
93+
WORKDIR /serving
94+
RUN git clone --recursive https://github.com/tensorflow/tensorflow.git && \
95+
cd tensorflow && \
96+
git checkout v$TF_SERVING_VERSION && \
97+
tensorflow/tools/ci_build/builds/configured GPU
98+
99+
# Build TensorFlow Serving and Install it in /usr/local/bin
100+
WORKDIR /serving
101+
RUN bazel build -c opt --config=cuda \
102+
--cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \
103+
--crosstool_top=@local_config_cuda//crosstool:toolchain \
104+
tensorflow_serving/model_servers:tensorflow_model_server && \
105+
cp bazel-bin/tensorflow_serving/model_servers/tensorflow_model_server /usr/local/bin/ && \
106+
bazel clean --expunge
107+
108+
# cleaning up the container
109+
RUN rm -rf /serving && \
110+
rm -rf /bazel
111+

docker/1.6.0/final/py2/Dockerfile.cpu

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Use local version of image built from Dockerfile.cpu in /docker/1.6.0/base
2+
FROM tensorflow-base:1.6.0-cpu-py2
3+
MAINTAINER Amazon AI
4+
5+
ARG framework_installable
6+
ARG framework_support_installable=sagemaker_tensorflow_container-1.0.0.tar.gz
7+
8+
WORKDIR /root
9+
10+
# Will install from pypi once packages are released there. For now, copy from local file system.
11+
COPY $framework_installable .
12+
COPY $framework_support_installable .
13+
14+
RUN framework_installable_local=$(basename $framework_installable) && \
15+
framework_support_installable_local=$(basename $framework_support_installable) && \
16+
\
17+
pip install $framework_installable_local && \
18+
pip install $framework_support_installable_local && \
19+
\
20+
rm $framework_installable_local && \
21+
rm $framework_support_installable_local
22+
23+
# entry.py comes from sagemaker-container-support
24+
ENTRYPOINT ["entry.py"]

docker/1.6.0/final/py2/Dockerfile.gpu

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Use local version of image built from Dockerfile.gpu in /docker/1.6.0/base
2+
FROM tensorflow-base:1.6.0-gpu-py2
3+
MAINTAINER Amazon AI
4+
5+
ARG framework_installable
6+
ARG framework_support_installable=sagemaker_tensorflow_container-1.0.0.tar.gz
7+
8+
WORKDIR /root
9+
10+
# Will install from pypi once packages are released there. For now, copy from local file system.
11+
COPY $framework_installable .
12+
COPY $framework_support_installable .
13+
14+
RUN framework_installable_local=$(basename $framework_installable) && \
15+
framework_support_installable_local=$(basename $framework_support_installable) && \
16+
\
17+
pip install $framework_installable_local && \
18+
pip install $framework_support_installable_local && \
19+
\
20+
rm $framework_installable_local && \
21+
rm $framework_support_installable_local
22+
23+
# entry.py comes from sagemaker-container-support
24+
ENTRYPOINT ["entry.py"]
25+

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def read(fname):
3636

3737
install_requires=['sagemaker-container-support'],
3838
extras_require={
39-
'test': ['tox', 'flake8', 'pytest', 'pytest-cov', 'pytest-xdist', 'mock', 'run',
39+
'test': ['tox', 'flake8', 'pytest', 'pytest-cov', 'pytest-xdist', 'mock',
4040
'sagemaker', 'tensorflow']
4141
},
4242
)

0 commit comments

Comments
 (0)