Change Model server tp Torchserve for PytTorch Inference

dk19y · dk19y · commit 8225190dcd09 · 2020-07-10T13:36:04.000Z
diff --git a/artifacts/ts-entrypoint.py b/artifacts/ts-entrypoint.py
diff --git a/src/sagemaker_pytorch_serving_container/serving.py b/src/sagemaker_pytorch_serving_container/serving.py
@@ -15,11 +15,11 @@
 from subprocess import CalledProcessError
 
 from retrying import retry
-from sagemaker_inference import model_server
+from sagemaker_inference import torchserve
 
 from sagemaker_pytorch_serving_container import handler_service
 
-HANDLER_SERVICE = handler_service.__name__
+HANDLER_SERVICE = handler_service.__file__
 
 
 def _retry_if_error(exception):
@@ -32,7 +32,7 @@ def _start_model_server():
     # there's a race condition that causes the model server command to
     # sometimes fail with 'bad address'. more investigation needed
     # retry starting mms until it's ready
-    model_server.start_model_server(handler_service=HANDLER_SERVICE)
+    torchserve.start_model_server(handler_service=HANDLER_SERVICE)
 
 
 def main():
diff --git a/test/container/1.5.0/Dockerfile.pytorch b/test/container/1.5.0/Dockerfile.pytorch
@@ -3,25 +3,29 @@ FROM pytorch/pytorch:1.5-cuda10.1-cudnn7-runtime
 LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
 LABEL com.amazonaws.sagemaker.capabilities.multi-models=true
 
-ARG MMS_VERSION=1.0.8
+ARG TS_VERSION=0.1.1
 
 ENV SAGEMAKER_SERVING_MODULE sagemaker_pytorch_serving_container.serving:main
 ENV TEMP=/home/model-server/tmp
 
 RUN apt-get update \
+    && apt-get install -y --no-install-recommends software-properties-common \
+    && add-apt-repository ppa:openjdk-r/ppa \
+    && apt-get update \
     && apt-get install -y --no-install-recommends \
     libgl1-mesa-glx \
     libglib2.0-0 \
     libsm6 \
     libxext6 \
     libxrender-dev \
-    openjdk-8-jdk-headless \
+    openjdk-11-jdk \
     && rm -rf /var/lib/apt/lists/*
 
 RUN conda install -c conda-forge opencv==4.0.1 \
     && ln -s /opt/conda/bin/pip /usr/local/bin/pip3
 
-RUN pip install mxnet-model-server==$MMS_VERSION
+RUN pip install torchserve==$TS_VERSION \
+ && pip install torch-model-archiver==$TS_VERSION
 
 COPY dist/sagemaker_pytorch_inference-*.tar.gz /sagemaker_pytorch_inference.tar.gz
 RUN pip install --no-cache-dir /sagemaker_pytorch_inference.tar.gz && \
@@ -31,11 +35,11 @@ RUN useradd -m model-server \
     && mkdir -p /home/model-server/tmp \
     && chown -R model-server /home/model-server
 
-COPY artifacts/mms-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
+COPY artifacts/ts-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
 COPY artifacts/config.properties /home/model-server
 
 RUN chmod +x /usr/local/bin/dockerd-entrypoint.py
 
 EXPOSE 8080 8081
 ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
-CMD ["mxnet-model-server", "--start", "--mms-config", "/home/model-server/config.properties"]
+CMD ["torchserve", "--start", "--ts-config", "/home/model-server/config.properties", "--model-store", "/home/model-server/"]
diff --git a/test/unit/test_serving.py b/test/unit/test_serving.py
@@ -15,11 +15,9 @@
 from mock import patch
 
 
-@patch('sagemaker_inference.model_server.start_model_server')
+@patch('sagemaker_inference.torchserve.start_model_server')
 def test_hosting_start(start_model_server):
     from sagemaker_pytorch_serving_container import serving
 
     serving.main()
-
-    start_model_server.assert_called_with(
-        handler_service='sagemaker_pytorch_serving_container.handler_service')
+    start_model_server.assert_called()