Skip to content

Upgrading sklearn to 1.0.2 #107

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Apr 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ If you want to run local integration tests, then use:

# Required arguments for integration tests are found in test/conftest.py

pytest test/integration/local --docker-base-name <your_docker_image> \
pytest test/integration --docker-base-name <your_docker_image> \
--tag <your_docker_image_tag> \
--py-version <2_or_3> \
--framework-version <Scikit-learn_version>
Expand All @@ -183,8 +183,8 @@ If you want to run local integration tests, then use:
.. parsed-literal::

# Example
pytest test/integration/local --docker-base-name preprod-sklearn ``\``
--tag 1.0 ``\``
pytest test/integration --docker-base-name preprod-sklearn ``\``
--tag 1.0-1-cpu-py3 ``\``
--py-version 3 ``\``
--framework-version |FRAMEWORK_VERSION|

Expand Down Expand Up @@ -242,4 +242,4 @@ SageMaker Scikit-learn Container is licensed under the Apache 2.0 License. It is
.com, Inc. or its affiliates. All Rights Reserved. The license is available at:
http://aws.amazon.com/apache2.0/

.. |FRAMEWORK_VERSION| replace:: 0.23-1
.. |FRAMEWORK_VERSION| replace:: 1.0-1
2 changes: 1 addition & 1 deletion ci/buildspec-extension.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ version: 0.2

env:
variables:
SKLEARN_FRAMEWORK_VERSION: "0.23-1"
SKLEARN_FRAMEWORK_VERSION: "1.0-1"
EXTENSION_FRAMEWORK_VERSION: "2.5-1"

phases:
Expand Down
20 changes: 18 additions & 2 deletions ci/buildspec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,26 @@ version: 0.2

env:
variables:
FRAMEWORK_VERSION: "0.23-1"
FRAMEWORK_VERSION: "1.0-1"

phases:
install:
runtime-versions:
docker: 18
python: 3.7
python: 3.9
pre_build:
commands:
- echo Logging in to Amazon ECR...
- $(aws ecr get-login --no-include-email --region $AWS_DEFAULT_REGION)
- echo Installing dependencies...
- curl -LO http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
- bash Miniconda3-latest-Linux-x86_64.sh -bfp /miniconda3
- export PATH=/miniconda3/bin:${PATH}
- conda install python=3.7
- conda update -y conda
- conda install -y 'requests<2.21' # sagemaker-python-sdk requires requests<2.21
- python3 -m pip install pip==20.1 # The new pip denpendency resolver in 20.2+ can't resolve 1.0-1 and 0.90 dependencies
- python3 -m pip install .[test]
build:
commands:
- echo Build started on `date`
Expand All @@ -22,6 +31,13 @@ phases:
- pip install wheel setuptools
- python setup.py bdist_wheel
- docker build -t preprod-sklearn:$FRAMEWORK_VERSION-cpu-py3 -f docker/$FRAMEWORK_VERSION/final/Dockerfile.cpu .
- echo Running tox...
- printf "FROM preprod-sklearn:$FRAMEWORK_VERSION-cpu-py3\nADD . /app\nWORKDIR /app\nRUN python3 -m pip install .[test]" > Dockerfile.test
- docker build -t test-sklearn -f Dockerfile.test .
- docker run --rm -t test-sklearn sh -c 'pytest --cov=sagemaker_sklearn_container --cov-fail-under=60 test/unit'
- docker run --rm -t test-sklearn sh -c 'flake8 setup.py src test'
- echo Running container tests...
- pytest test/integration --docker-base-name preprod-sklearn --tag $FRAMEWORK_VERSION-cpu-py3 --py-version 3 --framework-version $FRAMEWORK_VERSION
- docker tag preprod-sklearn:$FRAMEWORK_VERSION-cpu-py3 515193369038.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:$FRAMEWORK_VERSION-cpu-py3
post_build:
commands:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ ARG UBUNTU_IMAGE_DIGEST=98706f0f213dbd440021993a82d2f70451a73698315370ae8615cc46

FROM ubuntu:${UBUNTU_VERSION}@sha256:${UBUNTU_IMAGE_DIGEST}

ARG MINICONDA_VERSION=4.8.3
ARG CONDA_PY_VERSION=37
ARG MINICONDA_VERSION=4.11.0
ARG CONDA_PY_VERSION=39
ARG CONDA_PKG_VERSION=4.9.0
ARG PYTHON_VERSION=3.7.10
ARG PYARROW_VERSION=0.16.0
Expand Down Expand Up @@ -105,4 +105,4 @@ ENV PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFERED=1 PYTHONIOENCODING=UTF-8 LANG=C.U
# Install Scikit-Learn
# Scikit-learn 0.20 was the last version to support Python 2.7 and Python 3.4.
# Scikit-learn now requires Python 3.6 or newer.
RUN python -m pip install --no-cache -I scikit-learn==0.23.2
RUN python -m pip install --no-cache -I scikit-learn==1.0.2
115 changes: 115 additions & 0 deletions docker/1.0-1/base/Dockerfile_aarm64.cpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
ARG UBUNTU_VERSION=18.04
ARG UBUNTU_IMAGE_DIGEST=646942475da61b4ce9cc5b3fadb42642ea90e5d0de46111458e100ff2c7031e6

FROM ubuntu:${UBUNTU_VERSION}@sha256:${UBUNTU_IMAGE_DIGEST}

ARG MINICONDA_VERSION=4.9.2 # Upgraded version
ARG CONDA_PY_VERSION=39
ARG CONDA_PKG_VERSION=4.10.1
ARG PYTHON_VERSION=3.7.10
ARG PYARROW_VERSION=1.0
ARG MLIO_VERSION=arch-agnostic

# Install python and other scikit-learn runtime dependencies
# Dependency list from http://scikit-learn.org/stable/developers/advanced_installation.html#installing-build-dependencies
RUN apt-get update && \
apt-get -y upgrade && \
apt-get -y install --no-install-recommends \
build-essential \
curl \
git \
jq \
libatlas-base-dev \
nginx \
openjdk-8-jdk-headless \
unzip \
wget \
&& \

apt-get -y install --no-install-recommends \
apt-transport-https \
ca-certificates \
gnupg \
software-properties-common \
autoconf \
automake \
build-essential \
libssl-dev \
&& \
# MLIO build dependencies
# Official Ubuntu APT repositories do not contain an up-to-date version of CMake required to build MLIO.
# Kitware contains the latest version of CMake.
wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | \
gpg --dearmor - | \
tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null && \
apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main' && \
wget https://cmake.org/files/v3.18/cmake-3.18.4.tar.gz && \
tar -xzvf cmake-3.18.4.tar.gz && \
cd cmake-3.18.4 && \
./configure && \
make -j$(nproc) && \
make install && \
apt-get update && \
apt-get install -y --no-install-recommends \

cmake-data=3.18.4-0kitware1 \
doxygen \
kitware-archive-keyring \
libcurl4-openssl-dev \
libtool \
ninja-build \
python3-dev \
python3-distutils \
python3-pip \
zlib1g-dev \
&& \
rm /etc/apt/trusted.gpg.d/kitware.gpg && \
rm -rf /var/lib/apt/lists/*

RUN cd /tmp && \
curl -L --output /tmp/Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-py${CONDA_PY_VERSION}_${MINICONDA_VERSION}-Linux-aarch64.sh && \
bash /tmp/Miniconda3.sh -bfp /miniconda3 && \
rm /tmp/Miniconda3.sh

ENV PATH=/miniconda3/bin:${PATH}

# Install MLIO with Apache Arrow integration
# We could install mlio-py from conda, but it comes with extra support such as image reader that increases image size
# which increases training time. We build from source to minimize the image size.
RUN echo "conda ${CONDA_PKG_VERSION}" >> /miniconda3/conda-meta/pinned && \
# Conda configuration see https://conda.io/projects/conda/en/latest/configuration.html
conda config --system --set auto_update_conda false && \
conda config --system --set show_channel_urls true && \
echo "python ${PYTHON_VERSION}.*" >> /miniconda3/conda-meta/pinned && \
conda install -c conda-forge python=${PYTHON_VERSION} && \
conda install conda=${CONDA_PKG_VERSION} && \
conda update -y conda && \
conda install -c conda-forge pyarrow=${PYARROW_VERSION} && \
cd /tmp && \
git clone --branch ${MLIO_VERSION} https://github.com/awslabs/ml-io.git mlio && \
cd mlio && \
build-tools/build-dependency build/third-party all && \
mkdir -p build/release && \
cd build/release && \
cmake -GNinja -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH="$(pwd)/../third-party" ../.. && \
cmake --build . && \
cmake --build . --target install && \
cmake -DMLIO_INCLUDE_PYTHON_EXTENSION=ON -DMLIO_INCLUDE_ARROW_INTEGRATION=ON ../.. && \
cmake --build . --target mlio-py && \
cmake --build . --target mlio-arrow && \
cd ../../src/mlio-py && \
python3 setup.py bdist_wheel && \
python3 -m pip install --upgrade pip && \
python3 -m pip install dist/*.whl && \
cp -r /tmp/mlio/build/third-party/lib/libtbb* /usr/local/lib/ && \
ldconfig && \
rm -rf /tmp/mlio

# Python won’t try to write .pyc or .pyo files on the import of source modules
# Force stdin, stdout and stderr to be totally unbuffered. Good for logging
ENV PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFERED=1 PYTHONIOENCODING=UTF-8 LANG=C.UTF-8 LC_ALL=C.UTF-8

# Install Scikit-Learn
# Scikit-learn 0.20 was the last version to support Python 2.7 and Python 3.4.
# Scikit-learn now requires Python 3.6 or newer.
RUN python -m pip install --no-cache -I scikit-learn==1.0.2
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
FROM preprod-sklearn:0.23-1-cpu-py3
FROM preprod-sklearn:1.0-1-cpu-py3

RUN pip freeze | grep -q 'scikit-learn==0.23.2'; \
RUN pip freeze | grep -q 'scikit-learn==1.0.2'; \
if [ $? -eq 0 ]; \
then echo 'scikit-learn version 0.23.2 requirement met'; \
else echo 'ERROR: Expected scikit-learn version is 0.23.2, check base images for scikit-learn version' && \
then echo 'scikit-learn version 1.0.2 requirement met'; \
else echo 'ERROR: Expected scikit-learn version is 1.0.2, check base images for scikit-learn version' && \
exit 1; fi

RUN pip install --upgrade --no-cache --no-deps sagemaker-scikit-learn-extension==2.5.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ The SageMaker Scikit-learn Extension Container is built in 3 steps. The first 2

The "base" Dockerfile encompass the installation of the framework and all of the dependencies needed.

Tagging scheme is based on <Scikit-learn_version>-<SageMaker_version>-cpu-py<python_version>. (e.g. 0.23-1-cpu-py3)
Tagging scheme is based on <Scikit-learn_version>-<SageMaker_version>-cpu-py<python_version>. (e.g. 1.0-1-cpu-py3)

All "final" Dockerfiles build images using base images that use the tagging scheme above.

```
docker build -t sklearn-base:0.23-1-cpu-py3 -f docker/0.23-1/base/Dockerfile.cpu .
docker build -t sklearn-base:1.0-1-cpu-py3 -f docker/1.0-1/base/Dockerfile.cpu .
```

Notice that this Dockerfile has the updated version of sklearn (0.23.2) installed.
Notice that this Dockerfile has the updated version of sklearn (1.0.2) installed.

### Step 2: Final Image

Expand All @@ -38,7 +38,7 @@ python setup.py bdist_wheel
Then build the final image, like in the sagemaker-sklearn-container

```
docker build -t preprod-sklearn:0.23-1-cpu-py3 -f docker/0.23-1/final/Dockerfile.cpu .
docker build -t preprod-sklearn:1.0-1-cpu-py3 -f docker/1.0-1/final/Dockerfile.cpu .
```

### Step 3: Build the extension image for SageMaker Scikit-learn Extension Container
Expand All @@ -47,10 +47,10 @@ The "extension" Dockerfiles encompass the installation of the SageMaker Autopilo

The "extension" Dockerfiles use final images for building.

Build the third additional Dockerfile needed for SageMaker Scikit-learn Extension Container. This Dockerfile specifies a hard dependency on a certain version of scikit-learn (i.e. v0.23.2).
Build the third additional Dockerfile needed for SageMaker Scikit-learn Extension Container. This Dockerfile specifies a hard dependency on a certain version of scikit-learn (i.e. v1.0.2).

Tagging scheme is based on extension-<Scikit-learn-Extension_version>-<SageMaker_version>-cpu-py<python_version>. (e.g. extension-2.5-1-cpu-py3). Make sure the "extension" image is tagged in accordance with the `extension` (i.e. `extension-2.5-1-cpu-py3`).

```
docker build -t preprod-sklearn-extension:2.5-1-cpu-py3 -f docker/0.23-1/extension/Dockerfile.cpu .
docker build -t preprod-sklearn-extension:2.5-1-cpu-py3 -f docker/1.0-1/extension/Dockerfile.cpu .
```
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
FROM sklearn-base:0.23-1-cpu-py3
ENV SAGEMAKER_SKLEARN_VERSION 0.23-1
FROM sklearn-base:1.0-1-cpu-py3
ENV SAGEMAKER_SKLEARN_VERSION 1.0-1

LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true

Expand Down Expand Up @@ -54,4 +54,3 @@ ENV TEMP=/home/model-server/tmp

# Required label for multi-model loading
LABEL com.amazonaws.sagemaker.capabilities.multi-models=true

56 changes: 56 additions & 0 deletions docker/1.0-1/final/Dockerfile_aarm64.cpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
FROM sklearn-base:1.0-1-arm64-cpu-py3
ENV SAGEMAKER_SKLEARN_VERSION 1.0-1

LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true

COPY requirements.txt /requirements.txt
RUN python -m pip install -r /requirements.txt && \
rm /requirements.txt

COPY dist/sagemaker_sklearn_container-2.0-py3-none-any.whl /sagemaker_sklearn_container-2.0-py3-none-any.whl
# # https://github.com/googleapis/google-cloud-python/issues/6647
RUN rm -rf /miniconda3/lib/python3.7/site-packages/numpy-1.19.4.dist-info && \
pip install --no-cache /sagemaker_sklearn_container-2.0-py3-none-any.whl && \
rm /sagemaker_sklearn_container-2.0-py3-none-any.whl

ENV SAGEMAKER_TRAINING_MODULE sagemaker_sklearn_container.training:main
ENV SAGEMAKER_SERVING_MODULE sagemaker_sklearn_container.serving:main

#######
# MMS #
#######
# Create MMS user directory
RUN useradd -m model-server
RUN mkdir -p /home/model-server/tmp
RUN chown -R model-server /home/model-server

# Copy MMS configs
COPY docker/$SAGEMAKER_SKLEARN_VERSION/resources/mms/config.properties.tmp /home/model-server
ENV SKLEARN_MMS_CONFIG=/home/model-server/config.properties

# Copy execution parameters endpoint plugin for MMS
RUN mkdir -p /tmp/plugins
COPY docker/$SAGEMAKER_SKLEARN_VERSION/resources/mms/endpoints-1.0.jar /tmp/plugins
RUN chmod +x /tmp/plugins/endpoints-1.0.jar

# Create directory for models
RUN mkdir -p /opt/ml/models
RUN chmod +rwx /opt/ml/models

#####################
# Required ENV vars #
#####################
# Set SageMaker training environment variables
ENV SM_INPUT /opt/ml/input
ENV SM_INPUT_TRAINING_CONFIG_FILE $SM_INPUT/config/hyperparameters.json
ENV SM_INPUT_DATA_CONFIG_FILE $SM_INPUT/config/inputdataconfig.json
ENV SM_CHECKPOINT_CONFIG_FILE $SM_INPUT/config/checkpointconfig.json

# Set SageMaker serving environment variables
ENV SM_MODEL_DIR /opt/ml/model

#EXPOSE 8080
ENV TEMP=/home/model-server/tmp

# Required label for multi-model loading
LABEL com.amazonaws.sagemaker.capabilities.multi-models=true
5 changes: 4 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ retrying==1.3.3
sagemaker-containers==2.8.6.post2
sagemaker-inference==1.2.0
sagemaker-training==4.0.1
scikit-learn==0.23.2
scikit-learn==1.0.2
scipy==1.5.3
six==1.15.0
jinja2==2.10.2
MarkupSafe==1.1.1
Werkzeug==0.15.6
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def read(fname):
"Programming Language :: Python",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.9",
],

install_requires=read("requirements.txt"),
Expand Down
2 changes: 1 addition & 1 deletion test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def pytest_addoption(parser):
parser.addoption('--install-container-support', '-C', action='store_true')
parser.addoption('--docker-base-name', default='sk-learn')
parser.addoption('--region', default='us-west-2')
parser.addoption('--framework-version', default='0.20.0')
parser.addoption('--framework-version', default='1.0.2')
parser.addoption('--py-version', choices=['2', '3'], default=str(sys.version_info.major))
parser.addoption('--processor', choices=['cpu'], default='cpu')
# If not specified, will default to {framework-version}-{processor}-py{py-version}
Expand Down
10 changes: 7 additions & 3 deletions test/integration/test_multiple_model_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,18 +56,22 @@ def modulevolume():
@pytest.fixture(scope='module', autouse=True)
def container(request, docker_base_name, tag):
test_name = 'sagemaker-sklearn-serving-test'
module_dir = os.path.join(resource_path, 'module')
model_dir = os.path.join(resource_path, 'models')
try:
command = (
'docker run --name {} -p 8080:8080'
' --mount type=volume,source=dynamic_endpoint_model_volume,target=/opt/ml/model,readonly'
' --mount type=volume,source=dynamic_endpoint_module_volume,target=/user_module,readonly'
# ' --mount type=volume,source=dynamic_endpoint_model_volume,target=/opt/ml/model,readonly'
# ' --mount type=volume,source=dynamic_endpoint_module_volume,target=/user_module,readonly'
' -v {}:/opt/ml/model'
' -v {}:/user_module'
' -e SAGEMAKER_BIND_TO_PORT=8080'
' -e SAGEMAKER_SAFE_PORT_RANGE=9000-9999'
' -e SAGEMAKER_MULTI_MODEL=true'
' -e SAGEMAKER_PROGRAM={}'
' -e SAGEMAKER_SUBMIT_DIRECTORY={}'
' {}:{} serve'
).format(test_name, 'script.py', "/user_module/user_code.tar.gz", docker_base_name, tag)
).format(test_name, model_dir, module_dir, 'script.py', "/user_module/user_code.tar.gz", docker_base_name, tag)

proc = subprocess.Popen(command.split(), stdout=sys.stdout, stderr=subprocess.STDOUT)

Expand Down
Binary file modified test/resources/models/pickled-model-1/sklearn-model
Binary file not shown.
Binary file modified test/resources/models/pickled-model-2/sklearn-model
Binary file not shown.
Binary file modified test/resources/module/user_code.tar.gz
Binary file not shown.
Loading