Skip to content

test: Vspecinteg2 #3249

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jul 27, 2022
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions tests/data/marketplace/iris/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
FROM public.ecr.aws/ubuntu/ubuntu:18.04

# Specify encoding
ENV LC_ALL=C.UTF-8
ENV LANG=C.UTF-8

# Install python-pip
RUN apt-get update \
&& apt-get install -y python3.6 python3-pip \
&& ln -s /usr/bin/python3.6 /usr/bin/python \
&& ln -s /usr/bin/pip3 /usr/bin/pip;

# Install flask server
RUN pip install -U flask gunicorn joblib sklearn;

#Copy scoring logic and model artifacts into the docker image
COPY scoring_logic.py /scoring_logic.py
COPY wsgi.py /wsgi.py
COPY model-artifacts.joblib /opt/ml/model/model-artifacts.joblib
COPY serve /opt/program/serve

RUN chmod 755 /opt/program/serve
ENV PATH=/opt/program:${PATH}
Binary file not shown.
108 changes: 108 additions & 0 deletions tests/data/marketplace/iris/scoring_logic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
from enum import IntEnum
import json
import logging
import re
from flask import Flask
from flask import request
from joblib import dump, load
import numpy as np
import os

logger = logging.getLogger(__name__)


class IrisLabel(IntEnum):
setosa = 0
versicolor = 1
virginica = 2


class IrisModel:
LABELS = IrisLabel
NUM_FEATURES = 4

def __init__(self, model_path):
self.model_path = model_path
self._model = None

# Cache the model to prevent repeatedly loading it for every request
@property
def model(self):
if self._model is None:
self._model = load(self.model_path)
return self._model

def predict_from_csv(self, lines, **kwargs):
data = np.genfromtxt(lines.split("\n"), delimiter=",")
return self.predict(data, **kwargs)

def predict_from_json(self, obj, **kwargs):
req = json.loads(obj)
instances = req["instances"]
x = np.array([instance["features"] for instance in instances])
return self.predict(x, **kwargs)

def predict_from_jsonlines(self, obj, **kwargs):
x = np.array([json.loads(line)["features"] for line in obj.split("\n")])
return self.predict(x, **kwargs)

def predict(self, x, return_names=True):
label_codes = self.model.predict(x.reshape(-1, IrisModel.NUM_FEATURES))

if return_names:
predictions = [IrisModel.LABELS(code).name for code in label_codes]
else:
predictions = label_codes.tolist()

return predictions


SUPPORTED_REQUEST_MIMETYPES = ["text/csv", "application/json", "application/jsonlines"]
SUPPORTED_RESPONSE_MIMETYPES = ["application/json", "application/jsonlines", "text/csv"]

app = Flask(__name__)
model = IrisModel(model_path="/opt/ml/model/model-artifacts.joblib")

# Create a path for health checks
@app.route("/ping")
def endpoint_ping():
return ""


# Create a path for inference
@app.route("/invocations", methods=["POST"])
def endpoint_invocations():
try:
logger.info(f"Processing request: {request.headers}")
logger.debug(f"Payload: {request.headers}")

if request.content_type not in SUPPORTED_REQUEST_MIMETYPES:
logger.error(f"Unsupported Content-Type specified: {request.content_type}")
return f"Invalid Content-Type. Supported Content-Types: {', '.join(SUPPORTED_REQUEST_MIMETYPES)}"
elif request.content_type == "text/csv":
# Step 1: Decode payload into input format expected by model
data = request.get_data().decode("utf8")
# Step 2: Perform inference with the loaded model
predictions = model.predict_from_csv(data)
elif request.content_type == "application/json":
data = request.get_data().decode("utf8")
predictions = model.predict_from_json(data)
elif request.content_type == "application/jsonlines":
data = request.get_data().decode("utf8")
predictions = model.predict_from_jsonlines(data)

# Step 3: Process predictions into the specified response type (if specified)
response_mimetype = request.accept_mimetypes.best_match(
SUPPORTED_RESPONSE_MIMETYPES, default="application/json"
)

if response_mimetype == "text/csv":
response = "\n".join(predictions)
elif response_mimetype == "application/jsonlines":
response = "\n".join([json.dumps({"class": pred}) for pred in predictions])
elif response_mimetype == "application/json":
response = json.dumps({"predictions": [{"class": pred} for pred in predictions]})

return response
except Exception as e:
return f"Error during model invocation: {str(e)} for input: {request.get_data()}"
6 changes: 6 additions & 0 deletions tests/data/marketplace/iris/serve
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

ls -lah /opt/ml/model

# Run gunicorn server on port 8080 for SageMaker
gunicorn --worker-tmp-dir /dev/shm --bind 0.0.0.0:8080 wsgi:app
4 changes: 4 additions & 0 deletions tests/data/marketplace/iris/wsgi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from scoring_logic import app

if __name__ == "__main__":
app.run()
142 changes: 141 additions & 1 deletion tests/integ/test_marketplace.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,31 @@
import itertools
import os
import time
import requests

import pandas
import pytest
import docker

import sagemaker
import tests.integ
from sagemaker import AlgorithmEstimator, ModelPackage
from sagemaker import AlgorithmEstimator, ModelPackage, Model
from sagemaker.serializers import CSVSerializer
from sagemaker.tuner import IntegerParameter, HyperparameterTuner
from sagemaker.utils import sagemaker_timestamp, _aws_partition, unique_name_from_base
from tests.integ import DATA_DIR
from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name
from tests.integ.marketplace_utils import REGION_ACCOUNT_MAP
from tests.integ.test_multidatamodel import (
_ecr_image_uri,
_ecr_login,
_create_repository,
_delete_repository,
)
from tests.integ.retry import retries
import logging

logger = logging.getLogger(__name__)

# All these tests require a manual 1 time subscription to the following Marketplace items:
# Algorithm: Scikit Decision Trees
Expand Down Expand Up @@ -186,6 +197,135 @@ def predict_wrapper(endpoint, session):
print(predictor.predict(test_x.values).decode("utf-8"))


@pytest.fixture(scope="module")
def iris_image(sagemaker_session):
algorithm_name = unique_name_from_base("iris-classifier")
ecr_image = _ecr_image_uri(sagemaker_session, algorithm_name)
ecr_client = sagemaker_session.boto_session.client("ecr")
username, password = _ecr_login(ecr_client)

docker_client = docker.from_env()

# Build and tag docker image locally
path = os.path.join(DATA_DIR, "marketplace", "iris")
image, build_logs = docker_client.images.build(
path=path,
tag=algorithm_name,
rm=True,
)
image.tag(ecr_image, tag="latest")
_create_repository(ecr_client, algorithm_name)

# Retry docker image push
for _ in retries(3, "Upload docker image to ECR repo", seconds_to_sleep=10):
try:
docker_client.images.push(
ecr_image, auth_config={"username": username, "password": password}
)
break
except requests.exceptions.ConnectionError:
# This can happen when we try to create multiple repositories in parallel, so we retry
pass

yield ecr_image

# Delete repository after the marketplace integration tests complete
_delete_repository(ecr_client, algorithm_name)


def test_create_model_package(sagemaker_session, boto_session, iris_image):

# Prepare
s3_bucket = sagemaker_session.default_bucket()

model_name = "my-flower-detection-model"
model_description = "This model accepts petal length, petal width, sepal length, sepal width and predicts whether \
flower is of type setosa, versicolor, or virginica"

supported_realtime_inference_instance_types = supported_batch_transform_instance_types = [
"ml.m4.xlarge"
]
supported_content_types = ["text/csv", "application/json", "application/jsonlines"]
supported_response_MIME_types = ["application/json", "text/csv", "application/jsonlines"]

validation_input_path = "s3://" + s3_bucket + "/validation-input-csv/"
validation_output_path = "s3://" + s3_bucket + "/validation-output-csv/"

iam = boto_session.resource("iam")
role = iam.Role("SageMakerRole").arn
sm_client = boto_session.client("sagemaker")
s3_client = boto_session.client("s3")
s3_client.put_object(
Bucket=s3_bucket, Key="validation-input-csv/input.csv", Body="5.1, 3.5, 1.4, 0.2"
)

ValidationSpecification = {
"ValidationRole": role,
"ValidationProfiles": [
{
"ProfileName": "Validation-test",
"TransformJobDefinition": {
"BatchStrategy": "SingleRecord",
"TransformInput": {
"DataSource": {
"S3DataSource": {
"S3DataType": "S3Prefix",
"S3Uri": validation_input_path,
}
},
"ContentType": supported_content_types[0],
},
"TransformOutput": {
"S3OutputPath": validation_output_path,
},
"TransformResources": {
"InstanceType": supported_batch_transform_instance_types[0],
"InstanceCount": 1,
},
},
},
],
}

# get pre-existing model artifact stored in ECR
model = Model(
image_uri=iris_image,
model_data=validation_input_path + "input.csv",
role=role,
sagemaker_session=sagemaker_session,
enable_network_isolation=False,
)

# Call model.register() - the method under test - to create a model package
model.register(
supported_content_types,
supported_response_MIME_types,
supported_realtime_inference_instance_types,
supported_batch_transform_instance_types,
marketplace_cert=True,
description=model_description,
model_package_name=model_name,
validation_specification=ValidationSpecification,
)

# wait for model execution to complete
time.sleep(60 * 3)

# query for all model packages with the name "my-flower-detection-model"
response = sm_client.list_model_packages(
MaxResults=10,
NameContains="my-flower-detection-model",
SortBy="CreationTime",
SortOrder="Descending",
)

if len(response["ModelPackageSummaryList"]) > 0:
sm_client.delete_model_package(ModelPackageName=model_name)

# assert that response is non-empty
assert len(response["ModelPackageSummaryList"]) > 0


@pytest.mark.skipif(
tests.integ.test_region() in tests.integ.NO_MARKET_PLACE_REGIONS,
reason="Marketplace is not available in {}".format(tests.integ.test_region()),
Expand Down
3 changes: 1 addition & 2 deletions tests/integ/test_multidatamodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import os
import requests

import botocore
import docker
import numpy
import pytest
Expand Down Expand Up @@ -116,7 +115,7 @@ def _delete_repository(ecr_client, repository_name):
try:
ecr_client.describe_repositories(repositoryNames=[repository_name])
ecr_client.delete_repository(repositoryName=repository_name, force=True)
except botocore.errorfactory.ResourceNotFoundException:
except ecr_client.exceptions.RepositoryNotFoundException:
pass


Expand Down
1 change: 1 addition & 0 deletions tests/unit/sagemaker/model/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,7 @@ def test_script_mode_model_uses_proper_sagemaker_submit_dir(repack_model, sagema

@patch("sagemaker.get_model_package_args")
def test_register_calls_model_package_args(get_model_package_args, sagemaker_session):
"""model.register() should pass the ValidationSpecification to get_model_package_args()"""

source_dir = "s3://blah/blah/blah"
t = Model(
Expand Down