Skip to content

change: reorganize test files for workflow #2960

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 3, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/doc_utils/jumpstart_doc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def create_jumpstart_model_table():
file_content.append(" - Latest Version\n")
file_content.append(" - Min SDK Version\n")

for model in sorted(sdk_manifest, key=lambda elt: elt["model_id"]):
for model in sdk_manifest_top_versions_for_models.values():
model_spec = get_jumpstart_sdk_spec(model["spec_key"])
file_content.append(" * - {}\n".format(model["model_id"]))
file_content.append(" - {}\n".format(model_spec["training_supported"]))
Expand Down
36 changes: 35 additions & 1 deletion src/sagemaker/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import csv
import io
import json

import numpy as np
from six import with_metaclass

Expand Down Expand Up @@ -357,3 +356,38 @@ def serialize(self, data):
return data.read()

raise ValueError("Unable to handle input format: %s" % type(data))


class DataSerializer(SimpleBaseSerializer):
"""Serialize data in any file by extracting raw bytes from the file."""

def __init__(self, content_type="file-path/raw-bytes"):
"""Initialize a ``DataSerializer`` instance.

Args:
content_type (str): The MIME type to signal to the inference endpoint when sending
request data (default: "file-path/raw-bytes").
"""
super(DataSerializer, self).__init__(content_type=content_type)

def serialize(self, data):
"""Serialize file data to a raw bytes.

Args:
data (object): Data to be serialized. The data can be a string
representing file-path or the raw bytes from a file.
Returns:
raw-bytes: The data serialized as raw-bytes from the input.
"""
if isinstance(data, str):
try:
dataFile = open(data, "rb")
dataFileInfo = dataFile.read()
dataFile.close()
return dataFileInfo
except Exception as e:
raise ValueError(f"Could not open/read file: {data}. {e}")
if isinstance(data, bytes):
return data

raise ValueError(f"Object of type {type(data)} is not Data serializable.")
Binary file added tests/data/cuteCat.raw
Binary file not shown.
6 changes: 0 additions & 6 deletions tests/integ/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,12 +148,6 @@
"eu-west-2",
"us-east-1",
]
NO_SM_PIPELINE_MM_CLARIFY_CHECK_STEP_REGIONS = [
"ap-northeast-3",
"ap-south-1",
"eu-north-1",
"sa-east-1",
]
EDGE_PACKAGING_SUPPORTED_REGIONS = [
"us-east-2",
"us-west-2",
Expand Down
Empty file.
4 changes: 3 additions & 1 deletion tests/integ/sagemaker/lineage/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@
artifact,
)
from sagemaker.model import ModelPackage
from tests.integ.test_workflow import test_end_to_end_pipeline_successful_execution
from tests.integ.sagemaker.workflow.test_workflow import (
test_end_to_end_pipeline_successful_execution,
)
from sagemaker.workflow.pipeline import _PipelineExecution
from sagemaker.session import get_execution_role
from smexperiments import trial_component, trial, experiment
Expand Down
Empty file.
118 changes: 118 additions & 0 deletions tests/integ/sagemaker/workflow/test_callback_steps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
from __future__ import absolute_import

import re

import pytest

from sagemaker import get_execution_role, utils
from sagemaker.workflow.callback_step import CallbackOutput, CallbackStep, CallbackOutputTypeEnum
from sagemaker.workflow.parameters import ParameterInteger
from sagemaker.workflow.pipeline import Pipeline


@pytest.fixture
def role(sagemaker_session):
return get_execution_role(sagemaker_session)


@pytest.fixture
def pipeline_name():
return utils.unique_name_from_base("my-pipeline-callback")


@pytest.fixture
def region_name(sagemaker_session):
return sagemaker_session.boto_session.region_name


def test_one_step_callback_pipeline(sagemaker_session, role, pipeline_name, region_name):
instance_count = ParameterInteger(name="InstanceCount", default_value=2)

outputParam1 = CallbackOutput(output_name="output1", output_type=CallbackOutputTypeEnum.String)
step_callback = CallbackStep(
name="callback-step",
sqs_queue_url="https://sqs.us-east-2.amazonaws.com/123456789012/MyQueue",
inputs={"arg1": "foo"},
outputs=[outputParam1],
)

pipeline = Pipeline(
name=pipeline_name,
parameters=[instance_count],
steps=[step_callback],
sagemaker_session=sagemaker_session,
)

try:
response = pipeline.create(role)
create_arn = response["PipelineArn"]
assert re.match(
rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}",
create_arn,
)

pipeline.parameters = [ParameterInteger(name="InstanceCount", default_value=1)]
response = pipeline.update(role)
update_arn = response["PipelineArn"]
assert re.match(
rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}",
update_arn,
)
finally:
try:
pipeline.delete()
except Exception:
pass


def test_two_step_callback_pipeline_with_output_reference(
sagemaker_session, role, pipeline_name, region_name
):
instance_count = ParameterInteger(name="InstanceCount", default_value=2)

outputParam1 = CallbackOutput(output_name="output1", output_type=CallbackOutputTypeEnum.String)
step_callback1 = CallbackStep(
name="callback-step1",
sqs_queue_url="https://sqs.us-east-2.amazonaws.com/123456789012/MyQueue",
inputs={"arg1": "foo"},
outputs=[outputParam1],
)

step_callback2 = CallbackStep(
name="callback-step2",
sqs_queue_url="https://sqs.us-east-2.amazonaws.com/123456789012/MyQueue",
inputs={"arg1": outputParam1},
outputs=[],
)

pipeline = Pipeline(
name=pipeline_name,
parameters=[instance_count],
steps=[step_callback1, step_callback2],
sagemaker_session=sagemaker_session,
)

try:
response = pipeline.create(role)
create_arn = response["PipelineArn"]
assert re.match(
rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}",
create_arn,
)
finally:
try:
pipeline.delete()
except Exception:
pass
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import pytest
from botocore.exceptions import WaiterError

import tests
from sagemaker.clarify import (
BiasConfig,
DataConfig,
Expand Down Expand Up @@ -129,10 +128,6 @@ def data_bias_check_config(data_config, bias_config):
)


@pytest.mark.skipif(
tests.integ.test_region() in tests.integ.NO_SM_PIPELINE_MM_CLARIFY_CHECK_STEP_REGIONS,
reason=f"ClarifyCheckStep is not fully deployed in {tests.integ.test_region()}.",
)
def test_one_step_data_bias_pipeline_happycase(
sagemaker_session,
role,
Expand Down Expand Up @@ -220,10 +215,6 @@ def test_one_step_data_bias_pipeline_happycase(
pass


@pytest.mark.skipif(
tests.integ.test_region() in tests.integ.NO_SM_PIPELINE_MM_CLARIFY_CHECK_STEP_REGIONS,
reason=f"ClarifyCheckStep is not fully deployed in {tests.integ.test_region()}.",
)
def test_one_step_data_bias_pipeline_constraint_violation(
sagemaker_session,
role,
Expand Down
82 changes: 82 additions & 0 deletions tests/integ/sagemaker/workflow/test_emr_steps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
from __future__ import absolute_import

import re

import pytest

from sagemaker import get_execution_role, utils
from sagemaker.workflow.emr_step import EMRStep, EMRStepConfig
from sagemaker.workflow.parameters import ParameterInteger
from sagemaker.workflow.pipeline import Pipeline


@pytest.fixture
def role(sagemaker_session):
return get_execution_role(sagemaker_session)


@pytest.fixture
def pipeline_name():
return utils.unique_name_from_base("my-pipeline-emr")


@pytest.fixture
def region_name(sagemaker_session):
return sagemaker_session.boto_session.region_name


def test_two_steps_emr_pipeline(sagemaker_session, role, pipeline_name, region_name):
instance_count = ParameterInteger(name="InstanceCount", default_value=2)

emr_step_config = EMRStepConfig(
jar="s3://us-west-2.elasticmapreduce/libs/script-runner/script-runner.jar",
args=["dummy_emr_script_path"],
)

step_emr_1 = EMRStep(
name="emr-step-1",
cluster_id="j-1YONHTCP3YZKC",
display_name="emr_step_1",
description="MyEMRStepDescription",
step_config=emr_step_config,
)

step_emr_2 = EMRStep(
name="emr-step-2",
cluster_id=step_emr_1.properties.ClusterId,
display_name="emr_step_2",
description="MyEMRStepDescription",
step_config=emr_step_config,
)

pipeline = Pipeline(
name=pipeline_name,
parameters=[instance_count],
steps=[step_emr_1, step_emr_2],
sagemaker_session=sagemaker_session,
)

try:
response = pipeline.create(role)
create_arn = response["PipelineArn"]
assert re.match(
rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}",
create_arn,
)
finally:
try:
pipeline.delete()
except Exception:
pass
Loading