Skip to content

Commit c48bba7

Browse files
evakravibenieric
authored andcommitted
chore: add jumpstart gated model integration tests (#1415)
1 parent 435e545 commit c48bba7

File tree

3 files changed

+156
-39
lines changed

3 files changed

+156
-39
lines changed

tests/integ/sagemaker/jumpstart/constants.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ def _to_s3_path(filename: str, s3_prefix: Optional[str]) -> str:
4646
("huggingface-spc-bert-base-cased", "*"): ("training-datasets/QNLI-tiny/"),
4747
("js-trainable-model", "*"): ("training-datasets/QNLI-tiny/"),
4848
("meta-textgeneration-llama-2-7b", "*"): ("training-datasets/sec_amazon/"),
49+
("meta-textgeneration-llama-2-7b", "2.*"): ("training-datasets/sec_amazon/"),
50+
("meta-textgeneration-llama-2-7b", "3.*"): ("training-datasets/sec_amazon/"),
51+
("meta-textgenerationneuron-llama-2-7b", "*"): ("training-datasets/sec_amazon/"),
4952
}
5053

5154

tests/integ/sagemaker/jumpstart/estimator/test_jumpstart_estimator.py

Lines changed: 94 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,19 @@
3535

3636
MAX_INIT_TIME_SECONDS = 5
3737

38-
GATED_TRAINING_MODEL_SUPPORTED_REGIONS = {
38+
GATED_TRAINING_MODEL_V1_SUPPORTED_REGIONS = {
3939
"us-west-2",
4040
"us-east-1",
4141
"eu-west-1",
4242
"ap-southeast-1",
4343
"us-east-2",
4444
"ap-southeast-2",
4545
}
46+
TRN2_SUPPORTED_REGIONS = {
47+
"us-west-2",
48+
"us-east-1",
49+
"us-east-2",
50+
}
4651

4752

4853
def test_jumpstart_estimator(setup):
@@ -87,15 +92,58 @@ def test_jumpstart_estimator(setup):
8792

8893
@x_fail_if_ice
8994
@pytest.mark.skipif(
90-
tests.integ.test_region() not in GATED_TRAINING_MODEL_SUPPORTED_REGIONS,
95+
tests.integ.test_region() not in GATED_TRAINING_MODEL_V1_SUPPORTED_REGIONS,
9196
reason=f"JumpStart gated training models unavailable in {tests.integ.test_region()}.",
9297
)
93-
def test_gated_model_training(setup):
98+
def test_gated_model_training_v1(setup):
99+
100+
model_id = "meta-textgeneration-llama-2-7b"
101+
model_version = "2.*" # model artifacts were retrieved using legacy workflow
102+
103+
estimator = JumpStartEstimator(
104+
model_id=model_id,
105+
model_version=model_version,
106+
role=get_sm_session().get_caller_identity_arn(),
107+
sagemaker_session=get_sm_session(),
108+
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
109+
environment={"accept_eula": "true"},
110+
max_run=259200, # avoid exceeding resource limits
111+
)
112+
113+
# uses ml.g5.12xlarge instance
114+
estimator.fit(
115+
{
116+
"training": f"s3://{get_jumpstart_content_bucket(JUMPSTART_DEFAULT_REGION_NAME)}/"
117+
f"{get_training_dataset_for_model_and_version(model_id, model_version)}",
118+
}
119+
)
120+
121+
# uses ml.g5.2xlarge instance
122+
predictor = estimator.deploy(
123+
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
124+
role=get_sm_session().get_caller_identity_arn(),
125+
sagemaker_session=get_sm_session(),
126+
)
127+
128+
payload = {
129+
"inputs": "some-payload",
130+
"parameters": {"max_new_tokens": 256, "top_p": 0.9, "temperature": 0.6},
131+
}
132+
133+
response = predictor.predict(payload, custom_attributes="accept_eula=true")
134+
135+
assert response is not None
94136

95-
model_id, model_version = "meta-textgeneration-llama-2-7b", "*"
137+
138+
@x_fail_if_ice
139+
def test_gated_model_training_v2(setup):
140+
141+
model_id = "meta-textgeneration-llama-2-7b"
142+
model_version = "3.*" # model artifacts retrieved from jumpstart-private-cache-* buckets
96143

97144
estimator = JumpStartEstimator(
98145
model_id=model_id,
146+
model_version=model_version,
99147
role=get_sm_session().get_caller_identity_arn(),
100148
sagemaker_session=get_sm_session(),
101149
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
@@ -128,6 +176,48 @@ def test_gated_model_training(setup):
128176
assert response is not None
129177

130178

179+
@x_fail_if_ice
180+
@pytest.mark.skipif(
181+
tests.integ.test_region() not in TRN2_SUPPORTED_REGIONS,
182+
reason=f"TRN2 instances unavailable in {tests.integ.test_region()}.",
183+
)
184+
def test_gated_model_training_v2_neuron(setup):
185+
186+
model_id = "meta-textgenerationneuron-llama-2-7b"
187+
188+
estimator = JumpStartEstimator(
189+
model_id=model_id,
190+
role=get_sm_session().get_caller_identity_arn(),
191+
sagemaker_session=get_sm_session(),
192+
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
193+
environment={"accept_eula": "true"},
194+
max_run=259200, # avoid exceeding resource limits
195+
)
196+
197+
# uses ml.trn1.32xlarge instance
198+
estimator.fit(
199+
{
200+
"training": f"s3://{get_jumpstart_content_bucket(JUMPSTART_DEFAULT_REGION_NAME)}/"
201+
f"{get_training_dataset_for_model_and_version(model_id, '*')}",
202+
}
203+
)
204+
205+
# uses ml.inf2.xlarge instance
206+
predictor = estimator.deploy(
207+
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
208+
role=get_sm_session().get_caller_identity_arn(),
209+
sagemaker_session=get_sm_session(),
210+
)
211+
212+
payload = {
213+
"inputs": "some-payload",
214+
}
215+
216+
response = predictor.predict(payload, custom_attributes="accept_eula=true")
217+
218+
assert response is not None
219+
220+
131221
@mock.patch("sagemaker.jumpstart.cache.JUMPSTART_LOGGER.warning")
132222
def test_instatiating_estimator(mock_warning_logger, setup):
133223

tests/integ/sagemaker/jumpstart/model/test_jumpstart_model.py

Lines changed: 59 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from unittest import mock
1717

1818
import pytest
19+
1920
import tests.integ
2021

2122
from sagemaker.jumpstart.model import JumpStartModel
@@ -30,11 +31,15 @@
3031
get_tabular_data,
3132
)
3233

33-
from sagemaker.enums import EndpointType
34+
INF2_SUPPORTED_REGIONS = {
35+
"us-west-2",
36+
"us-east-1",
37+
"us-east-2",
38+
}
3439

3540
MAX_INIT_TIME_SECONDS = 5
3641

37-
GATED_INFERENCE_MODEL_SUPPORTED_REGIONS = {
42+
GATED_INFERENCE_MODEL_PACKAGE_SUPPORTED_REGIONS = {
3843
"us-west-2",
3944
"us-east-1",
4045
"eu-west-1",
@@ -67,89 +72,108 @@ def test_non_prepacked_jumpstart_model(setup):
6772
assert response is not None
6873

6974

75+
def test_prepacked_jumpstart_model(setup):
76+
77+
model_id = "huggingface-txt2img-conflictx-complex-lineart"
78+
79+
model = JumpStartModel(
80+
model_id=model_id,
81+
role=get_sm_session().get_caller_identity_arn(),
82+
sagemaker_session=get_sm_session(),
83+
)
84+
85+
# uses ml.p3.2xlarge instance
86+
predictor = model.deploy(
87+
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
88+
)
89+
90+
response = predictor.predict("hello world!")
91+
92+
assert response is not None
93+
94+
7095
@pytest.mark.skipif(
71-
tests.integ.test_region() not in tests.integ.INFERENCE_COMPONENT_SUPPORTED_REGIONS,
72-
reason="inference component based endpoint is not supported in certain regions",
96+
tests.integ.test_region() not in GATED_INFERENCE_MODEL_PACKAGE_SUPPORTED_REGIONS,
97+
reason=f"JumpStart model package inference models unavailable in {tests.integ.test_region()}.",
7398
)
74-
def test_non_prepacked_jumpstart_model_deployed_on_inference_component_based_endpoint(setup):
99+
def test_model_package_arn_jumpstart_model(setup):
75100

76-
model_id = "huggingface-llm-falcon-7b-instruct-bf16" # default g5.2xlarge
101+
model_id = "meta-textgeneration-llama-2-7b"
77102

78103
model = JumpStartModel(
79104
model_id=model_id,
105+
model_version="2.*", # version <3.0.0 uses model packages
80106
role=get_sm_session().get_caller_identity_arn(),
81107
sagemaker_session=get_sm_session(),
82108
)
83109

84-
predictor = model.deploy(endpoint_type=EndpointType.INFERENCE_COMPONENT_BASED)
85-
86-
inference_input = {
87-
"inputs": "Girafatron is obsessed with giraffes, the most glorious animal on the "
88-
+ "face of this Earth. Giraftron believes all other animals are irrelevant when compared "
89-
+ "to the glorious majesty of the giraffe.\nDaniel: Hello, Girafatron!\nGirafatron:",
90-
"parameters": {
91-
"max_new_tokens": 50,
92-
"top_k": 10,
93-
"return_full_text": False,
94-
"do_sample": True,
95-
},
96-
}
110+
# uses ml.g5.2xlarge instance
111+
predictor = model.deploy(
112+
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
113+
)
97114

98-
response = predictor.predict(inference_input)
99-
assert response is not None
115+
payload = {
116+
"inputs": "some-payload",
117+
"parameters": {"max_new_tokens": 256, "top_p": 0.9, "temperature": 0.6},
118+
}
100119

101-
# Delete predictor
102-
predictor.delete_predictor(wait=True)
120+
response = predictor.predict(payload, custom_attributes="accept_eula=true")
103121

104-
# Delete endpoint
105-
predictor.delete_endpoint()
122+
assert response is not None
106123

107124

108-
def test_prepacked_jumpstart_model(setup):
125+
@pytest.mark.skipif(
126+
tests.integ.test_region() not in INF2_SUPPORTED_REGIONS,
127+
reason=f"INF2 instances unavailable in {tests.integ.test_region()}.",
128+
)
129+
def test_jumpstart_gated_model_neuron(setup):
109130

110-
model_id = "huggingface-txt2img-conflictx-complex-lineart"
131+
model_id = "meta-textgenerationneuron-llama-2-7b"
111132

112133
model = JumpStartModel(
113134
model_id=model_id,
114135
role=get_sm_session().get_caller_identity_arn(),
115136
sagemaker_session=get_sm_session(),
116137
)
117138

118-
# uses ml.p3.2xlarge instance
139+
# uses ml.inf2.xlarge instance
119140
predictor = model.deploy(
120141
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
142+
accept_eula=True,
121143
)
122144

123-
response = predictor.predict("hello world!")
145+
payload = {
146+
"inputs": "some-payload",
147+
}
148+
149+
response = predictor.predict(payload)
124150

125151
assert response is not None
126152

127153

128-
@pytest.mark.skipif(
129-
tests.integ.test_region() not in GATED_INFERENCE_MODEL_SUPPORTED_REGIONS,
130-
reason=f"JumpStart gated inference models unavailable in {tests.integ.test_region()}.",
131-
)
132-
def test_model_package_arn_jumpstart_model(setup):
154+
def test_jumpstart_gated_model(setup):
133155

134156
model_id = "meta-textgeneration-llama-2-7b"
135157

136158
model = JumpStartModel(
137159
model_id=model_id,
160+
model_version="3.*", # version >=3.0.0 stores artifacts in jumpstart-private-cache-* buckets
138161
role=get_sm_session().get_caller_identity_arn(),
139162
sagemaker_session=get_sm_session(),
140163
)
141164

142165
# uses ml.g5.2xlarge instance
143166
predictor = model.deploy(
144167
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
168+
accept_eula=True,
145169
)
146170

147171
payload = {
148172
"inputs": "some-payload",
149173
"parameters": {"max_new_tokens": 256, "top_p": 0.9, "temperature": 0.6},
150174
}
151175

152-
response = predictor.predict(payload, custom_attributes="accept_eula=true")
176+
response = predictor.predict(payload)
153177

154178
assert response is not None
155179

0 commit comments

Comments
 (0)