|
16 | 16 | from unittest import mock
|
17 | 17 |
|
18 | 18 | import pytest
|
| 19 | + |
19 | 20 | import tests.integ
|
20 | 21 |
|
21 | 22 | from sagemaker.jumpstart.model import JumpStartModel
|
|
30 | 31 | get_tabular_data,
|
31 | 32 | )
|
32 | 33 |
|
33 |
| -from sagemaker.enums import EndpointType |
| 34 | +INF2_SUPPORTED_REGIONS = { |
| 35 | + "us-west-2", |
| 36 | + "us-east-1", |
| 37 | + "us-east-2", |
| 38 | +} |
34 | 39 |
|
35 | 40 | MAX_INIT_TIME_SECONDS = 5
|
36 | 41 |
|
37 |
| -GATED_INFERENCE_MODEL_SUPPORTED_REGIONS = { |
| 42 | +GATED_INFERENCE_MODEL_PACKAGE_SUPPORTED_REGIONS = { |
38 | 43 | "us-west-2",
|
39 | 44 | "us-east-1",
|
40 | 45 | "eu-west-1",
|
@@ -67,89 +72,108 @@ def test_non_prepacked_jumpstart_model(setup):
|
67 | 72 | assert response is not None
|
68 | 73 |
|
69 | 74 |
|
| 75 | +def test_prepacked_jumpstart_model(setup): |
| 76 | + |
| 77 | + model_id = "huggingface-txt2img-conflictx-complex-lineart" |
| 78 | + |
| 79 | + model = JumpStartModel( |
| 80 | + model_id=model_id, |
| 81 | + role=get_sm_session().get_caller_identity_arn(), |
| 82 | + sagemaker_session=get_sm_session(), |
| 83 | + ) |
| 84 | + |
| 85 | + # uses ml.p3.2xlarge instance |
| 86 | + predictor = model.deploy( |
| 87 | + tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}], |
| 88 | + ) |
| 89 | + |
| 90 | + response = predictor.predict("hello world!") |
| 91 | + |
| 92 | + assert response is not None |
| 93 | + |
| 94 | + |
70 | 95 | @pytest.mark.skipif(
|
71 |
| - tests.integ.test_region() not in tests.integ.INFERENCE_COMPONENT_SUPPORTED_REGIONS, |
72 |
| - reason="inference component based endpoint is not supported in certain regions", |
| 96 | + tests.integ.test_region() not in GATED_INFERENCE_MODEL_PACKAGE_SUPPORTED_REGIONS, |
| 97 | + reason=f"JumpStart model package inference models unavailable in {tests.integ.test_region()}.", |
73 | 98 | )
|
74 |
| -def test_non_prepacked_jumpstart_model_deployed_on_inference_component_based_endpoint(setup): |
| 99 | +def test_model_package_arn_jumpstart_model(setup): |
75 | 100 |
|
76 |
| - model_id = "huggingface-llm-falcon-7b-instruct-bf16" # default g5.2xlarge |
| 101 | + model_id = "meta-textgeneration-llama-2-7b" |
77 | 102 |
|
78 | 103 | model = JumpStartModel(
|
79 | 104 | model_id=model_id,
|
| 105 | + model_version="2.*", # version <3.0.0 uses model packages |
80 | 106 | role=get_sm_session().get_caller_identity_arn(),
|
81 | 107 | sagemaker_session=get_sm_session(),
|
82 | 108 | )
|
83 | 109 |
|
84 |
| - predictor = model.deploy(endpoint_type=EndpointType.INFERENCE_COMPONENT_BASED) |
85 |
| - |
86 |
| - inference_input = { |
87 |
| - "inputs": "Girafatron is obsessed with giraffes, the most glorious animal on the " |
88 |
| - + "face of this Earth. Giraftron believes all other animals are irrelevant when compared " |
89 |
| - + "to the glorious majesty of the giraffe.\nDaniel: Hello, Girafatron!\nGirafatron:", |
90 |
| - "parameters": { |
91 |
| - "max_new_tokens": 50, |
92 |
| - "top_k": 10, |
93 |
| - "return_full_text": False, |
94 |
| - "do_sample": True, |
95 |
| - }, |
96 |
| - } |
| 110 | + # uses ml.g5.2xlarge instance |
| 111 | + predictor = model.deploy( |
| 112 | + tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}], |
| 113 | + ) |
97 | 114 |
|
98 |
| - response = predictor.predict(inference_input) |
99 |
| - assert response is not None |
| 115 | + payload = { |
| 116 | + "inputs": "some-payload", |
| 117 | + "parameters": {"max_new_tokens": 256, "top_p": 0.9, "temperature": 0.6}, |
| 118 | + } |
100 | 119 |
|
101 |
| - # Delete predictor |
102 |
| - predictor.delete_predictor(wait=True) |
| 120 | + response = predictor.predict(payload, custom_attributes="accept_eula=true") |
103 | 121 |
|
104 |
| - # Delete endpoint |
105 |
| - predictor.delete_endpoint() |
| 122 | + assert response is not None |
106 | 123 |
|
107 | 124 |
|
108 |
| -def test_prepacked_jumpstart_model(setup): |
| 125 | +@pytest.mark.skipif( |
| 126 | + tests.integ.test_region() not in INF2_SUPPORTED_REGIONS, |
| 127 | + reason=f"INF2 instances unavailable in {tests.integ.test_region()}.", |
| 128 | +) |
| 129 | +def test_jumpstart_gated_model_neuron(setup): |
109 | 130 |
|
110 |
| - model_id = "huggingface-txt2img-conflictx-complex-lineart" |
| 131 | + model_id = "meta-textgenerationneuron-llama-2-7b" |
111 | 132 |
|
112 | 133 | model = JumpStartModel(
|
113 | 134 | model_id=model_id,
|
114 | 135 | role=get_sm_session().get_caller_identity_arn(),
|
115 | 136 | sagemaker_session=get_sm_session(),
|
116 | 137 | )
|
117 | 138 |
|
118 |
| - # uses ml.p3.2xlarge instance |
| 139 | + # uses ml.inf2.xlarge instance |
119 | 140 | predictor = model.deploy(
|
120 | 141 | tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
|
| 142 | + accept_eula=True, |
121 | 143 | )
|
122 | 144 |
|
123 |
| - response = predictor.predict("hello world!") |
| 145 | + payload = { |
| 146 | + "inputs": "some-payload", |
| 147 | + } |
| 148 | + |
| 149 | + response = predictor.predict(payload) |
124 | 150 |
|
125 | 151 | assert response is not None
|
126 | 152 |
|
127 | 153 |
|
128 |
| -@pytest.mark.skipif( |
129 |
| - tests.integ.test_region() not in GATED_INFERENCE_MODEL_SUPPORTED_REGIONS, |
130 |
| - reason=f"JumpStart gated inference models unavailable in {tests.integ.test_region()}.", |
131 |
| -) |
132 |
| -def test_model_package_arn_jumpstart_model(setup): |
| 154 | +def test_jumpstart_gated_model(setup): |
133 | 155 |
|
134 | 156 | model_id = "meta-textgeneration-llama-2-7b"
|
135 | 157 |
|
136 | 158 | model = JumpStartModel(
|
137 | 159 | model_id=model_id,
|
| 160 | + model_version="3.*", # version >=3.0.0 stores artifacts in jumpstart-private-cache-* buckets |
138 | 161 | role=get_sm_session().get_caller_identity_arn(),
|
139 | 162 | sagemaker_session=get_sm_session(),
|
140 | 163 | )
|
141 | 164 |
|
142 | 165 | # uses ml.g5.2xlarge instance
|
143 | 166 | predictor = model.deploy(
|
144 | 167 | tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
|
| 168 | + accept_eula=True, |
145 | 169 | )
|
146 | 170 |
|
147 | 171 | payload = {
|
148 | 172 | "inputs": "some-payload",
|
149 | 173 | "parameters": {"max_new_tokens": 256, "top_p": 0.9, "temperature": 0.6},
|
150 | 174 | }
|
151 | 175 |
|
152 |
| - response = predictor.predict(payload, custom_attributes="accept_eula=true") |
| 176 | + response = predictor.predict(payload) |
153 | 177 |
|
154 | 178 | assert response is not None
|
155 | 179 |
|
|
0 commit comments