Skip to content

Commit 6ab07ac

Browse files
feat(Sagemaker): allow deploying llama 3.1 from Sagemaker Jumpstart (#692)
* feat(Sagemaker): add Jumpstart support for new regions and add gatedBucket into consideration. --------- Signed-off-by: arief hidayat <[email protected]> Co-authored-by: Alain Krok <[email protected]>
1 parent 9e91372 commit 6ab07ac

File tree

6 files changed

+56
-3
lines changed

6 files changed

+56
-3
lines changed

apidocs/interfaces/IJumpStartModelSpec.md

+6
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@
3030

3131
***
3232

33+
### gatedBucket
34+
35+
> **gatedBucket**: `boolean`
36+
37+
***
38+
3339
### instanceAliases?
3440

3541
> `optional` **instanceAliases**: [`IInstanceAliase`](IInstanceAliase.md)[]

src/patterns/gen-ai/aws-model-deployment-sagemaker/code-generation/generate-jumpstart-models.ts

+5
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ interface JumpStartModelSpec {
3434
hosting_artifact_key?: string;
3535
hosting_script_key?: string;
3636
hosting_prepacked_artifact_key?: string;
37+
gated_bucket: boolean;
3738
hosting_eula_key?: string;
3839
inference_environment_variables: {
3940
name: string;
@@ -113,6 +114,7 @@ export async function download_data() {
113114
hosting_script_key,
114115
hosting_artifact_key,
115116
hosting_prepacked_artifact_key,
117+
gated_bucket,
116118
inference_environment_variables,
117119
hosting_instance_type_variants,
118120
hosting_eula_key,
@@ -149,6 +151,7 @@ export async function download_data() {
149151
hosting_artifact_key,
150152
hosting_script_key,
151153
hosting_prepacked_artifact_key,
154+
gated_bucket,
152155
inference_environment_variables,
153156
hosting_instance_type_variants,
154157
hosting_eula_key,
@@ -220,6 +223,7 @@ function generateCode() {
220223
instanceTypes: specSource.supported_inference_instance_types,
221224
modelPackageArns: specSource.hosting_model_package_arns,
222225
prepackedArtifactKey: specSource.hosting_prepacked_artifact_key,
226+
gatedBucket: specSource.gated_bucket,
223227
artifactKey: specSource.hosting_artifact_key,
224228
environment,
225229
instanceAliases: instanceAliasesArr,
@@ -270,6 +274,7 @@ export interface IJumpStartModelSpec {
270274
instanceTypes: string[];
271275
modelPackageArns?: { [region: string]: string };
272276
prepackedArtifactKey?: string;
277+
gatedBucket: boolean;
273278
artifactKey?: string;
274279
environment: { [key: string]: string | number | boolean };
275280
instanceAliases?: IInstanceAliase[];

src/patterns/gen-ai/aws-model-deployment-sagemaker/jumpstart-model.ts

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ export interface IJumpStartModelSpec {
3131
instanceTypes: string[];
3232
modelPackageArns?: { [region: string]: string };
3333
prepackedArtifactKey?: string;
34+
gatedBucket: boolean;
3435
artifactKey?: string;
3536
environment: { [key: string]: string | number | boolean };
3637
instanceAliases?: IInstanceAliase[];

src/patterns/gen-ai/aws-model-deployment-sagemaker/jumpstart-models.json

+1-1
Large diffs are not rendered by default.

src/patterns/gen-ai/aws-model-deployment-sagemaker/jumpstart-sagemaker-endpoint.ts

+19-2
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,10 @@ export class JumpStartSageMakerEndpoint extends SageMakerEndpointBase {
183183
...this.environment,
184184
};
185185

186+
if (environment.SAGEMAKER_SUBMIT_DIRECTORY) {
187+
delete environment.SAGEMAKER_SUBMIT_DIRECTORY;
188+
}
189+
186190
return environment;
187191
}
188192

@@ -195,7 +199,8 @@ export class JumpStartSageMakerEndpoint extends SageMakerEndpointBase {
195199
vpcConfig: sagemaker.CfnModel.VpcConfigProperty | undefined,
196200
) {
197201
const key = this.spec.prepackedArtifactKey ?? this.spec.artifactKey;
198-
const bucket = JumpStartConstants.JUMPSTART_LAUNCHED_REGIONS[this.region]?.contentBucket;
202+
const bucket = this.spec.gatedBucket ? JumpStartConstants.JUMPSTART_LAUNCHED_REGIONS[this.region]?.gatedContentBucket :
203+
JumpStartConstants.JUMPSTART_LAUNCHED_REGIONS[this.region]?.contentBucket;
199204
if (!bucket) {
200205
throw new Error(`JumpStart is not available in the region ${this.region}.`);
201206
}
@@ -224,7 +229,7 @@ export class JumpStartSageMakerEndpoint extends SageMakerEndpointBase {
224229
executionRoleArn: this.role.roleArn,
225230
enableNetworkIsolation: true,
226231
primaryContainer: isArtifactCompressed ? {
227-
// True: Artifact is a tarball
232+
// True: Artifact is a tarball
228233
image,
229234
modelDataUrl: modelArtifactUrl,
230235
environment,
@@ -252,6 +257,18 @@ export class JumpStartSageMakerEndpoint extends SageMakerEndpointBase {
252257
key: 'modelVersion',
253258
value: this.spec.version,
254259
},
260+
{
261+
key: 'sagemaker-studio:jumpstart-model-id',
262+
value: this.spec.modelId,
263+
},
264+
{
265+
key: 'sagemaker-studio:jumpstart-model-version',
266+
value: this.spec.version,
267+
},
268+
{
269+
key: 'sagemaker-studio:jumpstart-hub-name',
270+
value: 'SageMakerPublicHub',
271+
},
255272
],
256273
vpcConfig: vpcConfig,
257274
});

src/patterns/gen-ai/aws-model-deployment-sagemaker/private/jumpstart-constants.ts

+24
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ export abstract class JumpStartConstants {
4444
contentBucket: 'jumpstart-cache-prod-eu-north-1',
4545
gatedContentBucket: 'jumpstart-private-cache-prod-eu-north-1',
4646
},
47+
'me-central-1': {
48+
contentBucket: 'jumpstart-cache-prod-me-central-1',
49+
gatedContentBucket: 'jumpstart-private-cache-prod-me-central-1',
50+
},
4751
'me-south-1': {
4852
contentBucket: 'jumpstart-cache-prod-me-south-1',
4953
gatedContentBucket: 'jumpstart-private-cache-prod-me-south-1',
@@ -72,6 +76,10 @@ export abstract class JumpStartConstants {
7276
contentBucket: 'jumpstart-cache-prod-ap-northeast-2',
7377
gatedContentBucket: 'jumpstart-private-cache-prod-ap-northeast-2',
7478
},
79+
'ap-northeast-3': {
80+
contentBucket: 'jumpstart-cache-prod-ap-northeast-3',
81+
gatedContentBucket: 'jumpstart-private-cache-prod-ap-northeast-3',
82+
},
7583
'eu-west-2': {
7684
contentBucket: 'jumpstart-cache-prod-eu-west-2',
7785
gatedContentBucket: 'jumpstart-private-cache-prod-eu-west-2',
@@ -96,13 +104,29 @@ export abstract class JumpStartConstants {
96104
contentBucket: 'jumpstart-cache-prod-ap-southeast-2',
97105
gatedContentBucket: 'jumpstart-private-cache-prod-ap-southeast-2',
98106
},
107+
'ap-southeast-3': {
108+
contentBucket: 'jumpstart-cache-prod-ap-southeast-3',
109+
gatedContentBucket: 'jumpstart-private-cache-prod-ap-southeast-3',
110+
},
99111
'ca-central-1': {
100112
contentBucket: 'jumpstart-cache-prod-ca-central-1',
101113
gatedContentBucket: 'jumpstart-private-cache-prod-ca-central-1',
102114
},
103115
'cn-north-1': {
104116
contentBucket: 'jumpstart-cache-prod-cn-north-1',
105117
},
118+
'il-central-1': {
119+
contentBucket: 'jumpstart-cache-prod-il-central-1',
120+
gatedContentBucket: 'jumpstart-private-cache-prod-il-central-1',
121+
},
122+
'us-gov-east-1': {
123+
contentBucket: 'jumpstart-cache-prod-us-gov-east-1',
124+
gatedContentBucket: 'jumpstart-private-cache-prod-us-gov-east-1',
125+
},
126+
'us-gov-west-1': {
127+
contentBucket: 'jumpstart-cache-prod-us-gov-west-1',
128+
gatedContentBucket: 'jumpstart-private-cache-prod-us-gov-west-1',
129+
},
106130
};
107131

108132
public static JUMPSTART_DEFAULT_MANIFEST_FILE_S3_KEY = 'models_manifest.json';

0 commit comments

Comments
 (0)