Skip to content

Commit 8de50d6

Browse files
authored
feat(glue): support Ray jobs (#23822)
Glue launched a new job type: `AWS Glue for Ray`. https://aws.amazon.com/about-aws/whats-new/2022/11/aws-glue-ray-preview/ This PR is to support the Ray job type in AWS CDK. ---- ### All Submissions: * [x] Have you followed the guidelines in our [Contributing guide?](https://github.com/aws/aws-cdk/blob/main/CONTRIBUTING.md) ### Adding new Construct Runtime Dependencies: * [ ] This PR adds new construct runtime dependencies following the process described [here](https://github.com/aws/aws-cdk/blob/main/CONTRIBUTING.md/#adding-construct-runtime-dependencies) ### New Features * [ ] Have you added the new feature to an [integration test](https://github.com/aws/aws-cdk/blob/main/INTEGRATION_TESTS.md)? * [ ] Did you use `yarn integ` to deploy the infrastructure and generate the snapshot (i.e. `yarn integ` without `--dry-run`)? *By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
1 parent 57770bb commit 8de50d6

File tree

10 files changed

+497
-16
lines changed

10 files changed

+497
-16
lines changed

packages/@aws-cdk/aws-glue/README.md

+17
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,23 @@ new glue.Job(this, 'PythonShellJob', {
9090
});
9191
```
9292

93+
### Ray Jobs
94+
95+
These jobs run in a Ray environment managed by AWS Glue.
96+
97+
```ts
98+
new glue.Job(this, 'RayJob', {
99+
executable: glue.JobExecutable.pythonRay({
100+
glueVersion: glue.GlueVersion.V4_0,
101+
pythonVersion: glue.PythonVersion.THREE_NINE,
102+
script: glue.Code.fromAsset(path.join(__dirname, 'job-script/hello_world.py')),
103+
}),
104+
workerType: glue.WorkerType.Z_2X,
105+
workerCount: 2,
106+
description: 'an example Ray job'
107+
});
108+
```
109+
93110
See [documentation](https://docs.aws.amazon.com/glue/latest/dg/add-job.html) for more information on adding jobs in Glue.
94111

95112
## Connection

packages/@aws-cdk/aws-glue/lib/job-executable.ts

+39-5
Original file line numberDiff line numberDiff line change
@@ -95,12 +95,12 @@ export enum PythonVersion {
9595
*/
9696
export class JobType {
9797
/**
98-
* Command for running a Glue ETL job.
98+
* Command for running a Glue Spark job.
9999
*/
100100
public static readonly ETL = new JobType('glueetl');
101101

102102
/**
103-
* Command for running a Glue streaming job.
103+
* Command for running a Glue Spark streaming job.
104104
*/
105105
public static readonly STREAMING = new JobType('gluestreaming');
106106

@@ -109,6 +109,11 @@ export class JobType {
109109
*/
110110
public static readonly PYTHON_SHELL = new JobType('pythonshell');
111111

112+
/**
113+
* Command for running a Glue Ray job.
114+
*/
115+
public static readonly RAY = new JobType('glueray');
116+
112117
/**
113118
* Custom type name
114119
* @param name type name
@@ -211,6 +216,11 @@ export interface PythonSparkJobExecutableProps extends SharedSparkJobExecutableP
211216
*/
212217
export interface PythonShellExecutableProps extends SharedJobExecutableProps, PythonExecutableProps {}
213218

219+
/**
220+
* Props for creating a Python Ray job executable
221+
*/
222+
export interface PythonRayExecutableProps extends SharedJobExecutableProps, PythonExecutableProps {}
223+
214224
/**
215225
* The executable properties related to the Glue job's GlueVersion, JobType and code
216226
*/
@@ -281,6 +291,19 @@ export class JobExecutable {
281291
});
282292
}
283293

294+
/**
295+
* Create Python executable props for Ray jobs.
296+
*
297+
* @param props Ray Job props.
298+
*/
299+
public static pythonRay(props: PythonRayExecutableProps): JobExecutable {
300+
return new JobExecutable({
301+
...props,
302+
type: JobType.RAY,
303+
language: JobLanguage.PYTHON,
304+
});
305+
}
306+
284307
/**
285308
* Create a custom JobExecutable.
286309
*
@@ -297,10 +320,18 @@ export class JobExecutable {
297320
if (config.language !== JobLanguage.PYTHON) {
298321
throw new Error('Python shell requires the language to be set to Python');
299322
}
300-
if ([GlueVersion.V0_9, GlueVersion.V2_0, GlueVersion.V3_0, GlueVersion.V4_0].includes(config.glueVersion)) {
323+
if ([GlueVersion.V0_9, GlueVersion.V3_0, GlueVersion.V4_0].includes(config.glueVersion)) {
301324
throw new Error(`Specified GlueVersion ${config.glueVersion.name} does not support Python Shell`);
302325
}
303326
}
327+
if (JobType.RAY === config.type) {
328+
if (config.language !== JobLanguage.PYTHON) {
329+
throw new Error('Ray requires the language to be set to Python');
330+
}
331+
if ([GlueVersion.V0_9, GlueVersion.V1_0, GlueVersion.V2_0, GlueVersion.V3_0].includes(config.glueVersion)) {
332+
throw new Error(`Specified GlueVersion ${config.glueVersion.name} does not support Ray`);
333+
}
334+
}
304335
if (config.extraJarsFirst && [GlueVersion.V0_9, GlueVersion.V1_0].includes(config.glueVersion)) {
305336
throw new Error(`Specified GlueVersion ${config.glueVersion.name} does not support extraJarsFirst`);
306337
}
@@ -310,8 +341,11 @@ export class JobExecutable {
310341
if (JobLanguage.PYTHON !== config.language && config.extraPythonFiles) {
311342
throw new Error('extraPythonFiles is not supported for languages other than JobLanguage.PYTHON');
312343
}
313-
if (config.pythonVersion === PythonVersion.THREE_NINE && config.type !== JobType.PYTHON_SHELL) {
314-
throw new Error('Specified PythonVersion PythonVersion.THREE_NINE is only supported for JobType Python Shell');
344+
if (config.pythonVersion === PythonVersion.THREE_NINE && config.type !== JobType.PYTHON_SHELL && config.type !== JobType.RAY) {
345+
throw new Error('Specified PythonVersion PythonVersion.THREE_NINE is only supported for JobType Python Shell and Ray');
346+
}
347+
if (config.pythonVersion === PythonVersion.THREE && config.type === JobType.RAY) {
348+
throw new Error('Specified PythonVersion PythonVersion.THREE is not supported for Ray');
315349
}
316350
this.config = config;
317351
}

packages/@aws-cdk/aws-glue/lib/job.ts

+12
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,16 @@ export class WorkerType {
3232
*/
3333
public static readonly G_2X = new WorkerType('G.2X');
3434

35+
/**
36+
* Each worker maps to 0.25 DPU (2 vCPU, 4 GB of memory, 64 GB disk), and provides 1 executor per worker. Suitable for low volume streaming jobs.
37+
*/
38+
public static readonly G_025X = new WorkerType('G.025X');
39+
40+
/**
41+
* Each worker maps to 2 high-memory DPU [M-DPU] (8 vCPU, 64 GB of memory, 128 GB disk). Supported in Ray jobs.
42+
*/
43+
public static readonly Z_2X = new WorkerType('Z.2X');
44+
3545
/**
3646
* Custom worker type
3747
* @param workerType custom worker type
@@ -726,6 +736,8 @@ export class Job extends JobBase {
726736
private setupSparkUI(executable: JobExecutableConfig, role: iam.IRole, props: SparkUIProps) {
727737
if (JobType.PYTHON_SHELL === executable.type) {
728738
throw new Error('Spark UI is not available for JobType.PYTHON_SHELL jobs');
739+
} else if (JobType.RAY === executable.type) {
740+
throw new Error('Spark UI is not available for JobType.RAY jobs');
729741
}
730742

731743
const bucket = props.bucket ?? new s3.Bucket(this, 'SparkUIBucket');

packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/aws-glue-job.assets.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,15 @@
1414
}
1515
}
1616
},
17-
"977a2f07e22679bb04b03ce83cc1fac3e6cc269a794e38248ec67106ee39f0a2": {
17+
"b553fef631f82898c826f3c20e1de0d155dbd3a35339ef92d0893052a5be69ce": {
1818
"source": {
1919
"path": "aws-glue-job.template.json",
2020
"packaging": "file"
2121
},
2222
"destinations": {
2323
"current_account-current_region": {
2424
"bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}",
25-
"objectKey": "977a2f07e22679bb04b03ce83cc1fac3e6cc269a794e38248ec67106ee39f0a2.json",
25+
"objectKey": "b553fef631f82898c826f3c20e1de0d155dbd3a35339ef92d0893052a5be69ce.json",
2626
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}"
2727
}
2828
}

packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/aws-glue-job.template.json

+136-3
Original file line numberDiff line numberDiff line change
@@ -350,9 +350,11 @@
350350
},
351351
"GlueVersion": "2.0",
352352
"Name": "StreamingJob2.0",
353+
"NumberOfWorkers": 10,
353354
"Tags": {
354355
"key": "value"
355-
}
356+
},
357+
"WorkerType": "G.025X"
356358
}
357359
},
358360
"EtlJob30ServiceRole8E675579": {
@@ -705,9 +707,11 @@
705707
},
706708
"GlueVersion": "3.0",
707709
"Name": "StreamingJob3.0",
710+
"NumberOfWorkers": 10,
708711
"Tags": {
709712
"key": "value"
710-
}
713+
},
714+
"WorkerType": "G.025X"
711715
}
712716
},
713717
"EtlJob40ServiceRoleBDD9998A": {
@@ -1060,9 +1064,11 @@
10601064
},
10611065
"GlueVersion": "4.0",
10621066
"Name": "StreamingJob4.0",
1067+
"NumberOfWorkers": 10,
10631068
"Tags": {
10641069
"key": "value"
1065-
}
1070+
},
1071+
"WorkerType": "G.025X"
10661072
}
10671073
},
10681074
"ShellJobServiceRoleCF97BC4B": {
@@ -1314,6 +1320,133 @@
13141320
"key": "value"
13151321
}
13161322
}
1323+
},
1324+
"RayJobServiceRole51433C3D": {
1325+
"Type": "AWS::IAM::Role",
1326+
"Properties": {
1327+
"AssumeRolePolicyDocument": {
1328+
"Statement": [
1329+
{
1330+
"Action": "sts:AssumeRole",
1331+
"Effect": "Allow",
1332+
"Principal": {
1333+
"Service": "glue.amazonaws.com"
1334+
}
1335+
}
1336+
],
1337+
"Version": "2012-10-17"
1338+
},
1339+
"ManagedPolicyArns": [
1340+
{
1341+
"Fn::Join": [
1342+
"",
1343+
[
1344+
"arn:",
1345+
{
1346+
"Ref": "AWS::Partition"
1347+
},
1348+
":iam::aws:policy/service-role/AWSGlueServiceRole"
1349+
]
1350+
]
1351+
}
1352+
]
1353+
}
1354+
},
1355+
"RayJobServiceRoleDefaultPolicyA615640D": {
1356+
"Type": "AWS::IAM::Policy",
1357+
"Properties": {
1358+
"PolicyDocument": {
1359+
"Statement": [
1360+
{
1361+
"Action": [
1362+
"s3:GetBucket*",
1363+
"s3:GetObject*",
1364+
"s3:List*"
1365+
],
1366+
"Effect": "Allow",
1367+
"Resource": [
1368+
{
1369+
"Fn::Join": [
1370+
"",
1371+
[
1372+
"arn:",
1373+
{
1374+
"Ref": "AWS::Partition"
1375+
},
1376+
":s3:::",
1377+
{
1378+
"Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}"
1379+
},
1380+
"/*"
1381+
]
1382+
]
1383+
},
1384+
{
1385+
"Fn::Join": [
1386+
"",
1387+
[
1388+
"arn:",
1389+
{
1390+
"Ref": "AWS::Partition"
1391+
},
1392+
":s3:::",
1393+
{
1394+
"Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}"
1395+
}
1396+
]
1397+
]
1398+
}
1399+
]
1400+
}
1401+
],
1402+
"Version": "2012-10-17"
1403+
},
1404+
"PolicyName": "RayJobServiceRoleDefaultPolicyA615640D",
1405+
"Roles": [
1406+
{
1407+
"Ref": "RayJobServiceRole51433C3D"
1408+
}
1409+
]
1410+
}
1411+
},
1412+
"RayJob2F7864D9": {
1413+
"Type": "AWS::Glue::Job",
1414+
"Properties": {
1415+
"Command": {
1416+
"Name": "glueray",
1417+
"PythonVersion": "3.9",
1418+
"ScriptLocation": {
1419+
"Fn::Join": [
1420+
"",
1421+
[
1422+
"s3://",
1423+
{
1424+
"Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}"
1425+
},
1426+
"/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py"
1427+
]
1428+
]
1429+
}
1430+
},
1431+
"Role": {
1432+
"Fn::GetAtt": [
1433+
"RayJobServiceRole51433C3D",
1434+
"Arn"
1435+
]
1436+
},
1437+
"DefaultArguments": {
1438+
"--job-language": "python",
1439+
"arg1": "value1",
1440+
"arg2": "value2"
1441+
},
1442+
"GlueVersion": "4.0",
1443+
"Name": "RayJob",
1444+
"NumberOfWorkers": 2,
1445+
"Tags": {
1446+
"key": "value"
1447+
},
1448+
"WorkerType": "Z.2X"
1449+
}
13171450
}
13181451
},
13191452
"Parameters": {

packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/manifest.json

+19-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
"validateOnSynth": false,
1818
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}",
1919
"cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}",
20-
"stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/977a2f07e22679bb04b03ce83cc1fac3e6cc269a794e38248ec67106ee39f0a2.json",
20+
"stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/b553fef631f82898c826f3c20e1de0d155dbd3a35339ef92d0893052a5be69ce.json",
2121
"requiresBootstrapStackVersion": 6,
2222
"bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version",
2323
"additionalDependencies": [
@@ -213,6 +213,24 @@
213213
"data": "ShellJob390C141361"
214214
}
215215
],
216+
"/aws-glue-job/RayJob/ServiceRole/Resource": [
217+
{
218+
"type": "aws:cdk:logicalId",
219+
"data": "RayJobServiceRole51433C3D"
220+
}
221+
],
222+
"/aws-glue-job/RayJob/ServiceRole/DefaultPolicy/Resource": [
223+
{
224+
"type": "aws:cdk:logicalId",
225+
"data": "RayJobServiceRoleDefaultPolicyA615640D"
226+
}
227+
],
228+
"/aws-glue-job/RayJob/Resource": [
229+
{
230+
"type": "aws:cdk:logicalId",
231+
"data": "RayJob2F7864D9"
232+
}
233+
],
216234
"/aws-glue-job/BootstrapVersion": [
217235
{
218236
"type": "aws:cdk:logicalId",

0 commit comments

Comments
 (0)