Skip to content

Commit be3bce3

Browse files
authored
feat(glue-alpha): include extra jars parameter in pyspark jobs (#33238)
### Issue # (if applicable) Closes #33225. ### Reason for this change PySpark jobs with extra JAR dependencies cannot be defined with the new L2 constructs introduced in [v2.177.0](https://github.com/aws/aws-cdk/releases/tag/v2.177.0). ### Description of changes Add the `extraJars` parameter in the PySpark job L2 constructs. ### Checklist - [x] My code adheres to the [CONTRIBUTING GUIDE](https://github.com/aws/aws-cdk/blob/main/CONTRIBUTING.md) and [DESIGN GUIDELINES](https://github.com/aws/aws-cdk/blob/main/docs/DESIGN_GUIDELINES.md) ---- *By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
1 parent 6f1aa80 commit be3bce3

File tree

5 files changed

+44
-2
lines changed

5 files changed

+44
-2
lines changed

packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts

+10
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,13 @@ export interface PySparkEtlJobProps extends JobProperties {
3939
*/
4040
readonly extraFiles?: Code[];
4141

42+
/**
43+
* Extra Jars S3 URL (optional)
44+
* S3 URL where additional jar dependencies are located
45+
* @default - no extra jar files
46+
*/
47+
readonly extraJars?: Code[];
48+
4249
/**
4350
* Specifies whether job run queuing is enabled for the job runs for this job.
4451
* A value of true means job run queuing is enabled for the job runs.
@@ -159,6 +166,9 @@ export class PySparkEtlJob extends Job {
159166
if (props.extraFiles && props.extraFiles.length > 0) {
160167
args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(',');
161168
}
169+
if (props.extraJars && props.extraJars?.length > 0) {
170+
args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(',');
171+
}
162172

163173
return args;
164174
}

packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-flex-etl-job.ts

+10
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,13 @@ export interface PySparkFlexEtlJobProps extends JobProperties {
4848
*/
4949
readonly extraFiles?: Code[];
5050

51+
/**
52+
* Extra Jars S3 URL (optional)
53+
* S3 URL where additional jar dependencies are located
54+
* @default - no extra jar files
55+
*/
56+
readonly extraJars?: Code[];
57+
5158
}
5259

5360
/**
@@ -160,6 +167,9 @@ export class PySparkFlexEtlJob extends Job {
160167
if (props.extraFiles && props.extraFiles.length > 0) {
161168
args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(',');
162169
}
170+
if (props.extraJars && props.extraJars?.length > 0) {
171+
args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(',');
172+
}
163173

164174
return args;
165175
}

packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts

+10
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,13 @@ export interface PySparkStreamingJobProps extends JobProperties {
3939
*/
4040
readonly extraFiles?: Code[];
4141

42+
/**
43+
* Extra Jars S3 URL (optional)
44+
* S3 URL where additional jar dependencies are located
45+
* @default - no extra jar files
46+
*/
47+
readonly extraJars?: Code[];
48+
4249
/**
4350
* Specifies whether job run queuing is enabled for the job runs for this job.
4451
* A value of true means job run queuing is enabled for the job runs.
@@ -159,6 +166,9 @@ export class PySparkStreamingJob extends Job {
159166
if (props.extraFiles && props.extraFiles.length > 0) {
160167
args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(',');
161168
}
169+
if (props.extraJars && props.extraJars?.length > 0) {
170+
args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(',');
171+
}
162172

163173
return args;
164174
}

packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts

+7-1
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,7 @@ describe('Job', () => {
365365
});
366366
});
367367

368-
describe('Create PySpark ETL Job with extraPythonFiles and extraFiles', () => {
368+
describe('Create PySpark ETL Job with extraPythonFiles, extraFiles and extraJars', () => {
369369
beforeEach(() => {
370370
job = new glue.PySparkEtlJob(stack, 'PySparkETLJob', {
371371
role,
@@ -381,6 +381,11 @@ describe('Job', () => {
381381
s3.Bucket.fromBucketName(stack, 'extraFilesBucket', 'extra-files-bucket'),
382382
'prefix/file.txt'),
383383
],
384+
extraJars: [
385+
glue.Code.fromBucket(
386+
s3.Bucket.fromBucketName(stack, 'extraJarsBucket', 'extra-jars-bucket'),
387+
'prefix/file.jar'),
388+
],
384389
});
385390
});
386391

@@ -408,6 +413,7 @@ describe('Job', () => {
408413
'--enable-continuous-cloudwatch-log': 'true',
409414
'--extra-py-files': 's3://extra-python-files-bucket/prefix/file.py',
410415
'--extra-files': 's3://extra-files-bucket/prefix/file.txt',
416+
'--extra-jars': 's3://extra-jars-bucket/prefix/file.jar',
411417
}),
412418
});
413419
});

packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts

+7-1
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,7 @@ describe('Job', () => {
377377
});
378378
});
379379

380-
describe('Create PySpark Streaming Job with extraPythonFiles and extraFiles', () => {
380+
describe('Create PySpark Streaming Job with extraPythonFiles, extraFiles and extraJars', () => {
381381
beforeEach(() => {
382382
job = new glue.PySparkStreamingJob(stack, 'PySparkStreamingJob', {
383383
role,
@@ -393,6 +393,11 @@ describe('Job', () => {
393393
s3.Bucket.fromBucketName(stack, 'extraFilesBucket', 'extra-files-bucket'),
394394
'prefix/file.txt'),
395395
],
396+
extraJars: [
397+
glue.Code.fromBucket(
398+
s3.Bucket.fromBucketName(stack, 'extraJarsBucket', 'extra-jars-bucket'),
399+
'prefix/file.jar'),
400+
],
396401
});
397402
});
398403

@@ -420,6 +425,7 @@ describe('Job', () => {
420425
'--enable-continuous-cloudwatch-log': 'true',
421426
'--extra-py-files': 's3://extra-python-files-bucket/prefix/file.py',
422427
'--extra-files': 's3://extra-files-bucket/prefix/file.txt',
428+
'--extra-jars': 's3://extra-jars-bucket/prefix/file.jar',
423429
}),
424430
});
425431
});

0 commit comments

Comments
 (0)