Skip to content

Commit 42250f1

Browse files
authored
feat(glue): Job construct does not honor SparkUIProps S3 prefix when granting S3 access (#26696)
The `SparkUIProps.prefix` attribute of `glue.Job` now has a consistent format, is validated, and the bucket ReadWrite role is only given permissions to the folder of the bucket specified by the `prefix` if provided. Adds a suggested format for the prefix: `/foo/bar` and throws and error if the prefix does not start with a `/` or ends without a `/`. This may be a breaking change for users who configured their prefix in a style different than this enforces. However, I believe it is the correct standardization going forward. Closes #19862. ---- *By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
1 parent 68b96c6 commit 42250f1

File tree

3 files changed

+122
-7
lines changed

3 files changed

+122
-7
lines changed

Diff for: packages/@aws-cdk/aws-glue-alpha/README.md

+20
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,26 @@ new glue.Job(this, 'RayJob', {
105105
});
106106
```
107107

108+
### Enable Spark UI
109+
110+
Enable Spark UI setting the `sparkUI` property.
111+
112+
```ts
113+
new glue.Job(this, 'EnableSparkUI', {
114+
jobName: 'EtlJobWithSparkUIPrefix',
115+
sparkUI: {
116+
enabled: true,
117+
},
118+
executable: glue.JobExecutable.pythonEtl({
119+
glueVersion: glue.GlueVersion.V3_0,
120+
pythonVersion: glue.PythonVersion.THREE,
121+
script: glue.Code.fromAsset(path.join(__dirname, 'job-script/hello_world.py')),
122+
}),
123+
});
124+
```
125+
126+
The `sparkUI` property also allows the specification of an s3 bucket and a bucket prefix.
127+
108128
See [documentation](https://docs.aws.amazon.com/glue/latest/dg/add-job.html) for more information on adding jobs in Glue.
109129

110130
## Connection

Diff for: packages/@aws-cdk/aws-glue-alpha/lib/job.ts

+30-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import { EOL } from 'os';
12
import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch';
23
import * as events from 'aws-cdk-lib/aws-events';
34
import * as iam from 'aws-cdk-lib/aws-iam';
@@ -389,8 +390,9 @@ export interface SparkUIProps {
389390

390391
/**
391392
* The path inside the bucket (objects prefix) where the Glue job stores the logs.
393+
* Use format `'/foo/bar'`
392394
*
393-
* @default '/' - the logs will be written at the root of the bucket
395+
* @default - the logs will be written at the root of the bucket
394396
*/
395397
readonly prefix?: string;
396398
}
@@ -813,8 +815,9 @@ export class Job extends JobBase {
813815
throw new Error('Spark UI is not available for JobType.RAY jobs');
814816
}
815817

818+
this.validatePrefix(props.prefix);
816819
const bucket = props.bucket ?? new s3.Bucket(this, 'SparkUIBucket');
817-
bucket.grantReadWrite(role);
820+
bucket.grantReadWrite(role, this.cleanPrefixForGrant(props.prefix));
818821
const args = {
819822
'--enable-spark-ui': 'true',
820823
'--spark-event-logs-path': bucket.s3UrlForObject(props.prefix),
@@ -829,6 +832,31 @@ export class Job extends JobBase {
829832
};
830833
}
831834

835+
private validatePrefix(prefix?: string): void {
836+
if (!prefix || cdk.Token.isUnresolved(prefix)) {
837+
// skip validation if prefix is not specified or is a token
838+
return;
839+
}
840+
841+
const errors: string[] = [];
842+
843+
if (!prefix.startsWith('/')) {
844+
errors.push('Prefix must begin with \'/\'');
845+
}
846+
847+
if (prefix.endsWith('/')) {
848+
errors.push('Prefix must not end with \'/\'');
849+
}
850+
851+
if (errors.length > 0) {
852+
throw new Error(`Invalid prefix format (value: ${prefix})${EOL}${errors.join(EOL)}`);
853+
}
854+
}
855+
856+
private cleanPrefixForGrant(prefix?: string): string | undefined {
857+
return prefix !== undefined ? prefix.slice(1) + '/*' : undefined;
858+
}
859+
832860
private setupContinuousLogging(role: iam.IRole, props: ContinuousLoggingProps) {
833861
const args: {[key: string]: string} = {
834862
'--enable-continuous-cloudwatch-log': 'true',

Diff for: packages/@aws-cdk/aws-glue-alpha/test/job.test.ts

+72-5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import { EOL } from 'os';
12
import { Template } from 'aws-cdk-lib/assertions';
23
import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch';
34
import * as events from 'aws-cdk-lib/aws-events';
@@ -629,29 +630,95 @@ describe('Job', () => {
629630
});
630631
});
631632
});
632-
633633
describe('with bucket and path provided', () => {
634634
const sparkUIBucketName = 'sparkbucketname';
635-
const prefix = 'some/path/';
635+
const prefix = '/foob/bart';
636+
const badPrefix = 'foob/bart/';
636637
let sparkUIBucket: s3.IBucket;
637638

639+
const expectedErrors = [
640+
`Invalid prefix format (value: ${badPrefix})`,
641+
'Prefix must begin with \'/\'',
642+
'Prefix must not end with \'/\'',
643+
].join(EOL);
644+
it('fails if path is mis-formatted', () => {
645+
expect(() => new glue.Job(stack, 'BadPrefixJob', {
646+
...defaultProps,
647+
sparkUI: {
648+
enabled: true,
649+
bucket: sparkUIBucket,
650+
prefix: badPrefix,
651+
},
652+
})).toThrow(expectedErrors);
653+
});
654+
638655
beforeEach(() => {
639656
sparkUIBucket = s3.Bucket.fromBucketName(stack, 'BucketId', sparkUIBucketName);
640657
job = new glue.Job(stack, 'Job', {
641658
...defaultProps,
642659
sparkUI: {
643660
enabled: true,
644661
bucket: sparkUIBucket,
645-
prefix,
662+
prefix: prefix,
646663
},
647664
});
648665
});
649666

650-
test('should set spark arguments on the job', () => {
667+
it('should grant the role read/write permissions spark ui bucket prefixed folder', () => {
668+
Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', {
669+
PolicyDocument: {
670+
Statement: [
671+
{
672+
Action: [
673+
's3:GetObject*',
674+
's3:GetBucket*',
675+
's3:List*',
676+
's3:DeleteObject*',
677+
's3:PutObject',
678+
's3:PutObjectLegalHold',
679+
's3:PutObjectRetention',
680+
's3:PutObjectTagging',
681+
's3:PutObjectVersionTagging',
682+
's3:Abort*',
683+
],
684+
Effect: 'Allow',
685+
Resource: [
686+
{
687+
'Fn::Join': [
688+
'',
689+
[
690+
'arn:',
691+
{ Ref: 'AWS::Partition' },
692+
':s3:::sparkbucketname',
693+
],
694+
],
695+
},
696+
{
697+
'Fn::Join': [
698+
'',
699+
[
700+
'arn:',
701+
{ Ref: 'AWS::Partition' },
702+
`:s3:::sparkbucketname${prefix}/*`,
703+
],
704+
],
705+
},
706+
],
707+
},
708+
codeBucketAccessStatement,
709+
],
710+
Version: '2012-10-17',
711+
},
712+
PolicyName: 'JobServiceRoleDefaultPolicy03F68F9D',
713+
Roles: [{ Ref: 'JobServiceRole4F432993' }],
714+
});
715+
});
716+
717+
it('should set spark arguments on the job', () => {
651718
Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', {
652719
DefaultArguments: {
653720
'--enable-spark-ui': 'true',
654-
'--spark-event-logs-path': `s3://${sparkUIBucketName}/${prefix}`,
721+
'--spark-event-logs-path': `s3://${sparkUIBucketName}${prefix}`,
655722
},
656723
});
657724
});

0 commit comments

Comments
 (0)