Skip to content

Commit 7994733

Browse files
authored
fix(glue): PythonRayExecutableProps has innaccurate properties (#28625)
Closes #28570. - Added RayExecutableProps which supports s3PythonModules - Added check to block extraPythonFiles usage for Ray jobs - Added unit tests and integ tests ---- *By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
1 parent ba41996 commit 7994733

15 files changed

+514
-331
lines changed

packages/@aws-cdk/aws-glue-alpha/lib/job-executable.ts

Lines changed: 54 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -169,14 +169,32 @@ interface PythonExecutableProps {
169169
/**
170170
* Additional Python files that AWS Glue adds to the Python path before executing your script.
171171
* Only individual files are supported, directories are not supported.
172+
* Equivalent to a job parameter `--extra-py-files`.
172173
*
173174
* @default - no extra python files and argument is not set
174175
*
175-
* @see `--extra-py-files` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
176+
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
176177
*/
177178
readonly extraPythonFiles?: Code[];
178179
}
179180

181+
interface RayExecutableProps {
182+
/**
183+
* The Python version to use.
184+
*/
185+
readonly pythonVersion: PythonVersion;
186+
187+
/**
188+
* Additional Python modules that AWS Glue adds to the Python path before executing your script.
189+
* Equivalent to a job parameter `--s3-py-modules`.
190+
*
191+
* @default - no extra python files and argument is not set
192+
*
193+
* @see https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-job-parameters.html
194+
*/
195+
readonly s3PythonModules?: Code[];
196+
}
197+
180198
interface SharedJobExecutableProps {
181199
/**
182200
* Runtime. It is required for Ray jobs.
@@ -199,10 +217,11 @@ interface SharedJobExecutableProps {
199217
/**
200218
* Additional files, such as configuration files that AWS Glue copies to the working directory of your script before executing it.
201219
* Only individual files are supported, directories are not supported.
220+
* Equivalent to a job parameter `--extra-files`.
202221
*
203222
* @default [] - no extra files are copied to the working directory
204223
*
205-
* @see `--extra-files` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
224+
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
206225
*/
207226
readonly extraFiles?: Code[];
208227
}
@@ -211,19 +230,21 @@ interface SharedSparkJobExecutableProps extends SharedJobExecutableProps {
211230
/**
212231
* Additional Java .jar files that AWS Glue adds to the Java classpath before executing your script.
213232
* Only individual files are supported, directories are not supported.
233+
* Equivalent to a job parameter `--extra-jars`.
214234
*
215235
* @default [] - no extra jars are added to the classpath
216236
*
217-
* @see `--extra-jars` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
237+
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
218238
*/
219239
readonly extraJars?: Code[];
220240

221241
/**
222242
* Setting this value to true prioritizes the customer's extra JAR files in the classpath.
243+
* Equivalent to a job parameter `--user-jars-first`.
223244
*
224245
* @default false - priority is not given to user-provided jars
225246
*
226-
* @see `--user-jars-first` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
247+
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
227248
*/
228249
readonly extraJarsFirst?: boolean;
229250
}
@@ -234,8 +255,9 @@ interface SharedSparkJobExecutableProps extends SharedJobExecutableProps {
234255
export interface ScalaJobExecutableProps extends SharedSparkJobExecutableProps {
235256
/**
236257
* The fully qualified Scala class name that serves as the entry point for the job.
258+
* Equivalent to a job parameter `--class`.
237259
*
238-
* @see `--class` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
260+
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
239261
*/
240262
readonly className: string;
241263
}
@@ -253,7 +275,7 @@ export interface PythonShellExecutableProps extends SharedJobExecutableProps, Py
253275
/**
254276
* Props for creating a Python Ray job executable
255277
*/
256-
export interface PythonRayExecutableProps extends SharedJobExecutableProps, PythonExecutableProps {}
278+
export interface PythonRayExecutableProps extends SharedJobExecutableProps, RayExecutableProps {}
257279

258280
/**
259281
* The executable properties related to the Glue job's GlueVersion, JobType and code
@@ -377,14 +399,17 @@ export class JobExecutable {
377399
if (JobLanguage.PYTHON !== config.language && config.extraPythonFiles) {
378400
throw new Error('extraPythonFiles is not supported for languages other than JobLanguage.PYTHON');
379401
}
402+
if (config.extraPythonFiles && type === JobType.RAY.name) {
403+
throw new Error('extraPythonFiles is not supported for Ray jobs');
404+
}
380405
if (config.pythonVersion === PythonVersion.THREE_NINE && type !== JobType.PYTHON_SHELL.name && type !== JobType.RAY.name) {
381406
throw new Error('Specified PythonVersion PythonVersion.THREE_NINE is only supported for JobType Python Shell and Ray');
382407
}
383408
if (config.pythonVersion === PythonVersion.THREE && type === JobType.RAY.name) {
384409
throw new Error('Specified PythonVersion PythonVersion.THREE is not supported for Ray');
385410
}
386411
if (config.runtime === undefined && type === JobType.RAY.name) {
387-
throw new Error('Runtime is required for Ray jobs.');
412+
throw new Error('Runtime is required for Ray jobs');
388413
}
389414
this.config = config;
390415
}
@@ -410,8 +435,9 @@ export interface JobExecutableConfig {
410435

411436
/**
412437
* The language of the job (Scala or Python).
438+
* Equivalent to a job parameter `--job-language`.
413439
*
414-
* @see `--job-language` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
440+
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
415441
*/
416442
readonly language: JobLanguage;
417443

@@ -441,46 +467,61 @@ export interface JobExecutableConfig {
441467

442468
/**
443469
* The Scala class that serves as the entry point for the job. This applies only if your the job langauage is Scala.
470+
* Equivalent to a job parameter `--class`.
444471
*
445472
* @default - no scala className specified
446473
*
447-
* @see `--class` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
474+
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
448475
*/
449476
readonly className?: string;
450477

451478
/**
452479
* Additional Java .jar files that AWS Glue adds to the Java classpath before executing your script.
480+
* Equivalent to a job parameter `--extra-jars`.
453481
*
454482
* @default - no extra jars specified.
455483
*
456-
* @see `--extra-jars` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
484+
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
457485
*/
458486
readonly extraJars?: Code[];
459487

460488
/**
461489
* Additional Python files that AWS Glue adds to the Python path before executing your script.
490+
* Equivalent to a job parameter `--extra-py-files`.
462491
*
463492
* @default - no extra python files specified.
464493
*
465-
* @see `--extra-py-files` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
494+
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
466495
*/
467496
readonly extraPythonFiles?: Code[];
468497

498+
/**
499+
* Additional Python modules that AWS Glue adds to the Python path before executing your script.
500+
* Equivalent to a job parameter `--s3-py-modules`.
501+
*
502+
* @default - no extra python files specified.
503+
*
504+
* @see https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-job-parameters.html
505+
*/
506+
readonly s3PythonModules?: Code[];
507+
469508
/**
470509
* Additional files, such as configuration files that AWS Glue copies to the working directory of your script before executing it.
510+
* Equivalent to a job parameter `--extra-files`.
471511
*
472512
* @default - no extra files specified.
473513
*
474-
* @see `--extra-files` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
514+
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
475515
*/
476516
readonly extraFiles?: Code[];
477517

478518
/**
479519
* Setting this value to true prioritizes the customer's extra JAR files in the classpath.
520+
* Equivalent to a job parameter `--user-jars-first`.
480521
*
481522
* @default - extra jars are not prioritized.
482523
*
483-
* @see `--user-jars-first` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
524+
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
484525
*/
485526
readonly extraJarsFirst?: boolean;
486527
}

packages/@aws-cdk/aws-glue-alpha/lib/job.ts

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -598,10 +598,11 @@ export interface JobProps {
598598

599599
/**
600600
* Enables the collection of metrics for job profiling.
601+
* Equivalent to a job parameter `--enable-metrics`.
601602
*
602603
* @default - no profiling metrics emitted.
603604
*
604-
* @see `--enable-metrics` at https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
605+
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
605606
*/
606607
readonly enableProfilingMetrics? :boolean;
607608

@@ -801,6 +802,9 @@ export class Job extends JobBase {
801802
if (config.extraPythonFiles && config.extraPythonFiles.length > 0) {
802803
args['--extra-py-files'] = config.extraPythonFiles.map(code => this.codeS3ObjectUrl(code)).join(',');
803804
}
805+
if (config.s3PythonModules && config.s3PythonModules.length > 0) {
806+
args['--s3-py-modules'] = config.s3PythonModules.map(code => this.codeS3ObjectUrl(code)).join(',');
807+
}
804808
if (config.extraFiles && config.extraFiles.length > 0) {
805809
args['--extra-files'] = config.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(',');
806810
}
@@ -886,8 +890,8 @@ export class Job extends JobBase {
886890
}
887891

888892
/**
889-
* Create a CloudWatch Metric that's based on Glue Job events
890-
* {@see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types}
893+
* Create a CloudWatch Metric that's based on Glue Job events.
894+
* @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types
891895
* The metric has namespace = 'AWS/Events', metricName = 'TriggeredRules' and RuleName = rule.ruleName dimension.
892896
*
893897
* @param rule for use in setting RuleName dimension value

packages/@aws-cdk/aws-glue-alpha/test/integ.job.js.snapshot/asset.9120cd4aa376310a46a09d707c5e0c75c164b9014f1740e4c5a3637a34dfafe7.py

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/@aws-cdk/aws-glue-alpha/test/integ.job.js.snapshot/aws-glue-job.assets.json

Lines changed: 29 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)