@@ -170,11 +170,11 @@ def test_run_name_vs_trial_component_name_edge_cases(sagemaker_session, input_na
170
170
171
171
def test_run_from_local_and_train_job_and_all_exp_cfg_match (sagemaker_session , dev_sdk_tar ):
172
172
# Notes:
173
- # 1. The 1st Run TC created locally and its exp config was auto passed to the job
173
+ # 1. The 1st Run created locally and its exp config was auto passed to the job
174
174
# 2. In training job, the same exp and run names are given in the Run constructor
175
- # which will load the 1st Run TC in training job and log parameters
175
+ # which will load the 1st Run in training job and log parameters
176
176
# and metrics there
177
- # 3. In a different training job, load the same Run TC and log more parameters there.
177
+ # 3. In a different training job, load the same Run and log more parameters there.
178
178
exp_name = unique_name_from_base (_EXP_NAME_BASE_IN_SCRIPT )
179
179
estimator = _generate_estimator (
180
180
sdk_tar = dev_sdk_tar , sagemaker_session = sagemaker_session , exp_name = exp_name
@@ -253,12 +253,12 @@ def test_run_from_local_and_train_job_and_all_exp_cfg_match(sagemaker_session, d
253
253
254
254
def test_run_from_local_and_train_job_and_exp_cfg_not_match (sagemaker_session , dev_sdk_tar ):
255
255
# Notes:
256
- # 1. The 1st Run TC created locally and its exp config was auto passed to the job
257
- # 2. In training job, different exp and run names (i.e. 2nd Run TC ) are given
258
- # in the Run constructor which will create a Run TC according to the run_name
256
+ # 1. The 1st Run created locally and its exp config was auto passed to the job
257
+ # 2. In training job, different exp and run names (i.e. 2nd Run) are given
258
+ # in the Run constructor which will create a Run according to the run_name
259
259
# passed in there and ignore the exp config in the job
260
- # 3. Both metrics and parameters are logged in the Run TC created in job
261
- # 4. In a different training job, load the 2nd Run TC and log more parameters there.
260
+ # 3. Both metrics and parameters are logged in the Run created in job
261
+ # 4. In a different training job, load the 2nd Run and log more parameters there.
262
262
exp_name = unique_name_from_base (_EXP_NAME_BASE_IN_SCRIPT )
263
263
exp_name2 = unique_name_from_base (_EXP_NAME_BASE_IN_SCRIPT )
264
264
estimator = _generate_estimator (
@@ -328,11 +328,11 @@ def test_run_from_local_and_train_job_and_exp_cfg_not_match(sagemaker_session, d
328
328
329
329
def test_run_from_train_job_only (sagemaker_session , dev_sdk_tar ):
330
330
# Notes:
331
- # 1. No Run TC created locally or specified in experiment config
331
+ # 1. No Run created locally or specified in experiment config
332
332
# 2. In training job, Run is initialized
333
- # which will create a Run TC according to the run_name passed in there
334
- # 3. Both metrics and parameters are logged in the Run TC created in job
335
- # 4. In a different training job, load the same Run TC and log more parameters there.
333
+ # which will create a Run according to the run_name passed in there
334
+ # 3. Both metrics and parameters are logged in the Run created in job
335
+ # 4. In a different training job, load the same Run and log more parameters there.
336
336
exp_name = unique_name_from_base (_EXP_NAME_BASE_IN_SCRIPT )
337
337
estimator = _generate_estimator (
338
338
sdk_tar = dev_sdk_tar ,
@@ -370,13 +370,13 @@ def test_run_from_processing_job_and_override_default_exp_config(
370
370
sagemaker_session , dev_sdk_tar , run_obj
371
371
):
372
372
# Notes:
373
- # 1. The 1st Run TC (run) created locally
374
- # 2. Within the 2nd Run TC (run_obj)'s context, invoke processor.run
375
- # but override the default experiment config in context of 2nd Run TC
376
- # with the experiment config of the 1st Run TC
377
- # 3. In the processing job script, load the 1st Run TC via the experiment config
373
+ # 1. The 1st Run (run) created locally
374
+ # 2. Within the 2nd Run (run_obj)'s context, invoke processor.run
375
+ # but override the default experiment config in context of 2nd Run
376
+ # with the experiment config of the 1st Run
377
+ # 3. In the processing job script, load the 1st Run via the experiment config
378
378
# fetched from the job env
379
- # 4. All data are logged in the Run TC either locally or in the processing job
379
+ # 4. All data are logged in the Run either locally or in the processing job
380
380
exp_name = unique_name_from_base (_EXP_NAME_BASE_IN_SCRIPT )
381
381
processor = FrameworkProcessor (
382
382
estimator_cls = PyTorch ,
@@ -441,14 +441,15 @@ def test_run_from_processing_job_and_override_default_exp_config(
441
441
442
442
443
443
# dev_sdk_tar is required to trigger generating the dev SDK tar
444
- def test_run_from_transform_job (sagemaker_session , dev_sdk_tar , run_obj , xgboost_latest_version ):
444
+ def test_run_from_transform_job (sagemaker_session , dev_sdk_tar , xgboost_latest_version ):
445
445
# Notes:
446
- # 1. The 1st Run TC (run) created locally
447
- # 2. In the inference script running in a transform job, load the 1st Run TC
448
- # via explicitly passing the experiment_name and run_name of the 1st Run TC
446
+ # 1. The 1st Run (run) created locally
447
+ # 2. In the inference script running in a transform job, load the 1st Run
448
+ # via explicitly passing the experiment_name and run_name of the 1st Run
449
449
# TODO: once we're able to retrieve exp config from the transform job env,
450
450
# we should expand this test and add the load_run() without explicitly supplying the names
451
- # 3. All data are logged in the Run TC either locally or in the transform job
451
+ # 3. All data are logged in the Run either locally or in the transform job
452
+ exp_name = unique_name_from_base (_EXP_NAME_BASE_IN_SCRIPT )
452
453
xgb_model_data_s3 = sagemaker_session .upload_data (
453
454
path = os .path .join (_TRANSFORM_MATERIALS , "xgb_model.tar.gz" ),
454
455
key_prefix = "integ-test-data/xgboost/model" ,
@@ -461,8 +462,8 @@ def test_run_from_transform_job(sagemaker_session, dev_sdk_tar, run_obj, xgboost
461
462
source_dir = _EXP_DIR ,
462
463
framework_version = xgboost_latest_version ,
463
464
env = {
464
- "EXPERIMENT_NAME" : run_obj . experiment_name ,
465
- "RUN_NAME" : run_obj . run_name ,
465
+ "EXPERIMENT_NAME" : exp_name ,
466
+ "RUN_NAME" : _RUN_NAME_IN_SCRIPT ,
466
467
},
467
468
)
468
469
transformer = xgboost_model .transformer (
@@ -481,25 +482,83 @@ def test_run_from_transform_job(sagemaker_session, dev_sdk_tar, run_obj, xgboost
481
482
os .path .join (_TRANSFORM_MATERIALS , "data.csv" ), uri , sagemaker_session = sagemaker_session
482
483
)
483
484
484
- with run_obj :
485
- _local_run_log_behaviors (is_complete_log = False , sagemaker_session = sagemaker_session )
486
- transformer .transform (
487
- data = input_data ,
488
- content_type = "text/libsvm" ,
489
- split_type = "Line" ,
490
- wait = True ,
491
- job_name = f"transform-job-{ name ()} " ,
485
+ with cleanup_exp_resources (exp_names = [exp_name ], sagemaker_session = sagemaker_session ):
486
+ with Run (
487
+ experiment_name = exp_name ,
488
+ run_name = _RUN_NAME_IN_SCRIPT ,
489
+ sagemaker_session = sagemaker_session ,
490
+ ) as run :
491
+ _local_run_log_behaviors (is_complete_log = False , sagemaker_session = sagemaker_session )
492
+ transformer .transform (
493
+ data = input_data ,
494
+ content_type = "text/libsvm" ,
495
+ split_type = "Line" ,
496
+ wait = True ,
497
+ job_name = f"transform-job-{ name ()} " ,
498
+ )
499
+
500
+ _check_run_from_local_end_result (
501
+ tc = run ._trial_component ,
502
+ sagemaker_session = sagemaker_session ,
503
+ is_complete_log = False ,
492
504
)
505
+ tc_name = Run ._generate_trial_component_name (
506
+ experiment_name = run .experiment_name , run_name = run .run_name
507
+ )
508
+ _check_run_from_job_result (
509
+ tc_name = tc_name , sagemaker_session = sagemaker_session , is_init = False
510
+ )
511
+
493
512
494
- _check_run_from_local_end_result (
495
- tc = run_obj ._trial_component ,
513
+ # dev_sdk_tar is required to trigger generating the dev SDK tar
514
+ def test_load_run_auto_pass_in_exp_config_to_job (sagemaker_session , dev_sdk_tar ):
515
+ # Notes:
516
+ # 1. In local side, load the Run created previously and invoke a job under the load context
517
+ # 2. In the job script, load the 1st Run via exp config auto-passed to the job env
518
+ # 3. All data are logged in the Run either locally or in the transform job
519
+ exp_name = unique_name_from_base (_EXP_NAME_BASE_IN_SCRIPT )
520
+ processor = FrameworkProcessor (
521
+ estimator_cls = PyTorch ,
522
+ framework_version = "1.10" ,
523
+ py_version = "py38" ,
524
+ instance_count = 1 ,
525
+ instance_type = "ml.m5.xlarge" ,
526
+ role = EXECUTION_ROLE ,
496
527
sagemaker_session = sagemaker_session ,
497
- is_complete_log = False ,
498
528
)
499
- tc_name = Run ._generate_trial_component_name (
500
- experiment_name = run_obj .experiment_name , run_name = run_obj .run_name
501
- )
502
- _check_run_from_job_result (tc_name = tc_name , sagemaker_session = sagemaker_session , is_init = False )
529
+
530
+ with cleanup_exp_resources (exp_names = [exp_name ], sagemaker_session = sagemaker_session ):
531
+ with Run (
532
+ experiment_name = exp_name ,
533
+ run_name = _RUN_NAME_IN_SCRIPT ,
534
+ sagemaker_session = sagemaker_session ,
535
+ ) as run :
536
+ _local_run_log_behaviors (is_complete_log = False , sagemaker_session = sagemaker_session )
537
+
538
+ with load_run (
539
+ experiment_name = run .experiment_name ,
540
+ run_name = run .run_name ,
541
+ sagemaker_session = sagemaker_session ,
542
+ ):
543
+ processor .run (
544
+ code = _PYTHON_PROCESS_SCRIPT ,
545
+ source_dir = _EXP_DIR ,
546
+ job_name = f"process-job-{ name ()} " ,
547
+ wait = True , # wait the job to finish
548
+ logs = False ,
549
+ )
550
+
551
+ _check_run_from_local_end_result (
552
+ tc = run ._trial_component ,
553
+ sagemaker_session = sagemaker_session ,
554
+ is_complete_log = False ,
555
+ )
556
+ tc_name = Run ._generate_trial_component_name (
557
+ experiment_name = run .experiment_name , run_name = run .run_name
558
+ )
559
+ _check_run_from_job_result (
560
+ tc_name = tc_name , sagemaker_session = sagemaker_session , is_init = False
561
+ )
503
562
504
563
505
564
def test_list (run_obj , sagemaker_session ):
0 commit comments