Skip to content

Commit 858628e

Browse files
keshav-chandakKeshav Chandak
authored andcommitted
fix: fixed create monitoring schedule failing after validation error (aws#4385)
Co-authored-by: Keshav Chandak <[email protected]>
1 parent dbf0a9e commit 858628e

File tree

3 files changed

+114
-24
lines changed

3 files changed

+114
-24
lines changed

src/sagemaker/model_monitor/clarify_model_monitoring.py

+2
Original file line numberDiff line numberDiff line change
@@ -669,6 +669,7 @@ def create_monitoring_schedule(
669669
self.monitoring_schedule_name = monitor_schedule_name
670670
except Exception:
671671
logger.exception("Failed to create monitoring schedule.")
672+
self.monitoring_schedule_name = None
672673
# noinspection PyBroadException
673674
try:
674675
self.sagemaker_session.sagemaker_client.delete_model_bias_job_definition(
@@ -1109,6 +1110,7 @@ def create_monitoring_schedule(
11091110
self.monitoring_schedule_name = monitor_schedule_name
11101111
except Exception:
11111112
logger.exception("Failed to create monitoring schedule.")
1113+
self.monitoring_schedule_name = None
11121114
# noinspection PyBroadException
11131115
try:
11141116
self.sagemaker_session.sagemaker_client.delete_model_explainability_job_definition(

src/sagemaker/model_monitor/model_monitoring.py

+30-24
Original file line numberDiff line numberDiff line change
@@ -415,30 +415,34 @@ def create_monitoring_schedule(
415415
if arguments is not None:
416416
self.arguments = arguments
417417

418-
self.sagemaker_session.create_monitoring_schedule(
419-
monitoring_schedule_name=self.monitoring_schedule_name,
420-
schedule_expression=schedule_cron_expression,
421-
statistics_s3_uri=statistics_s3_uri,
422-
constraints_s3_uri=constraints_s3_uri,
423-
monitoring_inputs=[normalized_monitoring_input],
424-
monitoring_output_config=monitoring_output_config,
425-
instance_count=self.instance_count,
426-
instance_type=self.instance_type,
427-
volume_size_in_gb=self.volume_size_in_gb,
428-
volume_kms_key=self.volume_kms_key,
429-
image_uri=self.image_uri,
430-
entrypoint=self.entrypoint,
431-
arguments=self.arguments,
432-
record_preprocessor_source_uri=None,
433-
post_analytics_processor_source_uri=None,
434-
max_runtime_in_seconds=self.max_runtime_in_seconds,
435-
environment=self.env,
436-
network_config=network_config_dict,
437-
role_arn=self.sagemaker_session.expand_role(self.role),
438-
tags=self.tags,
439-
data_analysis_start_time=data_analysis_start_time,
440-
data_analysis_end_time=data_analysis_end_time,
441-
)
418+
try:
419+
self.sagemaker_session.create_monitoring_schedule(
420+
monitoring_schedule_name=self.monitoring_schedule_name,
421+
schedule_expression=schedule_cron_expression,
422+
statistics_s3_uri=statistics_s3_uri,
423+
constraints_s3_uri=constraints_s3_uri,
424+
monitoring_inputs=[normalized_monitoring_input],
425+
monitoring_output_config=monitoring_output_config,
426+
instance_count=self.instance_count,
427+
instance_type=self.instance_type,
428+
volume_size_in_gb=self.volume_size_in_gb,
429+
volume_kms_key=self.volume_kms_key,
430+
image_uri=self.image_uri,
431+
entrypoint=self.entrypoint,
432+
arguments=self.arguments,
433+
record_preprocessor_source_uri=None,
434+
post_analytics_processor_source_uri=None,
435+
max_runtime_in_seconds=self.max_runtime_in_seconds,
436+
environment=self.env,
437+
network_config=network_config_dict,
438+
role_arn=self.sagemaker_session.expand_role(self.role),
439+
tags=self.tags,
440+
data_analysis_start_time=data_analysis_start_time,
441+
data_analysis_end_time=data_analysis_end_time,
442+
)
443+
except Exception:
444+
self.monitoring_schedule_name = None
445+
raise
442446

443447
def update_monitoring_schedule(
444448
self,
@@ -2054,6 +2058,7 @@ def create_monitoring_schedule(
20542058
self.monitoring_schedule_name = monitor_schedule_name
20552059
except Exception:
20562060
logger.exception("Failed to create monitoring schedule.")
2061+
self.monitoring_schedule_name = None
20572062
# noinspection PyBroadException
20582063
try:
20592064
self.sagemaker_session.sagemaker_client.delete_data_quality_job_definition(
@@ -3173,6 +3178,7 @@ def create_monitoring_schedule(
31733178
self.monitoring_schedule_name = monitor_schedule_name
31743179
except Exception:
31753180
logger.exception("Failed to create monitoring schedule.")
3181+
self.monitoring_schedule_name = None
31763182
# noinspection PyBroadException
31773183
try:
31783184
self.sagemaker_session.sagemaker_client.delete_model_quality_job_definition(

tests/integ/test_model_monitor.py

+82
Original file line numberDiff line numberDiff line change
@@ -2488,3 +2488,85 @@ def test_one_time_monitoring_schedule(sagemaker_session):
24882488
my_default_monitor.stop_monitoring_schedule()
24892489
my_default_monitor.delete_monitoring_schedule()
24902490
raise e
2491+
2492+
2493+
def test_create_monitoring_schedule_with_validation_error(sagemaker_session):
2494+
my_default_monitor = DefaultModelMonitor(
2495+
role=ROLE,
2496+
instance_count=INSTANCE_COUNT,
2497+
instance_type=INSTANCE_TYPE,
2498+
volume_size_in_gb=VOLUME_SIZE_IN_GB,
2499+
max_runtime_in_seconds=MAX_RUNTIME_IN_SECONDS,
2500+
sagemaker_session=sagemaker_session,
2501+
env=ENVIRONMENT,
2502+
tags=TAGS,
2503+
network_config=NETWORK_CONFIG,
2504+
)
2505+
2506+
output_s3_uri = os.path.join(
2507+
"s3://",
2508+
sagemaker_session.default_bucket(),
2509+
"integ-test-monitoring-output-bucket",
2510+
str(uuid.uuid4()),
2511+
)
2512+
2513+
data_captured_destination_s3_uri = os.path.join(
2514+
"s3://",
2515+
sagemaker_session.default_bucket(),
2516+
"sagemaker-serving-batch-transform",
2517+
str(uuid.uuid4()),
2518+
)
2519+
2520+
batch_transform_input = BatchTransformInput(
2521+
data_captured_destination_s3_uri=data_captured_destination_s3_uri,
2522+
destination="/opt/ml/processing/output",
2523+
dataset_format=MonitoringDatasetFormat.csv(header=False),
2524+
)
2525+
2526+
statistics = Statistics.from_file_path(
2527+
statistics_file_path=os.path.join(tests.integ.DATA_DIR, "monitor/statistics.json"),
2528+
sagemaker_session=sagemaker_session,
2529+
)
2530+
2531+
constraints = Constraints.from_file_path(
2532+
constraints_file_path=os.path.join(tests.integ.DATA_DIR, "monitor/constraints.json"),
2533+
sagemaker_session=sagemaker_session,
2534+
)
2535+
2536+
try:
2537+
my_default_monitor.create_monitoring_schedule(
2538+
monitor_schedule_name="schedule-name-more-than-63-characters-to-get-a-validation-exception",
2539+
batch_transform_input=batch_transform_input,
2540+
output_s3_uri=output_s3_uri,
2541+
statistics=statistics,
2542+
constraints=constraints,
2543+
schedule_cron_expression=CronExpressionGenerator.now(),
2544+
data_analysis_start_time="-PT1H",
2545+
data_analysis_end_time="-PT0H",
2546+
enable_cloudwatch_metrics=ENABLE_CLOUDWATCH_METRICS,
2547+
)
2548+
except Exception as e:
2549+
assert "ValidationException" in str(e)
2550+
2551+
my_default_monitor.create_monitoring_schedule(
2552+
monitor_schedule_name=unique_name_from_base("valid-schedule-name"),
2553+
batch_transform_input=batch_transform_input,
2554+
output_s3_uri=output_s3_uri,
2555+
statistics=statistics,
2556+
constraints=constraints,
2557+
schedule_cron_expression=CronExpressionGenerator.now(),
2558+
data_analysis_start_time="-PT1H",
2559+
data_analysis_end_time="-PT0H",
2560+
enable_cloudwatch_metrics=ENABLE_CLOUDWATCH_METRICS,
2561+
)
2562+
try:
2563+
2564+
_wait_for_schedule_changes_to_apply(monitor=my_default_monitor)
2565+
2566+
my_default_monitor.stop_monitoring_schedule()
2567+
my_default_monitor.delete_monitoring_schedule()
2568+
2569+
except Exception as e:
2570+
my_default_monitor.stop_monitoring_schedule()
2571+
my_default_monitor.delete_monitoring_schedule()
2572+
raise e

0 commit comments

Comments
 (0)