Skip to content

change: change s3UploadMode of sagemaker clarify processing output for computer vision jobs. #3754

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Apr 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
8a5fa72
change: s3_upload mode for CV jobs in clarify processing output
Satish615 Mar 24, 2023
fd48fa8
Merge branch 'master' into cv_s3_upload
Satish615 Mar 24, 2023
ee72153
fix: datasetype Enum value in clarify processing output
Satish615 Mar 27, 2023
0f7af1a
temp: update image_uri_config file for clarify
Satish615 Mar 27, 2023
a30014c
Merge branch 'master' into cv_s3_upload
Satish615 Mar 31, 2023
3ed37f2
fix: pytestdoc failures for s3_upload_mode changes
Satish615 Apr 3, 2023
bb8b3c7
Merge remote-tracking branch 'origin/cv_s3_upload'
Satish615 Apr 3, 2023
1b159d3
fix: update image_config_uri to prod account for clarify pdx region
Satish615 Apr 5, 2023
480ba14
Merge branch 'master' into cv_s3_upload
Satish615 Apr 5, 2023
d110015
Merge remote-tracking branch 'origin/cv_s3_upload'
Satish615 Apr 5, 2023
5984ea4
fix: unit tests, clarify_check_step.py with s3_upload_mode value
Satish615 Apr 5, 2023
aec3ded
Merge branch 'master' into cv_s3_upload
Satish615 Apr 5, 2023
ceee655
Merge branch 'master' into cv_s3_upload
Satish615 Apr 7, 2023
5c8f1be
Merge branch 'master' into cv_s3_upload
Satish615 Apr 11, 2023
33d1df6
Merge branch 'aws:master' into cv_s3_upload
Satish615 Apr 13, 2023
e3509b1
Merge branch 'master' into cv_s3_upload
Satish615 Apr 13, 2023
e331619
Merge branch 'master' into cv_s3_upload
Satish615 Apr 17, 2023
2a6b242
Merge branch 'master' into cv_s3_upload
Satish615 Apr 18, 2023
78df06c
Merge branch 'master' into cv_s3_upload
Satish615 Apr 19, 2023
afa4014
Merge branch 'master' into cv_s3_upload
Satish615 Apr 19, 2023
1a20bc1
Merge branch 'master' into cv_s3_upload
Satish615 Apr 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 39 additions & 2 deletions src/sagemaker/clarify.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import tempfile
from abc import ABC, abstractmethod
from typing import List, Union, Dict, Optional, Any

from enum import Enum
from schema import Schema, And, Use, Or, Optional as SchemaOptional, Regex

from sagemaker import image_uris, s3, utils
Expand Down Expand Up @@ -304,6 +304,16 @@
)


class DatasetType(Enum):
"""Enum to store different dataset types supported in the Analysis config file"""

TEXTCSV = "text/csv"
JSONLINES = "application/jsonlines"
JSON = "application/json"
PARQUET = "application/x-parquet"
IMAGE = "application/x-image"


class DataConfig:
"""Config object related to configurations of the input and output dataset."""

Expand Down Expand Up @@ -1451,7 +1461,7 @@ def _run(
source=self._CLARIFY_OUTPUT,
destination=data_config.s3_output_path,
output_name="analysis_result",
s3_upload_mode="EndOfJob",
s3_upload_mode=ProcessingOutputHandler.get_s3_upload_mode(analysis_config),
)

return super().run(
Expand Down Expand Up @@ -2171,6 +2181,33 @@ def _upload_analysis_config(analysis_config_file, s3_output_path, sagemaker_sess
)


class ProcessingOutputHandler:
"""Class to handle the parameters for SagemakerProcessor.Processingoutput"""

class S3UploadMode(Enum):
"""Enum values for different uplaod modes to s3 bucket"""

CONTINUOUS = "Continuous"
ENDOFJOB = "EndOfJob"

@classmethod
def get_s3_upload_mode(cls, analysis_config: Dict[str, Any]) -> str:
"""Fetches s3_upload mode based on the shap_config values

Args:
analysis_config (dict): dict Config following the analysis_config.json format

Returns:
The s3_upload_mode type for the processing output.
"""
dataset_type = analysis_config["dataset_type"]
return (
ProcessingOutputHandler.S3UploadMode.CONTINUOUS.value
if dataset_type == DatasetType.IMAGE.value
else ProcessingOutputHandler.S3UploadMode.ENDOFJOB.value
)


def _set(value, key, dictionary):
"""Sets dictionary[key] = value if value is not None."""
if value is not None:
Expand Down
3 changes: 2 additions & 1 deletion src/sagemaker/workflow/clarify_check_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
ModelConfig,
ModelPredictedLabelConfig,
SHAPConfig,
ProcessingOutputHandler,
_upload_analysis_config,
SageMakerClarifyProcessor,
_set,
Expand Down Expand Up @@ -391,7 +392,7 @@ def _generate_processing_job_parameters(
source=SageMakerClarifyProcessor._CLARIFY_OUTPUT,
destination=data_config.s3_output_path,
output_name="analysis_result",
s3_upload_mode="EndOfJob",
s3_upload_mode=ProcessingOutputHandler.get_s3_upload_mode(analysis_config),
)
return dict(config_input=config_input, data_input=data_input, result_output=result_output)

Expand Down
14 changes: 14 additions & 0 deletions tests/unit/test_clarify.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
TextConfig,
ImageConfig,
_AnalysisConfigGenerator,
DatasetType,
ProcessingOutputHandler,
)

JOB_NAME_PREFIX = "my-prefix"
Expand Down Expand Up @@ -1786,3 +1788,15 @@ def test_invalid_analysis_config(data_config, data_bias_config, model_config):
pre_training_methods="all",
post_training_methods="all",
)


class TestProcessingOutputHandler:
def test_get_s3_upload_mode_image(self):
analysis_config = {"dataset_type": DatasetType.IMAGE.value}
s3_upload_mode = ProcessingOutputHandler.get_s3_upload_mode(analysis_config)
assert s3_upload_mode == ProcessingOutputHandler.S3UploadMode.CONTINUOUS.value

def test_get_s3_upload_mode_text(self):
analysis_config = {"dataset_type": DatasetType.TEXTCSV.value}
s3_upload_mode = ProcessingOutputHandler.get_s3_upload_mode(analysis_config)
assert s3_upload_mode == ProcessingOutputHandler.S3UploadMode.ENDOFJOB.value