From 69657587d2b3d831e969e502050038b58219ba98 Mon Sep 17 00:00:00 2001 From: pialidas Date: Wed, 10 Jun 2020 11:59:16 -0400 Subject: [PATCH 1/9] Adding selectable inference for automl generated model 1. Added attach() method to AutoML to allow attaching an AutoML object to an existing AutoMLJob 2. Added create_model() method to AutoML that returns a PipelineModel that can be used by the 3. Added a util function validate_and_update_inference_response() to update the inference container list based on the requested inference response keys 4. Updated and added unit tests --- src/sagemaker/automl/automl.py | 356 ++++++++++++++------ tests/data/automl/data/iris_transform.csv | 15 + tests/integ/test_auto_ml.py | 65 ++++ tests/unit/sagemaker/automl/test_auto_ml.py | 186 ++++++++-- 4 files changed, 492 insertions(+), 130 deletions(-) create mode 100644 tests/data/automl/data/iris_transform.csv diff --git a/src/sagemaker/automl/automl.py b/src/sagemaker/automl/automl.py index 794e03aee1..b060d7e369 100644 --- a/src/sagemaker/automl/automl.py +++ b/src/sagemaker/automl/automl.py @@ -100,6 +100,61 @@ def fit(self, inputs=None, wait=True, logs=True, job_name=None): if wait: self.latest_auto_ml_job.wait(logs=logs) + @classmethod + def attach(cls, job_name, sagemaker_session=None): + """Attach to an existing AutoML job. + + Args: + job_name (str): AutoML job name + sagemaker_session (sagemaker.session.Session): A SageMaker Session + object, used for SageMaker interactions (default: None). If not + specified, the one originally associated with the ``AutoML`` instance is used.: + + Returns: + + """ + sagemaker_session = sagemaker_session or Session() + + _auto_ml_job_desc = sagemaker_session.describe_auto_ml_job(job_name) + automl_job_tags = sagemaker_session.sagemaker_client.list_tags( + _auto_ml_job_desc["AutoMLJobArn"] + ) + amlj = AutoML( + role=_auto_ml_job_desc["RoleArn"], + target_attribute_name=_auto_ml_job_desc["InputDataConfig"][0]["TargetAttributeName"], + problem_type=_auto_ml_job_desc["ProblemType"], + output_path=_auto_ml_job_desc["OutputDataConfig"]["S3OutputPath"], + output_kms_key=_auto_ml_job_desc["OutputDataConfig"].get("KmsKeyId"), + base_job_name=job_name, + sagemaker_session=sagemaker_session, + volume_kms_key=_auto_ml_job_desc["AutoMLJobConfig"] + .get("SecurityConfig", {}) + .get("VolumeKmsKeyId"), + encrypt_inter_container_traffic=_auto_ml_job_desc["AutoMLJobConfig"] + .get("SecurityConfig", {}) + .get("EnableInterContainerTrafficEncryption", False), + vpc_config=_auto_ml_job_desc["AutoMLJobConfig"] + .get("SecurityConfig", {}) + .get("VpcConfig"), + max_candidates=_auto_ml_job_desc["AutoMLJobConfig"]["CompletionCriteria"][ + "MaxCandidates" + ], + max_runtime_per_training_job_in_seconds=_auto_ml_job_desc["AutoMLJobConfig"][ + "CompletionCriteria" + ]["MaxRuntimePerTrainingJobInSeconds"], + total_job_runtime_in_seconds=_auto_ml_job_desc["AutoMLJobConfig"]["CompletionCriteria"][ + "MaxAutoMLJobRuntimeInSeconds" + ], + job_objective=_auto_ml_job_desc["AutoMLJobObjective"]["MetricName"], + generate_candidate_definitions_only=_auto_ml_job_desc[ + "GenerateCandidateDefinitionsOnly" + ], + tags=automl_job_tags, + ) + amlj.current_job_name = job_name + amlj._auto_ml_job_desc = _auto_ml_job_desc + return amlj + def describe_auto_ml_job(self, job_name=None): """Returns the job description of an AutoML job for the given job name. @@ -187,49 +242,29 @@ def list_candidates( return self.sagemaker_session.list_candidates(**list_candidates_args)["Candidates"] - def deploy( + def create_model( self, - initial_instance_count, - instance_type, - candidate=None, + name, sagemaker_session=None, - name=None, - endpoint_name=None, - tags=None, - wait=True, - update_endpoint=False, + candidate=None, vpc_config=None, enable_network_isolation=False, model_kms_key=None, predictor_cls=None, + inference_response_keys=None, ): - """Deploy a candidate to a SageMaker Inference Pipeline and return a Predictor + """Creates a model from a given candidate or the best candidate + from the automl job Args: - initial_instance_count (int): The initial number of instances to run - in the ``Endpoint`` created from this ``Model``. - instance_type (str): The EC2 instance type to deploy this Model to. - For example, 'ml.p2.xlarge'. + name (str): The pipeline model name. + sagemaker_session (sagemaker.session.Session): A SageMaker Session + object, used for SageMaker interactions (default: None). If not + specified, the one originally associated with the ``AutoML`` instance is used.: candidate (CandidateEstimator or dict): a CandidateEstimator used for deploying to a SageMaker Inference Pipeline. If None, the best candidate will be used. If the candidate input is a dict, a CandidateEstimator will be created from it. - sagemaker_session (sagemaker.session.Session): A SageMaker Session - object, used for SageMaker interactions (default: None). If not - specified, the one originally associated with the ``AutoML`` instance is used. - name (str): The pipeline model name. If None, a default model name will - be selected on each ``deploy``. - endpoint_name (str): The name of the endpoint to create (default: - None). If not specified, a unique endpoint name will be created. - tags (List[dict[str, str]]): The list of tags to attach to this - specific endpoint. - wait (bool): Whether the call should wait until the deployment of - model completes (default: True). - update_endpoint (bool): Flag to update the model in an existing - Amazon SageMaker endpoint. If True, this will deploy a new - EndpointConfig to an already existing endpoint and delete - resources corresponding to the previous EndpointConfig. If - False, a new endpoint will be created. Default: False vpc_config (dict): Specifies a VPC that your training jobs and hosted models have access to. Contents include "SecurityGroupIds" and "Subnets". enable_network_isolation (bool): Isolates the training container. No inbound or @@ -241,11 +276,12 @@ def deploy( function to call to create a predictor (default: None). If specified, ``deploy()`` returns the result of invoking this function on the created endpoint name. + inference_response_keys (list): List of keys for response content. The order of the + keys will dictate the content order in the response. Returns: - callable[string, sagemaker.session.Session] or ``None``: - If ``predictor_cls`` is specified, the invocation of ``self.predictor_cls`` on - the created endpoint name. Otherwise, ``None``. + PipelineModel object + """ sagemaker_session = sagemaker_session or self.sagemaker_session @@ -256,50 +292,46 @@ def deploy( candidate = CandidateEstimator(candidate, sagemaker_session=sagemaker_session) inference_containers = candidate.containers - endpoint_name = endpoint_name or self.current_job_name - - return self._deploy_inference_pipeline( - inference_containers, - initial_instance_count=initial_instance_count, - instance_type=instance_type, - name=name, - sagemaker_session=sagemaker_session, - endpoint_name=endpoint_name, - tags=tags, - wait=wait, - update_endpoint=update_endpoint, - vpc_config=vpc_config, - enable_network_isolation=enable_network_isolation, - model_kms_key=model_kms_key, - predictor_cls=predictor_cls, - ) - def _check_problem_type_and_job_objective(self, problem_type, job_objective): - """Validate if problem_type and job_objective are both None or are both provided. + self.validate_and_update_inference_response(inference_containers, inference_response_keys) - Args: - problem_type (str): The type of problem of this AutoMLJob. Valid values are - "Regression", "BinaryClassification", "MultiClassClassification". - job_objective (dict): AutoMLJob objective, contains "AutoMLJobObjectiveType" (optional), - "MetricName" and "Value". + # construct Model objects + models = [] - Raises (ValueError): raises ValueError if one of problem_type and job_objective is provided - while the other is None. + for container in inference_containers: + image = container["Image"] + model_data = container["ModelDataUrl"] + env = container["Environment"] - """ - if not (problem_type and job_objective) and (problem_type or job_objective): - raise ValueError( - "One of problem type and objective metric provided. " - "Either both of them should be provided or none of them should be provided." + model = Model( + image=image, + model_data=model_data, + role=self.role, + env=env, + vpc_config=vpc_config, + sagemaker_session=sagemaker_session or self.sagemaker_session, + enable_network_isolation=enable_network_isolation, + model_kms_key=model_kms_key, ) + models.append(model) + + pipeline = PipelineModel( + models=models, + role=self.role, + predictor_cls=predictor_cls, + name=name, + vpc_config=vpc_config, + sagemaker_session=sagemaker_session or self.sagemaker_session, + ) + return pipeline - def _deploy_inference_pipeline( + def deploy( self, - inference_containers, initial_instance_count, instance_type, - name=None, + candidate=None, sagemaker_session=None, + name=None, endpoint_name=None, tags=None, wait=True, @@ -308,21 +340,24 @@ def _deploy_inference_pipeline( enable_network_isolation=False, model_kms_key=None, predictor_cls=None, + inference_response_keys=None, ): - """Deploy a SageMaker Inference Pipeline. + """Deploy a candidate to a SageMaker Inference Pipeline and return a Predictor Args: - inference_containers (list): a list of inference container definitions initial_instance_count (int): The initial number of instances to run in the ``Endpoint`` created from this ``Model``. instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. - name (str): The pipeline model name. If None, a default model name will - be selected on each ``deploy``. + candidate (CandidateEstimator or dict): a CandidateEstimator used for deploying + to a SageMaker Inference Pipeline. If None, the best candidate will + be used. If the candidate input is a dict, a CandidateEstimator will be + created from it. sagemaker_session (sagemaker.session.Session): A SageMaker Session object, used for SageMaker interactions (default: None). If not - specified, one is created using the default AWS configuration - chain. + specified, the one originally associated with the ``AutoML`` instance is used. + name (str): The pipeline model name. If None, a default model name will + be selected on each ``deploy``. endpoint_name (str): The name of the endpoint to create (default: None). If not specified, a unique endpoint name will be created. tags (List[dict[str, str]]): The list of tags to attach to this @@ -334,44 +369,38 @@ def _deploy_inference_pipeline( EndpointConfig to an already existing endpoint and delete resources corresponding to the previous EndpointConfig. If False, a new endpoint will be created. Default: False - vpc_config (dict): information about vpc configuration, optionally - contains "SecurityGroupIds", "Subnets" + vpc_config (dict): Specifies a VPC that your training jobs and hosted models have + access to. Contents include "SecurityGroupIds" and "Subnets". + enable_network_isolation (bool): Isolates the training container. No inbound or + outbound network calls can be made, except for calls between peers within a + training cluster for distributed training. Default: False model_kms_key (str): KMS key ARN used to encrypt the repacked model archive file if the model is repacked predictor_cls (callable[string, sagemaker.session.Session]): A function to call to create a predictor (default: None). If specified, ``deploy()`` returns the result of invoking this function on the created endpoint name. - """ - # construct Model objects - models = [] - for container in inference_containers: - image = container["Image"] - model_data = container["ModelDataUrl"] - env = container["Environment"] + inference_response_keys (list): List of keys for response content. The order of the + keys will dictate the content order in the response. - model = Model( - image=image, - model_data=model_data, - role=self.role, - env=env, - vpc_config=vpc_config, - sagemaker_session=sagemaker_session or self.sagemaker_session, - enable_network_isolation=enable_network_isolation, - model_kms_key=model_kms_key, - ) - models.append(model) - - pipeline = PipelineModel( - models=models, - role=self.role, - predictor_cls=predictor_cls, + Returns: + callable[string, sagemaker.session.Session] or ``None``: + If ``predictor_cls`` is specified, the invocation of ``self.predictor_cls`` on + the created endpoint name. Otherwise, ``None``. + """ + sagemaker_session = sagemaker_session or self.sagemaker_session + model = self.create_model( name=name, + sagemaker_session=sagemaker_session, + candidate=candidate, + inference_response_keys=inference_response_keys, vpc_config=vpc_config, - sagemaker_session=sagemaker_session or self.sagemaker_session, + enable_network_isolation=enable_network_isolation, + model_kms_key=model_kms_key, + predictor_cls=predictor_cls, ) - return pipeline.deploy( + return model.deploy( initial_instance_count=initial_instance_count, instance_type=instance_type, endpoint_name=endpoint_name, @@ -380,6 +409,25 @@ def _deploy_inference_pipeline( update_endpoint=update_endpoint, ) + def _check_problem_type_and_job_objective(self, problem_type, job_objective): + """Validate if problem_type and job_objective are both None or are both provided. + + Args: + problem_type (str): The type of problem of this AutoMLJob. Valid values are + "Regression", "BinaryClassification", "MultiClassClassification". + job_objective (dict): AutoMLJob objective, contains "AutoMLJobObjectiveType" (optional), + "MetricName" and "Value". + + Raises (ValueError): raises ValueError if one of problem_type and job_objective is provided + while the other is None. + + """ + if not (problem_type and job_objective) and (problem_type or job_objective): + raise ValueError( + "One of problem type and objective metric provided. " + "Either both of them should be provided or none of them should be provided." + ) + def _prepare_for_auto_ml_job(self, job_name=None): """Set any values in the AutoMLJob that need to be set before creating request. @@ -400,6 +448,114 @@ def _prepare_for_auto_ml_job(self, job_name=None): if self.output_path is None: self.output_path = "s3://{}/".format(self.sagemaker_session.default_bucket()) + @classmethod + def _get_supported_inference_keys(cls, container, default=None): + """Returns the inference keys supported by the container. + + Args: + container (dict): Dictionary representing container + default (object): The value to be returned if the container definition + has no marker environment variable + + Returns: + List of keys the container support or default + + Raises: + Value error if the default is None and the container definition has + no marker environment variable. + """ + try: + return [ + x.strip() + for x in container["Environment"]["SAGEMAKER_INFERENCE_SUPPORT"].split(",") + ] + except KeyError: + if default is None: + raise + return default + + @classmethod + def _check_inference_keys(cls, inference_response_keys, containers): + """Given an inference container list, checks if the pipeline supports the + requested inference keys + + Args: + inference_response_keys (list): List of keys for inference response content + containers (list): list of inference container + + Raises: + ValueError, if one or more keys in inference_response_keys are not supported + the inference pipeline. + + """ + if not inference_response_keys: + return + try: + supported_inference_keys = cls._get_supported_inference_keys(container=containers[-1]) + except KeyError: + raise ValueError( + "The inference model does not support selection of inference content beyond " + "it's default content. Please retry without setting " + "inference_response_keys key word argument." + ) + bad_keys = [] + for key in inference_response_keys: + if key not in supported_inference_keys: + bad_keys.append(key) + + if bad_keys: + raise ValueError( + "Requested inference output keys [{bad_keys_str}] are unsupported. " + "The supported inference keys are [{allowed_keys_str}]".format( + bad_keys_str=", ".join(bad_keys), + allowed_keys_str=", ".join(supported_inference_keys), + ) + ) + + @classmethod + def validate_and_update_inference_response(cls, inference_containers, inference_response_keys): + """Validates the requested inference keys and updates inference containers to emit the + requested content in the inference response. + + Args: + inference_containers (list): list of inference containers + inference_response_keys (list): list of inference response keys + + Raises: + ValueError: if one or more of inference_response_keys are unsupported by the model + + """ + if not inference_response_keys: + return + + cls._check_inference_keys(inference_response_keys, inference_containers) + + previous_container_output = None + + for container in inference_containers: + supported_inference_keys_container = cls._get_supported_inference_keys( + container, default=[] + ) + if not supported_inference_keys_container: + previous_container_output = None + continue + current_container_output = None + for key in inference_response_keys: + if key in supported_inference_keys_container: + current_container_output = ( + current_container_output + "," + key if current_container_output else key + ) + + if previous_container_output: + container["Environment"].update( + {"SAGEMAKER_INFERENCE_INPUT": previous_container_output} + ) + if current_container_output: + container["Environment"].update( + {"SAGEMAKER_INFERENCE_OUTPUT": current_container_output} + ) + previous_container_output = current_container_output + class AutoMLInput(object): """Accepts parameters that specify an S3 input for an auto ml job and provides diff --git a/tests/data/automl/data/iris_transform.csv b/tests/data/automl/data/iris_transform.csv new file mode 100644 index 0000000000..114f823d28 --- /dev/null +++ b/tests/data/automl/data/iris_transform.csv @@ -0,0 +1,15 @@ +6.4,2.8,5.6,2.2 +5.0,2.3,3.3,1.0 +4.9,2.5,4.5,1.7 +4.9,3.1,1.5,0.1 +5.7,3.8,1.7,0.3 +4.4,3.2,1.3,0.2 +5.4,3.4,1.5,0.4 +6.9,3.1,5.1,2.3 +6.7,3.1,4.4,1.4 +5.1,3.7,1.5,0.4 +5.2,2.7,3.9,1.4 +6.9,3.1,4.9,1.5 +5.8,4.0,1.2,0.2 +5.4,3.9,1.7,0.4 +7.7,3.8,6.7,2.2 \ No newline at end of file diff --git a/tests/integ/test_auto_ml.py b/tests/integ/test_auto_ml.py index 26e90ecf74..ed27492f16 100644 --- a/tests/integ/test_auto_ml.py +++ b/tests/integ/test_auto_ml.py @@ -32,6 +32,7 @@ DATA_DIR = os.path.join(DATA_DIR, "automl", "data") TRAINING_DATA = os.path.join(DATA_DIR, "iris_training.csv") TEST_DATA = os.path.join(DATA_DIR, "iris_test.csv") +TRANSFORM_DATA = os.path.join(DATA_DIR, "iris_transform.csv") PROBLEM_TYPE = "MultiClassClassification" BASE_JOB_NAME = "auto-ml" @@ -180,6 +181,42 @@ def test_auto_ml_describe_auto_ml_job(sagemaker_session): assert desc["OutputDataConfig"] == expected_default_output_config +@pytest.mark.skipif( + tests.integ.test_region() in tests.integ.NO_AUTO_ML_REGIONS, + reason="AutoML is not supported in the region yet.", +) +def test_auto_ml_attach(sagemaker_session): + expected_default_input_config = [ + { + "DataSource": { + "S3DataSource": { + "S3DataType": "S3Prefix", + "S3Uri": "s3://{}/{}/input/iris_training.csv".format( + sagemaker_session.default_bucket(), PREFIX + ), + } + }, + "TargetAttributeName": TARGET_ATTRIBUTE_NAME, + } + ] + expected_default_output_config = { + "S3OutputPath": "s3://{}/".format(sagemaker_session.default_bucket()) + } + + auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session) + + attached_automl_job = AutoML.attach( + job_name=AUTO_ML_JOB_NAME, sagemaker_session=sagemaker_session + ) + attached_desc = attached_automl_job.describe_auto_ml_job() + assert attached_desc["AutoMLJobName"] == AUTO_ML_JOB_NAME + assert attached_desc["AutoMLJobStatus"] == "Completed" + assert isinstance(attached_desc["BestCandidate"], dict) + assert attached_desc["InputDataConfig"] == expected_default_input_config + assert attached_desc["AutoMLJobConfig"] == EXPECTED_DEFAULT_JOB_CONFIG + assert attached_desc["OutputDataConfig"] == expected_default_output_config + + @pytest.mark.skipif( tests.integ.test_region() in tests.integ.NO_AUTO_ML_REGIONS, reason="AutoML is not supported in the region yet.", @@ -240,6 +277,34 @@ def test_deploy_best_candidate(sagemaker_session, cpu_instance_type): sagemaker_session.sagemaker_client.delete_endpoint(EndpointName=endpoint_name) +@pytest.mark.canary_quick +def test_create_model_best_candidate(sagemaker_session, cpu_instance_type): + auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session) + + auto_ml = AutoML.attach(job_name=AUTO_ML_JOB_NAME) + best_candidate = auto_ml.best_candidate() + + with timeout(minutes=2): + pipeline_model = auto_ml.create_model( + candidate=best_candidate, + sagemaker_session=sagemaker_session, + vpc_config=None, + enable_network_isolation=False, + model_kms_key=None, + predictor_cls=None, + ) + inputs = sagemaker_session.upload_data( + path=TRANSFORM_DATA, key_prefix=PREFIX + "/transform_input" + ) + pipeline_model.transformer( + instance_count=1, + instance_type=cpu_instance_type, + assemble_with="Line", + output_path="s3://{}/{}".format(sagemaker_session.default_bucket(), "transform_test"), + accept="text/csv", + ).transform(data=inputs, content_type="text/csv", split_type="Line", join_source="Input") + + @pytest.mark.skipif( tests.integ.test_region() in tests.integ.NO_AUTO_ML_REGIONS, reason="AutoML is not supported in the region yet.", diff --git a/tests/unit/sagemaker/automl/test_auto_ml.py b/tests/unit/sagemaker/automl/test_auto_ml.py index 8ef0cd31da..5ff99e3b8d 100644 --- a/tests/unit/sagemaker/automl/test_auto_ml.py +++ b/tests/unit/sagemaker/automl/test_auto_ml.py @@ -14,7 +14,7 @@ import pytest from mock import Mock, patch -from sagemaker import AutoML, AutoMLJob, AutoMLInput, CandidateEstimator +from sagemaker import AutoML, AutoMLJob, AutoMLInput, CandidateEstimator, PipelineModel from sagemaker.predictor import RealTimePredictor MODEL_DATA = "s3://bucket/model.tar.gz" @@ -37,12 +37,14 @@ JOB_NAME = "default-job-name" JOB_NAME_2 = "banana-auto-ml-job" +JOB_NAME_3 = "descriptive-auto-ml-job" VOLUME_KMS_KEY = "volume-kms-key-id-string" OUTPUT_KMS_KEY = "output-kms-key-id-string" OUTPUT_PATH = "s3://my_other_bucket/" BASE_JOB_NAME = "banana" PROBLEM_TYPE = "BinaryClassification" BLACKLISTED_ALGORITHM = ["xgboost"] +LIST_TAGS_RESULT = {"Tags": [{"Key": "key1", "Value": "value1"}]} MAX_CANDIDATES = 10 MAX_RUNTIME_PER_TRAINING_JOB = 3600 TOTAL_JOB_RUNTIME = 36000 @@ -57,6 +59,33 @@ BEST_CANDIDATE_2 = {"best-candidate": "best-trial-2"} AUTO_ML_DESC = {"AutoMLJobName": JOB_NAME, "BestCandidate": BEST_CANDIDATE} AUTO_ML_DESC_2 = {"AutoMLJobName": JOB_NAME_2, "BestCandidate": BEST_CANDIDATE_2} +AUTO_ML_DESC_3 = { + "AutoMLJobArn": "automl_job_arn", + "AutoMLJobConfig": { + "CompletionCriteria": { + "MaxAutoMLJobRuntimeInSeconds": 3000, + "MaxCandidates": 28, + "MaxRuntimePerTrainingJobInSeconds": 100, + }, + "SecurityConfig": {"EnableInterContainerTrafficEncryption": True}, + }, + "AutoMLJobName": "mock_automl_job_name", + "AutoMLJobObjective": {"MetricName": "Auto"}, + "AutoMLJobSecondaryStatus": "Completed", + "AutoMLJobStatus": "Completed", + "GenerateCandidateDefinitionsOnly": False, + "InputDataConfig": [ + { + "DataSource": { + "S3DataSource": {"S3DataType": "S3Prefix", "S3Uri": "s3://input/prefix"} + }, + "TargetAttributeName": "y", + } + ], + "OutputDataConfig": {"KmsKeyId": "string", "S3OutputPath": "s3://output_prefix"}, + "ProblemType": "Auto", + "RoleArn": "mock_role_arn", +} INFERENCE_CONTAINERS = [ { @@ -76,6 +105,33 @@ }, ] +CLASSIFICATION_INFERENCE_CONTAINERS = [ + { + "Environment": {"SAGEMAKER_PROGRAM": "sagemaker_serve"}, + "Image": "account.dkr.ecr.us-west-2.amazonaws.com/sagemaker-auto-ml-data-processing:1.0-cpu-py3", + "ModelDataUrl": "s3://sagemaker-us-west-2-account/sagemaker-auto-ml-gamma/data-processing/output", + }, + { + "Environment": { + "MAX_CONTENT_LENGTH": "20000000", + "SAGEMAKER_INFERENCE_SUPPORT": "probability,probabilities,predicted_label", + "SAGEMAKER_INFERENCE_OUTPUT": "predicted_label", + }, + "Image": "account.dkr.ecr.us-west-2.amazonaws.com/sagemaker-auto-ml-training:1.0-cpu-py3", + "ModelDataUrl": "s3://sagemaker-us-west-2-account/sagemaker-auto-ml-gamma/training/output", + }, + { + "Environment": { + "INVERSE_LABEL_TRANSFORM": "1", + "SAGEMAKER_INFERENCE_SUPPORT": "probability,probabilities,predicted_label,labels", + "SAGEMAKER_INFERENCE_OUTPUT": "predicted_label", + "SAGEMAKER_INFERENCE_INPUT": "predicted_label", + }, + "Image": "account.dkr.ecr.us-west-2.amazonaws.com/sagemaker-auto-ml-transform:1.0-cpu-py3", + "ModelDataUrl": "s3://sagemaker-us-west-2-account/sagemaker-auto-ml-gamma/transform/output", + }, +] + CANDIDATE_STEPS = [ { "CandidateStepName": "training-job/sagemaker-auto-ml-gamma/data-processing", @@ -97,6 +153,12 @@ "CandidateSteps": CANDIDATE_STEPS, } +CLASSIFICATION_CANDIDATE_DICT = { + "CandidateName": "candidate_mock", + "InferenceContainers": CLASSIFICATION_INFERENCE_CONTAINERS, + "CandidateSteps": CANDIDATE_STEPS, +} + TRAINING_JOB = { "AlgorithmSpecification": { "AlgorithmName": "string", @@ -143,6 +205,8 @@ def describe_auto_ml_job_mock(job_name=None): return AUTO_ML_DESC elif job_name == JOB_NAME_2: return AUTO_ML_DESC_2 + elif job_name == JOB_NAME_3: + return AUTO_ML_DESC_3 @pytest.fixture() @@ -168,7 +232,7 @@ def sagemaker_session(): name="describe_transform_job", return_value=TRANSFORM_JOB ) sms.list_candidates = Mock(name="list_candidates", return_value={"Candidates": []}) - + sms.sagemaker_client.list_tags = Mock(name="list_tags", return_value=LIST_TAGS_RESULT) return sms @@ -452,29 +516,17 @@ def test_deploy(sagemaker_session, candidate_mock): auto_ml = AutoML( role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session ) + mock_pipeline = Mock(name="pipeline_model") + mock_pipeline.deploy = Mock(name="model_deploy") auto_ml.best_candidate = Mock(name="best_candidate", return_value=CANDIDATE_DICT) - auto_ml._deploy_inference_pipeline = Mock("_deploy_inference_pipeline", return_value=None) + auto_ml.create_model = Mock(name="create_model", return_value=mock_pipeline) auto_ml.deploy( initial_instance_count=INSTANCE_COUNT, instance_type=INSTANCE_TYPE, sagemaker_session=sagemaker_session, ) - auto_ml._deploy_inference_pipeline.assert_called_once() - auto_ml._deploy_inference_pipeline.assert_called_with( - candidate_mock.containers, - initial_instance_count=INSTANCE_COUNT, - instance_type=INSTANCE_TYPE, - name=None, - sagemaker_session=sagemaker_session, - endpoint_name=None, - tags=None, - wait=True, - update_endpoint=False, - vpc_config=None, - enable_network_isolation=False, - model_kms_key=None, - predictor_cls=None, - ) + auto_ml.create_model.assert_called_once() + mock_pipeline.deploy.assert_called_once() @patch("sagemaker.automl.automl.CandidateEstimator") @@ -484,7 +536,10 @@ def test_deploy_optional_args(candidate_estimator, sagemaker_session, candidate_ auto_ml = AutoML( role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session ) - auto_ml._deploy_inference_pipeline = Mock("_deploy_inference_pipeline", return_value=None) + mock_pipeline = Mock(name="pipeline_model") + mock_pipeline.deploy = Mock(name="model_deploy") + auto_ml.best_candidate = Mock(name="best_candidate", return_value=CANDIDATE_DICT) + auto_ml.create_model = Mock(name="create_model", return_value=mock_pipeline) auto_ml.deploy( initial_instance_count=INSTANCE_COUNT, @@ -500,25 +555,31 @@ def test_deploy_optional_args(candidate_estimator, sagemaker_session, candidate_ enable_network_isolation=True, model_kms_key=OUTPUT_KMS_KEY, predictor_cls=RealTimePredictor, + inference_response_keys=None, ) - auto_ml._deploy_inference_pipeline.assert_called_once() - auto_ml._deploy_inference_pipeline.assert_called_with( - candidate_mock.containers, - initial_instance_count=INSTANCE_COUNT, - instance_type=INSTANCE_TYPE, + + auto_ml.create_model.assert_called_once() + auto_ml.create_model.assert_called_with( name=JOB_NAME, sagemaker_session=sagemaker_session, - endpoint_name=JOB_NAME, - tags=TAGS, - wait=False, - update_endpoint=True, + candidate=CANDIDATE_DICT, + inference_response_keys=None, vpc_config=VPC_CONFIG, enable_network_isolation=True, model_kms_key=OUTPUT_KMS_KEY, predictor_cls=RealTimePredictor, ) - candidate_estimator.assert_called_with(CANDIDATE_DICT, sagemaker_session=sagemaker_session) + mock_pipeline.deploy.assert_called_once() + + mock_pipeline.deploy.assert_called_with( + initial_instance_count=INSTANCE_COUNT, + instance_type=INSTANCE_TYPE, + endpoint_name=JOB_NAME, + tags=TAGS, + wait=False, + update_endpoint=True, + ) def test_candidate_estimator_get_steps(sagemaker_session): @@ -536,3 +597,68 @@ def test_candidate_estimator_fit(sagemaker_session): candidate_estimator.fit(inputs) sagemaker_session.train.assert_called() sagemaker_session.transform.assert_called() + + +def test_validate_and_update_inference_response(): + cic = CLASSIFICATION_INFERENCE_CONTAINERS.copy() + + AutoML.validate_and_update_inference_response( + inference_containers=cic, + inference_response_keys=["predicted_label", "labels", "probabilities", "probability"], + ) + + assert ( + cic[2]["Environment"]["SAGEMAKER_INFERENCE_OUTPUT"] + == "predicted_label,labels,probabilities,probability" + ) + assert ( + cic[2]["Environment"]["SAGEMAKER_INFERENCE_INPUT"] + == "predicted_label,probabilities,probability" + ) + assert ( + cic[1]["Environment"]["SAGEMAKER_INFERENCE_OUTPUT"] + == "predicted_label,probabilities,probability" + ) + + +def test_validate_and_update_inference_response_wrong_input(): + cic = CLASSIFICATION_INFERENCE_CONTAINERS.copy() + + with pytest.raises( + ValueError, + message="Requested inference output keys [wrong_key, wrong_label] are unsupported. " + "The supported inference keys are [probability, probabilities, predicted_label, labels]", + ): + AutoML.validate_and_update_inference_response( + inference_containers=cic, + inference_response_keys=["wrong_key", "wrong_label", "probabilities", "probability"], + ) + + +def test_create_model(sagemaker_session): + auto_ml = AutoML( + role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session + ) + + pipeline_model = auto_ml.create_model( + name=JOB_NAME, + sagemaker_session=sagemaker_session, + candidate=CLASSIFICATION_CANDIDATE_DICT, + vpc_config=VPC_CONFIG, + enable_network_isolation=True, + model_kms_key=None, + predictor_cls=None, + inference_response_keys=None, + ) + + assert isinstance(pipeline_model, PipelineModel) + + +def test_attach(sagemaker_session): + aml = AutoML.attach(job_name=JOB_NAME_3, sagemaker_session=sagemaker_session) + assert aml.current_job_name == JOB_NAME_3 + assert aml.role == "mock_role_arn" + assert aml.target_attribute_name == "y" + assert aml.problem_type == "Auto" + assert aml.output_path == "s3://output_prefix" + assert aml.tags == LIST_TAGS_RESULT From 333cdbf371571c9f831f3157bc6c5eb0349be458 Mon Sep 17 00:00:00 2001 From: pialidas Date: Wed, 10 Jun 2020 12:14:26 -0400 Subject: [PATCH 2/9] Allow selecting inference response for Autopilot generated models 1. Added attach() method to AutoML to allow attaching an AutoML object to an existing AutoMLJob 2. Added create_model() method to AutoML that returns a PipelineModel that can be used by the 3. Added a util function validate_and_update_inference_response() to update the inference container list based on the requested inference response keys 4. Updated and added unit tests --- src/sagemaker/automl/automl.py | 2 +- tests/unit/sagemaker/automl/test_auto_ml.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/sagemaker/automl/automl.py b/src/sagemaker/automl/automl.py index b060d7e369..334badfa9c 100644 --- a/src/sagemaker/automl/automl.py +++ b/src/sagemaker/automl/automl.py @@ -467,7 +467,7 @@ def _get_supported_inference_keys(cls, container, default=None): try: return [ x.strip() - for x in container["Environment"]["SAGEMAKER_INFERENCE_SUPPORT"].split(",") + for x in container["Environment"]["SAGEMAKER_INFERENCE_SUPPORTED"].split(",") ] except KeyError: if default is None: diff --git a/tests/unit/sagemaker/automl/test_auto_ml.py b/tests/unit/sagemaker/automl/test_auto_ml.py index 5ff99e3b8d..b7df53bfe3 100644 --- a/tests/unit/sagemaker/automl/test_auto_ml.py +++ b/tests/unit/sagemaker/automl/test_auto_ml.py @@ -114,7 +114,7 @@ { "Environment": { "MAX_CONTENT_LENGTH": "20000000", - "SAGEMAKER_INFERENCE_SUPPORT": "probability,probabilities,predicted_label", + "SAGEMAKER_INFERENCE_SUPPORTED": "probability,probabilities,predicted_label", "SAGEMAKER_INFERENCE_OUTPUT": "predicted_label", }, "Image": "account.dkr.ecr.us-west-2.amazonaws.com/sagemaker-auto-ml-training:1.0-cpu-py3", @@ -123,7 +123,7 @@ { "Environment": { "INVERSE_LABEL_TRANSFORM": "1", - "SAGEMAKER_INFERENCE_SUPPORT": "probability,probabilities,predicted_label,labels", + "SAGEMAKER_INFERENCE_SUPPORTED": "probability,probabilities,predicted_label,labels", "SAGEMAKER_INFERENCE_OUTPUT": "predicted_label", "SAGEMAKER_INFERENCE_INPUT": "predicted_label", }, From 5f21aa2e94c75c9f462f53805e2653293ba75b83 Mon Sep 17 00:00:00 2001 From: pialidas Date: Wed, 10 Jun 2020 13:14:29 -0400 Subject: [PATCH 3/9] fixed unit tests for py27 and integ test --- src/sagemaker/automl/automl.py | 4 ++-- tests/integ/test_auto_ml.py | 5 ++++- tests/unit/sagemaker/automl/test_auto_ml.py | 8 +++++--- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/sagemaker/automl/automl.py b/src/sagemaker/automl/automl.py index 334badfa9c..8b1a69173f 100644 --- a/src/sagemaker/automl/automl.py +++ b/src/sagemaker/automl/automl.py @@ -117,8 +117,8 @@ def attach(cls, job_name, sagemaker_session=None): _auto_ml_job_desc = sagemaker_session.describe_auto_ml_job(job_name) automl_job_tags = sagemaker_session.sagemaker_client.list_tags( - _auto_ml_job_desc["AutoMLJobArn"] - ) + ResourceArn=_auto_ml_job_desc["AutoMLJobArn"])["Tags"] + amlj = AutoML( role=_auto_ml_job_desc["RoleArn"], target_attribute_name=_auto_ml_job_desc["InputDataConfig"][0]["TargetAttributeName"], diff --git a/tests/integ/test_auto_ml.py b/tests/integ/test_auto_ml.py index ed27492f16..1dd25da0bf 100644 --- a/tests/integ/test_auto_ml.py +++ b/tests/integ/test_auto_ml.py @@ -277,7 +277,10 @@ def test_deploy_best_candidate(sagemaker_session, cpu_instance_type): sagemaker_session.sagemaker_client.delete_endpoint(EndpointName=endpoint_name) -@pytest.mark.canary_quick +@pytest.mark.skipif( + tests.integ.test_region() in tests.integ.NO_AUTO_ML_REGIONS, + reason="AutoML is not supported in the region yet.", +) def test_create_model_best_candidate(sagemaker_session, cpu_instance_type): auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session) diff --git a/tests/unit/sagemaker/automl/test_auto_ml.py b/tests/unit/sagemaker/automl/test_auto_ml.py index b7df53bfe3..e62635bd18 100644 --- a/tests/unit/sagemaker/automl/test_auto_ml.py +++ b/tests/unit/sagemaker/automl/test_auto_ml.py @@ -12,6 +12,8 @@ # language governing permissions and limitations under the License. from __future__ import absolute_import +import copy + import pytest from mock import Mock, patch from sagemaker import AutoML, AutoMLJob, AutoMLInput, CandidateEstimator, PipelineModel @@ -600,7 +602,7 @@ def test_candidate_estimator_fit(sagemaker_session): def test_validate_and_update_inference_response(): - cic = CLASSIFICATION_INFERENCE_CONTAINERS.copy() + cic = copy.copy(CLASSIFICATION_INFERENCE_CONTAINERS) AutoML.validate_and_update_inference_response( inference_containers=cic, @@ -622,7 +624,7 @@ def test_validate_and_update_inference_response(): def test_validate_and_update_inference_response_wrong_input(): - cic = CLASSIFICATION_INFERENCE_CONTAINERS.copy() + cic = copy.copy(CLASSIFICATION_INFERENCE_CONTAINERS) with pytest.raises( ValueError, @@ -661,4 +663,4 @@ def test_attach(sagemaker_session): assert aml.target_attribute_name == "y" assert aml.problem_type == "Auto" assert aml.output_path == "s3://output_prefix" - assert aml.tags == LIST_TAGS_RESULT + assert aml.tags == LIST_TAGS_RESULT["Tags"] From 450ceb760980c75033bec12c19659b7290227419 Mon Sep 17 00:00:00 2001 From: pialidas Date: Wed, 10 Jun 2020 14:02:32 -0400 Subject: [PATCH 4/9] black formatting --- src/sagemaker/automl/automl.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/sagemaker/automl/automl.py b/src/sagemaker/automl/automl.py index 8b1a69173f..648281699e 100644 --- a/src/sagemaker/automl/automl.py +++ b/src/sagemaker/automl/automl.py @@ -117,7 +117,8 @@ def attach(cls, job_name, sagemaker_session=None): _auto_ml_job_desc = sagemaker_session.describe_auto_ml_job(job_name) automl_job_tags = sagemaker_session.sagemaker_client.list_tags( - ResourceArn=_auto_ml_job_desc["AutoMLJobArn"])["Tags"] + ResourceArn=_auto_ml_job_desc["AutoMLJobArn"] + )["Tags"] amlj = AutoML( role=_auto_ml_job_desc["RoleArn"], From 68b38d053d6f7bbd691863ab5699cb65c742f71a Mon Sep 17 00:00:00 2001 From: pialidas Date: Wed, 10 Jun 2020 19:10:56 -0400 Subject: [PATCH 5/9] fix the test --- src/sagemaker/automl/automl.py | 38 ++++++++++++++++++---------------- tests/integ/test_auto_ml.py | 2 +- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/src/sagemaker/automl/automl.py b/src/sagemaker/automl/automl.py index 648281699e..3bd20e7fc1 100644 --- a/src/sagemaker/automl/automl.py +++ b/src/sagemaker/automl/automl.py @@ -1,3 +1,4 @@ +# %load /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/sagemaker/automl/automl.py # Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You @@ -123,33 +124,34 @@ def attach(cls, job_name, sagemaker_session=None): amlj = AutoML( role=_auto_ml_job_desc["RoleArn"], target_attribute_name=_auto_ml_job_desc["InputDataConfig"][0]["TargetAttributeName"], - problem_type=_auto_ml_job_desc["ProblemType"], - output_path=_auto_ml_job_desc["OutputDataConfig"]["S3OutputPath"], output_kms_key=_auto_ml_job_desc["OutputDataConfig"].get("KmsKeyId"), + output_path=_auto_ml_job_desc["OutputDataConfig"]["S3OutputPath"], base_job_name=job_name, + compression_type=_auto_ml_job_desc["InputDataConfig"][0].get("CompressionType"), sagemaker_session=sagemaker_session, - volume_kms_key=_auto_ml_job_desc["AutoMLJobConfig"] + volume_kms_key=_auto_ml_job_desc.get("AutoMLJobConfig", {}) .get("SecurityConfig", {}) .get("VolumeKmsKeyId"), - encrypt_inter_container_traffic=_auto_ml_job_desc["AutoMLJobConfig"] + encrypt_inter_container_traffic=_auto_ml_job_desc.get("AutoMLJobConfig", {}) .get("SecurityConfig", {}) .get("EnableInterContainerTrafficEncryption", False), - vpc_config=_auto_ml_job_desc["AutoMLJobConfig"] + vpc_config=_auto_ml_job_desc.get("AutoMLJobConfig", {}) .get("SecurityConfig", {}) .get("VpcConfig"), - max_candidates=_auto_ml_job_desc["AutoMLJobConfig"]["CompletionCriteria"][ - "MaxCandidates" - ], - max_runtime_per_training_job_in_seconds=_auto_ml_job_desc["AutoMLJobConfig"][ - "CompletionCriteria" - ]["MaxRuntimePerTrainingJobInSeconds"], - total_job_runtime_in_seconds=_auto_ml_job_desc["AutoMLJobConfig"]["CompletionCriteria"][ - "MaxAutoMLJobRuntimeInSeconds" - ], - job_objective=_auto_ml_job_desc["AutoMLJobObjective"]["MetricName"], - generate_candidate_definitions_only=_auto_ml_job_desc[ - "GenerateCandidateDefinitionsOnly" - ], + problem_type=_auto_ml_job_desc.get("ProblemType"), + max_candidates=_auto_ml_job_desc.get("AutoMLJobConfig", {}) + .get("CompletionCriteria", {}) + .get("MaxCandidates"), + max_runtime_per_training_job_in_seconds=_auto_ml_job_desc.get("AutoMLJobConfig", {}) + .get("CompletionCriteria", {}) + .get("MaxRuntimePerTrainingJobInSeconds"), + total_job_runtime_in_seconds=_auto_ml_job_desc.get("AutoMLJobConfig", {}) + .get("CompletionCriteria", {}) + .get("MaxAutoMLJobRuntimeInSeconds"), + job_objective=_auto_ml_job_desc.get("AutoMLJobObjective", {}).get("MetricName"), + generate_candidate_definitions_only=_auto_ml_job_desc.get( + "GenerateCandidateDefinitionsOnly", False + ), tags=automl_job_tags, ) amlj.current_job_name = job_name diff --git a/tests/integ/test_auto_ml.py b/tests/integ/test_auto_ml.py index 1dd25da0bf..45018307cd 100644 --- a/tests/integ/test_auto_ml.py +++ b/tests/integ/test_auto_ml.py @@ -284,7 +284,7 @@ def test_deploy_best_candidate(sagemaker_session, cpu_instance_type): def test_create_model_best_candidate(sagemaker_session, cpu_instance_type): auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session) - auto_ml = AutoML.attach(job_name=AUTO_ML_JOB_NAME) + auto_ml = AutoML.attach(job_name=AUTO_ML_JOB_NAME, sagemaker_session=sagemaker_session) best_candidate = auto_ml.best_candidate() with timeout(minutes=2): From baad38ec318cf8d7ced27481c620f4d01e9f3123 Mon Sep 17 00:00:00 2001 From: pialidas Date: Thu, 11 Jun 2020 00:44:14 -0400 Subject: [PATCH 6/9] fix the test --- tests/integ/test_auto_ml.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/integ/test_auto_ml.py b/tests/integ/test_auto_ml.py index 45018307cd..5e1e7268ed 100644 --- a/tests/integ/test_auto_ml.py +++ b/tests/integ/test_auto_ml.py @@ -38,6 +38,8 @@ # use a succeeded AutoML job to test describe and list candidates method, otherwise tests will run too long AUTO_ML_JOB_NAME = "python-sdk-integ-test-base-job" +DEFAULT_MODEL_NAME = "python-sdk-automl" + EXPECTED_DEFAULT_JOB_CONFIG = { "CompletionCriteria": {"MaxCandidates": 3}, @@ -287,8 +289,9 @@ def test_create_model_best_candidate(sagemaker_session, cpu_instance_type): auto_ml = AutoML.attach(job_name=AUTO_ML_JOB_NAME, sagemaker_session=sagemaker_session) best_candidate = auto_ml.best_candidate() - with timeout(minutes=2): + with timeout(minutes=5): pipeline_model = auto_ml.create_model( + name=DEFAULT_MODEL_NAME, candidate=best_candidate, sagemaker_session=sagemaker_session, vpc_config=None, From 0324152599384ab0d50500640e21d2e94a3c76bd Mon Sep 17 00:00:00 2001 From: pialidas Date: Thu, 11 Jun 2020 19:08:55 -0400 Subject: [PATCH 7/9] PR comments addressed --- src/sagemaker/automl/automl.py | 49 +++++++++++---------- tests/integ/test_auto_ml.py | 2 +- tests/unit/sagemaker/automl/test_auto_ml.py | 2 +- 3 files changed, 28 insertions(+), 25 deletions(-) diff --git a/src/sagemaker/automl/automl.py b/src/sagemaker/automl/automl.py index 3bd20e7fc1..536065c21a 100644 --- a/src/sagemaker/automl/automl.py +++ b/src/sagemaker/automl/automl.py @@ -1,4 +1,3 @@ -# %load /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/sagemaker/automl/automl.py # Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You @@ -102,60 +101,64 @@ def fit(self, inputs=None, wait=True, logs=True, job_name=None): self.latest_auto_ml_job.wait(logs=logs) @classmethod - def attach(cls, job_name, sagemaker_session=None): + def attach(cls, auto_ml_job_name, sagemaker_session=None): """Attach to an existing AutoML job. + Creates and returns a AutoML bound to an existing automl job. + Args: - job_name (str): AutoML job name + auto_ml_job_name (str): AutoML job name sagemaker_session (sagemaker.session.Session): A SageMaker Session object, used for SageMaker interactions (default: None). If not - specified, the one originally associated with the ``AutoML`` instance is used.: + specified, the one originally associated with the ``AutoML`` instance is used. Returns: + sagemaker.automl.AutoML: A ``AutoML`` instance with the attached automl job. """ sagemaker_session = sagemaker_session or Session() - _auto_ml_job_desc = sagemaker_session.describe_auto_ml_job(job_name) + auto_ml_job_desc = sagemaker_session.describe_auto_ml_job(auto_ml_job_name) automl_job_tags = sagemaker_session.sagemaker_client.list_tags( - ResourceArn=_auto_ml_job_desc["AutoMLJobArn"] + ResourceArn=auto_ml_job_desc["AutoMLJobArn"] )["Tags"] amlj = AutoML( - role=_auto_ml_job_desc["RoleArn"], - target_attribute_name=_auto_ml_job_desc["InputDataConfig"][0]["TargetAttributeName"], - output_kms_key=_auto_ml_job_desc["OutputDataConfig"].get("KmsKeyId"), - output_path=_auto_ml_job_desc["OutputDataConfig"]["S3OutputPath"], - base_job_name=job_name, - compression_type=_auto_ml_job_desc["InputDataConfig"][0].get("CompressionType"), + role=auto_ml_job_desc["RoleArn"], + target_attribute_name=auto_ml_job_desc["InputDataConfig"][0]["TargetAttributeName"], + output_kms_key=auto_ml_job_desc["OutputDataConfig"].get("KmsKeyId"), + output_path=auto_ml_job_desc["OutputDataConfig"]["S3OutputPath"], + base_job_name=auto_ml_job_name, + compression_type=auto_ml_job_desc["InputDataConfig"][0].get("CompressionType"), sagemaker_session=sagemaker_session, - volume_kms_key=_auto_ml_job_desc.get("AutoMLJobConfig", {}) + volume_kms_key=auto_ml_job_desc.get("AutoMLJobConfig", {}) .get("SecurityConfig", {}) .get("VolumeKmsKeyId"), - encrypt_inter_container_traffic=_auto_ml_job_desc.get("AutoMLJobConfig", {}) + encrypt_inter_container_traffic=auto_ml_job_desc.get("AutoMLJobConfig", {}) .get("SecurityConfig", {}) .get("EnableInterContainerTrafficEncryption", False), - vpc_config=_auto_ml_job_desc.get("AutoMLJobConfig", {}) + vpc_config=auto_ml_job_desc.get("AutoMLJobConfig", {}) .get("SecurityConfig", {}) .get("VpcConfig"), - problem_type=_auto_ml_job_desc.get("ProblemType"), - max_candidates=_auto_ml_job_desc.get("AutoMLJobConfig", {}) + problem_type=auto_ml_job_desc.get("ProblemType"), + max_candidates=auto_ml_job_desc.get("AutoMLJobConfig", {}) .get("CompletionCriteria", {}) .get("MaxCandidates"), - max_runtime_per_training_job_in_seconds=_auto_ml_job_desc.get("AutoMLJobConfig", {}) + max_runtime_per_training_job_in_seconds=auto_ml_job_desc.get("AutoMLJobConfig", {}) .get("CompletionCriteria", {}) .get("MaxRuntimePerTrainingJobInSeconds"), - total_job_runtime_in_seconds=_auto_ml_job_desc.get("AutoMLJobConfig", {}) + total_job_runtime_in_seconds=auto_ml_job_desc.get("AutoMLJobConfig", {}) .get("CompletionCriteria", {}) .get("MaxAutoMLJobRuntimeInSeconds"), - job_objective=_auto_ml_job_desc.get("AutoMLJobObjective", {}).get("MetricName"), - generate_candidate_definitions_only=_auto_ml_job_desc.get( + job_objective=auto_ml_job_desc.get("AutoMLJobObjective", {}).get("MetricName"), + generate_candidate_definitions_only=auto_ml_job_desc.get( "GenerateCandidateDefinitionsOnly", False ), tags=automl_job_tags, ) - amlj.current_job_name = job_name - amlj._auto_ml_job_desc = _auto_ml_job_desc + amlj.current_job_name = auto_ml_job_name + amlj.latest_auto_ml_job = auto_ml_job_name # pylint: disable=W0201 + amlj._auto_ml_job_desc = auto_ml_job_desc return amlj def describe_auto_ml_job(self, job_name=None): diff --git a/tests/integ/test_auto_ml.py b/tests/integ/test_auto_ml.py index 5e1e7268ed..0b86cf5551 100644 --- a/tests/integ/test_auto_ml.py +++ b/tests/integ/test_auto_ml.py @@ -286,7 +286,7 @@ def test_deploy_best_candidate(sagemaker_session, cpu_instance_type): def test_create_model_best_candidate(sagemaker_session, cpu_instance_type): auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session) - auto_ml = AutoML.attach(job_name=AUTO_ML_JOB_NAME, sagemaker_session=sagemaker_session) + auto_ml = AutoML.attach(auto_ml_job_name=AUTO_ML_JOB_NAME, sagemaker_session=sagemaker_session) best_candidate = auto_ml.best_candidate() with timeout(minutes=5): diff --git a/tests/unit/sagemaker/automl/test_auto_ml.py b/tests/unit/sagemaker/automl/test_auto_ml.py index e62635bd18..70adc840bc 100644 --- a/tests/unit/sagemaker/automl/test_auto_ml.py +++ b/tests/unit/sagemaker/automl/test_auto_ml.py @@ -657,7 +657,7 @@ def test_create_model(sagemaker_session): def test_attach(sagemaker_session): - aml = AutoML.attach(job_name=JOB_NAME_3, sagemaker_session=sagemaker_session) + aml = AutoML.attach(auto_ml_job_name=JOB_NAME_3, sagemaker_session=sagemaker_session) assert aml.current_job_name == JOB_NAME_3 assert aml.role == "mock_role_arn" assert aml.target_attribute_name == "y" From fdb858fd13d79a9aadd2eef900829da5126d90a6 Mon Sep 17 00:00:00 2001 From: pialidas Date: Thu, 11 Jun 2020 20:03:26 -0400 Subject: [PATCH 8/9] Few minor fixes to the comments --- src/sagemaker/automl/automl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sagemaker/automl/automl.py b/src/sagemaker/automl/automl.py index 536065c21a..c204356e5b 100644 --- a/src/sagemaker/automl/automl.py +++ b/src/sagemaker/automl/automl.py @@ -467,8 +467,8 @@ def _get_supported_inference_keys(cls, container, default=None): List of keys the container support or default Raises: - Value error if the default is None and the container definition has - no marker environment variable. + KeyError if the default is None and the container definition has + no marker environment variable SAGEMAKER_INFERENCE_SUPPORTED. """ try: return [ From f263ae8013cc77509feca5be37916d67f7694667 Mon Sep 17 00:00:00 2001 From: pialidas Date: Thu, 11 Jun 2020 22:00:03 -0400 Subject: [PATCH 9/9] fix the arg name --- tests/integ/test_auto_ml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integ/test_auto_ml.py b/tests/integ/test_auto_ml.py index 0b86cf5551..fc547256c6 100644 --- a/tests/integ/test_auto_ml.py +++ b/tests/integ/test_auto_ml.py @@ -208,7 +208,7 @@ def test_auto_ml_attach(sagemaker_session): auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session) attached_automl_job = AutoML.attach( - job_name=AUTO_ML_JOB_NAME, sagemaker_session=sagemaker_session + auto_ml_job_name=AUTO_ML_JOB_NAME, sagemaker_session=sagemaker_session ) attached_desc = attached_automl_job.describe_auto_ml_job() assert attached_desc["AutoMLJobName"] == AUTO_ML_JOB_NAME