From 70c95b6a065e0ee70fd620b512dbedf57435b6ff Mon Sep 17 00:00:00 2001 From: Karim Nakad Date: Wed, 10 Jul 2019 09:11:49 -0700 Subject: [PATCH 01/35] change: remove TODO comment on import-error Pylint check (#918) By running Pylint before any of the unit tests (and dependency installs), the import-error check will always fail since the dependencies are not yet installed. We could move Pylint to a later stage to resolve this, but there's value in this quick check occurring before the unit tests. As a result, this Pylint check is being disabled. --- .pylintrc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pylintrc b/.pylintrc index 1e8b52303d..768ef7dfbf 100644 --- a/.pylintrc +++ b/.pylintrc @@ -83,7 +83,7 @@ disable= too-many-instance-attributes, line-too-long, # We let Flake8 take care of this # TODO: Fix these and stop relying on flake8 len-as-condition, # TODO: Enable this check once pylint 2.4.0 is released and consumed due to the fix in https://github.com/PyCQA/pylint/issues/2684 - import-error, # TODO: Fix import errors + import-error, # Since we run Pylint before any of our builds in tox, this will always fail attribute-defined-outside-init, # TODO: Fix scope protected-access, # TODO: Fix access abstract-method, # TODO: Fix abstract methods From 7acfb318238332a99b81b0cd19388555024b19f3 Mon Sep 17 00:00:00 2001 From: ci Date: Wed, 10 Jul 2019 17:17:06 +0000 Subject: [PATCH 02/35] prepare release v1.33.0 --- CHANGELOG.md | 18 ++++++++++++++++++ VERSION | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 81778f5d32..dd3ccc77bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,23 @@ # Changelog +## v1.33.0 (2019-07-10) + +### Features + + * git support for hosting models + * allow custom model name during deploy + +### Bug fixes and other changes + + * remove TODO comment on import-error Pylint check + * enable wrong-import-position pylint check + * Revert "change: enable wrong-import-position pylint check (#907)" + * enable signature-differs pylint check + * enable wrong-import-position pylint check + * enable logging-not-lazy pylint check + * reset default output path in Transformer.transform + * Add ap-northeast-1 to Neo algorithms region map + ## v1.32.2 (2019-07-08) ### Bug fixes and other changes diff --git a/VERSION b/VERSION index 2127d310e8..7aa332e416 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.32.3.dev0 +1.33.0 From 1705b137742697030b0b01344937c29c71508e89 Mon Sep 17 00:00:00 2001 From: ci Date: Wed, 10 Jul 2019 17:39:25 +0000 Subject: [PATCH 03/35] update development version to v1.33.1.dev0 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 7aa332e416..21f10e8e95 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.33.0 +1.33.1.dev0 From b1f95fb9c1cb264dc548314559083adb69a1d23e Mon Sep 17 00:00:00 2001 From: Karim Nakad Date: Wed, 10 Jul 2019 16:30:06 -0700 Subject: [PATCH 04/35] change: enable unidiomatic-typecheck pylint check (#921) --- .pylintrc | 1 - src/sagemaker/model.py | 2 +- src/sagemaker/vpc_utils.py | 6 +++--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.pylintrc b/.pylintrc index 768ef7dfbf..442b7307bd 100644 --- a/.pylintrc +++ b/.pylintrc @@ -87,7 +87,6 @@ disable= attribute-defined-outside-init, # TODO: Fix scope protected-access, # TODO: Fix access abstract-method, # TODO: Fix abstract methods - unidiomatic-typecheck, # TODO: Fix typechecks wrong-import-order, # TODO: Fix import order no-else-return, # TODO: Remove unnecessary elses useless-object-inheritance, # TODO: Remove unnecessary imports diff --git a/src/sagemaker/model.py b/src/sagemaker/model.py index 27738f1f88..b89600576a 100644 --- a/src/sagemaker/model.py +++ b/src/sagemaker/model.py @@ -176,7 +176,7 @@ def _compilation_job_config( input_model_config = { "S3Uri": self.model_data, "DataInputConfig": input_shape - if type(input_shape) != dict + if not isinstance(input_shape, dict) else json.dumps(input_shape), "Framework": framework, } diff --git a/src/sagemaker/vpc_utils.py b/src/sagemaker/vpc_utils.py index 1be426b424..9e412db734 100644 --- a/src/sagemaker/vpc_utils.py +++ b/src/sagemaker/vpc_utils.py @@ -83,7 +83,7 @@ def sanitize(vpc_config): """ if vpc_config is None: return vpc_config - elif type(vpc_config) is not dict: + elif not isinstance(vpc_config, dict): raise ValueError("vpc_config is not a dict: {}".format(vpc_config)) elif not vpc_config: raise ValueError("vpc_config is empty") @@ -91,7 +91,7 @@ def sanitize(vpc_config): subnets = vpc_config.get(SUBNETS_KEY) if subnets is None: raise ValueError("vpc_config is missing key: {}".format(SUBNETS_KEY)) - if type(subnets) is not list: + if not isinstance(subnets, list): raise ValueError("vpc_config value for {} is not a list: {}".format(SUBNETS_KEY, subnets)) elif not subnets: raise ValueError("vpc_config value for {} is empty".format(SUBNETS_KEY)) @@ -99,7 +99,7 @@ def sanitize(vpc_config): security_group_ids = vpc_config.get(SECURITY_GROUP_IDS_KEY) if security_group_ids is None: raise ValueError("vpc_config is missing key: {}".format(SECURITY_GROUP_IDS_KEY)) - if type(security_group_ids) is not list: + if not isinstance(security_group_ids, list): raise ValueError( "vpc_config value for {} is not a list: {}".format( SECURITY_GROUP_IDS_KEY, security_group_ids From 0cf59021f7c14923bf2b7fd7695658bf755bcb64 Mon Sep 17 00:00:00 2001 From: Karim Nakad Date: Thu, 11 Jul 2019 13:42:38 -0700 Subject: [PATCH 05/35] change: enable no-else-return and no-else-raise pylint checks (#925) --- .pylintrc | 2 - src/sagemaker/amazon/common.py | 4 +- src/sagemaker/estimator.py | 31 +++++++-------- src/sagemaker/fw_utils.py | 55 ++++++++++++--------------- src/sagemaker/job.py | 32 +++++++--------- src/sagemaker/local/data.py | 22 ++++------- src/sagemaker/local/image.py | 11 +++--- src/sagemaker/local/local_session.py | 17 +++------ src/sagemaker/model.py | 3 +- src/sagemaker/predictor.py | 9 ++--- src/sagemaker/rl/estimator.py | 19 +++++---- src/sagemaker/session.py | 6 +-- src/sagemaker/sklearn/estimator.py | 2 +- src/sagemaker/tensorflow/estimator.py | 7 ++-- src/sagemaker/tensorflow/predictor.py | 3 +- src/sagemaker/vpc_utils.py | 8 ++-- 16 files changed, 97 insertions(+), 134 deletions(-) diff --git a/.pylintrc b/.pylintrc index 442b7307bd..0a980d43b3 100644 --- a/.pylintrc +++ b/.pylintrc @@ -88,10 +88,8 @@ disable= protected-access, # TODO: Fix access abstract-method, # TODO: Fix abstract methods wrong-import-order, # TODO: Fix import order - no-else-return, # TODO: Remove unnecessary elses useless-object-inheritance, # TODO: Remove unnecessary imports cyclic-import, # TODO: Resolve cyclic imports - no-else-raise, # TODO: Remove unnecessary elses no-self-use, # TODO: Convert methods to functions where appropriate inconsistent-return-statements, # TODO: Make returns consistent consider-merging-isinstance, # TODO: Merge isinstance where appropriate diff --git a/src/sagemaker/amazon/common.py b/src/sagemaker/amazon/common.py index 5a402a9c25..6bd5047a98 100644 --- a/src/sagemaker/amazon/common.py +++ b/src/sagemaker/amazon/common.py @@ -204,8 +204,8 @@ def read_recordio(f): def _resolve_type(dtype): if dtype == np.dtype(int): return "Int32" - elif dtype == np.dtype(float): + if dtype == np.dtype(float): return "Float64" - elif dtype == np.dtype("float32"): + if dtype == np.dtype("float32"): return "Float32" raise ValueError("Unsupported dtype {} on array".format(dtype)) diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py index e558822dd7..287d203912 100644 --- a/src/sagemaker/estimator.py +++ b/src/sagemaker/estimator.py @@ -643,8 +643,7 @@ def get_vpc_config(self, vpc_config_override=vpc_utils.VPC_CONFIG_DEFAULT): """ if vpc_config_override is vpc_utils.VPC_CONFIG_DEFAULT: return vpc_utils.to_dict(self.subnets, self.security_group_ids) - else: - return vpc_utils.sanitize(vpc_config_override) + return vpc_utils.sanitize(vpc_config_override) def _ensure_latest_training_job( self, error_message="Estimator is not associated with a training job" @@ -1235,14 +1234,13 @@ def train_image(self): """ if self.image_name: return self.image_name - else: - return create_image_uri( - self.sagemaker_session.boto_region_name, - self.__framework_name__, - self.train_instance_type, - self.framework_version, # pylint: disable=no-member - py_version=self.py_version, # pylint: disable=no-member - ) + return create_image_uri( + self.sagemaker_session.boto_region_name, + self.__framework_name__, + self.train_instance_type, + self.framework_version, # pylint: disable=no-member + py_version=self.py_version, # pylint: disable=no-member + ) @classmethod def attach(cls, training_job_name, sagemaker_session=None, model_channel_name="model"): @@ -1404,13 +1402,10 @@ def _s3_uri_without_prefix_from_input(input_data): for channel_name, channel_s3_uri in input_data.items(): response.update(_s3_uri_prefix(channel_name, channel_s3_uri)) return response - elif isinstance(input_data, str): + if isinstance(input_data, str): return _s3_uri_prefix("training", input_data) - elif isinstance(input_data, s3_input): + if isinstance(input_data, s3_input): return _s3_uri_prefix("training", input_data) - else: - raise ValueError( - "Unrecognized type for S3 input data config - not str or s3_input: {}".format( - input_data - ) - ) + raise ValueError( + "Unrecognized type for S3 input data config - not str or s3_input: {}".format(input_data) + ) diff --git a/src/sagemaker/fw_utils.py b/src/sagemaker/fw_utils.py index 9834d4eb65..3b90f9c006 100644 --- a/src/sagemaker/fw_utils.py +++ b/src/sagemaker/fw_utils.py @@ -87,8 +87,7 @@ def _is_merged_versions(framework, framework_version): lowest_version_list = MERGED_FRAMEWORKS_LOWEST_VERSIONS.get(framework) if lowest_version_list: return is_version_equal_or_higher(lowest_version_list, framework_version) - else: - return False + return False def _using_merged_images(region, framework, py_version, accelerator_type, framework_version): @@ -101,8 +100,7 @@ def _using_merged_images(region, framework, py_version, accelerator_type, framew def _registry_id(region, framework, py_version, account, accelerator_type, framework_version): if _using_merged_images(region, framework, py_version, accelerator_type, framework_version): return "763104351884" - else: - return VALID_ACCOUNTS_BY_REGION.get(region, account) + return VALID_ACCOUNTS_BY_REGION.get(region, account) def create_image_uri( @@ -182,10 +180,7 @@ def create_image_uri( return "{}/{}:{}".format( get_ecr_image_uri_prefix(account, region), MERGED_FRAMEWORKS_REPO_MAP[framework], tag ) - else: - return "{}/sagemaker-{}:{}".format( - get_ecr_image_uri_prefix(account, region), framework, tag - ) + return "{}/sagemaker-{}:{}".format(get_ecr_image_uri_prefix(account, region), framework, tag) def _accelerator_type_valid_for_framework( @@ -324,30 +319,28 @@ def framework_name_from_image(image_name): sagemaker_match = sagemaker_pattern.match(image_name) if sagemaker_match is None: return None, None, None, None - else: - # extract framework, python version and image tag - # We must support both the legacy and current image name format. - name_pattern = re.compile( - r"^(?:sagemaker(?:-rl)?-)?(tensorflow|mxnet|chainer|pytorch|scikit-learn)(?:-)?(scriptmode|training)?:(.*)-(.*?)-(py2|py3)$" # noqa: E501 + # extract framework, python version and image tag + # We must support both the legacy and current image name format. + name_pattern = re.compile( + r"^(?:sagemaker(?:-rl)?-)?(tensorflow|mxnet|chainer|pytorch|scikit-learn)(?:-)?(scriptmode|training)?:(.*)-(.*?)-(py2|py3)$" # noqa: E501 + ) + legacy_name_pattern = re.compile(r"^sagemaker-(tensorflow|mxnet)-(py2|py3)-(cpu|gpu):(.*)$") + + name_match = name_pattern.match(sagemaker_match.group(9)) + legacy_match = legacy_name_pattern.match(sagemaker_match.group(9)) + + if name_match is not None: + fw, scriptmode, ver, device, py = ( + name_match.group(1), + name_match.group(2), + name_match.group(3), + name_match.group(4), + name_match.group(5), ) - legacy_name_pattern = re.compile(r"^sagemaker-(tensorflow|mxnet)-(py2|py3)-(cpu|gpu):(.*)$") - - name_match = name_pattern.match(sagemaker_match.group(9)) - legacy_match = legacy_name_pattern.match(sagemaker_match.group(9)) - - if name_match is not None: - fw, scriptmode, ver, device, py = ( - name_match.group(1), - name_match.group(2), - name_match.group(3), - name_match.group(4), - name_match.group(5), - ) - return fw, py, "{}-{}-{}".format(ver, device, py), scriptmode - elif legacy_match is not None: - return (legacy_match.group(1), legacy_match.group(2), legacy_match.group(4), None) - else: - return None, None, None, None + return fw, py, "{}-{}-{}".format(ver, device, py), scriptmode + if legacy_match is not None: + return (legacy_match.group(1), legacy_match.group(2), legacy_match.group(4), None) + return None, None, None, None def framework_version_from_tag(image_tag): diff --git a/src/sagemaker/job.py b/src/sagemaker/job.py index ee2658fe02..ffe4ba31b0 100644 --- a/src/sagemaker/job.py +++ b/src/sagemaker/job.py @@ -140,25 +140,24 @@ def _convert_input_to_channel(channel_name, channel_s3_input): def _format_string_uri_input(uri_input, validate_uri=True, content_type=None, input_mode=None): if isinstance(uri_input, str) and validate_uri and uri_input.startswith("s3://"): return s3_input(uri_input, content_type=content_type, input_mode=input_mode) - elif isinstance(uri_input, str) and validate_uri and uri_input.startswith("file://"): + if isinstance(uri_input, str) and validate_uri and uri_input.startswith("file://"): return file_input(uri_input) - elif isinstance(uri_input, str) and validate_uri: + if isinstance(uri_input, str) and validate_uri: raise ValueError( 'URI input {} must be a valid S3 or FILE URI: must start with "s3://" or ' '"file://"'.format(uri_input) ) - elif isinstance(uri_input, str): + if isinstance(uri_input, str): return s3_input(uri_input, content_type=content_type, input_mode=input_mode) - elif isinstance(uri_input, s3_input): + if isinstance(uri_input, s3_input): return uri_input - elif isinstance(uri_input, file_input): + if isinstance(uri_input, file_input): return uri_input - else: - raise ValueError( - "Cannot format input {}. Expecting one of str, s3_input, or file_input".format( - uri_input - ) + raise ValueError( + "Cannot format input {}. Expecting one of str, s3_input, or file_input".format( + uri_input ) + ) @staticmethod def _prepare_channel( @@ -171,7 +170,7 @@ def _prepare_channel( ): if not channel_uri: return - elif not channel_name: + if not channel_name: raise ValueError( "Expected a channel name if a channel URI {} is specified".format(channel_uri) ) @@ -197,23 +196,20 @@ def _format_model_uri_input(model_uri, validate_uri=True): distribution="FullyReplicated", content_type="application/x-sagemaker-model", ) - elif ( - isinstance(model_uri, string_types) and validate_uri and model_uri.startswith("file://") - ): + if isinstance(model_uri, string_types) and validate_uri and model_uri.startswith("file://"): return file_input(model_uri) - elif isinstance(model_uri, string_types) and validate_uri: + if isinstance(model_uri, string_types) and validate_uri: raise ValueError( 'Model URI must be a valid S3 or FILE URI: must start with "s3://" or ' '"file://' ) - elif isinstance(model_uri, string_types): + if isinstance(model_uri, string_types): return s3_input( model_uri, input_mode="File", distribution="FullyReplicated", content_type="application/x-sagemaker-model", ) - else: - raise ValueError("Cannot format model URI {}. Expecting str".format(model_uri)) + raise ValueError("Cannot format model URI {}. Expecting str".format(model_uri)) @staticmethod def _format_record_set_list_input(inputs): diff --git a/src/sagemaker/local/data.py b/src/sagemaker/local/data.py index 88d5dde41b..62f41bf4e0 100644 --- a/src/sagemaker/local/data.py +++ b/src/sagemaker/local/data.py @@ -45,7 +45,7 @@ def get_data_source_instance(data_source, sagemaker_session): parsed_uri = urlparse(data_source) if parsed_uri.scheme == "file": return LocalFileDataSource(parsed_uri.netloc + parsed_uri.path) - elif parsed_uri.scheme == "s3": + if parsed_uri.scheme == "s3": return S3DataSource(parsed_uri.netloc, parsed_uri.path, sagemaker_session) @@ -62,12 +62,11 @@ def get_splitter_instance(split_type): """ if split_type is None: return NoneSplitter() - elif split_type == "Line": + if split_type == "Line": return LineSplitter() - elif split_type == "RecordIO": + if split_type == "RecordIO": return RecordIOSplitter() - else: - raise ValueError("Invalid Split Type: %s" % split_type) + raise ValueError("Invalid Split Type: %s" % split_type) def get_batch_strategy_instance(strategy, splitter): @@ -82,12 +81,9 @@ def get_batch_strategy_instance(strategy, splitter): """ if strategy == "SingleRecord": return SingleRecordStrategy(splitter) - elif strategy == "MultiRecord": + if strategy == "MultiRecord": return MultiRecordStrategy(splitter) - else: - raise ValueError( - 'Invalid Batch Strategy: %s - Valid Strategies: "SingleRecord", "MultiRecord"' - ) + raise ValueError('Invalid Batch Strategy: %s - Valid Strategies: "SingleRecord", "MultiRecord"') class DataSource(with_metaclass(ABCMeta, object)): @@ -129,8 +125,7 @@ def get_file_list(self): for f in os.listdir(self.root_path) if os.path.isfile(os.path.join(self.root_path, f)) ] - else: - return [self.root_path] + return [self.root_path] def get_root_dir(self): """Retrieve the absolute path to the root directory of this data source. @@ -140,8 +135,7 @@ def get_root_dir(self): """ if os.path.isdir(self.root_path): return self.root_path - else: - return os.path.dirname(self.root_path) + return os.path.dirname(self.root_path) class S3DataSource(DataSource): diff --git a/src/sagemaker/local/image.py b/src/sagemaker/local/image.py index 30a6f47572..95a6277f87 100644 --- a/src/sagemaker/local/image.py +++ b/src/sagemaker/local/image.py @@ -665,7 +665,7 @@ def _aws_credentials(session): "AWS_ACCESS_KEY_ID=%s" % (str(access_key)), "AWS_SECRET_ACCESS_KEY=%s" % (str(secret_key)), ] - elif not _aws_credentials_available_in_metadata_service(): + if not _aws_credentials_available_in_metadata_service(): logger.warning( "Using the short-lived AWS credentials found in session. They might expire while running." ) @@ -674,11 +674,10 @@ def _aws_credentials(session): "AWS_SECRET_ACCESS_KEY=%s" % (str(secret_key)), "AWS_SESSION_TOKEN=%s" % (str(token)), ] - else: - logger.info( - "No AWS credentials found in session but credentials from EC2 Metadata Service are available." - ) - return None + logger.info( + "No AWS credentials found in session but credentials from EC2 Metadata Service are available." + ) + return None except Exception as e: # pylint: disable=broad-except logger.info("Could not get AWS credentials: %s", e) diff --git a/src/sagemaker/local/local_session.py b/src/sagemaker/local/local_session.py index 247dc17790..d2e6582ed4 100644 --- a/src/sagemaker/local/local_session.py +++ b/src/sagemaker/local/local_session.py @@ -107,8 +107,7 @@ def describe_training_job(self, TrainingJobName): } } raise ClientError(error_response, "describe_training_job") - else: - return LocalSagemakerClient._training_jobs[TrainingJobName].describe() + return LocalSagemakerClient._training_jobs[TrainingJobName].describe() def create_transform_job( self, @@ -132,8 +131,7 @@ def describe_transform_job(self, TransformJobName): } } raise ClientError(error_response, "describe_transform_job") - else: - return LocalSagemakerClient._transform_jobs[TransformJobName].describe() + return LocalSagemakerClient._transform_jobs[TransformJobName].describe() def create_model( self, ModelName, PrimaryContainer, *args, **kwargs @@ -152,13 +150,10 @@ def describe_model(self, ModelName): "Error": {"Code": "ValidationException", "Message": "Could not find local model"} } raise ClientError(error_response, "describe_model") - else: - return LocalSagemakerClient._models[ModelName].describe() + return LocalSagemakerClient._models[ModelName].describe() def describe_endpoint_config(self, EndpointConfigName): - if EndpointConfigName in LocalSagemakerClient._endpoint_configs: - return LocalSagemakerClient._endpoint_configs[EndpointConfigName].describe() - else: + if EndpointConfigName not in LocalSagemakerClient._endpoint_configs: error_response = { "Error": { "Code": "ValidationException", @@ -166,6 +161,7 @@ def describe_endpoint_config(self, EndpointConfigName): } } raise ClientError(error_response, "describe_endpoint_config") + return LocalSagemakerClient._endpoint_configs[EndpointConfigName].describe() def create_endpoint_config(self, EndpointConfigName, ProductionVariants, Tags=None): LocalSagemakerClient._endpoint_configs[EndpointConfigName] = _LocalEndpointConfig( @@ -178,8 +174,7 @@ def describe_endpoint(self, EndpointName): "Error": {"Code": "ValidationException", "Message": "Could not find local endpoint"} } raise ClientError(error_response, "describe_endpoint") - else: - return LocalSagemakerClient._endpoints[EndpointName].describe() + return LocalSagemakerClient._endpoints[EndpointName].describe() def create_endpoint(self, EndpointName, EndpointConfigName, Tags=None): endpoint = _LocalEndpoint(EndpointName, EndpointConfigName, Tags, self.sagemaker_session) diff --git a/src/sagemaker/model.py b/src/sagemaker/model.py index b89600576a..e2352c5f41 100644 --- a/src/sagemaker/model.py +++ b/src/sagemaker/model.py @@ -205,8 +205,7 @@ def check_neo_region(self, region): """ if region in NEO_IMAGE_ACCOUNT: return True - else: - return False + return False def _neo_image_account(self, region): if region not in NEO_IMAGE_ACCOUNT: diff --git a/src/sagemaker/predictor.py b/src/sagemaker/predictor.py index e7d43e1925..4bdf0ed665 100644 --- a/src/sagemaker/predictor.py +++ b/src/sagemaker/predictor.py @@ -195,10 +195,9 @@ def _serialize_row(data): if isinstance(data, np.ndarray): data = np.ndarray.flatten(data) if hasattr(data, "__len__"): - if len(data) > 0: - return _csv_serialize_python_array(data) - else: + if len(data) == 0: raise ValueError("Cannot serialize empty array") + return _csv_serialize_python_array(data) # files and buffers if hasattr(data, "read"): @@ -387,9 +386,9 @@ def __call__(self, stream, content_type=CONTENT_TYPE_NPY): return np.genfromtxt( codecs.getreader("utf-8")(stream), delimiter=",", dtype=self.dtype ) - elif content_type == CONTENT_TYPE_JSON: + if content_type == CONTENT_TYPE_JSON: return np.array(json.load(codecs.getreader("utf-8")(stream)), dtype=self.dtype) - elif content_type == CONTENT_TYPE_NPY: + if content_type == CONTENT_TYPE_NPY: return np.load(BytesIO(stream.read())) finally: stream.close() diff --git a/src/sagemaker/rl/estimator.py b/src/sagemaker/rl/estimator.py index 90e2e60a03..5836948c10 100644 --- a/src/sagemaker/rl/estimator.py +++ b/src/sagemaker/rl/estimator.py @@ -226,7 +226,7 @@ def create_model( from sagemaker.tensorflow.serving import Model as tfsModel return tfsModel(framework_version=self.framework_version, **base_args) - elif self.framework == RLFramework.MXNET.value: + if self.framework == RLFramework.MXNET.value: return MXNetModel( framework_version=self.framework_version, py_version=PYTHON_VERSION, **extended_args ) @@ -242,14 +242,13 @@ def train_image(self): """ if self.image_name: return self.image_name - else: - return fw_utils.create_image_uri( - self.sagemaker_session.boto_region_name, - self._image_framework(), - self.train_instance_type, - self._image_version(), - py_version=PYTHON_VERSION, - ) + return fw_utils.create_image_uri( + self.sagemaker_session.boto_region_name, + self._image_framework(), + self.train_instance_type, + self._image_version(), + py_version=PYTHON_VERSION, + ) @classmethod def _prepare_init_params_from_job_description(cls, job_details, model_channel_name=None): @@ -406,7 +405,7 @@ def default_metric_definitions(cls, toolkit): {"Name": "reward-training", "Regex": "^Training>.*Total reward=(.*?),"}, {"Name": "reward-testing", "Regex": "^Testing>.*Total reward=(.*?),"}, ] - elif toolkit is RLToolkit.RAY: + if toolkit is RLToolkit.RAY: float_regex = "[-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?" # noqa: W605, E501 return [ diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index d14454eb4a..efa56fc0f9 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -1231,8 +1231,7 @@ def expand_role(self, role): """ if "/" in role: return role - else: - return self.boto_session.resource("iam").Role(role).arn + return self.boto_session.resource("iam").Role(role).arn def get_caller_identity_arn(self): """Returns the ARN user or role whose credentials are used to call the API. @@ -1791,5 +1790,4 @@ def _vpc_config_from_training_job( ): if vpc_config_override is vpc_utils.VPC_CONFIG_DEFAULT: return training_job_desc.get(vpc_utils.VPC_CONFIG_KEY) - else: - return vpc_utils.sanitize(vpc_config_override) + return vpc_utils.sanitize(vpc_config_override) diff --git a/src/sagemaker/sklearn/estimator.py b/src/sagemaker/sklearn/estimator.py index e066980b75..56cdcd1f51 100644 --- a/src/sagemaker/sklearn/estimator.py +++ b/src/sagemaker/sklearn/estimator.py @@ -176,7 +176,7 @@ def _prepare_init_params_from_job_description(cls, job_details, model_channel_na training_job_name ) ) - elif not framework: + if not framework: # If we were unable to parse the framework name from the image it is not one of our # officially supported images, in this case just add the image to the init params. init_params["image_name"] = image_name diff --git a/src/sagemaker/tensorflow/estimator.py b/src/sagemaker/tensorflow/estimator.py index 2026dbfca8..a24bf75dec 100644 --- a/src/sagemaker/tensorflow/estimator.py +++ b/src/sagemaker/tensorflow/estimator.py @@ -569,12 +569,11 @@ def _default_s3_path(self, directory, mpi=False): local_code = utils.get_config_value("local.local_code", self.sagemaker_session.config) if self.sagemaker_session.local_mode and local_code: return "/opt/ml/shared/{}".format(directory) - elif mpi: + if mpi: return "/opt/ml/model" - elif self._current_job_name: + if self._current_job_name: return os.path.join(self.output_path, self._current_job_name, directory) - else: - return None + return None def _script_mode_enabled(self): return self.py_version == "py3" or self.script_mode diff --git a/src/sagemaker/tensorflow/predictor.py b/src/sagemaker/tensorflow/predictor.py index c56f72ddc9..f9dd30d014 100644 --- a/src/sagemaker/tensorflow/predictor.py +++ b/src/sagemaker/tensorflow/predictor.py @@ -94,8 +94,7 @@ def __init__(self): def __call__(self, data): if isinstance(data, tensor_pb2.TensorProto): return json_format.MessageToJson(data) - else: - return json_serializer(data) + return json_serializer(data) tf_json_serializer = _TFJsonSerializer() diff --git a/src/sagemaker/vpc_utils.py b/src/sagemaker/vpc_utils.py index 9e412db734..5bf1fd687b 100644 --- a/src/sagemaker/vpc_utils.py +++ b/src/sagemaker/vpc_utils.py @@ -83,9 +83,9 @@ def sanitize(vpc_config): """ if vpc_config is None: return vpc_config - elif not isinstance(vpc_config, dict): + if not isinstance(vpc_config, dict): raise ValueError("vpc_config is not a dict: {}".format(vpc_config)) - elif not vpc_config: + if not vpc_config: raise ValueError("vpc_config is empty") subnets = vpc_config.get(SUBNETS_KEY) @@ -93,7 +93,7 @@ def sanitize(vpc_config): raise ValueError("vpc_config is missing key: {}".format(SUBNETS_KEY)) if not isinstance(subnets, list): raise ValueError("vpc_config value for {} is not a list: {}".format(SUBNETS_KEY, subnets)) - elif not subnets: + if not subnets: raise ValueError("vpc_config value for {} is empty".format(SUBNETS_KEY)) security_group_ids = vpc_config.get(SECURITY_GROUP_IDS_KEY) @@ -105,7 +105,7 @@ def sanitize(vpc_config): SECURITY_GROUP_IDS_KEY, security_group_ids ) ) - elif not security_group_ids: + if not security_group_ids: raise ValueError("vpc_config value for {} is empty".format(SECURITY_GROUP_IDS_KEY)) return to_dict(subnets, security_group_ids) From 100d9067c4717e3568b13841098f570f9701e5cc Mon Sep 17 00:00:00 2001 From: Dan Date: Thu, 11 Jul 2019 14:51:00 -0700 Subject: [PATCH 06/35] change: fix list serialization for 1P algos (#922) --- src/sagemaker/amazon/hyperparameter.py | 8 +++++++- tests/integ/test_kmeans.py | 3 +++ tests/integ/test_randomcutforest.py | 1 + tests/unit/test_kmeans.py | 2 +- tests/unit/test_randomcutforest.py | 2 +- 5 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/sagemaker/amazon/hyperparameter.py b/src/sagemaker/amazon/hyperparameter.py index 06dc89e45c..15ae928810 100644 --- a/src/sagemaker/amazon/hyperparameter.py +++ b/src/sagemaker/amazon/hyperparameter.py @@ -12,6 +12,8 @@ # language governing permissions and limitations under the License. from __future__ import absolute_import +import json + class Hyperparameter(object): """An algorithm hyperparameter with optional validation. Implemented as a python @@ -67,4 +69,8 @@ def serialize_all(obj): """Return all non-None ``hyperparameter`` values on ``obj`` as a ``dict[str,str].``""" if "_hyperparameters" not in dir(obj): return {} - return {k: str(v) for k, v in obj._hyperparameters.items() if v is not None} + return { + k: json.dumps(v) if isinstance(v, list) else str(v) + for k, v in obj._hyperparameters.items() + if v is not None + } diff --git a/tests/integ/test_kmeans.py b/tests/integ/test_kmeans.py index 3323e961cd..65f2854179 100644 --- a/tests/integ/test_kmeans.py +++ b/tests/integ/test_kmeans.py @@ -13,6 +13,7 @@ from __future__ import absolute_import import gzip +import json import os import pickle import sys @@ -52,6 +53,7 @@ def test_kmeans(sagemaker_session): kmeans.half_life_time_size = 1 kmeans.epochs = 1 kmeans.center_factor = 1 + kmeans.eval_metrics = ["ssd", "msd"] assert kmeans.hyperparameters() == dict( init_method=kmeans.init_method, @@ -63,6 +65,7 @@ def test_kmeans(sagemaker_session): epochs=str(kmeans.epochs), extra_center_factor=str(kmeans.center_factor), k=str(kmeans.k), + eval_metrics=json.dumps(kmeans.eval_metrics), force_dense="True", ) diff --git a/tests/integ/test_randomcutforest.py b/tests/integ/test_randomcutforest.py index 0c74251fd1..2301cca786 100644 --- a/tests/integ/test_randomcutforest.py +++ b/tests/integ/test_randomcutforest.py @@ -34,6 +34,7 @@ def test_randomcutforest(sagemaker_session): train_instance_type="ml.c4.xlarge", num_trees=50, num_samples_per_tree=20, + eval_metrics=["accuracy", "precision_recall_fscore"], sagemaker_session=sagemaker_session, ) diff --git a/tests/unit/test_kmeans.py b/tests/unit/test_kmeans.py index 156920e787..0389f8c180 100644 --- a/tests/unit/test_kmeans.py +++ b/tests/unit/test_kmeans.py @@ -104,7 +104,7 @@ def test_all_hyperparameters(sagemaker_session): half_life_time_size="0", epochs="10", extra_center_factor="2", - eval_metrics="['msd', 'ssd']", + eval_metrics='["msd", "ssd"]', force_dense="True", ) diff --git a/tests/unit/test_randomcutforest.py b/tests/unit/test_randomcutforest.py index f182969c54..de3fda172c 100644 --- a/tests/unit/test_randomcutforest.py +++ b/tests/unit/test_randomcutforest.py @@ -100,7 +100,7 @@ def test_all_hyperparameters(sagemaker_session): assert randomcutforest.hyperparameters() == dict( num_samples_per_tree=str(NUM_SAMPLES_PER_TREE), num_trees=str(NUM_TREES), - eval_metrics="{}".format(EVAL_METRICS), + eval_metrics='["accuracy", "precision_recall_fscore"]', ) From 6061a8bfbf9302ce9dc17b1a96133523fbbb7b41 Mon Sep 17 00:00:00 2001 From: Karim Nakad Date: Thu, 11 Jul 2019 15:46:09 -0700 Subject: [PATCH 07/35] change: enable simplifiable-if-expression pylint checks (#926) --- .pylintrc | 1 - src/sagemaker/session.py | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.pylintrc b/.pylintrc index 0a980d43b3..9a3ee55af3 100644 --- a/.pylintrc +++ b/.pylintrc @@ -94,7 +94,6 @@ disable= inconsistent-return-statements, # TODO: Make returns consistent consider-merging-isinstance, # TODO: Merge isinstance where appropriate consider-using-in, # TODO: Consider merging comparisons with "in" - simplifiable-if-expression, # TODO: Simplify expressions too-many-public-methods, # TODO: Resolve ungrouped-imports, # TODO: Group imports consider-using-ternary, # TODO: Consider ternary expressions diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index efa56fc0f9..019f9e4ef9 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -1292,9 +1292,7 @@ def logs_for_job( # noqa: C901 - suppress complexity warning for this method client = self.boto_session.client("logs", config=config) log_group = "/aws/sagemaker/TrainingJobs" - job_already_completed = ( - True if status == "Completed" or status == "Failed" or status == "Stopped" else False - ) + job_already_completed = status == "Completed" or status == "Failed" or status == "Stopped" state = LogState.TAILING if wait and not job_already_completed else LogState.COMPLETE dot = False From fb309bcd8d57d462bdceee58a00108d6d8e42490 Mon Sep 17 00:00:00 2001 From: GaryTu1020 <45720913+GaryTu1020@users.noreply.github.com> Date: Thu, 11 Jul 2019 16:30:06 -0700 Subject: [PATCH 08/35] feature: deal with credentials for Git support for GitHub (#914) add authentication info --- doc/overview.rst | 85 ++++++--- src/sagemaker/estimator.py | 21 ++- src/sagemaker/git_utils.py | 205 +++++++++++++++++---- src/sagemaker/model.py | 23 ++- tests/integ/test_git.py | 66 ++++++- tests/unit/test_estimator.py | 129 ++++++++++++-- tests/unit/test_git_utils.py | 334 ++++++++++++++++++++++++++++++----- tests/unit/test_model.py | 99 +++++++++++ 8 files changed, 830 insertions(+), 132 deletions(-) diff --git a/doc/overview.rst b/doc/overview.rst index d5bef646f5..7d39f746fb 100644 --- a/doc/overview.rst +++ b/doc/overview.rst @@ -183,38 +183,43 @@ Here is an example: # When you are done using your endpoint algo.delete_endpoint() -Git Support ------------ -If you have your training scripts in your GitHub repository, you can use them directly without the trouble to download -them to local machine. Git support can be enabled simply by providing ``git_config`` parameter when initializing an -estimator. If Git support is enabled, then ``entry_point``, ``source_dir`` and ``dependencies`` should all be relative -paths in the Git repo. Note that if you decided to use Git support, then everything you need for ``entry_point``, -``source_dir`` and ``dependencies`` should be in a single Git repo. +Use Scripts Stored in a Git Repository +-------------------------------------- +When you create an estimator, you can specify a training script that is stored in a GitHub or other Git repository as the entry point for the estimator, so that you don't have to download the scripts locally. +If you do so, source directory and dependencies should be in the same repo if they are needed. Git support can be enabled simply by providing ``git_config`` parameter +when creating an ``Estimator`` object. If Git support is enabled, then ``entry_point``, ``source_dir`` and ``dependencies`` +should be relative paths in the Git repo if provided. -Here are ways to specify ``git_config``: +The ``git_config`` parameter includes fields ``repo``, ``branch``, ``commit``, ``2FA_enabled``, ``username``, +``password`` and ``token``. The ``repo`` field is required. All other fields are optional. ``repo`` specifies the Git +repository where your training script is stored. If you don't provide ``branch``, the default value 'master' is used. +If you don't provide ``commit``, the latest commit in the specified branch is used. -.. code:: python +``2FA_enabled``, ``username``, ``password`` and ``token`` are used for authentication. Set ``2FA_enabled`` to 'True' if +two-factor authentication is enabled for the GitHub (or other Git) account, otherwise set it to 'False'. +If you do not provide a value for ``2FA_enabled``, a default value of 'False' is used. - # Specifies the git_config parameter - git_config = {'repo': 'https://github.com/username/repo-with-training-scripts.git', - 'branch': 'branch1', - 'commit': '4893e528afa4a790331e1b5286954f073b0f14a2'} - - # Alternatively, you can also specify git_config by providing only 'repo' and 'branch'. - # If this is the case, the latest commit in the branch will be used. - git_config = {'repo': 'https://github.com/username/repo-with-training-scripts.git', - 'branch': 'branch1'} +If ``repo`` is an SSH URL, you should either have no passphrase for the SSH key pairs, or have the ``ssh-agent`` configured +so that you are not prompted for the SSH passphrase when you run a ``git clone`` command with SSH URLs. For SSH URLs, it +does not matter whether two-factor authentication is enabled. - # Only providing 'repo' is also allowed. If this is the case, latest commit in - # 'master' branch will be used. - git_config = {'repo': 'https://github.com/username/repo-with-training-scripts.git'} +If ``repo`` is an https URL, 2FA matters. When 2FA is disabled, either ``token`` or ``username``+``password`` will be +used for authentication if provided (``token`` prioritized). When 2FA is enabled, only token will be used for +authentication if provided. If required authentication info is not provided, python SDK will try to use local +credentials storage to authenticate. If that fails either, an error message will be thrown. -The following are some examples to define estimators with Git support: +Here are some examples of creating estimators with Git support: .. code:: python + # Specifies the git_config parameter. This example does not provide Git credentials, so python SDK will try + # to use local credential storage. + git_config = {'repo': 'https://github.com/username/repo-with-training-scripts.git', + 'branch': 'branch1', + 'commit': '4893e528afa4a790331e1b5286954f073b0f14a2'} + # In this example, the source directory 'pytorch' contains the entry point 'mnist.py' and other source code. - # and it is relative path inside the Git repo. + # and it is relative path inside the Git repo. pytorch_estimator = PyTorch(entry_point='mnist.py', role='SageMakerRole', source_dir='pytorch', @@ -222,6 +227,13 @@ The following are some examples to define estimators with Git support: train_instance_count=1, train_instance_type='ml.c4.xlarge') +.. code:: python + + # You can also specify git_config by providing only 'repo' and 'branch'. + # If this is the case, the latest commit in that branch will be used. + git_config = {'repo': 'git@github.com:username/repo-with-training-scripts.git', + 'branch': 'branch1'} + # In this example, the entry point 'mnist.py' is all we need for source code. # We need to specify the path to it in the Git repo. mx_estimator = MXNet(entry_point='mxnet/mnist.py', @@ -230,6 +242,15 @@ The following are some examples to define estimators with Git support: train_instance_count=1, train_instance_type='ml.c4.xlarge') +.. code:: python + + # Only providing 'repo' is also allowed. If this is the case, latest commit in 'master' branch will be used. + # This example does not provide '2FA_enabled', so 2FA is treated as disabled by default. 'username' and + # 'password' are provided for authentication + git_config = {'repo': 'https://github.com/username/repo-with-training-scripts.git', + 'username': 'username', + 'password': 'passw0rd!'} + # In this example, besides entry point and other source code in source directory, we still need some # dependencies for the training job. Dependencies should also be paths inside the Git repo. pytorch_estimator = PyTorch(entry_point='mnist.py', @@ -240,7 +261,23 @@ The following are some examples to define estimators with Git support: train_instance_count=1, train_instance_type='ml.c4.xlarge') -When Git support is enabled, users can still use local mode in the same way. +.. code:: python + + # This example specifies that 2FA is enabled, and token is provided for authentication + git_config = {'repo': 'https://github.com/username/repo-with-training-scripts.git', + '2FA_enabled': True, + 'token': 'your-token'} + + # In this exmaple, besides entry point, we also need some dependencies for the training job. + pytorch_estimator = PyTorch(entry_point='pytorch/mnist.py', + role='SageMakerRole', + dependencies=['dep.py'], + git_config=git_config, + train_instance_count=1, + train_instance_type='local') + +Git support can be used not only for training jobs, but also for hosting models. The usage is the same as the above, +and ``git_config`` should be provided when creating model objects, e.g. ``TensorFlowModel``, ``MXNetModel``, ``PyTorchModel``. Training Metrics ---------------- diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py index 287d203912..4a3aeb9649 100644 --- a/src/sagemaker/estimator.py +++ b/src/sagemaker/estimator.py @@ -20,10 +20,10 @@ from abc import abstractmethod from six import with_metaclass from six import string_types - import sagemaker from sagemaker import git_utils from sagemaker.analytics import TrainingJobAnalytics + from sagemaker.fw_utils import ( create_image_uri, tar_and_upload_dir, @@ -975,10 +975,12 @@ def __init__( >>> |----- test.py You can assign entry_point='src/train.py'. - git_config (dict[str, str]): Git configurations used for cloning files, including 'repo', 'branch' - and 'commit' (default: None). - 'branch' and 'commit' are optional. If 'branch' is not specified, 'master' branch will be used. If - 'commit' is not specified, the latest commit in the required branch will be used. + git_config (dict[str, str]): Git configurations used for cloning files, including ``repo``, ``branch``, + ``commit``, ``2FA_enabled``, ``username``, ``password`` and ``token`` (default: None). The fields are + optional except ``repo``. If ``branch`` is not specified, master branch will be used. If ``commit`` + is not specified, the latest commit in the required branch will be used. 'branch' and 'commit' are + optional. If 'branch' is not specified, 'master' branch will be used. If 'commit' is not specified, + the latest commit in the required branch will be used. Example: The following config: @@ -989,6 +991,15 @@ def __init__( results in cloning the repo specified in 'repo', then checkout the 'master' branch, and checkout the specified commit. + ``2FA_enabled``, ``username``, ``password`` and ``token`` are for authentication purpose. + ``2FA_enabled`` must be ``True`` or ``False`` if it is provided. If ``2FA_enabled`` is not provided, + we consider 2FA as disabled. For GitHub and other Git repos, when ssh urls are provided, it does not + make a difference whether 2FA is enabled or disabled; an ssh passphrase should be in local storage. + When https urls are provided: if 2FA is disabled, then either token or username+password will + be used for authentication if provided (token prioritized); if 2FA is enabled, only token will + be used for authentication if provided. If required authentication info is not provided, python SDK + will try to use local credentials storage to authenticate. If that fails either, an error message will + be thrown. source_dir (str): Path (absolute or relative) to a directory with any other training source code dependencies aside from the entry point file (default: None). Structure within this directory are preserved when training on Amazon SageMaker. If 'git_config' is provided, diff --git a/src/sagemaker/git_utils.py b/src/sagemaker/git_utils.py index 7360c31e55..fa4e104cfc 100644 --- a/src/sagemaker/git_utils.py +++ b/src/sagemaker/git_utils.py @@ -16,6 +16,8 @@ import six import subprocess import tempfile +import warnings +from six.moves import urllib def git_clone_repo(git_config, entry_point, source_dir=None, dependencies=None): @@ -23,9 +25,18 @@ def git_clone_repo(git_config, entry_point, source_dir=None, dependencies=None): and set ``entry_point``, ``source_dir`` and ``dependencies`` to the right file or directory in the repo cloned. Args: - git_config (dict[str, str]): Git configurations used for cloning files, including ``repo``, ``branch`` - and ``commit``. ``branch`` and ``commit`` are optional. If ``branch`` is not specified, master branch - will be used. If ``commit`` is not specified, the latest commit in the required branch will be used. + git_config (dict[str, object]): Git configurations used for cloning files, including ``repo``, ``branch``, + ``commit``, ``2FA_enabled``, ``username``, ``password`` and ``token``. The fields are optional except + ``repo``. If ``branch`` is not specified, master branch will be used. If ``commit`` is not specified, + the latest commit in the required branch will be used. ``2FA_enabled``, ``username``, ``password`` and + ``token`` are for authentication purpose. + ``2FA_enabled`` must be ``True`` or ``False`` if it is provided. If ``2FA_enabled`` is not provided, we + consider 2FA as disabled. For GitHub and other Git repos, when ssh urls are provided, it does not make a + difference whether 2FA is enabled or disabled; an ssh passphrase should be in local storage. When + https urls are provided: if 2FA is disabled, then either token or username+password will be used for + authentication if provided (token prioritized); if 2FA is enabled, only token will be used for + authentication if provided. If required authentication info is not provided, python SDK will try to use + local credentials storage to authenticate. If that fails either, an error message will be thrown. entry_point (str): A relative location to the Python source file which should be executed as the entry point to training or model hosting in the Git repo. source_dir (str): A relative location to a directory with other training or model hosting source code @@ -41,18 +52,18 @@ def git_clone_repo(git_config, entry_point, source_dir=None, dependencies=None): ValueError: If 1. entry point specified does not exist in the repo 2. source dir specified does not exist in the repo 3. dependencies specified do not exist in the repo - 4. git_config is in bad format + 4. wrong format is provided for git_config Returns: - dict: A dict that contains the updated values of entry_point, source_dir and dependencies + dict: A dict that contains the updated values of entry_point, source_dir and dependencies. """ if entry_point is None: raise ValueError("Please provide an entry point.") _validate_git_config(git_config) - repo_dir = tempfile.mkdtemp() - subprocess.check_call(["git", "clone", git_config["repo"], repo_dir]) + dest_dir = tempfile.mkdtemp() + _generate_and_run_clone_command(git_config, dest_dir) - _checkout_branch_and_commit(git_config, repo_dir) + _checkout_branch_and_commit(git_config, dest_dir) updated_paths = { "entry_point": entry_point, @@ -62,62 +73,180 @@ def git_clone_repo(git_config, entry_point, source_dir=None, dependencies=None): # check if the cloned repo contains entry point, source directory and dependencies if source_dir: - if not os.path.isdir(os.path.join(repo_dir, source_dir)): + if not os.path.isdir(os.path.join(dest_dir, source_dir)): raise ValueError("Source directory does not exist in the repo.") - if not os.path.isfile(os.path.join(repo_dir, source_dir, entry_point)): + if not os.path.isfile(os.path.join(dest_dir, source_dir, entry_point)): raise ValueError("Entry point does not exist in the repo.") - updated_paths["source_dir"] = os.path.join(repo_dir, source_dir) + updated_paths["source_dir"] = os.path.join(dest_dir, source_dir) else: - if os.path.isfile(os.path.join(repo_dir, entry_point)): - updated_paths["entry_point"] = os.path.join(repo_dir, entry_point) + if os.path.isfile(os.path.join(dest_dir, entry_point)): + updated_paths["entry_point"] = os.path.join(dest_dir, entry_point) else: raise ValueError("Entry point does not exist in the repo.") - - updated_paths["dependencies"] = [] - for path in dependencies: - if os.path.exists(os.path.join(repo_dir, path)): - updated_paths["dependencies"].append(os.path.join(repo_dir, path)) - else: - raise ValueError("Dependency {} does not exist in the repo.".format(path)) + if dependencies is not None: + updated_paths["dependencies"] = [] + for path in dependencies: + if os.path.exists(os.path.join(dest_dir, path)): + updated_paths["dependencies"].append(os.path.join(dest_dir, path)) + else: + raise ValueError("Dependency {} does not exist in the repo.".format(path)) return updated_paths def _validate_git_config(git_config): - """check if a git_config param is valid + if "repo" not in git_config: + raise ValueError("Please provide a repo for git_config.") + for key in git_config: + if key == "2FA_enabled": + if not isinstance(git_config["2FA_enabled"], bool): + raise ValueError("Please enter a bool type for 2FA_enabled'.") + elif not isinstance(git_config[key], six.string_types): + raise ValueError("'{}' must be a string.".format(key)) + + +def _generate_and_run_clone_command(git_config, dest_dir): + """check if a git_config param is valid, if it is, create the command to git clone the repo, and run it. Args: git_config ((dict[str, str]): Git configurations used for cloning files, including ``repo``, ``branch`` and ``commit``. + dest_dir (str): The local directory to clone the Git repo into. Raises: - ValueError: If: - 1. git_config has no key 'repo' - 2. git_config['repo'] is in the wrong format. + CalledProcessError: If failed to clone git repo. """ - if "repo" not in git_config: - raise ValueError("Please provide a repo for git_config.") - allowed_keys = ["repo", "branch", "commit"] - for key in allowed_keys: - if key in git_config and not isinstance(git_config[key], six.string_types): - raise ValueError("'{}' should be a string".format(key)) - for key in git_config: - if key not in allowed_keys: - raise ValueError("Unexpected argument(s) provided for git_config!") + _clone_command_for_github_like(git_config, dest_dir) + + +def _clone_command_for_github_like(git_config, dest_dir): + """check if a git_config param representing a GitHub (or like) repo is valid, if it is, create the command to + git clone the repo, and run it. + + Args: + git_config ((dict[str, str]): Git configurations used for cloning files, including ``repo``, ``branch`` + and ``commit``. + dest_dir (str): The local directory to clone the Git repo into. + + Raises: + ValueError: If git_config['repo'] is in the wrong format. + CalledProcessError: If failed to clone git repo. + """ + is_https = git_config["repo"].startswith("https://") + is_ssh = git_config["repo"].startswith("git@") + if not is_https and not is_ssh: + raise ValueError("Invalid Git url provided.") + if is_ssh: + _clone_command_for_github_like_ssh(git_config, dest_dir) + elif "2FA_enabled" in git_config and git_config["2FA_enabled"] is True: + _clone_command_for_github_like_https_2fa_enabled(git_config, dest_dir) + else: + _clone_command_for_github_like_https_2fa_disabled(git_config, dest_dir) + + +def _clone_command_for_github_like_ssh(git_config, dest_dir): + if "username" in git_config or "password" in git_config or "token" in git_config: + warnings.warn("SSH cloning, authentication information in git config will be ignored.") + _run_clone_command(git_config["repo"], dest_dir) + +def _clone_command_for_github_like_https_2fa_disabled(git_config, dest_dir): + updated_url = git_config["repo"] + if "token" in git_config: + if "username" in git_config or "password" in git_config: + warnings.warn("Using token for authentication, " "other credentials will be ignored.") + updated_url = _insert_token_to_repo_url(url=git_config["repo"], token=git_config["token"]) + elif "username" in git_config and "password" in git_config: + updated_url = _insert_username_and_password_to_repo_url( + url=git_config["repo"], username=git_config["username"], password=git_config["password"] + ) + elif "username" in git_config or "password" in git_config: + warnings.warn("Credentials provided in git config will be ignored.") + _run_clone_command(updated_url, dest_dir) -def _checkout_branch_and_commit(git_config, repo_dir): + +def _clone_command_for_github_like_https_2fa_enabled(git_config, dest_dir): + updated_url = git_config["repo"] + if "token" in git_config: + if "username" in git_config or "password" in git_config: + warnings.warn("Using token for authentication, " "other credentials will be ignored.") + updated_url = _insert_token_to_repo_url(url=git_config["repo"], token=git_config["token"]) + _run_clone_command(updated_url, dest_dir) + + +def _run_clone_command(repo_url, dest_dir): + """Run the 'git clone' command with the repo url and the directory to clone the repo into. + + Args: + repo_url (str): Git repo url to be cloned. + dest_dir: (str): Local path where the repo should be cloned into. + + Raises: + CalledProcessError: If failed to clone git repo. + """ + my_env = os.environ.copy() + if repo_url.startswith("https://"): + my_env["GIT_TERMINAL_PROMPT"] = "0" + subprocess.check_call(["git", "clone", repo_url, dest_dir], env=my_env) + elif repo_url.startswith("git@"): + with tempfile.NamedTemporaryFile() as sshnoprompt: + write_pipe = open(sshnoprompt.name, "w") + write_pipe.write("ssh -oBatchMode=yes $@") + write_pipe.close() + # 511 in decimal is same as 777 in octal + os.chmod(sshnoprompt.name, 511) + my_env["GIT_SSH"] = sshnoprompt.name + subprocess.check_call(["git", "clone", repo_url, dest_dir], env=my_env) + + +def _insert_token_to_repo_url(url, token): + """Insert the token to the Git repo url, to make a component of the git clone command. This method can + only be called when repo_url is an https url. + + Args: + url (str): Git repo url where the token should be inserted into. + token (str): Token to be inserted. + + Returns: + str: the component needed fot the git clone command. + """ + index = len("https://") + if url.find(token) == index: + return url + return url.replace("https://", "https://" + token + "@") + + +def _insert_username_and_password_to_repo_url(url, username, password): + """Insert the username and the password to the Git repo url, to make a component of the git clone command. + This method can only be called when repo_url is an https url. + + Args: + url (str): Git repo url where the token should be inserted into. + username (str): Username to be inserted. + password (str): Password to be inserted. + + Returns: + str: the component needed for the git clone command. + """ + password = urllib.parse.quote_plus(password) + # urllib parses ' ' as '+', but what we need is '%20' here + password = password.replace("+", "%20") + index = len("https://") + return url[:index] + username + ":" + password + "@" + url[index:] + + +def _checkout_branch_and_commit(git_config, dest_dir): """Checkout the required branch and commit. Args: - git_config: (dict[str, str]): Git configurations used for cloning files, including ``repo``, ``branch`` + git_config (dict[str, str]): Git configurations used for cloning files, including ``repo``, ``branch`` and ``commit``. - repo_dir (str): the directory where the repo is cloned + dest_dir (str): the directory where the repo is cloned Raises: CalledProcessError: If 1. failed to checkout the required branch 2. failed to checkout the required commit """ if "branch" in git_config: - subprocess.check_call(args=["git", "checkout", git_config["branch"]], cwd=str(repo_dir)) + subprocess.check_call(args=["git", "checkout", git_config["branch"]], cwd=str(dest_dir)) if "commit" in git_config: - subprocess.check_call(args=["git", "checkout", git_config["commit"]], cwd=str(repo_dir)) + subprocess.check_call(args=["git", "checkout", git_config["commit"]], cwd=str(dest_dir)) diff --git a/src/sagemaker/model.py b/src/sagemaker/model.py index e2352c5f41..b49ab5e7f5 100644 --- a/src/sagemaker/model.py +++ b/src/sagemaker/model.py @@ -516,10 +516,12 @@ def __init__( >>> |----- test.py You can assign entry_point='src/inference.py'. - git_config (dict[str, str]): Git configurations used for cloning files, including 'repo', 'branch' - and 'commit' (default: None). - 'branch' and 'commit' are optional. If 'branch' is not specified, 'master' branch will be used. If - 'commit' is not specified, the latest commit in the required branch will be used. + git_config (dict[str, str]): Git configurations used for cloning files, including ``repo``, ``branch``, + ``commit``, ``2FA_enabled``, ``username``, ``password`` and ``token`` (default: None). The fields are + optional except ``repo``. If ``branch`` is not specified, master branch will be used. If ``commit`` + is not specified, the latest commit in the required branch will be used. 'branch' and 'commit' are + optional. If 'branch' is not specified, 'master' branch will be used. If 'commit' is not specified, + the latest commit in the required branch will be used. Example: The following config: @@ -530,6 +532,15 @@ def __init__( results in cloning the repo specified in 'repo', then checkout the 'master' branch, and checkout the specified commit. + ``2FA_enabled``, ``username``, ``password`` and ``token`` are for authentication purpose. + ``2FA_enabled`` must be ``True`` or ``False`` if it is provided. If ``2FA_enabled`` is not provided, + we consider 2FA as disabled. For GitHub and other Git repos, when ssh urls are provided, it does not + make a difference whether 2FA is enabled or disabled; an ssh passphrase should be in local storage. + When https urls are provided: if 2FA is disabled, then either token or username+password will + be used for authentication if provided (token prioritized); if 2FA is enabled, only token will + be used for authentication if provided. If required authentication info is not provided, python SDK + will try to use local credentials storage to authenticate. If that fails either, an error message will + be thrown. source_dir (str): Path (absolute or relative) to a directory with any other training source code dependencies aside from the entry point file (default: None). Structure within this directory will be preserved when training on SageMaker. If 'git_config' is provided, @@ -554,13 +565,13 @@ def __init__( Example: The following call - >>> Estimator(entry_point='train.py', dependencies=['my/libs/common', 'virtual-env']) + >>> Estimator(entry_point='inference.py', dependencies=['my/libs/common', 'virtual-env']) results in the following inside the container: >>> $ ls >>> opt/ml/code - >>> |------ train.py + >>> |------ inference.py >>> |------ common >>> |------ virtual-env diff --git a/tests/integ/test_git.py b/tests/integ/test_git.py index a941269122..da5579f9b9 100644 --- a/tests/integ/test_git.py +++ b/tests/integ/test_git.py @@ -16,6 +16,7 @@ import numpy import pytest +import subprocess import tempfile from tests.integ import lock as lock @@ -30,6 +31,20 @@ BRANCH = "test-branch-git-config" COMMIT = "ae15c9d7d5b97ea95ea451e4662ee43da3401d73" +PRIVATE_GIT_REPO = "https://github.com/git-support-test/test-git.git" +PRIVATE_BRANCH = "master" +PRIVATE_COMMIT = "a46d6f9add3532ca3e4e231e4108b6bad15b7373" + +PRIVATE_GIT_REPO_2FA = "https://github.com/git-support-test-2fa/test-git.git" +PRIVATE_GIT_REPO_2FA_SSH = "git@github.com:git-support-test-2fa/test-git.git" +PRIVATE_BRANCH_2FA = "master" +PRIVATE_COMMIT_2FA = "52381dee030eb332a7e42d9992878d7261eb21d4" + +# Since personal access tokens will delete themselves if they are committed to GitHub repos, +# we cannot hard code them here, but have to encrypt instead +ENCRYPTED_PRIVATE_REPO_TOKEN = "e-4_1-1dc_71-f0e_f7b54a0f3b7db2757163da7b5e8c3" +PRIVATE_REPO_TOKEN = ENCRYPTED_PRIVATE_REPO_TOKEN.replace("-", "").replace("_", "") + # endpoint tests all use the same port, so we use this lock to prevent concurrent execution LOCK_PATH = os.path.join(tempfile.gettempdir(), "sagemaker_test_git_lock") @@ -56,7 +71,6 @@ def test_git_support_with_pytorch(sagemaker_local_session): with lock.lock(LOCK_PATH): try: predictor = pytorch.deploy(initial_instance_count=1, instance_type="local") - data = numpy.zeros(shape=(1, 1, 28, 28)).astype(numpy.float32) result = predictor.predict(data) assert result is not None @@ -66,9 +80,17 @@ def test_git_support_with_pytorch(sagemaker_local_session): @pytest.mark.local_mode def test_git_support_with_mxnet(sagemaker_local_session): + script_path = "mnist.py" data_path = os.path.join(DATA_DIR, "mxnet_mnist") - git_config = {"repo": GIT_REPO, "branch": BRANCH, "commit": COMMIT} + git_config = { + "repo": PRIVATE_GIT_REPO, + "branch": PRIVATE_BRANCH, + "commit": PRIVATE_COMMIT, + "2FA_enabled": False, + "username": "git-support-test", + "password": "passw0rd@ %", + } source_dir = "mxnet" dependencies = ["foo/bar.py"] mx = MXNet( @@ -114,7 +136,6 @@ def test_git_support_with_mxnet(sagemaker_local_session): git_config=git_config, ) predictor = model.deploy(initial_instance_count=1, instance_type="local") - data = numpy.zeros(shape=(1, 1, 28, 28)) result = predictor.predict(data) assert result is not None @@ -128,9 +149,11 @@ def test_git_support_with_sklearn(sagemaker_local_session, sklearn_full_version) script_path = "mnist.py" data_path = os.path.join(DATA_DIR, "sklearn_mnist") git_config = { - "repo": "https://github.com/GaryTu1020/python-sdk-testing.git", - "branch": "branch1", - "commit": "aafa4e96237dd78a015d5df22bfcfef46845c3c5", + "repo": PRIVATE_GIT_REPO_2FA, + "branch": PRIVATE_BRANCH_2FA, + "commit": PRIVATE_COMMIT_2FA, + "2FA_enabled": True, + "token": PRIVATE_REPO_TOKEN, } source_dir = "sklearn" sklearn = SKLearn( @@ -171,3 +194,34 @@ def test_git_support_with_sklearn(sagemaker_local_session, sklearn_full_version) assert result is not None finally: predictor.delete_endpoint() + + +@pytest.mark.local_mode +def test_git_support_with_sklearn_ssh_passphrase_not_configured( + sagemaker_local_session, sklearn_full_version +): + script_path = "mnist.py" + data_path = os.path.join(DATA_DIR, "sklearn_mnist") + git_config = { + "repo": PRIVATE_GIT_REPO_2FA_SSH, + "branch": PRIVATE_BRANCH_2FA, + "commit": PRIVATE_COMMIT_2FA, + } + source_dir = "sklearn" + sklearn = SKLearn( + entry_point=script_path, + role="SageMakerRole", + source_dir=source_dir, + py_version=PYTHON_VERSION, + train_instance_count=1, + train_instance_type="local", + sagemaker_session=sagemaker_local_session, + framework_version=sklearn_full_version, + hyperparameters={"epochs": 1}, + git_config=git_config, + ) + train_input = "file://" + os.path.join(data_path, "train") + test_input = "file://" + os.path.join(data_path, "test") + with pytest.raises(subprocess.CalledProcessError) as error: + sklearn.fit({"train": train_input, "test": test_input}) + assert "returned non-zero exit status" in str(error) diff --git a/tests/unit/test_estimator.py b/tests/unit/test_estimator.py index e7bc6d2edf..addcdc5025 100644 --- a/tests/unit/test_estimator.py +++ b/tests/unit/test_estimator.py @@ -51,16 +51,11 @@ GIT_REPO = "https://github.com/aws/sagemaker-python-sdk.git" BRANCH = "test-branch-git-config" COMMIT = "ae15c9d7d5b97ea95ea451e4662ee43da3401d73" - -DESCRIBE_TRAINING_JOB_RESULT = {"ModelArtifacts": {"S3ModelArtifacts": MODEL_DATA}} -INSTANCE_TYPE = "c4.4xlarge" -ACCELERATOR_TYPE = "ml.eia.medium" -ROLE = "DummyRole" -IMAGE_NAME = "fakeimage" -REGION = "us-west-2" -JOB_NAME = "{}-{}".format(IMAGE_NAME, TIMESTAMP) -TAGS = [{"Name": "some-tag", "Value": "value-for-tag"}] -OUTPUT_PATH = "s3://bucket/prefix" +PRIVATE_GIT_REPO_SSH = "git@github.com:testAccount/private-repo.git" +PRIVATE_GIT_REPO = "https://github.com/testAccount/private-repo.git" +PRIVATE_BRANCH = "test-branch" +PRIVATE_COMMIT = "329bfcf884482002c05ff7f44f62599ebc9f445a" +REPO_DIR = "/tmp/repo_dir" DESCRIBE_TRAINING_JOB_RESULT = {"ModelArtifacts": {"S3ModelArtifacts": MODEL_DATA}} @@ -892,9 +887,9 @@ def test_git_support_bad_repo_url_format(sagemaker_session): train_instance_type=INSTANCE_TYPE, enable_cloudwatch_metrics=True, ) - with pytest.raises(subprocess.CalledProcessError) as error: + with pytest.raises(ValueError) as error: fw.fit() - assert "returned non-zero exit status" in str(error) + assert "Invalid Git url provided." in str(error) @patch( @@ -1026,6 +1021,116 @@ def test_git_support_dependencies_not_exist(sagemaker_session): assert "Dependency", "does not exist in the repo." in str(error) +@patch( + "sagemaker.git_utils.git_clone_repo", + side_effect=lambda gitconfig, entrypoint, source_dir=None, dependencies=None: { + "entry_point": "/tmp/repo_dir/entry_point", + "source_dir": None, + "dependencies": None, + }, +) +def test_git_support_with_username_password_no_2fa(git_clone_repo, sagemaker_session): + git_config = { + "repo": PRIVATE_GIT_REPO, + "branch": PRIVATE_BRANCH, + "commit": PRIVATE_COMMIT, + "username": "username", + "password": "passw0rd!", + } + entry_point = "entry_point" + fw = DummyFramework( + entry_point=entry_point, + git_config=git_config, + role=ROLE, + sagemaker_session=sagemaker_session, + train_instance_count=INSTANCE_COUNT, + train_instance_type=INSTANCE_TYPE, + enable_cloudwatch_metrics=True, + ) + fw.fit() + git_clone_repo.assert_called_once_with(git_config, entry_point, None, []) + assert fw.entry_point == "/tmp/repo_dir/entry_point" + + +@patch( + "sagemaker.git_utils.git_clone_repo", + side_effect=lambda gitconfig, entrypoint, source_dir=None, dependencies=None: { + "entry_point": "/tmp/repo_dir/entry_point", + "source_dir": None, + "dependencies": None, + }, +) +def test_git_support_with_token_2fa(git_clone_repo, sagemaker_session): + git_config = { + "repo": PRIVATE_GIT_REPO, + "branch": PRIVATE_BRANCH, + "commit": PRIVATE_COMMIT, + "token": "my-token", + "2FA_enabled": True, + } + entry_point = "entry_point" + fw = DummyFramework( + entry_point=entry_point, + git_config=git_config, + role=ROLE, + sagemaker_session=sagemaker_session, + train_instance_count=INSTANCE_COUNT, + train_instance_type=INSTANCE_TYPE, + enable_cloudwatch_metrics=True, + ) + fw.fit() + git_clone_repo.assert_called_once_with(git_config, entry_point, None, []) + assert fw.entry_point == "/tmp/repo_dir/entry_point" + + +@patch( + "sagemaker.git_utils.git_clone_repo", + side_effect=lambda gitconfig, entrypoint, source_dir=None, dependencies=None: { + "entry_point": "/tmp/repo_dir/entry_point", + "source_dir": None, + "dependencies": None, + }, +) +def test_git_support_ssh_no_passphrase_needed(git_clone_repo, sagemaker_session): + git_config = {"repo": PRIVATE_GIT_REPO_SSH, "branch": PRIVATE_BRANCH, "commit": PRIVATE_COMMIT} + entry_point = "entry_point" + fw = DummyFramework( + entry_point=entry_point, + git_config=git_config, + role=ROLE, + sagemaker_session=sagemaker_session, + train_instance_count=INSTANCE_COUNT, + train_instance_type=INSTANCE_TYPE, + enable_cloudwatch_metrics=True, + ) + fw.fit() + git_clone_repo.assert_called_once_with(git_config, entry_point, None, []) + assert fw.entry_point == "/tmp/repo_dir/entry_point" + + +@patch( + "sagemaker.git_utils.git_clone_repo", + side_effect=subprocess.CalledProcessError( + returncode=1, cmd="git clone {} {}".format(PRIVATE_GIT_REPO_SSH, REPO_DIR) + ), +) +def test_git_support_ssh_passphrase_required(git_clone_repo, sagemaker_session): + git_config = {"repo": PRIVATE_GIT_REPO_SSH, "branch": PRIVATE_BRANCH, "commit": PRIVATE_COMMIT} + entry_point = "entry_point" + fw = DummyFramework( + entry_point=entry_point, + git_config=git_config, + role=ROLE, + sagemaker_session=sagemaker_session, + train_instance_count=INSTANCE_COUNT, + train_instance_type=INSTANCE_TYPE, + enable_cloudwatch_metrics=True, + ) + with pytest.raises(subprocess.CalledProcessError) as error: + fw.fit() + assert "returned non-zero exit status" in str(error) + + @patch("time.strftime", return_value=TIMESTAMP) def test_init_with_source_dir_s3(strftime, sagemaker_session): fw = DummyFramework( diff --git a/tests/unit/test_git_utils.py b/tests/unit/test_git_utils.py index a862e76704..c97b34207c 100644 --- a/tests/unit/test_git_utils.py +++ b/tests/unit/test_git_utils.py @@ -13,15 +13,20 @@ from __future__ import absolute_import import pytest +import os import subprocess from mock import patch from sagemaker import git_utils REPO_DIR = "/tmp/repo_dir" -GIT_REPO = "https://github.com/aws/sagemaker-python-sdk.git" -BRANCH = "test-branch-git-config" -COMMIT = "ae15c9d7d5b97ea95ea451e4662ee43da3401d73" +PUBLIC_GIT_REPO = "https://github.com/aws/sagemaker-python-sdk.git" +PUBLIC_BRANCH = "test-branch-git-config" +PUBLIC_COMMIT = "ae15c9d7d5b97ea95ea451e4662ee43da3401d73" +PRIVATE_GIT_REPO_SSH = "git@github.com:testAccount/private-repo.git" +PRIVATE_GIT_REPO = "https://github.com/testAccount/private-repo.git" +PRIVATE_BRANCH = "test-branch" +PRIVATE_COMMIT = "329bfcf884482002c05ff7f44f62599ebc9f445a" @patch("subprocess.check_call") @@ -30,55 +35,58 @@ @patch("os.path.isdir", return_value=True) @patch("os.path.exists", return_value=True) def test_git_clone_repo_succeed(exists, isdir, isfile, mkdtemp, check_call): - git_config = {"repo": GIT_REPO, "branch": BRANCH, "commit": COMMIT} + git_config = {"repo": PUBLIC_GIT_REPO, "branch": PUBLIC_BRANCH, "commit": PUBLIC_COMMIT} entry_point = "entry_point" source_dir = "source_dir" dependencies = ["foo", "bar"] + env = os.environ.copy() + env["GIT_TERMINAL_PROMPT"] = "0" ret = git_utils.git_clone_repo(git_config, entry_point, source_dir, dependencies) - check_call.assert_any_call(["git", "clone", git_config["repo"], REPO_DIR]) - check_call.assert_any_call(args=["git", "checkout", BRANCH], cwd=REPO_DIR) - check_call.assert_any_call(args=["git", "checkout", COMMIT], cwd=REPO_DIR) + check_call.assert_any_call(["git", "clone", git_config["repo"], REPO_DIR], env=env) + check_call.assert_any_call(args=["git", "checkout", PUBLIC_BRANCH], cwd=REPO_DIR) + check_call.assert_any_call(args=["git", "checkout", PUBLIC_COMMIT], cwd=REPO_DIR) mkdtemp.assert_called_once() assert ret["entry_point"] == "entry_point" assert ret["source_dir"] == "/tmp/repo_dir/source_dir" assert ret["dependencies"] == ["/tmp/repo_dir/foo", "/tmp/repo_dir/bar"] -def test_git_clone_repo_entry_point_not_provided(): - git_config = {"repo": GIT_REPO, "branch": BRANCH, "commit": COMMIT} +def test_git_clone_repo_repo_not_provided(): + git_config = {"branch": PUBLIC_BRANCH, "commit": PUBLIC_COMMIT} + entry_point = "entry_point_that_does_not_exist" source_dir = "source_dir" + dependencies = ["foo", "bar"] with pytest.raises(ValueError) as error: - git_utils.git_clone_repo(git_config=git_config, entry_point=None, source_dir=source_dir) - assert "Please provide an entry point." in str(error) + git_utils.git_clone_repo(git_config, entry_point, source_dir, dependencies) + assert "Please provide a repo for git_config." in str(error) -@patch("subprocess.check_call") -@patch("tempfile.mkdtemp", return_value=REPO_DIR) -@patch("os.path.isfile", return_value=True) -@patch("os.path.isdir", return_value=True) -@patch("os.path.exists", return_value=True) -def test_git_clone_repo_repo_not_provided(exists, isdir, isfile, mkdtemp, check_call): - git_config = {"branch": BRANCH, "commit": COMMIT} - entry_point = "entry_point_that_does_not_exist" +def test_git_clone_repo_git_argument_wrong_format(): + git_config = { + "repo": PUBLIC_GIT_REPO, + "branch": PUBLIC_BRANCH, + "commit": PUBLIC_COMMIT, + "token": 42, + } + entry_point = "entry_point" source_dir = "source_dir" dependencies = ["foo", "bar"] + env = os.environ.copy() + env["GIT_TERMINAL_PROMPT"] = "0" with pytest.raises(ValueError) as error: git_utils.git_clone_repo(git_config, entry_point, source_dir, dependencies) - assert "Please provide a repo for git_config." in str(error) + assert "'token' must be a string." in str(error) @patch( "subprocess.check_call", side_effect=subprocess.CalledProcessError( - returncode=1, cmd="git clone {} {}".format(GIT_REPO, REPO_DIR) + returncode=1, cmd="git clone {} {}".format(PUBLIC_GIT_REPO, REPO_DIR) ), ) @patch("tempfile.mkdtemp", return_value=REPO_DIR) -@patch("os.path.isfile", return_value=True) -@patch("os.path.isdir", return_value=True) -@patch("os.path.exists", return_value=True) -def test_git_clone_repo_clone_fail(exists, isdir, isfile, mkdtemp, check_call): - git_config = {"repo": GIT_REPO, "branch": BRANCH, "commit": COMMIT} +def test_git_clone_repo_clone_fail(mkdtemp, check_call): + git_config = {"repo": PUBLIC_GIT_REPO, "branch": PUBLIC_BRANCH, "commit": PUBLIC_COMMIT} entry_point = "entry_point" source_dir = "source_dir" dependencies = ["foo", "bar"] @@ -92,11 +100,8 @@ def test_git_clone_repo_clone_fail(exists, isdir, isfile, mkdtemp, check_call): side_effect=[True, subprocess.CalledProcessError(returncode=1, cmd="git checkout banana")], ) @patch("tempfile.mkdtemp", return_value=REPO_DIR) -@patch("os.path.isfile", return_value=True) -@patch("os.path.isdir", return_value=True) -@patch("os.path.exists", return_value=True) -def test_git_clone_repo_branch_not_exist(exists, isdir, isfile, mkdtemp, check_call): - git_config = {"repo": GIT_REPO, "branch": BRANCH, "commit": COMMIT} +def test_git_clone_repo_branch_not_exist(mkdtemp, check_call): + git_config = {"repo": PUBLIC_GIT_REPO, "branch": PUBLIC_BRANCH, "commit": PUBLIC_COMMIT} entry_point = "entry_point" source_dir = "source_dir" dependencies = ["foo", "bar"] @@ -110,15 +115,12 @@ def test_git_clone_repo_branch_not_exist(exists, isdir, isfile, mkdtemp, check_c side_effect=[ True, True, - subprocess.CalledProcessError(returncode=1, cmd="git checkout {}".format(COMMIT)), + subprocess.CalledProcessError(returncode=1, cmd="git checkout {}".format(PUBLIC_COMMIT)), ], ) @patch("tempfile.mkdtemp", return_value=REPO_DIR) -@patch("os.path.isfile", return_value=True) -@patch("os.path.isdir", return_value=True) -@patch("os.path.exists", return_value=True) -def test_git_clone_repo_commit_not_exist(exists, isdir, isfile, mkdtemp, check_call): - git_config = {"repo": GIT_REPO, "branch": BRANCH, "commit": COMMIT} +def test_git_clone_repo_commit_not_exist(mkdtemp, check_call): + git_config = {"repo": PUBLIC_GIT_REPO, "branch": PUBLIC_BRANCH, "commit": PUBLIC_COMMIT} entry_point = "entry_point" source_dir = "source_dir" dependencies = ["foo", "bar"] @@ -132,8 +134,8 @@ def test_git_clone_repo_commit_not_exist(exists, isdir, isfile, mkdtemp, check_c @patch("os.path.isfile", return_value=False) @patch("os.path.isdir", return_value=True) @patch("os.path.exists", return_value=True) -def test_git_clone_repo_entry_point_not_exist(exists, isdir, isfile, mkdtemp, check_call): - git_config = {"repo": GIT_REPO, "branch": BRANCH, "commit": COMMIT} +def test_git_clone_repo_entry_point_not_exist(exists, isdir, isfile, mkdtemp, heck_call): + git_config = {"repo": PUBLIC_GIT_REPO, "branch": PUBLIC_BRANCH, "commit": PUBLIC_COMMIT} entry_point = "entry_point_that_does_not_exist" source_dir = "source_dir" dependencies = ["foo", "bar"] @@ -148,7 +150,7 @@ def test_git_clone_repo_entry_point_not_exist(exists, isdir, isfile, mkdtemp, ch @patch("os.path.isdir", return_value=False) @patch("os.path.exists", return_value=True) def test_git_clone_repo_source_dir_not_exist(exists, isdir, isfile, mkdtemp, check_call): - git_config = {"repo": GIT_REPO, "branch": BRANCH, "commit": COMMIT} + git_config = {"repo": PUBLIC_GIT_REPO, "branch": PUBLIC_BRANCH, "commit": PUBLIC_COMMIT} entry_point = "entry_point" source_dir = "source_dir_that_does_not_exist" dependencies = ["foo", "bar"] @@ -163,10 +165,260 @@ def test_git_clone_repo_source_dir_not_exist(exists, isdir, isfile, mkdtemp, che @patch("os.path.isdir", return_value=True) @patch("os.path.exists", side_effect=[True, False]) def test_git_clone_repo_dependencies_not_exist(exists, isdir, isfile, mkdtemp, check_call): - git_config = {"repo": GIT_REPO, "branch": BRANCH, "commit": COMMIT} + git_config = {"repo": PUBLIC_GIT_REPO, "branch": PUBLIC_BRANCH, "commit": PUBLIC_COMMIT} entry_point = "entry_point" source_dir = "source_dir" dependencies = ["foo", "dep_that_does_not_exist"] with pytest.raises(ValueError) as error: git_utils.git_clone_repo(git_config, entry_point, source_dir, dependencies) assert "does not exist in the repo." in str(error) + + +@patch("subprocess.check_call") +@patch("tempfile.mkdtemp", return_value=REPO_DIR) +@patch("os.path.isfile", return_value=True) +def test_git_clone_repo_with_username_password_no_2fa(sfile, mkdtemp, check_call): + git_config = { + "repo": PRIVATE_GIT_REPO, + "branch": PRIVATE_BRANCH, + "commit": PRIVATE_COMMIT, + "username": "username", + "password": "passw0rd!", + } + entry_point = "entry_point" + env = os.environ.copy() + env["GIT_TERMINAL_PROMPT"] = "0" + ret = git_utils.git_clone_repo(git_config=git_config, entry_point=entry_point) + check_call.assert_any_call( + [ + "git", + "clone", + "https://username:passw0rd%21@github.com/testAccount/private-repo.git", + REPO_DIR, + ], + env=env, + ) + check_call.assert_any_call(args=["git", "checkout", PRIVATE_BRANCH], cwd=REPO_DIR) + check_call.assert_any_call(args=["git", "checkout", PRIVATE_COMMIT], cwd=REPO_DIR) + assert ret["entry_point"] == "/tmp/repo_dir/entry_point" + assert ret["source_dir"] is None + assert ret["dependencies"] is None + + +@patch("subprocess.check_call") +@patch("tempfile.mkdtemp", return_value=REPO_DIR) +@patch("os.path.isfile", return_value=True) +def test_git_clone_repo_with_token_no_2fa(isfile, mkdtemp, check_call): + git_config = { + "repo": PRIVATE_GIT_REPO, + "branch": PRIVATE_BRANCH, + "commit": PRIVATE_COMMIT, + "token": "08c13d80a861f37150cb5c64520bfe14a85ca191", + "2FA_enabled": False, + } + entry_point = "entry_point" + env = os.environ.copy() + env["GIT_TERMINAL_PROMPT"] = "0" + ret = git_utils.git_clone_repo(git_config=git_config, entry_point=entry_point) + check_call.assert_any_call( + [ + "git", + "clone", + "https://08c13d80a861f37150cb5c64520bfe14a85ca191@github.com/testAccount/private-repo.git", + REPO_DIR, + ], + env=env, + ) + check_call.assert_any_call(args=["git", "checkout", PRIVATE_BRANCH], cwd=REPO_DIR) + check_call.assert_any_call(args=["git", "checkout", PRIVATE_COMMIT], cwd=REPO_DIR) + assert ret["entry_point"] == "/tmp/repo_dir/entry_point" + assert ret["source_dir"] is None + assert ret["dependencies"] is None + + +@patch("subprocess.check_call") +@patch("tempfile.mkdtemp", return_value=REPO_DIR) +@patch("os.path.isfile", return_value=True) +def test_git_clone_repo_with_token_2fa(isfile, mkdtemp, check_call): + git_config = { + "repo": PRIVATE_GIT_REPO, + "branch": PRIVATE_BRANCH, + "commit": PRIVATE_COMMIT, + "2FA_enabled": True, + "username": "username", + "token": "08c13d80a861f37150cb5c64520bfe14a85ca191", + } + entry_point = "entry_point" + env = os.environ.copy() + env["GIT_TERMINAL_PROMPT"] = "0" + ret = git_utils.git_clone_repo(git_config=git_config, entry_point=entry_point) + check_call.assert_any_call( + [ + "git", + "clone", + "https://08c13d80a861f37150cb5c64520bfe14a85ca191@github.com/testAccount/private-repo.git", + REPO_DIR, + ], + env=env, + ) + check_call.assert_any_call(args=["git", "checkout", PRIVATE_BRANCH], cwd=REPO_DIR) + check_call.assert_any_call(args=["git", "checkout", PRIVATE_COMMIT], cwd=REPO_DIR) + assert ret["entry_point"] == "/tmp/repo_dir/entry_point" + assert ret["source_dir"] is None + assert ret["dependencies"] is None + + +@patch("subprocess.check_call") +@patch("tempfile.mkdtemp", return_value=REPO_DIR) +@patch("os.path.isfile", return_value=True) +def test_git_clone_repo_ssh(isfile, mkdtemp, check_call): + git_config = {"repo": PRIVATE_GIT_REPO_SSH, "branch": PRIVATE_BRANCH, "commit": PRIVATE_COMMIT} + entry_point = "entry_point" + ret = git_utils.git_clone_repo(git_config, entry_point) + assert ret["entry_point"] == "/tmp/repo_dir/entry_point" + assert ret["source_dir"] is None + assert ret["dependencies"] is None + + +@patch("subprocess.check_call") +@patch("tempfile.mkdtemp", return_value=REPO_DIR) +@patch("os.path.isfile", return_value=True) +def test_git_clone_repo_with_token_no_2fa_unnecessary_creds_provided(isfile, mkdtemp, check_call): + git_config = { + "repo": PRIVATE_GIT_REPO, + "branch": PRIVATE_BRANCH, + "commit": PRIVATE_COMMIT, + "username": "username", + "password": "passw0rd!", + "token": "08c13d80a861f37150cb5c64520bfe14a85ca191", + } + entry_point = "entry_point" + env = os.environ.copy() + env["GIT_TERMINAL_PROMPT"] = "0" + with pytest.warns(UserWarning) as warn: + ret = git_utils.git_clone_repo(git_config=git_config, entry_point=entry_point) + assert ( + "Using token for authentication, other credentials will be ignored." + in warn[0].message.args[0] + ) + check_call.assert_any_call( + [ + "git", + "clone", + "https://08c13d80a861f37150cb5c64520bfe14a85ca191@github.com/testAccount/private-repo.git", + REPO_DIR, + ], + env=env, + ) + check_call.assert_any_call(args=["git", "checkout", PRIVATE_BRANCH], cwd=REPO_DIR) + check_call.assert_any_call(args=["git", "checkout", PRIVATE_COMMIT], cwd=REPO_DIR) + assert ret["entry_point"] == "/tmp/repo_dir/entry_point" + assert ret["source_dir"] is None + assert ret["dependencies"] is None + + +@patch("subprocess.check_call") +@patch("tempfile.mkdtemp", return_value=REPO_DIR) +@patch("os.path.isfile", return_value=True) +def test_git_clone_repo_with_token_2fa_unnecessary_creds_provided(isfile, mkdtemp, check_call): + git_config = { + "repo": PRIVATE_GIT_REPO, + "branch": PRIVATE_BRANCH, + "commit": PRIVATE_COMMIT, + "2FA_enabled": True, + "username": "username", + "token": "08c13d80a861f37150cb5c64520bfe14a85ca191", + } + entry_point = "entry_point" + env = os.environ.copy() + env["GIT_TERMINAL_PROMPT"] = "0" + with pytest.warns(UserWarning) as warn: + ret = git_utils.git_clone_repo(git_config=git_config, entry_point=entry_point) + assert ( + "Using token for authentication, other credentials will be ignored." + in warn[0].message.args[0] + ) + check_call.assert_any_call( + [ + "git", + "clone", + "https://08c13d80a861f37150cb5c64520bfe14a85ca191@github.com/testAccount/private-repo.git", + REPO_DIR, + ], + env=env, + ) + check_call.assert_any_call(args=["git", "checkout", PRIVATE_BRANCH], cwd=REPO_DIR) + check_call.assert_any_call(args=["git", "checkout", PRIVATE_COMMIT], cwd=REPO_DIR) + assert ret["entry_point"] == "/tmp/repo_dir/entry_point" + assert ret["source_dir"] is None + assert ret["dependencies"] is None + + +@patch( + "subprocess.check_call", + side_effect=subprocess.CalledProcessError( + returncode=1, cmd="git clone {} {}".format(PRIVATE_GIT_REPO, REPO_DIR) + ), +) +@patch("tempfile.mkdtemp", return_value=REPO_DIR) +def test_git_clone_repo_with_username_and_password_wrong_creds(mkdtemp, check_call): + git_config = { + "repo": PRIVATE_GIT_REPO, + "branch": PRIVATE_BRANCH, + "commit": PRIVATE_COMMIT, + "2FA_enabled": False, + "username": "username", + "password": "wrong-password", + } + entry_point = "entry_point" + env = os.environ.copy() + env["GIT_TERMINAL_PROMPT"] = "0" + with pytest.raises(subprocess.CalledProcessError) as error: + git_utils.git_clone_repo(git_config=git_config, entry_point=entry_point) + assert "returned non-zero exit status" in str(error) + + +@patch( + "subprocess.check_call", + side_effect=subprocess.CalledProcessError( + returncode=1, cmd="git clone {} {}".format(PRIVATE_GIT_REPO, REPO_DIR) + ), +) +@patch("tempfile.mkdtemp", return_value=REPO_DIR) +def test_git_clone_repo_with_token_wrong_creds(mkdtemp, check_call): + git_config = { + "repo": PRIVATE_GIT_REPO, + "branch": PRIVATE_BRANCH, + "commit": PRIVATE_COMMIT, + "2FA_enabled": False, + "token": "wrong-token", + } + entry_point = "entry_point" + env = os.environ.copy() + env["GIT_TERMINAL_PROMPT"] = "0" + with pytest.raises(subprocess.CalledProcessError) as error: + git_utils.git_clone_repo(git_config=git_config, entry_point=entry_point) + assert "returned non-zero exit status" in str(error) + + +@patch( + "subprocess.check_call", + side_effect=subprocess.CalledProcessError( + returncode=1, cmd="git clone {} {}".format(PRIVATE_GIT_REPO, REPO_DIR) + ), +) +@patch("tempfile.mkdtemp", return_value=REPO_DIR) +def test_git_clone_repo_with_and_token_2fa_wrong_creds(mkdtemp, check_call): + git_config = { + "repo": PRIVATE_GIT_REPO, + "branch": PRIVATE_BRANCH, + "commit": PRIVATE_COMMIT, + "2FA_enabled": False, + "token": "wrong-token", + } + entry_point = "entry_point" + env = os.environ.copy() + env["GIT_TERMINAL_PROMPT"] = "0" + with pytest.raises(subprocess.CalledProcessError) as error: + git_utils.git_clone_repo(git_config=git_config, entry_point=entry_point) + assert "returned non-zero exit status" in str(error) diff --git a/tests/unit/test_model.py b/tests/unit/test_model.py index fa6253f925..0090137a91 100644 --- a/tests/unit/test_model.py +++ b/tests/unit/test_model.py @@ -43,6 +43,11 @@ GIT_REPO = "https://github.com/aws/sagemaker-python-sdk.git" BRANCH = "test-branch-git-config" COMMIT = "ae15c9d7d5b97ea95ea451e4662ee43da3401d73" +PRIVATE_GIT_REPO_SSH = "git@github.com:testAccount/private-repo.git" +PRIVATE_GIT_REPO = "https://github.com/testAccount/private-repo.git" +PRIVATE_BRANCH = "test-branch" +PRIVATE_COMMIT = "329bfcf884482002c05ff7f44f62599ebc9f445a" +REPO_DIR = "/tmp/repo_dir" DESCRIBE_MODEL_PACKAGE_RESPONSE = { @@ -666,3 +671,97 @@ def test_git_support_dependencies_not_exist(sagemaker_session): ) model.prepare_container_def(instance_type=INSTANCE_TYPE) assert "Dependency", "does not exist in the repo." in str(error) + + +@patch( + "sagemaker.git_utils.git_clone_repo", + side_effect=lambda gitconfig, entrypoint, source_dir=None, dependencies=None: { + "entry_point": "/tmp/repo_dir/entry_point", + "source_dir": None, + "dependencies": None, + }, +) +@patch("sagemaker.model.fw_utils.tar_and_upload_dir") +def test_git_support_with_username_password_no_2fa( + tar_and_upload_dir, git_clone_repo, sagemaker_session +): + entry_point = "entry_point" + git_config = { + "repo": PRIVATE_GIT_REPO, + "branch": PRIVATE_BRANCH, + "commit": PRIVATE_COMMIT, + "username": "username", + "password": "passw0rd!", + } + model = DummyFrameworkModelForGit( + sagemaker_session=sagemaker_session, entry_point=entry_point, git_config=git_config + ) + model.prepare_container_def(instance_type=INSTANCE_TYPE) + git_clone_repo.assert_called_with(git_config, entry_point, None, []) + assert model.entry_point == "/tmp/repo_dir/entry_point" + + +@patch( + "sagemaker.git_utils.git_clone_repo", + side_effect=lambda gitconfig, entrypoint, source_dir=None, dependencies=None: { + "entry_point": "/tmp/repo_dir/entry_point", + "source_dir": None, + "dependencies": None, + }, +) +@patch("sagemaker.model.fw_utils.tar_and_upload_dir") +def test_git_support_with_token_2fa(tar_and_upload_dir, git_clone_repo, sagemaker_session): + entry_point = "entry_point" + git_config = { + "repo": PRIVATE_GIT_REPO, + "branch": PRIVATE_BRANCH, + "commit": PRIVATE_COMMIT, + "token": "my-token", + "2FA_enabled": True, + } + model = DummyFrameworkModelForGit( + sagemaker_session=sagemaker_session, entry_point=entry_point, git_config=git_config + ) + model.prepare_container_def(instance_type=INSTANCE_TYPE) + git_clone_repo.assert_called_with(git_config, entry_point, None, []) + assert model.entry_point == "/tmp/repo_dir/entry_point" + + +@patch( + "sagemaker.git_utils.git_clone_repo", + side_effect=lambda gitconfig, entrypoint, source_dir=None, dependencies=None: { + "entry_point": "/tmp/repo_dir/entry_point", + "source_dir": None, + "dependencies": None, + }, +) +@patch("sagemaker.model.fw_utils.tar_and_upload_dir") +def test_git_support_ssh_no_passphrase_needed( + tar_and_upload_dir, git_clone_repo, sagemaker_session +): + entry_point = "entry_point" + git_config = {"repo": PRIVATE_GIT_REPO_SSH, "branch": PRIVATE_BRANCH, "commit": PRIVATE_COMMIT} + model = DummyFrameworkModelForGit( + sagemaker_session=sagemaker_session, entry_point=entry_point, git_config=git_config + ) + model.prepare_container_def(instance_type=INSTANCE_TYPE) + git_clone_repo.assert_called_with(git_config, entry_point, None, []) + assert model.entry_point == "/tmp/repo_dir/entry_point" + + +@patch( + "sagemaker.git_utils.git_clone_repo", + side_effect=subprocess.CalledProcessError( + returncode=1, cmd="git clone {} {}".format(PRIVATE_GIT_REPO_SSH, REPO_DIR) + ), +) +@patch("sagemaker.model.fw_utils.tar_and_upload_dir") +def test_git_support_ssh_passphrase_required(tar_and_upload_dir, git_clone_repo, sagemaker_session): + entry_point = "entry_point" + git_config = {"repo": PRIVATE_GIT_REPO_SSH, "branch": PRIVATE_BRANCH, "commit": PRIVATE_COMMIT} + with pytest.raises(subprocess.CalledProcessError) as error: + model = DummyFrameworkModelForGit( + sagemaker_session=sagemaker_session, entry_point=entry_point, git_config=git_config + ) + model.prepare_container_def(instance_type=INSTANCE_TYPE) + assert "returned non-zero exit status" in str(error) From 2cf69b2afdba9d1b2722b6972ded438fe0222dbe Mon Sep 17 00:00:00 2001 From: GaryTu1020 <45720913+GaryTu1020@users.noreply.github.com> Date: Fri, 12 Jul 2019 15:50:12 -0700 Subject: [PATCH 09/35] feature: Git integration for CodeCommit (#927) * add functions, tests and doc for CodeCommit --- doc/overview.rst | 34 ++++++++++--- src/sagemaker/estimator.py | 37 ++++++++------ src/sagemaker/git_utils.py | 77 +++++++++++++++++++++++----- src/sagemaker/model.py | 37 ++++++++------ tests/integ/test_git.py | 68 ++++++++++++++++++++++++- tests/unit/test_estimator.py | 60 ++++++++++++++++++++++ tests/unit/test_git_utils.py | 98 +++++++++++++++++++++++++----------- tests/unit/test_model.py | 72 ++++++++++++++++++++++++++ 8 files changed, 405 insertions(+), 78 deletions(-) diff --git a/doc/overview.rst b/doc/overview.rst index 7d39f746fb..69992d9f1f 100644 --- a/doc/overview.rst +++ b/doc/overview.rst @@ -185,7 +185,7 @@ Here is an example: Use Scripts Stored in a Git Repository -------------------------------------- -When you create an estimator, you can specify a training script that is stored in a GitHub or other Git repository as the entry point for the estimator, so that you don't have to download the scripts locally. +When you create an estimator, you can specify a training script that is stored in a GitHub (or other Git) or CodeCommit repository as the entry point for the estimator, so that you don't have to download the scripts locally. If you do so, source directory and dependencies should be in the same repo if they are needed. Git support can be enabled simply by providing ``git_config`` parameter when creating an ``Estimator`` object. If Git support is enabled, then ``entry_point``, ``source_dir`` and ``dependencies`` should be relative paths in the Git repo if provided. @@ -195,19 +195,26 @@ The ``git_config`` parameter includes fields ``repo``, ``branch``, ``commit``, repository where your training script is stored. If you don't provide ``branch``, the default value 'master' is used. If you don't provide ``commit``, the latest commit in the specified branch is used. -``2FA_enabled``, ``username``, ``password`` and ``token`` are used for authentication. Set ``2FA_enabled`` to 'True' if -two-factor authentication is enabled for the GitHub (or other Git) account, otherwise set it to 'False'. -If you do not provide a value for ``2FA_enabled``, a default value of 'False' is used. +``2FA_enabled``, ``username``, ``password`` and ``token`` are used for authentication. For GitHub +(or other Git) accounts, set ``2FA_enabled`` to 'True' if two-factor authentication is enabled for the +account, otherwise set it to 'False'. If you do not provide a value for ``2FA_enabled``, a default +value of 'False' is used. CodeCommit does not support two-factor authentication, so do not provide +"2FA_enabled" with CodeCommit repositories. +For GitHub or other Git repositories, If ``repo`` is an SSH URL, you should either have no passphrase for the SSH key pairs, or have the ``ssh-agent`` configured so that you are not prompted for the SSH passphrase when you run a ``git clone`` command with SSH URLs. For SSH URLs, it -does not matter whether two-factor authentication is enabled. - -If ``repo`` is an https URL, 2FA matters. When 2FA is disabled, either ``token`` or ``username``+``password`` will be +does not matter whether two-factor authentication is enabled. If ``repo`` is an HTTPS URL, 2FA matters. When 2FA is disabled, either ``token`` or ``username``+``password`` will be used for authentication if provided (``token`` prioritized). When 2FA is enabled, only token will be used for authentication if provided. If required authentication info is not provided, python SDK will try to use local credentials storage to authenticate. If that fails either, an error message will be thrown. +For CodeCommit repos, please make sure you have completed the authentication setup: https://docs.aws.amazon.com/codecommit/latest/userguide/setting-up.html. +2FA is not supported by CodeCommit, so ``2FA_enabled`` should not be provided. There is no token in CodeCommit, so +``token`` should not be provided either. If ``repo`` is an SSH URL, the requirements are the same as GitHub repos. +If ``repo`` is an HTTPS URL, ``username``+``password`` will be used for authentication if they are provided; otherwise, +Python SDK will try to use either CodeCommit credential helper or local credential storage for authentication. + Here are some examples of creating estimators with Git support: .. code:: python @@ -276,6 +283,19 @@ Here are some examples of creating estimators with Git support: train_instance_count=1, train_instance_type='local') +.. code:: python + + # This example specifies a CodeCommit repository, and try to authenticate with provided username+password + git_config = {'repo': 'https://git-codecommit.us-west-2.amazonaws.com/v1/repos/your_repo_name', + 'username': 'username', + 'password': 'passw0rd!'} + + mx_estimator = MXNet(entry_point='mxnet/mnist.py', + role='SageMakerRole', + git_config=git_config, + train_instance_count=1, + train_instance_type='ml.c4.xlarge') + Git support can be used not only for training jobs, but also for hosting models. The usage is the same as the above, and ``git_config`` should be provided when creating model objects, e.g. ``TensorFlowModel``, ``MXNetModel``, ``PyTorchModel``. diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py index 4a3aeb9649..b68e0ededa 100644 --- a/src/sagemaker/estimator.py +++ b/src/sagemaker/estimator.py @@ -976,11 +976,10 @@ def __init__( You can assign entry_point='src/train.py'. git_config (dict[str, str]): Git configurations used for cloning files, including ``repo``, ``branch``, - ``commit``, ``2FA_enabled``, ``username``, ``password`` and ``token`` (default: None). The fields are - optional except ``repo``. If ``branch`` is not specified, master branch will be used. If ``commit`` - is not specified, the latest commit in the required branch will be used. 'branch' and 'commit' are - optional. If 'branch' is not specified, 'master' branch will be used. If 'commit' is not specified, - the latest commit in the required branch will be used. + ``commit``, ``2FA_enabled``, ``username``, ``password`` and ``token``. The ``repo`` field is required. + All other fields are optional. ``repo`` specifies the Git repository where your training script is + stored. If you don't provide ``branch``, the default value 'master' is used. If you don't provide + ``commit``, the latest commit in the specified branch is used. Example: The following config: @@ -991,15 +990,25 @@ def __init__( results in cloning the repo specified in 'repo', then checkout the 'master' branch, and checkout the specified commit. - ``2FA_enabled``, ``username``, ``password`` and ``token`` are for authentication purpose. - ``2FA_enabled`` must be ``True`` or ``False`` if it is provided. If ``2FA_enabled`` is not provided, - we consider 2FA as disabled. For GitHub and other Git repos, when ssh urls are provided, it does not - make a difference whether 2FA is enabled or disabled; an ssh passphrase should be in local storage. - When https urls are provided: if 2FA is disabled, then either token or username+password will - be used for authentication if provided (token prioritized); if 2FA is enabled, only token will - be used for authentication if provided. If required authentication info is not provided, python SDK - will try to use local credentials storage to authenticate. If that fails either, an error message will - be thrown. + ``2FA_enabled``, ``username``, ``password`` and ``token`` are used for authentication. For GitHub + (or other Git) accounts, set ``2FA_enabled`` to 'True' if two-factor authentication is enabled for the + account, otherwise set it to 'False'. If you do not provide a value for ``2FA_enabled``, a default + value of 'False' is used. CodeCommit does not support two-factor authentication, so do not provide + "2FA_enabled" with CodeCommit repositories. + + For GitHub and other Git repos, when SSH URLs are provided, it doesn't matter whether 2FA is + enabled or disabled; you should either have no passphrase for the SSH key pairs, or have the ssh-agent + configured so that you will not be prompted for SSH passphrase when you do 'git clone' command with SSH + URLs. When HTTPS URLs are provided: if 2FA is disabled, then either token or username+password will be + used for authentication if provided (token prioritized); if 2FA is enabled, only token will be used for + authentication if provided. If required authentication info is not provided, python SDK will try to use + local credentials storage to authenticate. If that fails either, an error message will be thrown. + + For CodeCommit repos, 2FA is not supported, so '2FA_enabled' should not be provided. There is no token + in CodeCommit, so 'token' should not be provided too. When 'repo' is an SSH URL, the requirements are + the same as GitHub-like repos. When 'repo' is an HTTPS URL, username+password will be used for + authentication if they are provided; otherwise, python SDK will try to use either CodeCommit credential + helper or local credential storage for authentication. source_dir (str): Path (absolute or relative) to a directory with any other training source code dependencies aside from the entry point file (default: None). Structure within this directory are preserved when training on Amazon SageMaker. If 'git_config' is provided, diff --git a/src/sagemaker/git_utils.py b/src/sagemaker/git_utils.py index fa4e104cfc..8028243b1f 100644 --- a/src/sagemaker/git_utils.py +++ b/src/sagemaker/git_utils.py @@ -25,18 +25,26 @@ def git_clone_repo(git_config, entry_point, source_dir=None, dependencies=None): and set ``entry_point``, ``source_dir`` and ``dependencies`` to the right file or directory in the repo cloned. Args: - git_config (dict[str, object]): Git configurations used for cloning files, including ``repo``, ``branch``, - ``commit``, ``2FA_enabled``, ``username``, ``password`` and ``token``. The fields are optional except - ``repo``. If ``branch`` is not specified, master branch will be used. If ``commit`` is not specified, - the latest commit in the required branch will be used. ``2FA_enabled``, ``username``, ``password`` and - ``token`` are for authentication purpose. - ``2FA_enabled`` must be ``True`` or ``False`` if it is provided. If ``2FA_enabled`` is not provided, we - consider 2FA as disabled. For GitHub and other Git repos, when ssh urls are provided, it does not make a - difference whether 2FA is enabled or disabled; an ssh passphrase should be in local storage. When - https urls are provided: if 2FA is disabled, then either token or username+password will be used for - authentication if provided (token prioritized); if 2FA is enabled, only token will be used for + git_config (dict[str, str]): Git configurations used for cloning files, including ``repo``, ``branch``, + ``commit``, ``2FA_enabled``, ``username``, ``password`` and ``token``. The ``repo`` field is required. + All other fields are optional. ``repo`` specifies the Git repository where your training script is stored. + If you don't provide ``branch``, the default value 'master' is used. If you don't provide ``commit``, + the latest commit in the specified branch is used. ``2FA_enabled``, ``username``, ``password`` and + ``token`` are for authentication purpose. If ``2FA_enabled`` is not provided, we consider 2FA as disabled. + + For GitHub and GitHub-like repos, when SSH URLs are provided, it doesn't matter whether 2FA is + enabled or disabled; you should either have no passphrase for the SSH key pairs, or have the ssh-agent + configured so that you will not be prompted for SSH passphrase when you do 'git clone' command with SSH + URLs. When https URLs are provided: if 2FA is disabled, then either token or username+password will be + used for authentication if provided (token prioritized); if 2FA is enabled, only token will be used for authentication if provided. If required authentication info is not provided, python SDK will try to use local credentials storage to authenticate. If that fails either, an error message will be thrown. + + For CodeCommit repos, 2FA is not supported, so '2FA_enabled' should not be provided. There is no token in + CodeCommit, so 'token' should not be provided too. When 'repo' is an SSH URL, the requirements are the + same as GitHub-like repos. When 'repo' is an https URL, username+password will be used for + authentication if they are provided; otherwise, python SDK will try to use either CodeCommit credential + helper or local credential storage for authentication. entry_point (str): A relative location to the Python source file which should be executed as the entry point to training or model hosting in the Git repo. source_dir (str): A relative location to a directory with other training or model hosting source code @@ -115,7 +123,12 @@ def _generate_and_run_clone_command(git_config, dest_dir): Raises: CalledProcessError: If failed to clone git repo. """ - _clone_command_for_github_like(git_config, dest_dir) + if git_config["repo"].startswith("https://git-codecommit") or git_config["repo"].startswith( + "ssh://git-codecommit" + ): + _clone_command_for_codecommit(git_config, dest_dir) + else: + _clone_command_for_github_like(git_config, dest_dir) def _clone_command_for_github_like(git_config, dest_dir): @@ -136,14 +149,14 @@ def _clone_command_for_github_like(git_config, dest_dir): if not is_https and not is_ssh: raise ValueError("Invalid Git url provided.") if is_ssh: - _clone_command_for_github_like_ssh(git_config, dest_dir) + _clone_command_for_ssh(git_config, dest_dir) elif "2FA_enabled" in git_config and git_config["2FA_enabled"] is True: _clone_command_for_github_like_https_2fa_enabled(git_config, dest_dir) else: _clone_command_for_github_like_https_2fa_disabled(git_config, dest_dir) -def _clone_command_for_github_like_ssh(git_config, dest_dir): +def _clone_command_for_ssh(git_config, dest_dir): if "username" in git_config or "password" in git_config or "token" in git_config: warnings.warn("SSH cloning, authentication information in git config will be ignored.") _run_clone_command(git_config["repo"], dest_dir) @@ -173,6 +186,44 @@ def _clone_command_for_github_like_https_2fa_enabled(git_config, dest_dir): _run_clone_command(updated_url, dest_dir) +def _clone_command_for_codecommit(git_config, dest_dir): + """check if a git_config param representing a CodeCommit repo is valid, if it is, create the command to + git clone the repo, and run it. + + Args: + git_config ((dict[str, str]): Git configurations used for cloning files, including ``repo``, ``branch`` + and ``commit``. + dest_dir (str): The local directory to clone the Git repo into. + + Raises: + ValueError: If git_config['repo'] is in the wrong format. + CalledProcessError: If failed to clone git repo. + """ + is_https = git_config["repo"].startswith("https://git-codecommit") + is_ssh = git_config["repo"].startswith("ssh://git-codecommit") + if not is_https and not is_ssh: + raise ValueError("Invalid Git url provided.") + if "2FA_enabled" in git_config: + warnings.warn("CodeCommit does not support 2FA, '2FA_enabled' will be ignored.") + if "token" in git_config: + warnings.warn("There are no tokens in CodeCommit, the token provided will be ignored.") + if is_ssh: + _clone_command_for_ssh(git_config, dest_dir) + else: + _clone_command_for_codecommit_https(git_config, dest_dir) + + +def _clone_command_for_codecommit_https(git_config, dest_dir): + updated_url = git_config["repo"] + if "username" in git_config and "password" in git_config: + updated_url = _insert_username_and_password_to_repo_url( + url=git_config["repo"], username=git_config["username"], password=git_config["password"] + ) + elif "username" in git_config or "password" in git_config: + warnings.warn("Credentials provided in git config will be ignored.") + _run_clone_command(updated_url, dest_dir) + + def _run_clone_command(repo_url, dest_dir): """Run the 'git clone' command with the repo url and the directory to clone the repo into. diff --git a/src/sagemaker/model.py b/src/sagemaker/model.py index b49ab5e7f5..6a975d1387 100644 --- a/src/sagemaker/model.py +++ b/src/sagemaker/model.py @@ -517,11 +517,10 @@ def __init__( You can assign entry_point='src/inference.py'. git_config (dict[str, str]): Git configurations used for cloning files, including ``repo``, ``branch``, - ``commit``, ``2FA_enabled``, ``username``, ``password`` and ``token`` (default: None). The fields are - optional except ``repo``. If ``branch`` is not specified, master branch will be used. If ``commit`` - is not specified, the latest commit in the required branch will be used. 'branch' and 'commit' are - optional. If 'branch' is not specified, 'master' branch will be used. If 'commit' is not specified, - the latest commit in the required branch will be used. + ``commit``, ``2FA_enabled``, ``username``, ``password`` and ``token``. The ``repo`` field is required. + All other fields are optional. ``repo`` specifies the Git repository where your training script is + stored. If you don't provide ``branch``, the default value 'master' is used. If you don't provide + ``commit``, the latest commit in the specified branch is used. Example: The following config: @@ -532,15 +531,25 @@ def __init__( results in cloning the repo specified in 'repo', then checkout the 'master' branch, and checkout the specified commit. - ``2FA_enabled``, ``username``, ``password`` and ``token`` are for authentication purpose. - ``2FA_enabled`` must be ``True`` or ``False`` if it is provided. If ``2FA_enabled`` is not provided, - we consider 2FA as disabled. For GitHub and other Git repos, when ssh urls are provided, it does not - make a difference whether 2FA is enabled or disabled; an ssh passphrase should be in local storage. - When https urls are provided: if 2FA is disabled, then either token or username+password will - be used for authentication if provided (token prioritized); if 2FA is enabled, only token will - be used for authentication if provided. If required authentication info is not provided, python SDK - will try to use local credentials storage to authenticate. If that fails either, an error message will - be thrown. + ``2FA_enabled``, ``username``, ``password`` and ``token`` are used for authentication. For GitHub + (or other Git) accounts, set ``2FA_enabled`` to 'True' if two-factor authentication is enabled for the + account, otherwise set it to 'False'. If you do not provide a value for ``2FA_enabled``, a default + value of 'False' is used. CodeCommit does not support two-factor authentication, so do not provide + "2FA_enabled" with CodeCommit repositories. + + For GitHub and other Git repos, when SSH URLs are provided, it doesn't matter whether 2FA is + enabled or disabled; you should either have no passphrase for the SSH key pairs, or have the ssh-agent + configured so that you will not be prompted for SSH passphrase when you do 'git clone' command with SSH + URLs. When HTTPS URLs are provided: if 2FA is disabled, then either token or username+password will be + used for authentication if provided (token prioritized); if 2FA is enabled, only token will be used for + authentication if provided. If required authentication info is not provided, python SDK will try to use + local credentials storage to authenticate. If that fails either, an error message will be thrown. + + For CodeCommit repos, 2FA is not supported, so '2FA_enabled' should not be provided. There is no token + in CodeCommit, so 'token' should not be provided too. When 'repo' is an SSH URL, the requirements are + the same as GitHub-like repos. When 'repo' is an HTTPS URL, username+password will be used for + authentication if they are provided; otherwise, python SDK will try to use either CodeCommit credential + helper or local credential storage for authentication. source_dir (str): Path (absolute or relative) to a directory with any other training source code dependencies aside from the entry point file (default: None). Structure within this directory will be preserved when training on SageMaker. If 'git_config' is provided, diff --git a/tests/integ/test_git.py b/tests/integ/test_git.py index da5579f9b9..cd0edd6ad0 100644 --- a/tests/integ/test_git.py +++ b/tests/integ/test_git.py @@ -30,15 +30,17 @@ GIT_REPO = "https://github.com/aws/sagemaker-python-sdk.git" BRANCH = "test-branch-git-config" COMMIT = "ae15c9d7d5b97ea95ea451e4662ee43da3401d73" - PRIVATE_GIT_REPO = "https://github.com/git-support-test/test-git.git" PRIVATE_BRANCH = "master" PRIVATE_COMMIT = "a46d6f9add3532ca3e4e231e4108b6bad15b7373" - PRIVATE_GIT_REPO_2FA = "https://github.com/git-support-test-2fa/test-git.git" PRIVATE_GIT_REPO_2FA_SSH = "git@github.com:git-support-test-2fa/test-git.git" PRIVATE_BRANCH_2FA = "master" PRIVATE_COMMIT_2FA = "52381dee030eb332a7e42d9992878d7261eb21d4" +CODECOMMIT_REPO = ( + "https://git-codecommit.us-west-2.amazonaws.com/v1/repos/sagemaker-python-sdk-git-testing-repo/" +) +CODECOMMIT_BRANCH = "master" # Since personal access tokens will delete themselves if they are committed to GitHub repos, # we cannot hard code them here, but have to encrypt instead @@ -225,3 +227,65 @@ def test_git_support_with_sklearn_ssh_passphrase_not_configured( with pytest.raises(subprocess.CalledProcessError) as error: sklearn.fit({"train": train_input, "test": test_input}) assert "returned non-zero exit status" in str(error) + + +@pytest.mark.local_mode +def test_git_support_codecommit_with_mxnet(sagemaker_local_session): + script_path = "mnist.py" + data_path = os.path.join(DATA_DIR, "mxnet_mnist") + git_config = { + "repo": CODECOMMIT_REPO, + "branch": CODECOMMIT_BRANCH, + "username": "GitTest-at-142577830533", + "password": "22LcZpWMtjpDG3fbOuHPooIoKoRxF36rQj7zdUvXooA=", + } + source_dir = "mxnet" + dependencies = ["foo/bar.py"] + mx = MXNet( + entry_point=script_path, + role="SageMakerRole", + source_dir=source_dir, + dependencies=dependencies, + framework_version=MXNet.LATEST_VERSION, + py_version=PYTHON_VERSION, + train_instance_count=1, + train_instance_type="local", + sagemaker_session=sagemaker_local_session, + git_config=git_config, + ) + + mx.fit( + { + "train": "file://" + os.path.join(data_path, "train"), + "test": "file://" + os.path.join(data_path, "test"), + } + ) + + files = [file for file in os.listdir(mx.source_dir)] + assert "some_file" in files + assert "mnist.py" in files + assert os.path.exists(mx.dependencies[0]) + + with lock.lock(LOCK_PATH): + try: + client = sagemaker_local_session.sagemaker_client + desc = client.describe_training_job(TrainingJobName=mx.latest_training_job.name) + model_data = desc["ModelArtifacts"]["S3ModelArtifacts"] + model = MXNetModel( + model_data, + "SageMakerRole", + entry_point=script_path, + source_dir=source_dir, + dependencies=dependencies, + py_version=PYTHON_VERSION, + sagemaker_session=sagemaker_local_session, + framework_version=MXNet.LATEST_VERSION, + git_config=git_config, + ) + predictor = model.deploy(1, "local") + + data = numpy.zeros(shape=(1, 1, 28, 28)) + result = predictor.predict(data) + assert result is not None + finally: + predictor.delete_endpoint() diff --git a/tests/unit/test_estimator.py b/tests/unit/test_estimator.py index addcdc5025..d8f95240b2 100644 --- a/tests/unit/test_estimator.py +++ b/tests/unit/test_estimator.py @@ -55,6 +55,9 @@ PRIVATE_GIT_REPO = "https://github.com/testAccount/private-repo.git" PRIVATE_BRANCH = "test-branch" PRIVATE_COMMIT = "329bfcf884482002c05ff7f44f62599ebc9f445a" +CODECOMMIT_REPO = "https://git-codecommit.us-west-2.amazonaws.com/v1/repos/test-repo/" +CODECOMMIT_REPO_SSH = "ssh://git-codecommit.us-west-2.amazonaws.com/v1/repos/test-repo/" +CODECOMMIT_BRANCH = "master" REPO_DIR = "/tmp/repo_dir" DESCRIBE_TRAINING_JOB_RESULT = {"ModelArtifacts": {"S3ModelArtifacts": MODEL_DATA}} @@ -1131,6 +1134,63 @@ def test_git_support_ssh_passphrase_required(git_clone_repo, sagemaker_session): assert "returned non-zero exit status" in str(error) +@patch( + "sagemaker.git_utils.git_clone_repo", + side_effect=lambda gitconfig, entrypoint, source_dir=None, dependencies=None: { + "entry_point": "/tmp/repo_dir/entry_point", + "source_dir": None, + "dependencies": None, + }, +) +def test_git_support_codecommit_with_username_and_password_succeed( + git_clone_repo, sagemaker_session +): + git_config = { + "repo": CODECOMMIT_REPO, + "branch": CODECOMMIT_BRANCH, + "username": "username", + "password": "passw0rd!", + } + entry_point = "entry_point" + fw = DummyFramework( + entry_point=entry_point, + git_config=git_config, + role=ROLE, + sagemaker_session=sagemaker_session, + train_instance_count=INSTANCE_COUNT, + train_instance_type=INSTANCE_TYPE, + enable_cloudwatch_metrics=True, + ) + fw.fit() + git_clone_repo.assert_called_once_with(git_config, entry_point, None, []) + assert fw.entry_point == "/tmp/repo_dir/entry_point" + + +@patch( + "sagemaker.git_utils.git_clone_repo", + side_effect=lambda gitconfig, entrypoint, source_dir=None, dependencies=None: { + "entry_point": "/tmp/repo_dir/entry_point", + "source_dir": None, + "dependencies": None, + }, +) +def test_git_support_codecommit_with_ssh_no_passphrase_needed(git_clone_repo, sagemaker_session): + git_config = {"repo": CODECOMMIT_REPO_SSH, "branch": CODECOMMIT_BRANCH} + entry_point = "entry_point" + fw = DummyFramework( + entry_point=entry_point, + git_config=git_config, + role=ROLE, + sagemaker_session=sagemaker_session, + train_instance_count=INSTANCE_COUNT, + train_instance_type=INSTANCE_TYPE, + # enable_cloudwatch_metrics=True, + ) + fw.fit() + git_clone_repo.assert_called_once_with(git_config, entry_point, None, []) + assert fw.entry_point == "/tmp/repo_dir/entry_point" + + @patch("time.strftime", return_value=TIMESTAMP) def test_init_with_source_dir_s3(strftime, sagemaker_session): fw = DummyFramework( diff --git a/tests/unit/test_git_utils.py b/tests/unit/test_git_utils.py index c97b34207c..2d76fb0554 100644 --- a/tests/unit/test_git_utils.py +++ b/tests/unit/test_git_utils.py @@ -27,6 +27,9 @@ PRIVATE_GIT_REPO = "https://github.com/testAccount/private-repo.git" PRIVATE_BRANCH = "test-branch" PRIVATE_COMMIT = "329bfcf884482002c05ff7f44f62599ebc9f445a" +CODECOMMIT_REPO = "https://git-codecommit.us-west-2.amazonaws.com/v1/repos/test-repo/" +CODECOMMIT_REPO_SSH = "ssh://git-codecommit.us-west-2.amazonaws.com/v1/repos/test-repo/" +CODECOMMIT_BRANCH = "master" @patch("subprocess.check_call") @@ -213,7 +216,7 @@ def test_git_clone_repo_with_token_no_2fa(isfile, mkdtemp, check_call): "repo": PRIVATE_GIT_REPO, "branch": PRIVATE_BRANCH, "commit": PRIVATE_COMMIT, - "token": "08c13d80a861f37150cb5c64520bfe14a85ca191", + "token": "my-token", "2FA_enabled": False, } entry_point = "entry_point" @@ -221,12 +224,7 @@ def test_git_clone_repo_with_token_no_2fa(isfile, mkdtemp, check_call): env["GIT_TERMINAL_PROMPT"] = "0" ret = git_utils.git_clone_repo(git_config=git_config, entry_point=entry_point) check_call.assert_any_call( - [ - "git", - "clone", - "https://08c13d80a861f37150cb5c64520bfe14a85ca191@github.com/testAccount/private-repo.git", - REPO_DIR, - ], + ["git", "clone", "https://my-token@github.com/testAccount/private-repo.git", REPO_DIR], env=env, ) check_call.assert_any_call(args=["git", "checkout", PRIVATE_BRANCH], cwd=REPO_DIR) @@ -246,19 +244,14 @@ def test_git_clone_repo_with_token_2fa(isfile, mkdtemp, check_call): "commit": PRIVATE_COMMIT, "2FA_enabled": True, "username": "username", - "token": "08c13d80a861f37150cb5c64520bfe14a85ca191", + "token": "my-token", } entry_point = "entry_point" env = os.environ.copy() env["GIT_TERMINAL_PROMPT"] = "0" ret = git_utils.git_clone_repo(git_config=git_config, entry_point=entry_point) check_call.assert_any_call( - [ - "git", - "clone", - "https://08c13d80a861f37150cb5c64520bfe14a85ca191@github.com/testAccount/private-repo.git", - REPO_DIR, - ], + ["git", "clone", "https://my-token@github.com/testAccount/private-repo.git", REPO_DIR], env=env, ) check_call.assert_any_call(args=["git", "checkout", PRIVATE_BRANCH], cwd=REPO_DIR) @@ -290,7 +283,7 @@ def test_git_clone_repo_with_token_no_2fa_unnecessary_creds_provided(isfile, mkd "commit": PRIVATE_COMMIT, "username": "username", "password": "passw0rd!", - "token": "08c13d80a861f37150cb5c64520bfe14a85ca191", + "token": "my-token", } entry_point = "entry_point" env = os.environ.copy() @@ -302,12 +295,7 @@ def test_git_clone_repo_with_token_no_2fa_unnecessary_creds_provided(isfile, mkd in warn[0].message.args[0] ) check_call.assert_any_call( - [ - "git", - "clone", - "https://08c13d80a861f37150cb5c64520bfe14a85ca191@github.com/testAccount/private-repo.git", - REPO_DIR, - ], + ["git", "clone", "https://my-token@github.com/testAccount/private-repo.git", REPO_DIR], env=env, ) check_call.assert_any_call(args=["git", "checkout", PRIVATE_BRANCH], cwd=REPO_DIR) @@ -327,7 +315,7 @@ def test_git_clone_repo_with_token_2fa_unnecessary_creds_provided(isfile, mkdtem "commit": PRIVATE_COMMIT, "2FA_enabled": True, "username": "username", - "token": "08c13d80a861f37150cb5c64520bfe14a85ca191", + "token": "my-token", } entry_point = "entry_point" env = os.environ.copy() @@ -339,12 +327,7 @@ def test_git_clone_repo_with_token_2fa_unnecessary_creds_provided(isfile, mkdtem in warn[0].message.args[0] ) check_call.assert_any_call( - [ - "git", - "clone", - "https://08c13d80a861f37150cb5c64520bfe14a85ca191@github.com/testAccount/private-repo.git", - REPO_DIR, - ], + ["git", "clone", "https://my-token@github.com/testAccount/private-repo.git", REPO_DIR], env=env, ) check_call.assert_any_call(args=["git", "checkout", PRIVATE_BRANCH], cwd=REPO_DIR) @@ -422,3 +405,62 @@ def test_git_clone_repo_with_and_token_2fa_wrong_creds(mkdtemp, check_call): with pytest.raises(subprocess.CalledProcessError) as error: git_utils.git_clone_repo(git_config=git_config, entry_point=entry_point) assert "returned non-zero exit status" in str(error) + + +@patch("subprocess.check_call") +@patch("tempfile.mkdtemp", return_value=REPO_DIR) +@patch("os.path.isfile", return_value=True) +def test_git_clone_repo_codecommit_https_with_username_and_password(isfile, mkdtemp, check_call): + git_config = { + "repo": CODECOMMIT_REPO, + "branch": CODECOMMIT_BRANCH, + "username": "username", + "password": "my-codecommit-password", + } + entry_point = "entry_point" + env = os.environ.copy() + env["GIT_TERMINAL_PROMPT"] = "0" + ret = git_utils.git_clone_repo(git_config=git_config, entry_point=entry_point) + check_call.assert_any_call( + [ + "git", + "clone", + "https://username:my-codecommit-password@git-codecommit.us-west-2.amazonaws.com/v1/repos/test-repo/", + REPO_DIR, + ], + env=env, + ) + check_call.assert_any_call(args=["git", "checkout", CODECOMMIT_BRANCH], cwd=REPO_DIR) + assert ret["entry_point"] == "/tmp/repo_dir/entry_point" + assert ret["source_dir"] is None + assert ret["dependencies"] is None + + +@patch( + "subprocess.check_call", + side_effect=subprocess.CalledProcessError( + returncode=128, cmd="git clone {} {}".format(CODECOMMIT_REPO_SSH, REPO_DIR) + ), +) +@patch("tempfile.mkdtemp", return_value=REPO_DIR) +def test_git_clone_repo_codecommit_ssh_passphrase_required(mkdtemp, check_call): + git_config = {"repo": CODECOMMIT_REPO_SSH, "branch": CODECOMMIT_BRANCH} + entry_point = "entry_point" + with pytest.raises(subprocess.CalledProcessError) as error: + git_utils.git_clone_repo(git_config, entry_point) + assert "returned non-zero exit status" in str(error) + + +@patch( + "subprocess.check_call", + side_effect=subprocess.CalledProcessError( + returncode=128, cmd="git clone {} {}".format(CODECOMMIT_REPO, REPO_DIR) + ), +) +@patch("tempfile.mkdtemp", return_value=REPO_DIR) +def test_git_clone_repo_codecommit_https_creds_not_stored_locally(mkdtemp, check_call): + git_config = {"repo": CODECOMMIT_REPO, "branch": CODECOMMIT_BRANCH} + entry_point = "entry_point" + with pytest.raises(subprocess.CalledProcessError) as error: + git_utils.git_clone_repo(git_config, entry_point) + assert "returned non-zero exit status" in str(error) diff --git a/tests/unit/test_model.py b/tests/unit/test_model.py index 0090137a91..a96a880f2f 100644 --- a/tests/unit/test_model.py +++ b/tests/unit/test_model.py @@ -47,6 +47,9 @@ PRIVATE_GIT_REPO = "https://github.com/testAccount/private-repo.git" PRIVATE_BRANCH = "test-branch" PRIVATE_COMMIT = "329bfcf884482002c05ff7f44f62599ebc9f445a" +CODECOMMIT_REPO = "https://git-codecommit.us-west-2.amazonaws.com/v1/repos/test-repo/" +CODECOMMIT_REPO_SSH = "ssh://git-codecommit.us-west-2.amazonaws.com/v1/repos/test-repo/" +CODECOMMIT_BRANCH = "master" REPO_DIR = "/tmp/repo_dir" @@ -765,3 +768,72 @@ def test_git_support_ssh_passphrase_required(tar_and_upload_dir, git_clone_repo, ) model.prepare_container_def(instance_type=INSTANCE_TYPE) assert "returned non-zero exit status" in str(error) + + +@patch( + "sagemaker.git_utils.git_clone_repo", + side_effect=lambda gitconfig, entrypoint, source_dir=None, dependencies=None: { + "entry_point": "/tmp/repo_dir/entry_point", + "source_dir": None, + "dependencies": None, + }, +) +@patch("sagemaker.model.fw_utils.tar_and_upload_dir") +def test_git_support_codecommit_with_username_and_password_succeed( + tar_and_upload_dir, git_clone_repo, sagemaker_session +): + entry_point = "entry_point" + git_config = { + "repo": CODECOMMIT_REPO, + "branch": CODECOMMIT_BRANCH, + "username": "username", + "password": "passw0rd!", + } + model = DummyFrameworkModelForGit( + sagemaker_session=sagemaker_session, entry_point=entry_point, git_config=git_config + ) + model.prepare_container_def(instance_type=INSTANCE_TYPE) + git_clone_repo.assert_called_with(git_config, entry_point, None, []) + assert model.entry_point == "/tmp/repo_dir/entry_point" + + +@patch( + "sagemaker.git_utils.git_clone_repo", + side_effect=lambda gitconfig, entrypoint, source_dir=None, dependencies=None: { + "entry_point": "/tmp/repo_dir/entry_point", + "source_dir": None, + "dependencies": None, + }, +) +@patch("sagemaker.model.fw_utils.tar_and_upload_dir") +def test_git_support_codecommit_ssh_no_passphrase_needed( + tar_and_upload_dir, git_clone_repo, sagemaker_session +): + entry_point = "entry_point" + git_config = {"repo": CODECOMMIT_REPO_SSH, "branch": CODECOMMIT_BRANCH} + model = DummyFrameworkModelForGit( + sagemaker_session=sagemaker_session, entry_point=entry_point, git_config=git_config + ) + model.prepare_container_def(instance_type=INSTANCE_TYPE) + git_clone_repo.assert_called_with(git_config, entry_point, None, []) + assert model.entry_point == "/tmp/repo_dir/entry_point" + + +@patch( + "sagemaker.git_utils.git_clone_repo", + side_effect=subprocess.CalledProcessError( + returncode=1, cmd="git clone {} {}".format(PRIVATE_GIT_REPO_SSH, REPO_DIR) + ), +) +@patch("sagemaker.model.fw_utils.tar_and_upload_dir") +def test_git_support_codecommit_ssh_passphrase_required( + tar_and_upload_dir, git_clone_repo, sagemaker_session +): + entry_point = "entry_point" + git_config = {"repo": CODECOMMIT_REPO_SSH, "branch": CODECOMMIT_BRANCH} + with pytest.raises(subprocess.CalledProcessError) as error: + model = DummyFrameworkModelForGit( + sagemaker_session=sagemaker_session, entry_point=entry_point, git_config=git_config + ) + model.prepare_container_def(instance_type=INSTANCE_TYPE) + assert "returned non-zero exit status" in str(error) From 01ed740f412df752d0821f4b0afefb2380197e9f Mon Sep 17 00:00:00 2001 From: Karim Nakad Date: Mon, 15 Jul 2019 10:21:45 -0700 Subject: [PATCH 10/35] change: enable inconsistent-return-statements Pylint check (#930) Note that this commit also raises ValueErrors in situations that would previously have returned None. Per PEP8: Be consistent in return statements. Either all return statements in a function should return an expression, or none of them should. If any return statement returns an expression, any return statements where no value is returned should explicitly state this as return None, and an explicit return statement should be present at the end of the function (if reachable). --- .pylintrc | 1 - src/sagemaker/job.py | 2 +- src/sagemaker/local/data.py | 8 +++++++- src/sagemaker/model.py | 1 + src/sagemaker/pipeline.py | 1 + src/sagemaker/predictor.py | 5 +++++ src/sagemaker/rl/estimator.py | 10 +++++++++- 7 files changed, 24 insertions(+), 4 deletions(-) diff --git a/.pylintrc b/.pylintrc index 9a3ee55af3..7b0ebdea68 100644 --- a/.pylintrc +++ b/.pylintrc @@ -91,7 +91,6 @@ disable= useless-object-inheritance, # TODO: Remove unnecessary imports cyclic-import, # TODO: Resolve cyclic imports no-self-use, # TODO: Convert methods to functions where appropriate - inconsistent-return-statements, # TODO: Make returns consistent consider-merging-isinstance, # TODO: Merge isinstance where appropriate consider-using-in, # TODO: Consider merging comparisons with "in" too-many-public-methods, # TODO: Resolve diff --git a/src/sagemaker/job.py b/src/sagemaker/job.py index ffe4ba31b0..e590e474a7 100644 --- a/src/sagemaker/job.py +++ b/src/sagemaker/job.py @@ -169,7 +169,7 @@ def _prepare_channel( input_mode=None, ): if not channel_uri: - return + return None if not channel_name: raise ValueError( "Expected a channel name if a channel URI {} is specified".format(channel_uri) diff --git a/src/sagemaker/local/data.py b/src/sagemaker/local/data.py index 62f41bf4e0..cf3cd88af5 100644 --- a/src/sagemaker/local/data.py +++ b/src/sagemaker/local/data.py @@ -38,15 +38,21 @@ def get_data_source_instance(data_source, sagemaker_session): sagemaker_session (:class:`sagemaker.session.Session`): a SageMaker Session to interact with S3 if required. - Returns + Returns: :class:`sagemaker.local.data.DataSource`: an Instance of a Data Source + Raises: + ValueError: If parsed_uri scheme is neither `file` nor `s3`, raise an error. + """ parsed_uri = urlparse(data_source) if parsed_uri.scheme == "file": return LocalFileDataSource(parsed_uri.netloc + parsed_uri.path) if parsed_uri.scheme == "s3": return S3DataSource(parsed_uri.netloc, parsed_uri.path, sagemaker_session) + raise ValueError( + "data_source must be either file or s3. parsed_uri.scheme: {}".format(parsed_uri.scheme) + ) def get_splitter_instance(split_type): diff --git a/src/sagemaker/model.py b/src/sagemaker/model.py index 6a975d1387..cb17454683 100644 --- a/src/sagemaker/model.py +++ b/src/sagemaker/model.py @@ -389,6 +389,7 @@ def deploy( if self.predictor_cls: return self.predictor_cls(self.endpoint_name, self.sagemaker_session) + return None def transformer( self, diff --git a/src/sagemaker/pipeline.py b/src/sagemaker/pipeline.py index 027f7a9c16..a91631dde7 100644 --- a/src/sagemaker/pipeline.py +++ b/src/sagemaker/pipeline.py @@ -115,6 +115,7 @@ def deploy( ) if self.predictor_cls: return self.predictor_cls(self.endpoint_name, self.sagemaker_session) + return None def _create_sagemaker_pipeline_model(self, instance_type): """Create a SageMaker Model Entity diff --git a/src/sagemaker/predictor.py b/src/sagemaker/predictor.py index 4bdf0ed665..68efe14d20 100644 --- a/src/sagemaker/predictor.py +++ b/src/sagemaker/predictor.py @@ -392,6 +392,11 @@ def __call__(self, stream, content_type=CONTENT_TYPE_NPY): return np.load(BytesIO(stream.read())) finally: stream.close() + raise ValueError( + "content_type must be one of the following: CSV, JSON, NPY. content_type: {}".format( + content_type + ) + ) numpy_deserializer = _NumpyDeserializer() diff --git a/src/sagemaker/rl/estimator.py b/src/sagemaker/rl/estimator.py index 5836948c10..aa2565d02a 100644 --- a/src/sagemaker/rl/estimator.py +++ b/src/sagemaker/rl/estimator.py @@ -184,7 +184,8 @@ def create_model( MXNet was used as RL backend; * sagemaker.tensorflow.serving.Model - if image_name wasn't specified and TensorFlow was used as RL backend. - + Raises: + ValueError: If image_name was not specified and framework enum is not valid. """ base_args = dict( model_data=self.model_data, @@ -230,6 +231,9 @@ def create_model( return MXNetModel( framework_version=self.framework_version, py_version=PYTHON_VERSION, **extended_args ) + raise ValueError( + "An unknown RLFramework enum was passed in. framework: {}".format(self.framework) + ) def train_image(self): """Return the Docker image to use for training. @@ -399,6 +403,9 @@ def default_metric_definitions(cls, toolkit): Returns: list: metric definitions + + Raises: + ValueError: If toolkit enum is not valid. """ if toolkit is RLToolkit.COACH: return [ @@ -412,3 +419,4 @@ def default_metric_definitions(cls, toolkit): {"Name": "episode_reward_mean", "Regex": "episode_reward_mean: (%s)" % float_regex}, {"Name": "episode_reward_max", "Regex": "episode_reward_max: (%s)" % float_regex}, ] + raise ValueError("An unknown RLToolkit enum was passed in. toolkit: {}".format(toolkit)) From 1b4c2b17412eb4e85e518c6794d31036c411a079 Mon Sep 17 00:00:00 2001 From: Karim Nakad Date: Mon, 15 Jul 2019 11:14:23 -0700 Subject: [PATCH 11/35] change: enable consider-merging-isinstance Pylint check (#932) Note that this commit will also enable simplifiable-if-statement, as there are no code changes needed for it. --- .pylintrc | 2 -- src/sagemaker/tuner.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.pylintrc b/.pylintrc index 7b0ebdea68..ed98a3b05a 100644 --- a/.pylintrc +++ b/.pylintrc @@ -91,13 +91,11 @@ disable= useless-object-inheritance, # TODO: Remove unnecessary imports cyclic-import, # TODO: Resolve cyclic imports no-self-use, # TODO: Convert methods to functions where appropriate - consider-merging-isinstance, # TODO: Merge isinstance where appropriate consider-using-in, # TODO: Consider merging comparisons with "in" too-many-public-methods, # TODO: Resolve ungrouped-imports, # TODO: Group imports consider-using-ternary, # TODO: Consider ternary expressions chained-comparison, # TODO: Simplify chained comparison between operands - simplifiable-if-statement, # TODO: Simplify ifs too-many-branches, # TODO: Simplify or ignore as appropriate missing-docstring, # TODO: Fix missing docstring diff --git a/src/sagemaker/tuner.py b/src/sagemaker/tuner.py index 08c578a99d..0c423e1c3c 100644 --- a/src/sagemaker/tuner.py +++ b/src/sagemaker/tuner.py @@ -307,7 +307,7 @@ def fit(self, inputs=None, job_name=None, include_cls_metadata=False, **kwargs): **kwargs: Other arguments needed for training. Please refer to the ``fit()`` method of the associated estimator to see what other arguments are needed. """ - if isinstance(inputs, list) or isinstance(inputs, RecordSet): + if isinstance(inputs, (list, RecordSet)): self.estimator._prepare_for_training(inputs, **kwargs) else: self.estimator._prepare_for_training(job_name) From 798299add47e54a7dc7c0f035e8acd06e7ee6e31 Mon Sep 17 00:00:00 2001 From: Karim Nakad Date: Mon, 15 Jul 2019 11:55:46 -0700 Subject: [PATCH 12/35] change: enable attribute-defined-outside-init Pylint check (#933) The logic behind this rule is to improve readability by defining all the attributes of a class inside the init function, even if it simply sets them to None. --- .pylintrc | 1 - src/sagemaker/amazon/amazon_estimator.py | 2 +- src/sagemaker/analytics.py | 8 ++++++++ src/sagemaker/estimator.py | 3 +++ src/sagemaker/local/entities.py | 5 ++++- src/sagemaker/model.py | 1 + src/sagemaker/pipeline.py | 1 + src/sagemaker/tuner.py | 1 + 8 files changed, 19 insertions(+), 3 deletions(-) diff --git a/.pylintrc b/.pylintrc index ed98a3b05a..fecdb49e56 100644 --- a/.pylintrc +++ b/.pylintrc @@ -84,7 +84,6 @@ disable= line-too-long, # We let Flake8 take care of this # TODO: Fix these and stop relying on flake8 len-as-condition, # TODO: Enable this check once pylint 2.4.0 is released and consumed due to the fix in https://github.com/PyCQA/pylint/issues/2684 import-error, # Since we run Pylint before any of our builds in tox, this will always fail - attribute-defined-outside-init, # TODO: Fix scope protected-access, # TODO: Fix access abstract-method, # TODO: Fix abstract methods wrong-import-order, # TODO: Fix import order diff --git a/src/sagemaker/amazon/amazon_estimator.py b/src/sagemaker/amazon/amazon_estimator.py index 770358c5a8..7f4c49c879 100644 --- a/src/sagemaker/amazon/amazon_estimator.py +++ b/src/sagemaker/amazon/amazon_estimator.py @@ -55,7 +55,7 @@ def __init__( data_location = data_location or "s3://{}/sagemaker-record-sets/".format( self.sagemaker_session.default_bucket() ) - self.data_location = data_location + self._data_location = data_location def train_image(self): return get_image_uri( diff --git a/src/sagemaker/analytics.py b/src/sagemaker/analytics.py index 019c660ae1..1b808aead6 100644 --- a/src/sagemaker/analytics.py +++ b/src/sagemaker/analytics.py @@ -37,6 +37,9 @@ class AnalyticsMetricsBase(with_metaclass(ABCMeta, object)): Understands common functionality like persistence and caching. """ + def __init__(self): + self._dataframe = None + def export_csv(self, filename): """Persists the analytics dataframe to a file. @@ -88,6 +91,9 @@ def __init__(self, hyperparameter_tuning_job_name, sagemaker_session=None): sagemaker_session = sagemaker_session or Session() self._sage_client = sagemaker_session.sagemaker_client self._tuning_job_name = hyperparameter_tuning_job_name + self._tuning_job_describe_result = None + self._training_job_summaries = None + super(HyperparameterTuningJobAnalytics, self).__init__() self.clear_cache() @property @@ -240,6 +246,8 @@ def __init__( self._metric_names = metric_names else: self._metric_names = self._metric_names_for_training_job() + + super(TrainingJobAnalytics, self).__init__() self.clear_cache() @property diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py index b68e0ededa..653c0942ae 100644 --- a/src/sagemaker/estimator.py +++ b/src/sagemaker/estimator.py @@ -166,6 +166,7 @@ def __init__( self.output_path = output_path self.output_kms_key = output_kms_key self.latest_training_job = None + self.deploy_instance_type = None self._compiled_models = {} @@ -1086,6 +1087,8 @@ def __init__( self.image_name = image_name self._enable_network_isolation = enable_network_isolation + self.uploaded_code = None + self._hyperparameters = hyperparameters or {} def enable_network_isolation(self): diff --git a/src/sagemaker/local/entities.py b/src/sagemaker/local/entities.py index 53bc794a9f..b92ece29eb 100644 --- a/src/sagemaker/local/entities.py +++ b/src/sagemaker/local/entities.py @@ -75,7 +75,7 @@ def start(self, input_data_config, output_data_config, hyperparameters, job_name self.model_artifacts = self.container.train( input_data_config, output_data_config, hyperparameters, job_name ) - self.end = datetime.datetime.now() + self.end_time = datetime.datetime.now() self.state = self._COMPLETED def describe(self): @@ -110,6 +110,9 @@ def __init__(self, transform_job_name, model_name, local_session=None): self.start_time = None self.end_time = None self.batch_strategy = None + self.transform_resources = None + self.input_data = None + self.output_data = None self.environment = {} self.state = _LocalTransformJob._CREATING diff --git a/src/sagemaker/model.py b/src/sagemaker/model.py index cb17454683..be840d6885 100644 --- a/src/sagemaker/model.py +++ b/src/sagemaker/model.py @@ -100,6 +100,7 @@ def __init__( self.vpc_config = vpc_config self.sagemaker_session = sagemaker_session self._model_name = None + self.endpoint_name = None self._is_compiled_model = False self._enable_network_isolation = enable_network_isolation diff --git a/src/sagemaker/pipeline.py b/src/sagemaker/pipeline.py index a91631dde7..c70b2b17ca 100644 --- a/src/sagemaker/pipeline.py +++ b/src/sagemaker/pipeline.py @@ -51,6 +51,7 @@ def __init__( self.vpc_config = vpc_config self.sagemaker_session = sagemaker_session self._model_name = None + self.endpoint_name = None def pipeline_container_def(self, instance_type): """Return a dict created by ``sagemaker.pipeline_container_def()`` for deploying this model to a specified diff --git a/src/sagemaker/tuner.py b/src/sagemaker/tuner.py index 0c423e1c3c..8f74f0cd77 100644 --- a/src/sagemaker/tuner.py +++ b/src/sagemaker/tuner.py @@ -247,6 +247,7 @@ def __init__( self.latest_tuning_job = None self.warm_start_config = warm_start_config self.early_stopping_type = early_stopping_type + self.static_hyperparameters = None def _prepare_for_training(self, job_name=None, include_cls_metadata=False): if job_name is not None: From b6b3cf6cf0e7d84e7e2de61f3f223199aa5c388d Mon Sep 17 00:00:00 2001 From: Karim Nakad Date: Mon, 15 Jul 2019 15:43:22 -0700 Subject: [PATCH 13/35] change: enable wrong-import-order Pylint check (#935) Per PEP8: Imports should be grouped in the following order: 1- Standard library imports. 2- Related third party imports. 3- Local application/library specific imports. --- .pylintrc | 1 - src/sagemaker/fw_utils.py | 2 +- src/sagemaker/git_utils.py | 2 +- src/sagemaker/local/image.py | 2 +- src/sagemaker/predictor.py | 2 +- src/sagemaker/session.py | 2 +- src/sagemaker/user_agent.py | 2 +- 7 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.pylintrc b/.pylintrc index fecdb49e56..130124da70 100644 --- a/.pylintrc +++ b/.pylintrc @@ -86,7 +86,6 @@ disable= import-error, # Since we run Pylint before any of our builds in tox, this will always fail protected-access, # TODO: Fix access abstract-method, # TODO: Fix abstract methods - wrong-import-order, # TODO: Fix import order useless-object-inheritance, # TODO: Remove unnecessary imports cyclic-import, # TODO: Resolve cyclic imports no-self-use, # TODO: Convert methods to functions where appropriate diff --git a/src/sagemaker/fw_utils.py b/src/sagemaker/fw_utils.py index 3b90f9c006..8deb803fa3 100644 --- a/src/sagemaker/fw_utils.py +++ b/src/sagemaker/fw_utils.py @@ -16,11 +16,11 @@ import os import re -import sagemaker.utils import shutil import tempfile from six.moves.urllib.parse import urlparse +import sagemaker.utils from sagemaker.utils import get_ecr_image_uri_prefix, ECR_URI_PATTERN _TAR_SOURCE_FILENAME = "source.tar.gz" diff --git a/src/sagemaker/git_utils.py b/src/sagemaker/git_utils.py index 8028243b1f..8490ec5788 100644 --- a/src/sagemaker/git_utils.py +++ b/src/sagemaker/git_utils.py @@ -13,10 +13,10 @@ from __future__ import absolute_import import os -import six import subprocess import tempfile import warnings +import six from six.moves import urllib diff --git a/src/sagemaker/local/image.py b/src/sagemaker/local/image.py index 95a6277f87..bcc2bfab36 100644 --- a/src/sagemaker/local/image.py +++ b/src/sagemaker/local/image.py @@ -28,8 +28,8 @@ import tarfile import tempfile -from six.moves.urllib.parse import urlparse from threading import Thread +from six.moves.urllib.parse import urlparse import yaml diff --git a/src/sagemaker/predictor.py b/src/sagemaker/predictor.py index 68efe14d20..d2c4822289 100644 --- a/src/sagemaker/predictor.py +++ b/src/sagemaker/predictor.py @@ -15,9 +15,9 @@ import codecs import csv import json -import numpy as np import six from six import StringIO, BytesIO +import numpy as np from sagemaker.content_types import CONTENT_TYPE_JSON, CONTENT_TYPE_CSV, CONTENT_TYPE_NPY from sagemaker.session import Session diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index 019f9e4ef9..e0e30e66fc 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -20,9 +20,9 @@ import time import warnings +import six import boto3 import botocore.config -import six import yaml from botocore.exceptions import ClientError diff --git a/src/sagemaker/user_agent.py b/src/sagemaker/user_agent.py index a63d2bb6bb..2420266572 100644 --- a/src/sagemaker/user_agent.py +++ b/src/sagemaker/user_agent.py @@ -12,9 +12,9 @@ # language governing permissions and limitations under the License. from __future__ import absolute_import -import pkg_resources import platform import sys +import pkg_resources import boto3 import botocore From 6adb29b0c92baf143784f6071fabf547af4694c9 Mon Sep 17 00:00:00 2001 From: Karim Nakad Date: Mon, 15 Jul 2019 17:02:43 -0700 Subject: [PATCH 14/35] change: enable ungrouped-imports Pylint check (#936) * change: enable wrong-import-order Pylint check Per PEP8: Imports should be grouped in the following order: 1- Standard library imports. 2- Related third party imports. 3- Local application/library specific imports. --- .pylintrc | 1 - src/sagemaker/session.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.pylintrc b/.pylintrc index 130124da70..a8308f4400 100644 --- a/.pylintrc +++ b/.pylintrc @@ -91,7 +91,6 @@ disable= no-self-use, # TODO: Convert methods to functions where appropriate consider-using-in, # TODO: Consider merging comparisons with "in" too-many-public-methods, # TODO: Resolve - ungrouped-imports, # TODO: Group imports consider-using-ternary, # TODO: Consider ternary expressions chained-comparison, # TODO: Simplify chained comparison between operands too-many-branches, # TODO: Simplify or ignore as appropriate diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index e0e30e66fc..b39e0be055 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -23,8 +23,8 @@ import six import boto3 import botocore.config -import yaml from botocore.exceptions import ClientError +import yaml import sagemaker.logs from sagemaker import vpc_utils From 3734314a8e27e923208f248775f572eea933002a Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 15 Jul 2019 18:16:24 -0700 Subject: [PATCH 15/35] change: fix attach for 1P algorithm estimators (#931) --- src/sagemaker/tuner.py | 34 +++++++++++++++++++++++++++++----- tests/integ/test_tuner.py | 11 +++++++---- tests/unit/test_tuner.py | 4 ++-- 3 files changed, 38 insertions(+), 11 deletions(-) diff --git a/src/sagemaker/tuner.py b/src/sagemaker/tuner.py index 8f74f0cd77..16f6bf07d3 100644 --- a/src/sagemaker/tuner.py +++ b/src/sagemaker/tuner.py @@ -19,7 +19,7 @@ from enum import Enum import sagemaker -from sagemaker.amazon.amazon_estimator import RecordSet +from sagemaker.amazon.amazon_estimator import RecordSet, AmazonAlgorithmEstimatorBase from sagemaker.amazon.hyperparameter import Hyperparameter as hp # noqa from sagemaker.analytics import HyperparameterTuningJobAnalytics from sagemaker.estimator import Framework @@ -358,7 +358,7 @@ def attach(cls, tuning_job_name, sagemaker_session=None, job_details=None, estim estimator_cls, job_details["TrainingJobDefinition"] ) estimator = cls._prepare_estimator_from_job_description( - estimator_cls, job_details["TrainingJobDefinition"], sagemaker_session + estimator_cls, job_details, sagemaker_session ) init_params = cls._prepare_init_params_from_job_description(job_details) @@ -497,9 +497,9 @@ def _prepare_estimator_cls(cls, estimator_cls, training_details): ) @classmethod - def _prepare_estimator_from_job_description( - cls, estimator_cls, training_details, sagemaker_session - ): + def _prepare_estimator_from_job_description(cls, estimator_cls, job_details, sagemaker_session): + training_details = job_details["TrainingJobDefinition"] + # Swap name for static hyperparameters to what an estimator would expect training_details["HyperParameters"] = training_details["StaticHyperParameters"] del training_details["StaticHyperParameters"] @@ -507,6 +507,15 @@ def _prepare_estimator_from_job_description( # Remove hyperparameter reserved by SageMaker for tuning jobs del training_details["HyperParameters"]["_tuning_objective_metric"] + # Add missing hyperparameters defined in the hyperparameter ranges, + # as potentially required in the Amazon algorithm estimator's constructor + if issubclass(estimator_cls, AmazonAlgorithmEstimatorBase): + parameter_ranges = job_details["HyperParameterTuningJobConfig"]["ParameterRanges"] + additional_hyperparameters = cls._extract_hyperparameters_from_parameter_ranges( + parameter_ranges + ) + training_details["HyperParameters"].update(additional_hyperparameters) + # Add items expected by the estimator (but aren't needed otherwise) training_details["TrainingJobName"] = "" if "KmsKeyId" not in training_details["OutputDataConfig"]: @@ -559,6 +568,21 @@ def _prepare_parameter_ranges(cls, parameter_ranges): return ranges + @classmethod + def _extract_hyperparameters_from_parameter_ranges(cls, parameter_ranges): + hyperparameters = {} + + for parameter in parameter_ranges["CategoricalParameterRanges"]: + hyperparameters[parameter["Name"]] = parameter["Values"][0] + + for parameter in parameter_ranges["ContinuousParameterRanges"]: + hyperparameters[parameter["Name"]] = float(parameter["MinValue"]) + + for parameter in parameter_ranges["IntegerParameterRanges"]: + hyperparameters[parameter["Name"]] = int(parameter["MinValue"]) + + return hyperparameters + def hyperparameter_ranges(self): """Return the hyperparameter ranges in a dictionary to be used as part of a request for creating a hyperparameter tuning job. diff --git a/tests/integ/test_tuner.py b/tests/integ/test_tuner.py index 1d74acbe06..b823484efd 100644 --- a/tests/integ/test_tuner.py +++ b/tests/integ/test_tuner.py @@ -460,12 +460,15 @@ def test_tuning_lda(sagemaker_session): time.sleep(15) tuner.wait() - desc = tuner.latest_tuning_job.sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job( - HyperParameterTuningJobName=latest_tuning_job_name + attached_tuner = HyperparameterTuner.attach( + tuning_job_name, sagemaker_session=sagemaker_session ) - assert desc["HyperParameterTuningJobConfig"]["TrainingJobEarlyStoppingType"] == "Auto" + assert attached_tuner.early_stopping_type == "Auto" + assert attached_tuner.estimator.alpha0 == 1.0 + assert attached_tuner.estimator.num_topics == 1 + + best_training_job = attached_tuner.best_training_job() - best_training_job = tuner.best_training_job() with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session): predictor = tuner.deploy(1, "ml.c4.xlarge") predict_input = np.random.rand(1, feature_num) diff --git a/tests/unit/test_tuner.py b/tests/unit/test_tuner.py index 73010c3afb..6accb3ba95 100644 --- a/tests/unit/test_tuner.py +++ b/tests/unit/test_tuner.py @@ -78,7 +78,7 @@ "IntegerParameterRanges": [ { "MaxValue": "100", - "Name": "mini_batch_size", + "Name": "num_components", "MinValue": "10", "ScalingType": "Auto", } @@ -416,7 +416,7 @@ def test_attach_tuning_job_with_estimator_from_hyperparameters(sagemaker_session assert tuner.estimator.output_kms_key == "" assert "_tuning_objective_metric" not in tuner.estimator.hyperparameters() - assert tuner.estimator.hyperparameters()["num_components"] == "1" + assert tuner.estimator.hyperparameters()["num_components"] == "10" def test_attach_tuning_job_with_estimator_from_hyperparameters_with_early_stopping( From 1badf7606f1d80fcb91db35a61aa711672e3bd53 Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 15 Jul 2019 19:54:39 -0700 Subject: [PATCH 16/35] change: set num_processes_per_host only if provided by user (#928) --- src/sagemaker/tensorflow/estimator.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/sagemaker/tensorflow/estimator.py b/src/sagemaker/tensorflow/estimator.py index a24bf75dec..cdac66661f 100644 --- a/src/sagemaker/tensorflow/estimator.py +++ b/src/sagemaker/tensorflow/estimator.py @@ -545,9 +545,12 @@ def hyperparameters(self): mpi_dict = self.distributions["mpi"] mpi_enabled = mpi_dict.get("enabled", False) additional_hyperparameters[self.LAUNCH_MPI_ENV_NAME] = mpi_enabled - additional_hyperparameters[self.MPI_NUM_PROCESSES_PER_HOST] = mpi_dict.get( - "processes_per_host", 1 - ) + + if mpi_dict.get("processes_per_host"): + additional_hyperparameters[self.MPI_NUM_PROCESSES_PER_HOST] = mpi_dict.get( + "processes_per_host" + ) + additional_hyperparameters[self.MPI_CUSTOM_MPI_OPTIONS] = mpi_dict.get( "custom_mpi_options", "" ) From aab167596a4565af603535f12f18aa834ccb1537 Mon Sep 17 00:00:00 2001 From: Karim Nakad Date: Tue, 16 Jul 2019 09:07:49 -0700 Subject: [PATCH 17/35] change: enable consider-using-in Pylint check (#938) * change: enable consider-using-in Pylint check --- .pylintrc | 1 - src/sagemaker/session.py | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.pylintrc b/.pylintrc index a8308f4400..056ee6d841 100644 --- a/.pylintrc +++ b/.pylintrc @@ -89,7 +89,6 @@ disable= useless-object-inheritance, # TODO: Remove unnecessary imports cyclic-import, # TODO: Resolve cyclic imports no-self-use, # TODO: Convert methods to functions where appropriate - consider-using-in, # TODO: Consider merging comparisons with "in" too-many-public-methods, # TODO: Resolve consider-using-ternary, # TODO: Consider ternary expressions chained-comparison, # TODO: Simplify chained comparison between operands diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index b39e0be055..1e904d58d6 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -1022,7 +1022,7 @@ def _check_job_status(self, job, desc, status_key_name): # If the status is capital case, then convert it to Camel case status = _STATUS_CODE_TABLE.get(status, status) - if status != "Completed" and status != "Stopped": + if status not in ("Completed", "Stopped"): reason = desc.get("FailureReason", "(No reason provided)") job_type = status_key_name.replace("JobStatus", " job") raise ValueError("Error for {} {}: {} Reason: {}".format(job_type, job, status, reason)) @@ -1292,7 +1292,7 @@ def logs_for_job( # noqa: C901 - suppress complexity warning for this method client = self.boto_session.client("logs", config=config) log_group = "/aws/sagemaker/TrainingJobs" - job_already_completed = status == "Completed" or status == "Failed" or status == "Stopped" + job_already_completed = status in ("Completed", "Failed", "Stopped") state = LogState.TAILING if wait and not job_already_completed else LogState.COMPLETE dot = False @@ -1385,7 +1385,7 @@ def logs_for_job( # noqa: C901 - suppress complexity warning for this method status = description["TrainingJobStatus"] - if status == "Completed" or status == "Failed" or status == "Stopped": + if status in ("Completed", "Failed", "Stopped"): print() state = LogState.JOB_COMPLETE From 39adbbc7cf5b0f4b533fca209524cdd048ce3127 Mon Sep 17 00:00:00 2001 From: Karim Nakad Date: Tue, 16 Jul 2019 10:04:11 -0700 Subject: [PATCH 18/35] change: enable too-many-public-methods Pylint check (#939) * change: enable too-many-public-methods Pylint check This is a useful check to have, but is a lot of work to retroactively enforce. Enabling it while ignoring the single violation allows the validation to run for future code. --- .pylintrc | 1 - src/sagemaker/session.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.pylintrc b/.pylintrc index 056ee6d841..64f6303541 100644 --- a/.pylintrc +++ b/.pylintrc @@ -89,7 +89,6 @@ disable= useless-object-inheritance, # TODO: Remove unnecessary imports cyclic-import, # TODO: Resolve cyclic imports no-self-use, # TODO: Convert methods to functions where appropriate - too-many-public-methods, # TODO: Resolve consider-using-ternary, # TODO: Consider ternary expressions chained-comparison, # TODO: Simplify chained comparison between operands too-many-branches, # TODO: Simplify or ignore as appropriate diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index 1e904d58d6..26b2e53adb 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -55,7 +55,7 @@ class LogState(object): COMPLETE = 5 -class Session(object): +class Session(object): # pylint: disable=too-many-public-methods """Manage interactions with the Amazon SageMaker APIs and any other AWS services needed. This class provides convenient methods for manipulating entities and resources that Amazon SageMaker uses, From 36bbf8ab7857e83488888d5176b5565b7e78c7e5 Mon Sep 17 00:00:00 2001 From: Karim Nakad Date: Tue, 16 Jul 2019 15:11:55 -0700 Subject: [PATCH 19/35] change: enable chained-comparison Pylint check (#940) --- .pylintrc | 1 - src/sagemaker/parameter.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.pylintrc b/.pylintrc index 64f6303541..5e319a7564 100644 --- a/.pylintrc +++ b/.pylintrc @@ -90,7 +90,6 @@ disable= cyclic-import, # TODO: Resolve cyclic imports no-self-use, # TODO: Convert methods to functions where appropriate consider-using-ternary, # TODO: Consider ternary expressions - chained-comparison, # TODO: Simplify chained comparison between operands too-many-branches, # TODO: Simplify or ignore as appropriate missing-docstring, # TODO: Fix missing docstring diff --git a/src/sagemaker/parameter.py b/src/sagemaker/parameter.py index 16372d80d4..152455c107 100644 --- a/src/sagemaker/parameter.py +++ b/src/sagemaker/parameter.py @@ -47,7 +47,7 @@ def is_valid(self, value): Returns: bool: True if valid, False otherwise. """ - return value >= self.min_value and value <= self.max_value + return self.min_value <= value <= self.max_value @classmethod def cast_to_type(cls, value): From d039e37f572c3dd9e3ae1a318e57089977cfb606 Mon Sep 17 00:00:00 2001 From: Karim Nakad Date: Wed, 17 Jul 2019 15:56:08 -0700 Subject: [PATCH 20/35] change: enable consider-using-ternary Pylint check (#942) This commit will add an exclusion for all auto-generated files. I chose to ignore the single violation, because the alternative is confusingly convoluted: `(hasattr(obj, '__getitem__') if hasattr(obj, '__iter__') else isinstance(obj, str))` --- .pylintrc | 7 ++++--- src/sagemaker/predictor.py | 4 +++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.pylintrc b/.pylintrc index 5e319a7564..0148db5544 100644 --- a/.pylintrc +++ b/.pylintrc @@ -19,9 +19,11 @@ profile=no # paths. ignore=CVS,tensorflow_serving -# Add files or directories matching the regex patterns to the blacklist. The -# regex matches against base names, not paths. +# Add files or directories matching the regex patterns to the blacklist. +# The regex matches against base names, not paths. +# Regex patterns can be comma(and newline)-separated ignore-patterns= + .*_pb2.py, # Ignore all files generated by the protocol buffer compiler # Pickle collected data for later comparisons. persistent=yes @@ -89,7 +91,6 @@ disable= useless-object-inheritance, # TODO: Remove unnecessary imports cyclic-import, # TODO: Resolve cyclic imports no-self-use, # TODO: Convert methods to functions where appropriate - consider-using-ternary, # TODO: Consider ternary expressions too-many-branches, # TODO: Simplify or ignore as appropriate missing-docstring, # TODO: Fix missing docstring diff --git a/src/sagemaker/predictor.py b/src/sagemaker/predictor.py index d2c4822289..d1ddba38a6 100644 --- a/src/sagemaker/predictor.py +++ b/src/sagemaker/predictor.py @@ -231,7 +231,9 @@ def _is_mutable_sequence_like(obj): def _is_sequence_like(obj): # Need to explicitly check on str since str lacks the iterable magic methods in Python 2 - return (hasattr(obj, "__iter__") and hasattr(obj, "__getitem__")) or isinstance(obj, str) + return ( # pylint: disable=consider-using-ternary + hasattr(obj, "__iter__") and hasattr(obj, "__getitem__") + ) or isinstance(obj, str) def _row_to_csv(obj): From 91c55292257de407cde2e60f6943c692c2daeb9e Mon Sep 17 00:00:00 2001 From: Karim Nakad Date: Wed, 17 Jul 2019 16:07:28 -0700 Subject: [PATCH 21/35] change: modify TODO on disabled Pylint check (#943) The check recommendations are only valid for packages that exclusively support Python 3. The changes cannot be made in Python 2. The TODO was updated to clarify this. --- .pylintrc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pylintrc b/.pylintrc index 0148db5544..692a6e3db5 100644 --- a/.pylintrc +++ b/.pylintrc @@ -88,7 +88,7 @@ disable= import-error, # Since we run Pylint before any of our builds in tox, this will always fail protected-access, # TODO: Fix access abstract-method, # TODO: Fix abstract methods - useless-object-inheritance, # TODO: Remove unnecessary imports + useless-object-inheritance, # TODO: Enable this check and fix code once Python 2 is no longer supported. cyclic-import, # TODO: Resolve cyclic imports no-self-use, # TODO: Convert methods to functions where appropriate too-many-branches, # TODO: Simplify or ignore as appropriate From 8871a1424c7b5ae2a3296d8e7353bff16ba452ee Mon Sep 17 00:00:00 2001 From: ci Date: Thu, 18 Jul 2019 17:17:05 +0000 Subject: [PATCH 22/35] prepare release v1.34.0 --- CHANGELOG.md | 26 ++++++++++++++++++++++++++ VERSION | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dd3ccc77bb..4efaf4f302 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,31 @@ # Changelog +## v1.34.0 (2019-07-18) + +### Features + + * Git integration for CodeCommit + * deal with credentials for Git support for GitHub + +### Bug fixes and other changes + + * modify TODO on disabled Pylint check + * enable consider-using-ternary Pylint check + * enable chained-comparison Pylint check + * enable too-many-public-methods Pylint check + * enable consider-using-in Pylint check + * set num_processes_per_host only if provided by user + * fix attach for 1P algorithm estimators + * enable ungrouped-imports Pylint check + * enable wrong-import-order Pylint check + * enable attribute-defined-outside-init Pylint check + * enable consider-merging-isinstance Pylint check + * enable inconsistent-return-statements Pylint check + * enable simplifiable-if-expression pylint checks + * fix list serialization for 1P algos + * enable no-else-return and no-else-raise pylint checks + * enable unidiomatic-typecheck pylint check + ## v1.33.0 (2019-07-10) ### Features diff --git a/VERSION b/VERSION index 21f10e8e95..2b17ffd504 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.33.1.dev0 +1.34.0 From 87d455aa8930c4aa0b6933b7c15c82723e7ac31a Mon Sep 17 00:00:00 2001 From: ci Date: Thu, 18 Jul 2019 17:39:00 +0000 Subject: [PATCH 23/35] update development version to v1.34.1.dev0 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 2b17ffd504..1a572d8c8e 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.34.0 +1.34.1.dev0 From 40f1f987e792e456dae8ebe6f7770500a684a64c Mon Sep 17 00:00:00 2001 From: Lauren Yu <6631887+laurenyu@users.noreply.github.com> Date: Thu, 18 Jul 2019 12:44:58 -0700 Subject: [PATCH 24/35] change: add MXNet 1.4.1 support (#886) --- README.rst | 4 ++-- doc/using_mxnet.rst | 32 ++++++++++++++++---------------- src/sagemaker/mxnet/README.rst | 32 ++++++++++++++++---------------- src/sagemaker/mxnet/estimator.py | 2 +- tests/conftest.py | 1 + 5 files changed, 36 insertions(+), 35 deletions(-) diff --git a/README.rst b/README.rst index f66c1533ff..bacf953a20 100644 --- a/README.rst +++ b/README.rst @@ -173,9 +173,9 @@ MXNet SageMaker Estimators By using MXNet SageMaker Estimators, you can train and host MXNet models on Amazon SageMaker. -Supported versions of MXNet: ``0.12.1``, ``1.0.0``, ``1.1.0``, ``1.2.1``, ``1.3.0``, ``1.4.0``. +Supported versions of MXNet: ``0.12.1``, ``1.0.0``, ``1.1.0``, ``1.2.1``, ``1.3.0``, ``1.4.0``, ``1.4.1``. -Supported versions of MXNet for Elastic Inference: ``1.3.0``, ``1.4.0``. +Supported versions of MXNet for Elastic Inference: ``1.3.0``, ``1.4.0``, ``1.4.1``. We recommend that you use the latest supported version, because that's where we focus most of our development efforts. diff --git a/doc/using_mxnet.rst b/doc/using_mxnet.rst index c4fb3c5ec0..a951b5ce43 100644 --- a/doc/using_mxnet.rst +++ b/doc/using_mxnet.rst @@ -6,9 +6,9 @@ Using MXNet with the SageMaker Python SDK With the SageMaker Python SDK, you can train and host MXNet models on Amazon SageMaker. -Supported versions of MXNet: ``1.4.0``, ``1.3.0``, ``1.2.1``, ``1.1.0``, ``1.0.0``, ``0.12.1``. +Supported versions of MXNet: ``0.12.1``, ``1.0.0``, ``1.1.0``, ``1.2.1``, ``1.3.0``, ``1.4.0``, ``1.4.1``. -Supported versions of MXNet for Elastic Inference: ``1.4.0``, ``1.3.0``. +Supported versions of MXNet for Elastic Inference: ``1.3.0``, ``1.4.0``, ``1.4.1``. Training with MXNet ------------------- @@ -806,23 +806,23 @@ Your MXNet training script will be run on version 1.2.1 by default. (See below f The Docker images have the following dependencies installed: -+-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+ -| Dependencies | MXNet 0.12.1 | MXNet 1.0.0 | MXNet 1.1.0 | MXNet 1.2.1 | MXNet 1.3.0 | MXNet 1.4.0 | -+-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+ -| Python | 2.7 or 3.5 | 2.7 or 3.5| 2.7 or 3.5| 2.7 or 3.5| 2.7 or 3.5| 2.7 or 3.6| -+-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+ -| CUDA (GPU image only) | 9.0 | 9.0 | 9.0 | 9.0 | 9.0 | 9.2 | -+-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+ -| numpy | 1.13.3 | 1.13.3 | 1.13.3 | 1.14.5 | 1.14.6 | 1.16.3 | -+-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+ -| onnx | N/A | N/A | N/A | 1.2.1 | 1.2.1 | 1.4.1 | -+-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+ -| keras-mxnet | N/A | N/A | N/A | N/A | 2.2.2 | 2.2.4.1 | -+-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+ ++-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+-------------+ +| Dependencies | MXNet 0.12.1 | MXNet 1.0.0 | MXNet 1.1.0 | MXNet 1.2.1 | MXNet 1.3.0 | MXNet 1.4.0 | MXNet 1.4.1 | ++-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+-------------+ +| Python | 2.7 or 3.5 | 2.7 or 3.5| 2.7 or 3.5| 2.7 or 3.5| 2.7 or 3.5| 2.7 or 3.6| 2.7 or 3.6| ++-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+-------------+ +| CUDA (GPU image only) | 9.0 | 9.0 | 9.0 | 9.0 | 9.0 | 9.2 | 10.0 | ++-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+-------------+ +| numpy | 1.13.3 | 1.13.3 | 1.13.3 | 1.14.5 | 1.14.6 | 1.16.3 | 1.14.5 | ++-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+-------------+ +| onnx | N/A | N/A | N/A | 1.2.1 | 1.2.1 | 1.4.1 | 1.4.1 | ++-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+-------------+ +| keras-mxnet | N/A | N/A | N/A | N/A | 2.2.2 | 2.2.4.1 | 2.2.4.1 | ++-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+-------------+ The Docker images extend Ubuntu 16.04. -You can select version of MXNet by passing a ``framework_version`` keyword arg to the MXNet Estimator constructor. Currently supported versions are listed in the above table. You can also set ``framework_version`` to only specify major and minor version, e.g ``1.2``, which will cause your training script to be run on the latest supported patch version of that minor version, which in this example would be 1.2.1. +You can select version of MXNet by passing a ``framework_version`` keyword arg to the MXNet Estimator constructor. Currently supported versions are listed in the above table. You can also set ``framework_version`` to only specify major and minor version, e.g ``1.4``, which will cause your training script to be run on the latest supported patch version of that minor version, which in this example would be 1.4.1. Alternatively, you can build your own image by following the instructions in the SageMaker MXNet containers repository, and passing ``image_name`` to the MXNet Estimator constructor. You can visit the SageMaker MXNet container repositories here: diff --git a/src/sagemaker/mxnet/README.rst b/src/sagemaker/mxnet/README.rst index f0965cd38a..6582257d08 100644 --- a/src/sagemaker/mxnet/README.rst +++ b/src/sagemaker/mxnet/README.rst @@ -4,9 +4,9 @@ Using MXNet with the SageMaker Python SDK With the SageMaker Python SDK, you can train and host MXNet models on Amazon SageMaker. -Supported versions of MXNet: ``1.4.0``, ``1.3.0``, ``1.2.1``, ``1.1.0``, ``1.0.0``, ``0.12.1``. +Supported versions of MXNet: ``0.12.1``, ``1.0.0``, ``1.1.0``, ``1.2.1``, ``1.3.0``, ``1.4.0``, ``1.4.1``. -Supported versions of MXNet for Elastic Inference: ``1.4.0``, ``1.3.0``. +Supported versions of MXNet for Elastic Inference: ``1.3.0``, ``1.4.0``, ``1.4.1``. For information about using MXNet with the SageMaker Python SDK, see https://sagemaker.readthedocs.io/en/stable/using_mxnet.html. @@ -21,23 +21,23 @@ Your MXNet training script will be run on version 1.2.1 by default. (See below f The Docker images have the following dependencies installed: -+-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+ -| Dependencies | MXNet 0.12.1 | MXNet 1.0.0 | MXNet 1.1.0 | MXNet 1.2.1 | MXNet 1.3.0 | MXNet 1.4.0 | -+-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+ -| Python | 2.7 or 3.5 | 2.7 or 3.5| 2.7 or 3.5| 2.7 or 3.5| 2.7 or 3.5| 2.7 or 3.6| -+-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+ -| CUDA (GPU image only) | 9.0 | 9.0 | 9.0 | 9.0 | 9.0 | 9.2 | -+-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+ -| numpy | 1.13.3 | 1.13.3 | 1.13.3 | 1.14.5 | 1.14.6 | 1.16.3 | -+-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+ -| onnx | N/A | N/A | N/A | 1.2.1 | 1.2.1 | 1.4.1 | -+-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+ -| keras-mxnet | N/A | N/A | N/A | N/A | 2.2.2 | 2.2.4.1 | -+-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+ ++-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+-------------+ +| Dependencies | MXNet 0.12.1 | MXNet 1.0.0 | MXNet 1.1.0 | MXNet 1.2.1 | MXNet 1.3.0 | MXNet 1.4.0 | MXNet 1.4.1 | ++-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+-------------+ +| Python | 2.7 or 3.5 | 2.7 or 3.5| 2.7 or 3.5| 2.7 or 3.5| 2.7 or 3.5| 2.7 or 3.6| 2.7 or 3.6| ++-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+-------------+ +| CUDA (GPU image only) | 9.0 | 9.0 | 9.0 | 9.0 | 9.0 | 9.2 | 10.0 | ++-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+-------------+ +| numpy | 1.13.3 | 1.13.3 | 1.13.3 | 1.14.5 | 1.14.6 | 1.16.3 | 1.14.5 | ++-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+-------------+ +| onnx | N/A | N/A | N/A | 1.2.1 | 1.2.1 | 1.4.1 | 1.4.1 | ++-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+-------------+ +| keras-mxnet | N/A | N/A | N/A | N/A | 2.2.2 | 2.2.4.1 | 2.2.4.1 | ++-------------------------+--------------+-------------+-------------+-------------+-------------+-------------+-------------+ The Docker images extend Ubuntu 16.04. -You can select version of MXNet by passing a ``framework_version`` keyword arg to the MXNet Estimator constructor. Currently supported versions are listed in the above table. You can also set ``framework_version`` to only specify major and minor version, e.g ``1.2``, which will cause your training script to be run on the latest supported patch version of that minor version, which in this example would be 1.2.1. +You can select version of MXNet by passing a ``framework_version`` keyword arg to the MXNet Estimator constructor. Currently supported versions are listed in the above table. You can also set ``framework_version`` to only specify major and minor version, e.g ``1.4``, which will cause your training script to be run on the latest supported patch version of that minor version, which in this example would be 1.4.1. Alternatively, you can build your own image by following the instructions in the SageMaker MXNet containers repository, and passing ``image_name`` to the MXNet Estimator constructor. You can visit the SageMaker MXNet container repositories here: diff --git a/src/sagemaker/mxnet/estimator.py b/src/sagemaker/mxnet/estimator.py index 81a81a768b..6cb552ff4b 100644 --- a/src/sagemaker/mxnet/estimator.py +++ b/src/sagemaker/mxnet/estimator.py @@ -34,7 +34,7 @@ class MXNet(Framework): __framework_name__ = "mxnet" _LOWEST_SCRIPT_MODE_VERSION = ["1", "3"] - LATEST_VERSION = "1.4" + LATEST_VERSION = "1.4.1" """The latest version of MXNet included in the SageMaker pre-built Docker images.""" def __init__( diff --git a/tests/conftest.py b/tests/conftest.py index 00033e6fd2..588aaf9b97 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -135,6 +135,7 @@ def chainer_version(request): "1.3.0", "1.4", "1.4.0", + "1.4.1", ], ) def mxnet_version(request): From 356283e8c599d9b5acaa1bfff4c32fe20d4ad9c1 Mon Sep 17 00:00:00 2001 From: Karim Nakad Date: Thu, 18 Jul 2019 16:16:06 -0700 Subject: [PATCH 25/35] change: format and add missing docstring placeholders (#945) This commit will format all existing docstring to follow Google style: https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html This commit will also add docstring placeholders to any class or method previously missing it. An ideal approach would be to take the time to include meaningful docstrings in every file. However, since that is not a task that will be prioritized, I've declared docstring bankruptcy on this package, in order to enforce docstring on all future code changes to this package. --- .pylintrc | 1 - doc/conf.py | 5 + examples/cli/host/script.py | 26 +- examples/cli/train/script.py | 32 + setup.py | 5 + src/sagemaker/__init__.py | 1 + src/sagemaker/algorithm.py | 298 ++++-- src/sagemaker/amazon/amazon_estimator.py | 182 ++-- src/sagemaker/amazon/common.py | 83 +- .../amazon/factorization_machines.py | 187 ++-- src/sagemaker/amazon/hyperparameter.py | 56 +- src/sagemaker/amazon/ipinsights.py | 113 ++- src/sagemaker/amazon/kmeans.py | 156 ++- src/sagemaker/amazon/knn.py | 129 ++- src/sagemaker/amazon/lda.py | 137 ++- src/sagemaker/amazon/linear_learner.py | 269 +++-- src/sagemaker/amazon/ntm.py | 140 ++- src/sagemaker/amazon/object2vec.py | 169 ++-- src/sagemaker/amazon/pca.py | 134 ++- src/sagemaker/amazon/randomcutforest.py | 122 ++- src/sagemaker/amazon/validation.py | 31 + src/sagemaker/analytics.py | 116 ++- src/sagemaker/chainer/__init__.py | 1 + src/sagemaker/chainer/defaults.py | 1 + src/sagemaker/chainer/estimator.py | 132 ++- src/sagemaker/chainer/model.py | 74 +- src/sagemaker/cli/common.py | 30 + src/sagemaker/cli/main.py | 10 + src/sagemaker/cli/mxnet.py | 18 + src/sagemaker/cli/tensorflow.py | 21 + src/sagemaker/content_types.py | 1 + src/sagemaker/estimator.py | 936 +++++++++++------- src/sagemaker/fw_registry.py | 14 +- src/sagemaker/fw_utils.py | 115 ++- src/sagemaker/git_utils.py | 155 +-- src/sagemaker/job.py | 83 +- src/sagemaker/local/__init__.py | 1 + src/sagemaker/local/data.py | 123 ++- src/sagemaker/local/entities.py | 99 +- src/sagemaker/local/image.py | 167 +++- src/sagemaker/local/local_session.py | 189 +++- src/sagemaker/local/utils.py | 24 +- src/sagemaker/logs.py | 45 +- src/sagemaker/model.py | 472 +++++---- src/sagemaker/mxnet/__init__.py | 1 + src/sagemaker/mxnet/defaults.py | 1 + src/sagemaker/mxnet/estimator.py | 115 ++- src/sagemaker/mxnet/model.py | 70 +- src/sagemaker/parameter.py | 63 +- src/sagemaker/pipeline.py | 149 +-- src/sagemaker/predictor.py | 218 +++- src/sagemaker/pytorch/__init__.py | 1 + src/sagemaker/pytorch/defaults.py | 1 + src/sagemaker/pytorch/estimator.py | 107 +- src/sagemaker/pytorch/model.py | 74 +- src/sagemaker/rl/__init__.py | 1 + src/sagemaker/rl/estimator.py | 193 ++-- src/sagemaker/session.py | 16 + src/sagemaker/sklearn/__init__.py | 1 + src/sagemaker/sklearn/defaults.py | 1 + src/sagemaker/sklearn/estimator.py | 106 +- src/sagemaker/sklearn/model.py | 77 +- src/sagemaker/sparkml/__init__.py | 1 + src/sagemaker/sparkml/model.py | 62 +- src/sagemaker/tensorflow/__init__.py | 1 + src/sagemaker/tensorflow/defaults.py | 1 + src/sagemaker/tensorflow/estimator.py | 14 +- src/sagemaker/tensorflow/model.py | 72 +- src/sagemaker/tensorflow/predictor.py | 34 + src/sagemaker/tensorflow/serving.py | 105 +- src/sagemaker/transformer.py | 172 +++- src/sagemaker/tuner.py | 488 +++++---- src/sagemaker/user_agent.py | 6 + src/sagemaker/utils.py | 187 ++-- src/sagemaker/vpc_utils.py | 48 +- src/sagemaker/workflow/airflow.py | 399 ++++---- 76 files changed, 5262 insertions(+), 2626 deletions(-) diff --git a/.pylintrc b/.pylintrc index 692a6e3db5..4989c1a22c 100644 --- a/.pylintrc +++ b/.pylintrc @@ -92,7 +92,6 @@ disable= cyclic-import, # TODO: Resolve cyclic imports no-self-use, # TODO: Convert methods to functions where appropriate too-many-branches, # TODO: Simplify or ignore as appropriate - missing-docstring, # TODO: Fix missing docstring [REPORTS] # Set the output format. Available formats are text, parseable, colorized, msvs diff --git a/doc/conf.py b/doc/conf.py index f9705e0089..df38101665 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import os @@ -22,6 +23,10 @@ class Mock(MagicMock): @classmethod def __getattr__(cls, name): + """ + Args: + name: + """ if name == "__version__": return "1.4.0" else: diff --git a/examples/cli/host/script.py b/examples/cli/host/script.py index 506cc86c4e..a5a549a04b 100644 --- a/examples/cli/host/script.py +++ b/examples/cli/host/script.py @@ -6,11 +6,13 @@ def model_fn(model_dir): - """ - Load the gluon model. Called once when hosting service starts. + """Load the gluon model. Called once when hosting service starts. + + Args: + model_dir: The directory where model files are stored. - :param: model_dir The directory where model files are stored. - :return: a model (in this case a Gluon network) + Returns: + a model (in this case a Gluon network) """ symbol = mx.sym.load("%s/model.json" % model_dir) outputs = mx.symbol.softmax(data=symbol, name="softmax_label") @@ -22,14 +24,16 @@ def model_fn(model_dir): def transform_fn(net, data, input_content_type, output_content_type): - """ - Transform a request using the Gluon model. Called once per request. + """Transform a request using the Gluon model. Called once per request. + + Args: + net: The Gluon model. + data: The request payload. + input_content_type: The request content type. + output_content_type: The (desired) response content type. - :param net: The Gluon model. - :param data: The request payload. - :param input_content_type: The request content type. - :param output_content_type: The (desired) response content type. - :return: response payload and content type. + Returns: + response payload and content type. """ # we can use content types to vary input/output handling, but # here we just assume json for both diff --git a/examples/cli/train/script.py b/examples/cli/train/script.py index 01d7ff3dbd..d97a364f85 100644 --- a/examples/cli/train/script.py +++ b/examples/cli/train/script.py @@ -12,6 +12,12 @@ def train(channel_input_dirs, hyperparameters, **kwargs): # SageMaker passes num_cpus, num_gpus and other args we can use to tailor training to # the current container environment, but here we just use simple cpu context. + """ + Args: + channel_input_dirs: + hyperparameters: + **kwargs: + """ ctx = mx.cpu() # retrieve the hyperparameters we set in notebook (with some defaults) @@ -80,6 +86,11 @@ def train(channel_input_dirs, hyperparameters, **kwargs): def save(net, model_dir): # save the model + """ + Args: + net: + model_dir: + """ y = net(mx.sym.var("data")) y.save("%s/model.json" % model_dir) net.collect_params().save("%s/model.params" % model_dir) @@ -95,11 +106,21 @@ def define_network(): def input_transformer(data, label): + """ + Args: + data: + label: + """ data = data.reshape((-1,)).astype(np.float32) / 255 return data, label def get_train_data(data_dir, batch_size): + """ + Args: + data_dir: + batch_size: + """ return gluon.data.DataLoader( gluon.data.vision.MNIST(data_dir, train=True, transform=input_transformer), batch_size=batch_size, @@ -109,6 +130,11 @@ def get_train_data(data_dir, batch_size): def get_val_data(data_dir, batch_size): + """ + Args: + data_dir: + batch_size: + """ return gluon.data.DataLoader( gluon.data.vision.MNIST(data_dir, train=False, transform=input_transformer), batch_size=batch_size, @@ -117,6 +143,12 @@ def get_val_data(data_dir, batch_size): def test(ctx, net, val_data): + """ + Args: + ctx: + net: + val_data: + """ metric = mx.metric.Accuracy() for data, label in val_data: data = data.as_in_context(ctx) diff --git a/setup.py b/setup.py index ea0d1d8fa4..bb1909e6e0 100644 --- a/setup.py +++ b/setup.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import os @@ -20,6 +21,10 @@ def read(fname): + """ + Args: + fname: + """ return open(os.path.join(os.path.dirname(__file__), fname)).read() diff --git a/src/sagemaker/__init__.py b/src/sagemaker/__init__.py index 7993c2fcda..a63d0f629e 100644 --- a/src/sagemaker/__init__.py +++ b/src/sagemaker/__init__.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import pkg_resources diff --git a/src/sagemaker/algorithm.py b/src/sagemaker/algorithm.py index 68a29053af..649d144c9d 100644 --- a/src/sagemaker/algorithm.py +++ b/src/sagemaker/algorithm.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Test docstring""" from __future__ import absolute_import import sagemaker @@ -21,9 +22,10 @@ class AlgorithmEstimator(EstimatorBase): - """A generic Estimator to train using any algorithm object (with an ``algorithm_arn``). - The Algorithm can be your own, or any Algorithm from AWS Marketplace that you have a valid - subscription for. This class will perform client-side validation on all the inputs. + """A generic Estimator to train using any algorithm object (with an + ``algorithm_arn``). The Algorithm can be your own, or any Algorithm from AWS + Marketplace that you have a valid subscription for. This class will perform + client-side validation on all the inputs. """ # These Hyperparameter Types have a range definition. @@ -55,54 +57,111 @@ def __init__( """Initialize an ``AlgorithmEstimator`` instance. Args: - algorithm_arn (str): algorithm arn used for training. Can be just the name if your - account owns the algorithm. - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs - that create Amazon SageMaker endpoints use this role to access training data and model artifacts. - After the endpoint is created, the inference code might use the IAM role, - if it needs to access an AWS resource. - train_instance_count (int): Number of Amazon EC2 instances to use for training. - train_instance_type (str): Type of EC2 instance to use for training, for example, 'ml.c4.xlarge'. - train_volume_size (int): Size in GB of the EBS volume to use for storing input data - during training (default: 30). Must be large enough to store training data if File Mode is used - (which is the default). - train_volume_kms_key (str): Optional. KMS key ID for encrypting EBS volume attached to the - training instance (default: None). - train_max_run (int): Timeout in seconds for training (default: 24 * 60 * 60). - After this amount of time Amazon SageMaker terminates the job regardless of its current status. - input_mode (str): The input mode that the algorithm supports (default: 'File'). Valid modes: - - * 'File' - Amazon SageMaker copies the training dataset from the S3 location to a local directory. - * 'Pipe' - Amazon SageMaker streams data directly from S3 to the container via a Unix-named pipe. - - This argument can be overriden on a per-channel basis using ``sagemaker.session.s3_input.input_mode``. - output_path (str): S3 location for saving the training result (model artifacts and output files). - If not specified, results are stored to a default bucket. If the bucket with the specific name - does not exist, the estimator creates the bucket during the - :meth:`~sagemaker.estimator.EstimatorBase.fit` method execution. - output_kms_key (str): Optional. KMS key ID for encrypting the training output (default: None). - base_job_name (str): Prefix for training job name when the :meth:`~sagemaker.estimator.EstimatorBase.fit` - method launches. If not specified, the estimator generates a default job name, based on - the training image name and current timestamp. - sagemaker_session (sagemaker.session.Session): Session object which manages interactions with - Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one - using the default AWS configuration chain. - tags (list[dict]): List of tags for labeling a training job. For more, see - https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. - subnets (list[str]): List of subnet ids. If not specified training job will be created without VPC config. - security_group_ids (list[str]): List of security group ids. If not specified training job will be created - without VPC config. - model_uri (str): URI where a pre-trained model is stored, either locally or in S3 (default: None). If - specified, the estimator will create a channel pointing to the model so the training job can download - it. This model can be a 'model.tar.gz' from a previous training job, or other artifacts coming from a - different source. - More information: https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html#td-deserialization - model_channel_name (str): Name of the channel where 'model_uri' will be downloaded (default: 'model'). - metric_definitions (list[dict]): A list of dictionaries that defines the metric(s) used to evaluate the - training jobs. Each dictionary contains two keys: 'Name' for the name of the metric, and 'Regex' for - the regular expression used to extract the metric from the logs. - encrypt_inter_container_traffic (bool): Specifies whether traffic between training containers is encrypted - for the training job (default: ``False``). + algorithm_arn (str): algorithm arn used for training. Can be just the name if your + account owns the algorithm. + + role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker + training jobs and APIsthat create Amazon SageMaker endpoints use this role to + access training data and model artifacts. After the endpoint + is created, the inference code might use the IAM role, if it + needs to access an AWS resource. + + train_instance_count (int): Number of Amazon EC2 instances to + use for training. train_instance_type (str): Type of EC2 + instance to use for training, for example, 'ml.c4.xlarge'. + train_volume_size (int): Size in GB of the EBS volume to use for + storing input data + + during training (default: 30). Must be large enough to store + training data if File Mode is used (which is the default). + + train_volume_kms_key (str): Optional. KMS key ID for encrypting EBS volume attached + to the training instance (default: None). + + train_max_run (int): Timeout in seconds for training (default: 24 * 60 * 60). + After this amount of time Amazon SageMaker terminates the + job regardless of its current status. + + input_mode (str): The input mode that the algorithm supports + (default: 'File'). Valid modes: + + * 'File' - Amazon SageMaker copies the training dataset from + the S3 location to a local directory. + * 'Pipe' - Amazon SageMaker streams data directly from S3 to + the container via a Unix-named pipe. + + This argument can be overriden on a per-channel basis using + ``sagemaker.session.s3_input.input_mode``. + + output_path (str): S3 location for saving the training result (model artifacts and + output files). If not specified, results are stored to a default bucket. If + the bucket with the specific name does not exist, the + estimator creates the bucket during the + :meth:`~sagemaker.estimator.EstimatorBase.fit` method + execution. + + output_kms_key (str): Optional. KMS key ID for encrypting the + training output (default: None). base_job_name (str): Prefix for + training job name when the + :meth:`~sagemaker.estimator.EstimatorBase.fit` + + method launches. If not specified, the estimator generates a + default job name, based on the training image name and + current timestamp. + + sagemaker_session (sagemaker.session.Session): Session object which manages + interactions with Amazon SageMaker APIs and any other AWS services needed. If + not specified, the estimator creates one using the default + AWS configuration chain. + + tags (list[dict]): List of tags for labeling a training job. For more, see + https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. + + subnets (list[str]): List of subnet ids. If not specified + training job will be created without VPC config. + security_group_ids (list[str]): List of security group ids. If + not specified training job will be created + + without VPC config. + + model_uri (str): URI where a pre-trained model is stored, either locally or in S3 + (default: None). If specified, the estimator will create a channel pointing to + the model so the training job can download it. This model + can be a 'model.tar.gz' from a previous training job, or + other artifacts coming from a different source. More + More information: + https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html#td-deserialization + + model_channel_name (str): Name of the channel where 'model_uri' + will be downloaded (default: 'model'). metric_definitions + (list[dict]): A list of dictionaries that defines the metric(s) + used to evaluate the + + training jobs. Each dictionary contains two keys: 'Name' for + the name of the metric, and 'Regex' for the regular + expression used to extract the metric from the logs. + + encrypt_inter_container_traffic (bool): Specifies whether traffic between training + containers is encrypted for the training job (default: ``False``). + role: + train_instance_count: + train_instance_type: + train_volume_size: + train_volume_kms_key: + train_max_run: + input_mode: + output_path: + output_kms_key: + base_job_name: + sagemaker_session: + hyperparameters: + tags: + subnets: + security_group_ids: + model_uri: + model_channel_name: + metric_definitions: + encrypt_inter_container_traffic: """ self.algorithm_arn = algorithm_arn super(AlgorithmEstimator, self).__init__( @@ -137,6 +196,7 @@ def __init__( self.set_hyperparameters(**hyperparameters) def validate_train_spec(self): + """Placeholder docstring""" train_spec = self.algorithm_spec["TrainingSpecification"] algorithm_name = self.algorithm_spec["AlgorithmName"] @@ -169,6 +229,10 @@ def validate_train_spec(self): ) def set_hyperparameters(self, **kwargs): + """ + Args: + **kwargs: + """ for k, v in kwargs.items(): value = self._validate_and_cast_hyperparameter(k, v) self.hyperparam_dict[k] = value @@ -178,23 +242,25 @@ def set_hyperparameters(self, **kwargs): def hyperparameters(self): """Returns the hyperparameters as a dictionary to use for training. - The fit() method, that does the model training, calls this method to find the hyperparameters you specified. + The fit() method, that does the model training, calls this method to + find the hyperparameters you specified. """ return self.hyperparam_dict def train_image(self): """Returns the docker image to use for training. - The fit() method, that does the model training, calls this method to find the image to use for model training. + The fit() method, that does the model training, calls this method to + find the image to use for model training. """ raise RuntimeError("train_image is never meant to be called on Algorithm Estimators") def enable_network_isolation(self): """Return True if this Estimator will need network isolation to run. - On Algorithm Estimators this depends on the algorithm being used. If this is algorithm - owned by your account it will be False. If this is an an algorithm consumed from Marketplace - it will be True. + On Algorithm Estimators this depends on the algorithm being used. If + this is algorithm owned by your account it will be False. If this is an + an algorithm consumed from Marketplace it will be True. Returns: bool: Whether this Estimator needs network isolation or not. @@ -214,25 +280,33 @@ def create_model( ): """Create a model to deploy. - The serializer, deserializer, content_type, and accept arguments are only used to define a default - RealTimePredictor. They are ignored if an explicit predictor class is passed in. Other arguments - are passed through to the Model class. + The serializer, deserializer, content_type, and accept arguments are + only used to define a default RealTimePredictor. They are ignored if an + explicit predictor class is passed in. Other arguments are passed + through to the Model class. Args: - role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also used during - transform jobs. If not specified, the role from the Estimator will be used. - predictor_cls (RealTimePredictor): The predictor class to use when deploying the model. - serializer (callable): Should accept a single argument, the input data, and return a sequence - of bytes. May provide a content_type attribute that defines the endpoint request content type - deserializer (callable): Should accept two arguments, the result data and the response content type, - and return a sequence of bytes. May provide a content_type attribute that defines th endpoint - response Accept content type. - content_type (str): The invocation ContentType, overriding any content_type from the serializer - accept (str): The invocation Accept, overriding any accept from the deserializer. - vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on the model. - Default: use subnets and security groups from this Estimator. + role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, + which is also used during transform jobs. If not specified, the + role from the Estimator will be used. + predictor_cls (RealTimePredictor): The predictor class to use when + deploying the model. + serializer (callable): Should accept a single argument, the input + data, and return a sequence of bytes. May provide a content_type + attribute that defines the endpoint request content type + deserializer (callable): Should accept two arguments, the result + data and the response content type, and return a sequence of + bytes. May provide a content_type attribute that defines the + endpoint response Accept content type. + content_type (str): The invocation ContentType, overriding any + content_type from the serializer + accept (str): The invocation Accept, overriding any accept from the + deserializer. + vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on + the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. + **kwargs: Returns: a Model ready for deployment. @@ -274,29 +348,39 @@ def transformer( role=None, volume_kms_key=None, ): - """Return a ``Transformer`` that uses a SageMaker Model based on the training job. It reuses the - SageMaker Session and base job name used by the Estimator. + """Return a ``Transformer`` that uses a SageMaker Model based on the + training job. It reuses the SageMaker Session and base job name used by + the Estimator. Args: instance_count (int): Number of EC2 instances to use. - instance_type (str): Type of EC2 instance to use, for example, 'ml.c4.xlarge'. - strategy (str): The strategy used to decide how to batch records in a single request (default: None). - Valid values: 'MULTI_RECORD' and 'SINGLE_RECORD'. - assemble_with (str): How the output is assembled (default: None). Valid values: 'Line' or 'None'. - output_path (str): S3 location for saving the transform result. If not specified, results are stored to - a default bucket. - output_kms_key (str): Optional. KMS key ID for encrypting the transform output (default: None). - accept (str): The content type accepted by the endpoint deployed during the transform job. - env (dict): Environment variables to be set for use during the transform job (default: None). - max_concurrent_transforms (int): The maximum number of HTTP requests to be made to - each individual transform container at one time. - max_payload (int): Maximum size of the payload in a single HTTP request to the container in MB. - tags (list[dict]): List of tags for labeling a transform job. If none specified, then the tags used for - the training job are used for the transform job. - role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also used during - transform jobs. If not specified, the role from the Estimator will be used. - volume_kms_key (str): Optional. KMS key ID for encrypting the volume attached to the ML - compute instance (default: None). + instance_type (str): Type of EC2 instance to use, for example, + 'ml.c4.xlarge'. + strategy (str): The strategy used to decide how to batch records in + a single request (default: None). Valid values: 'MULTI_RECORD' + and 'SINGLE_RECORD'. + assemble_with (str): How the output is assembled (default: None). + Valid values: 'Line' or 'None'. + output_path (str): S3 location for saving the transform result. If + not specified, results are stored to a default bucket. + output_kms_key (str): Optional. KMS key ID for encrypting the + transform output (default: None). + accept (str): The content type accepted by the endpoint deployed + during the transform job. + env (dict): Environment variables to be set for use during the + transform job (default: None). + max_concurrent_transforms (int): The maximum number of HTTP requests + to be made to each individual transform container at one time. + max_payload (int): Maximum size of the payload in a single HTTP + request to the container in MB. + tags (list[dict]): List of tags for labeling a transform job. If + none specified, then the tags used for the training job are used + for the transform job. + role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, + which is also used during transform jobs. If not specified, the + role from the Estimator will be used. + volume_kms_key (str): Optional. KMS key ID for encrypting the volume + attached to the ML compute instance (default: None). """ role = role or self.role @@ -334,23 +418,39 @@ def transformer( ) def _is_marketplace(self): + """Placeholder docstring""" return "ProductId" in self.algorithm_spec def _prepare_for_training(self, job_name=None): # Validate hyperparameters # an explicit call to set_hyperparameters() will also validate the hyperparameters # but it is possible that the user never called it. + """ + Args: + job_name: + """ self._validate_and_set_default_hyperparameters() super(AlgorithmEstimator, self)._prepare_for_training(job_name) def fit(self, inputs=None, wait=True, logs=True, job_name=None): + """ + Args: + inputs: + wait: + logs: + job_name: + """ if inputs: self._validate_input_channels(inputs) super(AlgorithmEstimator, self).fit(inputs, wait, logs, job_name) def _validate_input_channels(self, channels): + """ + Args: + channels: + """ train_spec = self.algorithm_spec["TrainingSpecification"] algorithm_name = self.algorithm_spec["AlgorithmName"] training_channels = {c["Name"]: c for c in train_spec["TrainingChannels"]} @@ -368,6 +468,11 @@ def _validate_input_channels(self, channels): raise ValueError("Required input channel: %s Was not provided." % (name)) def _validate_and_cast_hyperparameter(self, name, v): + """ + Args: + name: + v: + """ algorithm_name = self.algorithm_spec["AlgorithmName"] if name not in self.hyperparameter_definitions: @@ -387,6 +492,7 @@ def _validate_and_cast_hyperparameter(self, name, v): return value def _validate_and_set_default_hyperparameters(self): + """Placeholder docstring""" # Check if all the required hyperparameters are set. If there is a default value # for one, set it. for name, definition in self.hyperparameter_definitions.items(): @@ -398,6 +504,7 @@ def _validate_and_set_default_hyperparameters(self): raise ValueError("Required hyperparameter: %s is not set" % name) def _parse_hyperparameters(self): + """Placeholder docstring""" definitions = {} training_spec = self.algorithm_spec["TrainingSpecification"] @@ -419,6 +526,11 @@ def _parse_hyperparameters(self): return definitions def _hyperparameter_range_and_class(self, parameter_type, hyperparameter): + """ + Args: + parameter_type: + hyperparameter: + """ if parameter_type in self._hyperpameters_with_range: range_name = parameter_type + "ParameterRangeSpecification" @@ -459,6 +571,10 @@ def _hyperparameter_range_and_class(self, parameter_type, hyperparameter): return parameter_class, parameter_range def _algorithm_training_input_modes(self, training_channels): + """ + Args: + training_channels: + """ current_input_modes = {"File", "Pipe"} for channel in training_channels: supported_input_modes = set(channel["SupportedInputModes"]) diff --git a/src/sagemaker/amazon/amazon_estimator.py b/src/sagemaker/amazon/amazon_estimator.py index 7f4c49c879..e8ee5ce22e 100644 --- a/src/sagemaker/amazon/amazon_estimator.py +++ b/src/sagemaker/amazon/amazon_estimator.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import json @@ -30,8 +31,9 @@ class AmazonAlgorithmEstimatorBase(EstimatorBase): - """Base class for Amazon first-party Estimator implementations. This class isn't intended - to be instantiated directly.""" + """Base class for Amazon first-party Estimator implementations. This class + isn't intended to be instantiated directly. + """ feature_dim = hp("feature_dim", validation.gt(0), data_type=int) mini_batch_size = hp("mini_batch_size", validation.gt(0), data_type=int) @@ -44,10 +46,16 @@ def __init__( """Initialize an AmazonAlgorithmEstimatorBase. Args: - data_location (str or None): The s3 prefix to upload RecordSet objects to, expressed as an - S3 url. For example "s3://example-bucket/some-key-prefix/". Objects will be - saved in a unique sub-directory of the specified location. If None, a default - data location will be used.""" + role: + train_instance_count: + train_instance_type: + data_location (str or None): The s3 prefix to upload RecordSet + objects to, expressed as an S3 url. For example + "s3://example-bucket/some-key-prefix/". Objects will be saved in + a unique sub-directory of the specified location. If None, a + default data location will be used. + **kwargs: + """ super(AmazonAlgorithmEstimatorBase, self).__init__( role, train_instance_count, train_instance_type, **kwargs ) @@ -58,19 +66,26 @@ def __init__( self._data_location = data_location def train_image(self): + """Placeholder docstring""" return get_image_uri( self.sagemaker_session.boto_region_name, type(self).repo_name, type(self).repo_version ) def hyperparameters(self): + """Placeholder docstring""" return hp.serialize_all(self) @property def data_location(self): + """Placeholder docstring""" return self._data_location @data_location.setter def data_location(self, data_location): + """ + Args: + data_location: + """ if not data_location.startswith("s3://"): raise ValueError( 'Expecting an S3 URL beginning with "s3://". Got "{}"'.format(data_location) @@ -81,15 +96,17 @@ def data_location(self, data_location): @classmethod def _prepare_init_params_from_job_description(cls, job_details, model_channel_name=None): - """Convert the job description to init params that can be handled by the class constructor + """Convert the job description to init params that can be handled by the + class constructor Args: - job_details: the returned job details from a describe_training_job API call. - model_channel_name (str): Name of the channel where pre-trained model data will be downloaded. + job_details: the returned job details from a describe_training_job + API call. + model_channel_name (str): Name of the channel where pre-trained + model data will be downloaded. Returns: - dictionary: The transformed init_params - + dictionary: The transformed init_params """ init_params = super( AmazonAlgorithmEstimatorBase, cls @@ -111,11 +128,12 @@ def _prepare_for_training(self, records, mini_batch_size=None, job_name=None): """Set hyperparameters needed for training. Args: - * records (:class:`~RecordSet`): The records to train this ``Estimator`` on. - * mini_batch_size (int or None): The size of each mini-batch to use when training. If ``None``, a - default value will be used. - * job_name (str): Name of the training job to be created. If not specified, one is generated, - using the base name given to the constructor if applicable. + records (:class:`~RecordSet`): The records to train this ``Estimator`` on. + mini_batch_size (int or None): The size of each mini-batch to use when + training. If ``None``, a default value will be used. + job_name (str): Name of the training job to be created. If not + specified, one is generated, using the base name given to the + constructor if applicable. """ super(AmazonAlgorithmEstimatorBase, self)._prepare_for_training(job_name=job_name) @@ -137,28 +155,31 @@ def _prepare_for_training(self, records, mini_batch_size=None, job_name=None): def fit(self, records, mini_batch_size=None, wait=True, logs=True, job_name=None): """Fit this Estimator on serialized Record objects, stored in S3. - ``records`` should be an instance of :class:`~RecordSet`. This defines a collection of - S3 data files to train this ``Estimator`` on. + ``records`` should be an instance of :class:`~RecordSet`. This + defines a collection of S3 data files to train this ``Estimator`` on. - Training data is expected to be encoded as dense or sparse vectors in the "values" feature - on each Record. If the data is labeled, the label is expected to be encoded as a list of - scalas in the "values" feature of the Record label. + Training data is expected to be encoded as dense or sparse vectors in + the "values" feature on each Record. If the data is labeled, the label + is expected to be encoded as a list of scalas in the "values" feature of + the Record label. More information on the Amazon Record format is available at: https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html - See :meth:`~AmazonAlgorithmEstimatorBase.record_set` to construct a ``RecordSet`` object - from :class:`~numpy.ndarray` arrays. + See :meth:`~AmazonAlgorithmEstimatorBase.record_set` to construct a + ``RecordSet`` object from :class:`~numpy.ndarray` arrays. Args: records (:class:`~RecordSet`): The records to train this ``Estimator`` on - mini_batch_size (int or None): The size of each mini-batch to use when training. If ``None``, a - default value will be used. - wait (bool): Whether the call should wait until the job completes (default: True). - logs (bool): Whether to show the logs produced by the job. - Only meaningful when wait is True (default: True). - job_name (str): Training job name. If not specified, the estimator generates a default job name, - based on the training image name and current timestamp. + mini_batch_size (int or None): The size of each mini-batch to use + when training. If ``None``, a default value will be used. + wait (bool): Whether the call should wait until the job completes + (default: True). + logs (bool): Whether to show the logs produced by the job. Only + meaningful when wait is True (default: True). + job_name (str): Training job name. If not specified, the estimator + generates a default job name, based on the training image name + and current timestamp. """ self._prepare_for_training(records, job_name=job_name, mini_batch_size=mini_batch_size) @@ -167,29 +188,35 @@ def fit(self, records, mini_batch_size=None, wait=True, logs=True, job_name=None self.latest_training_job.wait(logs=logs) def record_set(self, train, labels=None, channel="train", encrypt=False): - """Build a :class:`~RecordSet` from a numpy :class:`~ndarray` matrix and label vector. + """Build a :class:`~RecordSet` from a numpy :class:`~ndarray` matrix and + label vector. - For the 2D ``ndarray`` ``train``, each row is converted to a :class:`~Record` object. - The vector is stored in the "values" entry of the ``features`` property of each Record. - If ``labels`` is not None, each corresponding label is assigned to the "values" entry - of the ``labels`` property of each Record. + For the 2D ``ndarray`` ``train``, each row is converted to a + :class:`~Record` object. The vector is stored in the "values" entry of + the ``features`` property of each Record. If ``labels`` is not None, + each corresponding label is assigned to the "values" entry of the + ``labels`` property of each Record. - The collection of ``Record`` objects are protobuf serialized and uploaded to new - S3 locations. A manifest file is generated containing the list of objects created and - also stored in S3. + The collection of ``Record`` objects are protobuf serialized and + uploaded to new S3 locations. A manifest file is generated containing + the list of objects created and also stored in S3. - The number of S3 objects created is controlled by the ``train_instance_count`` property - on this Estimator. One S3 object is created per training instance. + The number of S3 objects created is controlled by the + ``train_instance_count`` property on this Estimator. One S3 object is + created per training instance. Args: train (numpy.ndarray): A 2D numpy array of training data. - labels (numpy.ndarray): A 1D numpy array of labels. Its length must be equal to the - number of rows in ``train``. - channel (str): The SageMaker TrainingJob channel this RecordSet should be assigned to. - encrypt (bool): Specifies whether the objects uploaded to S3 are encrypted on the - server side using AES-256 (default: ``False``). + labels (numpy.ndarray): A 1D numpy array of labels. Its length must + be equal to the number of rows in ``train``. + channel (str): The SageMaker TrainingJob channel this RecordSet + should be assigned to. + encrypt (bool): Specifies whether the objects uploaded to S3 are + encrypted on the server side using AES-256 (default: ``False``). + Returns: - RecordSet: A RecordSet referencing the encoded, uploading training and label data. + RecordSet: A RecordSet referencing the encoded, uploading training + and label data. """ s3 = self.sagemaker_session.boto_session.resource("s3") parsed_s3_url = urlparse(self.data_location) @@ -210,21 +237,26 @@ def record_set(self, train, labels=None, channel="train", encrypt=False): class RecordSet(object): + """Placeholder docstring""" + def __init__( self, s3_data, num_records, feature_dim, s3_data_type="ManifestFile", channel="train" ): - """A collection of Amazon :class:~`Record` objects serialized and stored in S3. + """A collection of Amazon :class:~`Record` objects serialized and stored + in S3. Args: s3_data (str): The S3 location of the training data num_records (int): The number of records in the set. - feature_dim (int): The dimensionality of "values" arrays in the Record features, - and label (if each Record is labeled). - s3_data_type (str): Valid values: 'S3Prefix', 'ManifestFile'. If 'S3Prefix', ``s3_data`` defines - a prefix of s3 objects to train on. All objects with s3 keys beginning with ``s3_data`` will - be used to train. If 'ManifestFile', then ``s3_data`` defines a single s3 manifest file, listing - each s3 object to train on. - channel (str): The SageMaker Training Job channel this RecordSet should be bound to + feature_dim (int): The dimensionality of "values" arrays in the + Record features, and label (if each Record is labeled). + s3_data_type (str): Valid values: 'S3Prefix', 'ManifestFile'. If + 'S3Prefix', ``s3_data`` defines a prefix of s3 objects to train + on. All objects with s3 keys beginning with ``s3_data`` will be + used to train. If 'ManifestFile', then ``s3_data`` defines a + single s3 manifest file, listing each s3 object to train on. + channel (str): The SageMaker Training Job channel this RecordSet + should be bound to """ self.s3_data = s3_data self.feature_dim = feature_dim @@ -237,7 +269,9 @@ def __repr__(self): return str((RecordSet, self.__dict__)) def data_channel(self): - """Return a dictionary to represent the training data in a channel for use with ``fit()``""" + """Return a dictionary to represent the training data in a channel for + use with ``fit()`` + """ return {self.channel: self.records_s3_input()} def records_s3_input(self): @@ -246,6 +280,11 @@ def records_s3_input(self): def _build_shards(num_shards, array): + """ + Args: + num_shards: + array: + """ if num_shards < 1: raise ValueError("num_shards must be >= 1") shard_size = int(array.shape[0] / num_shards) @@ -259,9 +298,19 @@ def _build_shards(num_shards, array): def upload_numpy_to_s3_shards( num_shards, s3, bucket, key_prefix, array, labels=None, encrypt=False ): - """Upload the training ``array`` and ``labels`` arrays to ``num_shards`` S3 objects, - stored in "s3://``bucket``/``key_prefix``/". Optionally ``encrypt`` the S3 objects using - AES-256.""" + """Upload the training ``array`` and ``labels`` arrays to ``num_shards`` S3 + objects, stored in "s3:// ``bucket`` / ``key_prefix`` /". Optionally + ``encrypt`` the S3 objects using AES-256. + + Args: + num_shards: + s3: + bucket: + key_prefix: + array: + labels: + encrypt: + """ shards = _build_shards(num_shards, array) if labels is not None: label_shards = _build_shards(num_shards, labels) @@ -300,10 +349,14 @@ def upload_numpy_to_s3_shards( def registry(region_name, algorithm=None): """Return docker registry for the given AWS region - Note: Not all the algorithms listed below have an Amazon Estimator implemented. For full list of - pre-implemented Estimators, look at: + Note: Not all the algorithms listed below have an Amazon Estimator + implemented. For full list of pre-implemented Estimators, look at: https://github.com/aws/sagemaker-python-sdk/tree/master/src/sagemaker/amazon + + Args: + region_name: + algorithm: """ if algorithm in [ None, @@ -406,6 +459,13 @@ def registry(region_name, algorithm=None): def get_image_uri(region_name, repo_name, repo_version=1): - """Return algorithm image URI for the given AWS region, repository name, and repository version""" + """Return algorithm image URI for the given AWS region, repository name, and + repository version + + Args: + region_name: + repo_name: + repo_version: + """ repo = "{}:{}".format(repo_name, repo_version) return "{}/{}".format(registry(region_name, repo_name), repo) diff --git a/src/sagemaker/amazon/common.py b/src/sagemaker/amazon/common.py index 6bd5047a98..b8ee2cc821 100644 --- a/src/sagemaker/amazon/common.py +++ b/src/sagemaker/amazon/common.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import io @@ -23,10 +24,20 @@ class numpy_to_record_serializer(object): + """Placeholder docstring""" + def __init__(self, content_type="application/x-recordio-protobuf"): + """ + Args: + content_type: + """ self.content_type = content_type def __call__(self, array): + """ + Args: + array: + """ if len(array.shape) == 1: array = array.reshape(1, array.shape[0]) assert len(array.shape) == 2, "Expecting a 1 or 2 dimensional array" @@ -37,10 +48,21 @@ def __call__(self, array): class record_deserializer(object): + """Placeholder docstring""" + def __init__(self, accept="application/x-recordio-protobuf"): + """ + Args: + accept: + """ self.accept = accept def __call__(self, stream, content_type): + """ + Args: + stream: + content_type: + """ try: return read_records(stream) finally: @@ -48,6 +70,12 @@ def __call__(self, stream, content_type): def _write_feature_tensor(resolved_type, record, vector): + """ + Args: + resolved_type: + record: + vector: + """ if resolved_type == "Int32": record.features["values"].int32_tensor.values.extend(vector) elif resolved_type == "Float64": @@ -57,6 +85,12 @@ def _write_feature_tensor(resolved_type, record, vector): def _write_label_tensor(resolved_type, record, scalar): + """ + Args: + resolved_type: + record: + scalar: + """ if resolved_type == "Int32": record.label["values"].int32_tensor.values.extend([scalar]) elif resolved_type == "Float64": @@ -66,6 +100,12 @@ def _write_label_tensor(resolved_type, record, scalar): def _write_keys_tensor(resolved_type, record, vector): + """ + Args: + resolved_type: + record: + vector: + """ if resolved_type == "Int32": record.features["values"].int32_tensor.keys.extend(vector) elif resolved_type == "Float64": @@ -75,6 +115,12 @@ def _write_keys_tensor(resolved_type, record, vector): def _write_shape(resolved_type, record, scalar): + """ + Args: + resolved_type: + record: + scalar: + """ if resolved_type == "Int32": record.features["values"].int32_tensor.shape.extend([scalar]) elif resolved_type == "Float64": @@ -84,7 +130,13 @@ def _write_shape(resolved_type, record, scalar): def write_numpy_to_dense_tensor(file, array, labels=None): - """Writes a numpy array to a dense tensor""" + """Writes a numpy array to a dense tensor + + Args: + file: + array: + labels: + """ # Validate shape of array and labels, resolve array and label types if not len(array.shape) == 2: @@ -112,7 +164,13 @@ def write_numpy_to_dense_tensor(file, array, labels=None): def write_spmatrix_to_sparse_tensor(file, array, labels=None): - """Writes a scipy sparse matrix to a sparse tensor""" + """Writes a scipy sparse matrix to a sparse tensor + + Args: + file: + array: + labels: + """ if not issparse(array): raise TypeError("Array must be sparse") @@ -155,7 +213,11 @@ def write_spmatrix_to_sparse_tensor(file, array, labels=None): def read_records(file): - """Eagerly read a collection of amazon Record protobuf objects from file.""" + """Eagerly read a collection of amazon Record protobuf objects from file. + + Args: + file: + """ records = [] for record_data in read_recordio(file): record = Record() @@ -178,7 +240,12 @@ def read_records(file): def _write_recordio(f, data): - """Writes a single data point as a RecordIO record to the given file.""" + """Writes a single data point as a RecordIO record to the given file. + + Args: + f: + data: + """ length = len(data) f.write(struct.pack("I", _kmagic)) f.write(struct.pack("I", length)) @@ -188,6 +255,10 @@ def _write_recordio(f, data): def read_recordio(f): + """ + Args: + f: + """ while True: try: read_kmagic, = struct.unpack("I", f.read(4)) @@ -202,6 +273,10 @@ def read_recordio(f): def _resolve_type(dtype): + """ + Args: + dtype: + """ if dtype == np.dtype(int): return "Int32" if dtype == np.dtype(float): diff --git a/src/sagemaker/amazon/factorization_machines.py b/src/sagemaker/amazon/factorization_machines.py index ad377b5f2b..1f0956d5d6 100644 --- a/src/sagemaker/amazon/factorization_machines.py +++ b/src/sagemaker/amazon/factorization_machines.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase, registry @@ -23,6 +24,7 @@ class FactorizationMachines(AmazonAlgorithmEstimatorBase): + """Placeholder docstring""" repo_name = "factorization-machines" repo_version = 1 @@ -103,76 +105,105 @@ def __init__( factors_init_value=None, **kwargs ): - """Factorization Machines is :class:`Estimator` for general-purpose supervised learning. + """Factorization Machines is :class:`Estimator` for general-purpose + supervised learning. - Amazon SageMaker Factorization Machines is a general-purpose supervised learning algorithm that you can use - for both classification and regression tasks. It is an extension of a linear model that is designed - to parsimoniously capture interactions between features within high dimensional sparse datasets. + Amazon SageMaker Factorization Machines is a general-purpose + supervised learning algorithm that you can use for both classification + and regression tasks. It is an extension of a linear model that is + designed to parsimoniously capture interactions between features within + high dimensional sparse datasets. This Estimator may be fit via calls to - :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit`. It requires Amazon - :class:`~sagemaker.amazon.record_pb2.Record` protobuf serialized data to be stored in S3. - There is an utility :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.record_set` that - can be used to upload data to S3 and creates :class:`~sagemaker.amazon.amazon_estimator.RecordSet` to be passed - to the `fit` call. + :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit`. + It requires Amazon :class:`~sagemaker.amazon.record_pb2.Record` protobuf + serialized data to be stored in S3. There is an utility + :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.record_set` + that can be used to upload data to S3 and creates + :class:`~sagemaker.amazon.amazon_estimator.RecordSet` to be passed to + the `fit` call. - To learn more about the Amazon protobuf Record class and how to prepare bulk data in this format, please - consult AWS technical documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html + To learn more about the Amazon protobuf Record class and how to + prepare bulk data in this format, please consult AWS technical + documentation: + https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html - After this Estimator is fit, model data is stored in S3. The model may be deployed to an Amazon SageMaker - Endpoint by invoking :meth:`~sagemaker.amazon.estimator.EstimatorBase.deploy`. As well as deploying an Endpoint, - deploy returns a :class:`~sagemaker.amazon.pca.FactorizationMachinesPredictor` object that can be used - for inference calls using the trained model hosted in the SageMaker Endpoint. + After this Estimator is fit, model data is stored in S3. The model + may be deployed to an Amazon SageMaker Endpoint by invoking + :meth:`~sagemaker.amazon.estimator.EstimatorBase.deploy`. As well as + deploying an Endpoint, deploy returns a + :class:`~sagemaker.amazon.pca.FactorizationMachinesPredictor` object + that can be used for inference calls using the trained model hosted in + the SageMaker Endpoint. - FactorizationMachines Estimators can be configured by setting hyperparameters. The available hyperparameters for - FactorizationMachines are documented below. + FactorizationMachines Estimators can be configured by setting + hyperparameters. The available hyperparameters for FactorizationMachines + are documented below. For further information on the AWS FactorizationMachines algorithm, - please consult AWS technical documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/fact-machines.html + please consult AWS technical documentation: + https://docs.aws.amazon.com/sagemaker/latest/dg/fact-machines.html Args: - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and - APIs that create Amazon SageMaker endpoints use this role to access - training data and model artifacts. After the endpoint is created, - the inference code might use the IAM role, if accessing AWS resource. - train_instance_count (int): Number of Amazon EC2 instances to use for training. - train_instance_type (str): Type of EC2 instance to use for training, for example, 'ml.c4.xlarge'. + role (str): An AWS IAM role (either name or full ARN). The Amazon + SageMaker training jobs and APIs that create Amazon SageMaker + endpoints use this role to access training data and model + artifacts. After the endpoint is created, the inference code + might use the IAM role, if accessing AWS resource. + train_instance_count (int): Number of Amazon EC2 instances to use + for training. + train_instance_type (str): Type of EC2 instance to use for training, + for example, 'ml.c4.xlarge'. num_factors (int): Dimensionality of factorization. - predictor_type (str): Type of predictor 'binary_classifier' or 'regressor'. + predictor_type (str): Type of predictor 'binary_classifier' or + 'regressor'. epochs (int): Number of training epochs to run. - clip_gradient (float): Optimizer parameter. Clip the gradient by projecting onto - the box [-clip_gradient, +clip_gradient] - eps (float): Optimizer parameter. Small value to avoid division by 0. - rescale_grad (float): Optimizer parameter. If set, multiplies the gradient with rescale_grad - before updating. Often choose to be 1.0/batch_size. + clip_gradient (float): Optimizer parameter. Clip the gradient by + projecting onto the box [-clip_gradient, +clip_gradient] + eps (float): Optimizer parameter. Small value to avoid division by + 0. + rescale_grad (float): Optimizer parameter. If set, multiplies the + gradient with rescale_grad before updating. Often choose to be + 1.0/batch_size. bias_lr (float): Non-negative learning rate for the bias term. linear_lr (float): Non-negative learning rate for linear terms. - factors_lr (float): Noon-negative learning rate for factorization terms. + factors_lr (float): Noon-negative learning rate for factorization + terms. bias_wd (float): Non-negative weight decay for the bias term. linear_wd (float): Non-negative weight decay for linear terms. - factors_wd (float): Non-negative weight decay for factorization terms. - bias_init_method (string): Initialization method for the bias term: 'normal', 'uniform' or 'constant'. - bias_init_scale (float): Non-negative range for initialization of the bias term that takes - effect when bias_init_method parameter is 'uniform' - bias_init_sigma (float): Non-negative standard deviation for initialization of the bias term that takes - effect when bias_init_method parameter is 'normal'. - bias_init_value (float): Initial value of the bias term that takes effect - when bias_init_method parameter is 'constant'. - linear_init_method (string): Initialization method for linear term: 'normal', 'uniform' or 'constant'. - linear_init_scale (float): Non-negative range for initialization of linear terms that takes - effect when linear_init_method parameter is 'uniform'. - linear_init_sigma (float): Non-negative standard deviation for initialization of linear terms that takes - effect when linear_init_method parameter is 'normal'. - linear_init_value (float): Initial value of linear terms that takes effect - when linear_init_method parameter is 'constant'. - factors_init_method (string): Initialization method for factorization term: 'normal', - 'uniform' or 'constant'. - factors_init_scale (float): Non-negative range for initialization of factorization terms that takes - effect when factors_init_method parameter is 'uniform'. - factors_init_sigma (float): Non-negative standard deviation for initialization of factorization terms that - takes effect when factors_init_method parameter is 'normal'. - factors_init_value (float): Initial value of factorization terms that takes - effect when factors_init_method parameter is 'constant'. + factors_wd (float): Non-negative weight decay for factorization + terms. + bias_init_method (string): Initialization method for the bias term: + 'normal', 'uniform' or 'constant'. + bias_init_scale (float): Non-negative range for initialization of + the bias term that takes effect when bias_init_method parameter + is 'uniform' + bias_init_sigma (float): Non-negative standard deviation for + initialization of the bias term that takes effect when + bias_init_method parameter is 'normal'. + bias_init_value (float): Initial value of the bias term that takes + effect when bias_init_method parameter is 'constant'. + linear_init_method (string): Initialization method for linear term: + 'normal', 'uniform' or 'constant'. + linear_init_scale (float): Non-negative range for initialization of + linear terms that takes effect when linear_init_method parameter + is 'uniform'. + linear_init_sigma (float): Non-negative standard deviation for + initialization of linear terms that takes effect when + linear_init_method parameter is 'normal'. + linear_init_value (float): Initial value of linear terms that takes + effect when linear_init_method parameter is 'constant'. + factors_init_method (string): Initialization method for + factorization term: 'normal', 'uniform' or 'constant'. + factors_init_scale (float): Non-negative range for initialization of + factorization terms that takes effect when factors_init_method + parameter is 'uniform'. + factors_init_sigma (float): Non-negative standard deviation for + initialization of factorization terms that takes effect when + factors_init_method parameter is 'normal'. + factors_init_value (float): Initial value of factorization terms + that takes effect when factors_init_method parameter is + 'constant'. **kwargs: base class keyword argument values. """ super(FactorizationMachines, self).__init__( @@ -205,15 +236,14 @@ def __init__( self.factors_init_value = factors_init_value def create_model(self, vpc_config_override=VPC_CONFIG_DEFAULT): - """Return a :class:`~sagemaker.amazon.FactorizationMachinesModel` referencing the latest - s3 model data produced by this Estimator. + """Return a :class:`~sagemaker.amazon.FactorizationMachinesModel` + referencing the latest s3 model data produced by this Estimator. Args: - vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on the model. - Default: use subnets and security groups from this Estimator. + vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on + the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. - """ return FactorizationMachinesModel( self.model_data, @@ -224,20 +254,28 @@ def create_model(self, vpc_config_override=VPC_CONFIG_DEFAULT): class FactorizationMachinesPredictor(RealTimePredictor): - """Performs binary-classification or regression prediction from input vectors. + """Performs binary-classification or regression prediction from input + vectors. - The implementation of :meth:`~sagemaker.predictor.RealTimePredictor.predict` in this - `RealTimePredictor` requires a numpy ``ndarray`` as input. The array should contain the - same number of columns as the feature-dimension of the data used to fit the model this - Predictor performs inference on. + The implementation of + :meth:`~sagemaker.predictor.RealTimePredictor.predict` in this + `RealTimePredictor` requires a numpy ``ndarray`` as input. The array should + contain the same number of columns as the feature-dimension of the data used + to fit the model this Predictor performs inference on. - :meth:`predict()` returns a list of :class:`~sagemaker.amazon.record_pb2.Record` objects, one - for each row in the input ``ndarray``. The prediction is stored in the ``"score"`` - key of the ``Record.label`` field. - Please refer to the formats details described: https://docs.aws.amazon.com/sagemaker/latest/dg/fm-in-formats.html + :meth:`predict()` returns a list of + :class:`~sagemaker.amazon.record_pb2.Record` objects, one for each row in + the input ``ndarray``. The prediction is stored in the ``"score"`` key of + the ``Record.label`` field. Please refer to the formats details described: + https://docs.aws.amazon.com/sagemaker/latest/dg/fm-in-formats.html """ def __init__(self, endpoint, sagemaker_session=None): + """ + Args: + endpoint: + sagemaker_session: + """ super(FactorizationMachinesPredictor, self).__init__( endpoint, sagemaker_session, @@ -247,10 +285,19 @@ def __init__(self, endpoint, sagemaker_session=None): class FactorizationMachinesModel(Model): - """Reference S3 model data created by FactorizationMachines estimator. Calling :meth:`~sagemaker.model.Model.deploy` - creates an Endpoint and returns :class:`FactorizationMachinesPredictor`.""" + """Reference S3 model data created by FactorizationMachines estimator. + Calling :meth:`~sagemaker.model.Model.deploy` creates an Endpoint and + returns :class:`FactorizationMachinesPredictor`. + """ def __init__(self, model_data, role, sagemaker_session=None, **kwargs): + """ + Args: + model_data: + role: + sagemaker_session: + **kwargs: + """ sagemaker_session = sagemaker_session or Session() repo = "{}:{}".format(FactorizationMachines.repo_name, FactorizationMachines.repo_version) image = "{}/{}".format(registry(sagemaker_session.boto_session.region_name), repo) diff --git a/src/sagemaker/amazon/hyperparameter.py b/src/sagemaker/amazon/hyperparameter.py index 15ae928810..7b7c41f5c4 100644 --- a/src/sagemaker/amazon/hyperparameter.py +++ b/src/sagemaker/amazon/hyperparameter.py @@ -10,22 +10,33 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import json class Hyperparameter(object): - """An algorithm hyperparameter with optional validation. Implemented as a python - descriptor object.""" + """An algorithm hyperparameter with optional validation. Implemented as a + python descriptor object. + """ def __init__(self, name, validate=lambda _: True, validation_message="", data_type=str): - """Args: - name (str): The name of this hyperparameter - validate (callable[object]->[bool]): A validation function or list of validation functions. - Each function validates an object and returns False if the object value is invalid for - this hyperparameter. - validation_message (str): A usage guide to display on validation failure. + """Args: name (str): The name of this hyperparameter validate + (callable[object]->[bool]): A validation function or list of validation + functions. + + Each function validates an object and returns False if the object + value is invalid for this hyperparameter. + + validation_message (str): A usage guide to display on validation + failure. + + Args: + name: + validate: + validation_message: + data_type: """ self.validation = validate self.validation_message = validation_message @@ -37,6 +48,10 @@ def __init__(self, name, validate=lambda _: True, validation_message="", data_ty self.validation = [self.validation] def validate(self, value): + """ + Args: + value: + """ if value is None: # We allow assignment from None, but Nones are not sent to training. return @@ -48,12 +63,22 @@ def validate(self, value): raise ValueError(error_message) def __get__(self, obj, objtype): + """ + Args: + obj: + objtype: + """ if "_hyperparameters" not in dir(obj) or self.name not in obj._hyperparameters: raise AttributeError() return obj._hyperparameters[self.name] def __set__(self, obj, value): - """Validate the supplied value and set this hyperparameter to value""" + """Validate the supplied value and set this hyperparameter to value + + Args: + obj: + value: + """ value = None if value is None else self.data_type(value) self.validate(value) if "_hyperparameters" not in dir(obj): @@ -61,12 +86,21 @@ def __set__(self, obj, value): obj._hyperparameters[self.name] = value def __delete__(self, obj): - """Delete this hyperparameter""" + """Delete this hyperparameter + + Args: + obj: + """ del obj._hyperparameters[self.name] @staticmethod def serialize_all(obj): - """Return all non-None ``hyperparameter`` values on ``obj`` as a ``dict[str,str].``""" + """Return all non-None ``hyperparameter`` values on ``obj`` as a + ``dict[str,str].`` + + Args: + obj: + """ if "_hyperparameters" not in dir(obj): return {} return { diff --git a/src/sagemaker/amazon/ipinsights.py b/src/sagemaker/amazon/ipinsights.py index 5844092196..24c50cb244 100644 --- a/src/sagemaker/amazon/ipinsights.py +++ b/src/sagemaker/amazon/ipinsights.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase, registry @@ -22,6 +23,8 @@ class IPInsights(AmazonAlgorithmEstimatorBase): + """Placeholder docstring""" + repo_name = "ipinsights" repo_version = 1 MINI_BATCH_SIZE = 10000 @@ -63,42 +66,58 @@ def __init__( weight_decay=None, **kwargs ): - """This estimator is for IP Insights, an unsupervised algorithm that learns usage patterns of IP addresses. + """This estimator is for IP Insights, an unsupervised algorithm that + learns usage patterns of IP addresses. This Estimator may be fit via calls to - :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit`. It requires - CSV data to be stored in S3. + :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit`. + It requires CSV data to be stored in S3. - After this Estimator is fit, model data is stored in S3. The model may be deployed to an Amazon SageMaker - Endpoint by invoking :meth:`~sagemaker.amazon.estimator.EstimatorBase.deploy`. As well as deploying an Endpoint, - deploy returns a :class:`~sagemaker.amazon.IPInsightPredictor` object that can be used - for inference calls using the trained model hosted in the SageMaker Endpoint. + After this Estimator is fit, model data is stored in S3. The model + may be deployed to an Amazon SageMaker Endpoint by invoking + :meth:`~sagemaker.amazon.estimator.EstimatorBase.deploy`. As well as + deploying an Endpoint, deploy returns a + :class:`~sagemaker.amazon.IPInsightPredictor` object that can be used + for inference calls using the trained model hosted in the SageMaker + Endpoint. IPInsights Estimators can be configured by setting hyperparamters. The available hyperparamters are documented below. - For further information on the AWS IPInsights algorithm, please consult AWS technical documentation: + For further information on the AWS IPInsights algorithm, please + consult AWS technical documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/ip-insights-hyperparameters.html Args: - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and - APIs that create Amazon SageMaker endpoints use this role to access - training data and model artifacts. After the endpoint is created, - the inference code might use the IAM role, if accessing AWS resource. - train_instance_count (int): Number of Amazon EC2 instances to use for training. - train_instance_type (str): Type of EC2 instance to use for training, for example, 'ml.m5.xlarge'. - num_entity_vectors (int): Required. The number of embeddings to train for entities accessing online - resources. We recommend 2x the total number of unique entity IDs. - vector_dim (int): Required. The size of the embedding vectors for both entity and IP addresses. - batch_metrics_publish_interval (int): Optional. The period at which to publish metrics (batches). - epochs (int): Optional. Maximum number of passes over the training data. + role (str): An AWS IAM role (either name or full ARN). The Amazon + SageMaker training jobs and APIs that create Amazon SageMaker + endpoints use this role to access training data and model + artifacts. After the endpoint is created, the inference code + might use the IAM role, if accessing AWS resource. + train_instance_count (int): Number of Amazon EC2 instances to use + for training. + train_instance_type (str): Type of EC2 instance to use for training, + for example, 'ml.m5.xlarge'. + num_entity_vectors (int): Required. The number of embeddings to + train for entities accessing online resources. We recommend 2x + the total number of unique entity IDs. + vector_dim (int): Required. The size of the embedding vectors for + both entity and IP addresses. + batch_metrics_publish_interval (int): Optional. The period at which + to publish metrics (batches). + epochs (int): Optional. Maximum number of passes over the training + data. learning_rate (float): Optional. Learning rate for the optimizer. - num_ip_encoder_layers (int): Optional. The number of fully-connected layers to encode IP address embedding. - random_negative_sampling_rate (int): Optional. The ratio of random negative samples to draw during training. - Random negative samples are randomly drawn IPv4 addresses. - shuffled_negative_sampling_rate (int): Optional. The ratio of shuffled negative samples to draw during - training. Shuffled negative samples are IP addresses picked from within a batch. - weight_decay (float): Optional. Weight decay coefficient. Adds L2 regularization. + num_ip_encoder_layers (int): Optional. The number of fully-connected + layers to encode IP address embedding. + random_negative_sampling_rate (int): Optional. The ratio of random + negative samples to draw during training. Random negative + samples are randomly drawn IPv4 addresses. + shuffled_negative_sampling_rate (int): Optional. The ratio of + shuffled negative samples to draw during training. Shuffled + negative samples are IP addresses picked from within a batch. + weight_decay (float): Optional. Weight decay coefficient. Adds L2 + regularization. **kwargs: base class keyword argument values. """ super(IPInsights, self).__init__(role, train_instance_count, train_instance_type, **kwargs) @@ -116,12 +135,14 @@ def create_model(self, vpc_config_override=VPC_CONFIG_DEFAULT): """Create a model for the latest s3 model produced by this estimator. Args: - vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on the model. + vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on + the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. Returns: - :class:`~sagemaker.amazon.IPInsightsModel`: references the latest s3 model data produced by this estimator. + :class:`~sagemaker.amazon.IPInsightsModel`: references the latest s3 model + data produced by this estimator. """ return IPInsightsModel( self.model_data, @@ -131,6 +152,12 @@ def create_model(self, vpc_config_override=VPC_CONFIG_DEFAULT): ) def _prepare_for_training(self, records, mini_batch_size=None, job_name=None): + """ + Args: + records: + mini_batch_size: + job_name: + """ if mini_batch_size is not None and (mini_batch_size < 1 or mini_batch_size > 500000): raise ValueError("mini_batch_size must be in [1, 500000]") super(IPInsights, self)._prepare_for_training( @@ -139,25 +166,41 @@ def _prepare_for_training(self, records, mini_batch_size=None, job_name=None): class IPInsightsPredictor(RealTimePredictor): - """Returns dot product of entity and IP address embeddings as a score for compatibility. - - The implementation of :meth:`~sagemaker.predictor.RealTimePredictor.predict` in this - `RealTimePredictor` requires a numpy ``ndarray`` as input. The array should contain - two columns. The first column should contain the entity ID. The second column should - contain the IPv4 address in dot notation. + """Returns dot product of entity and IP address embeddings as a score for + compatibility. + + The implementation of + :meth:`~sagemaker.predictor.RealTimePredictor.predict` in this + `RealTimePredictor` requires a numpy ``ndarray`` as input. The array should + contain two columns. The first column should contain the entity ID. The + second column should contain the IPv4 address in dot notation. """ def __init__(self, endpoint, sagemaker_session=None): + """ + Args: + endpoint: + sagemaker_session: + """ super(IPInsightsPredictor, self).__init__( endpoint, sagemaker_session, serializer=csv_serializer, deserializer=json_deserializer ) class IPInsightsModel(Model): - """Reference IPInsights s3 model data. Calling :meth:`~sagemaker.model.Model.deploy` creates an - Endpoint and returns a Predictor that calculates anomaly scores for data points.""" + """Reference IPInsights s3 model data. Calling + :meth:`~sagemaker.model.Model.deploy` creates an Endpoint and returns a + Predictor that calculates anomaly scores for data points. + """ def __init__(self, model_data, role, sagemaker_session=None, **kwargs): + """ + Args: + model_data: + role: + sagemaker_session: + **kwargs: + """ sagemaker_session = sagemaker_session or Session() repo = "{}:{}".format(IPInsights.repo_name, IPInsights.repo_version) image = "{}/{}".format( diff --git a/src/sagemaker/amazon/kmeans.py b/src/sagemaker/amazon/kmeans.py index 633d8dc9a6..df213745b0 100644 --- a/src/sagemaker/amazon/kmeans.py +++ b/src/sagemaker/amazon/kmeans.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase, registry @@ -23,6 +24,7 @@ class KMeans(AmazonAlgorithmEstimatorBase): + """Placeholder docstring""" repo_name = "kmeans" repo_version = 1 @@ -63,53 +65,75 @@ def __init__( eval_metrics=None, **kwargs ): - """ - A k-means clustering :class:`~sagemaker.amazon.AmazonAlgorithmEstimatorBase`. Finds k clusters of data in an - unlabeled dataset. + """A k-means clustering + :class:`~sagemaker.amazon.AmazonAlgorithmEstimatorBase`. Finds k + clusters of data in an unlabeled dataset. This Estimator may be fit via calls to :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit_ndarray` - or :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit`. The former allows a KMeans model - to be fit on a 2-dimensional numpy array. The latter requires Amazon - :class:`~sagemaker.amazon.record_pb2.Record` protobuf serialized data to be stored in S3. - - To learn more about the Amazon protobuf Record class and how to prepare bulk data in this format, please - consult AWS technical documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html. - - After this Estimator is fit, model data is stored in S3. The model may be deployed to an Amazon SageMaker - Endpoint by invoking :meth:`~sagemaker.amazon.estimator.EstimatorBase.deploy`. As well as deploying an Endpoint, - ``deploy`` returns a :class:`~sagemaker.amazon.kmeans.KMeansPredictor` object that can be used to k-means - cluster assignments, using the trained k-means model hosted in the SageMaker Endpoint. - - KMeans Estimators can be configured by setting hyperparameters. The available hyperparameters for KMeans - are documented below. For further information on the AWS KMeans algorithm, please consult AWS technical - documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/k-means.html. + or + :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit`. + The former allows a KMeans model to be fit on a 2-dimensional numpy + array. The latter requires Amazon + :class:`~sagemaker.amazon.record_pb2.Record` protobuf serialized data to + be stored in S3. + + To learn more about the Amazon protobuf Record class and how to + prepare bulk data in this format, please consult AWS technical + documentation: + https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html. + + After this Estimator is fit, model data is stored in S3. The model + may be deployed to an Amazon SageMaker Endpoint by invoking + :meth:`~sagemaker.amazon.estimator.EstimatorBase.deploy`. As well as + deploying an Endpoint, ``deploy`` returns a + :class:`~sagemaker.amazon.kmeans.KMeansPredictor` object that can be + used to k-means cluster assignments, using the trained k-means model + hosted in the SageMaker Endpoint. + + KMeans Estimators can be configured by setting hyperparameters. The + available hyperparameters for KMeans are documented below. For further + information on the AWS KMeans algorithm, please consult AWS technical + documentation: + https://docs.aws.amazon.com/sagemaker/latest/dg/k-means.html. Args: - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and - APIs that create Amazon SageMaker endpoints use this role to access - training data and model artifacts. After the endpoint is created, - the inference code might use the IAM role, if accessing AWS resource. - train_instance_count (int): Number of Amazon EC2 instances to use for training. - train_instance_type (str): Type of EC2 instance to use for training, for example, 'ml.c4.xlarge'. + role (str): An AWS IAM role (either name or full ARN). The Amazon + SageMaker training jobs and APIs that create Amazon SageMaker + endpoints use this role to access training data and model + artifacts. After the endpoint is created, the inference code + might use the IAM role, if accessing AWS resource. + train_instance_count (int): Number of Amazon EC2 instances to use + for training. + train_instance_type (str): Type of EC2 instance to use for training, + for example, 'ml.c4.xlarge'. k (int): The number of clusters to produce. - init_method (str): How to initialize cluster locations. One of 'random' or 'kmeans++'. - max_iterations (int): Maximum iterations for Lloyds EM procedure in the local kmeans used in finalize stage. - tol (float): Tolerance for change in ssd for early stopping in local kmeans. - num_trials (int): Local version is run multiple times and the one with the best loss is chosen. This - determines how many times. - local_init_method (str): Initialization method for local version. One of 'random', 'kmeans++' - half_life_time_size (int): The points can have a decayed weight. When a point is observed its weight, - with regard to the computation of the cluster mean is 1. This weight will decay exponentially as we - observe more points. The exponent coefficient is chosen such that after observing - ``half_life_time_size`` points after the mentioned point, its weight will become 1/2. If set to 0, - there will be no decay. + init_method (str): How to initialize cluster locations. One of + 'random' or 'kmeans++'. + max_iterations (int): Maximum iterations for Lloyds EM procedure in + the local kmeans used in finalize stage. + tol (float): Tolerance for change in ssd for early stopping in local + kmeans. + num_trials (int): Local version is run multiple times and the one + with the best loss is chosen. This determines how many times. + local_init_method (str): Initialization method for local version. + One of 'random', 'kmeans++' + half_life_time_size (int): The points can have a decayed weight. + When a point is observed its weight, with regard to the + computation of the cluster mean is 1. This weight will decay + exponentially as we observe more points. The exponent + coefficient is chosen such that after observing + ``half_life_time_size`` points after the mentioned point, its + weight will become 1/2. If set to 0, there will be no decay. epochs (int): Number of passes done over the training data. - center_factor(int): The algorithm will create ``num_clusters * extra_center_factor`` as it runs and - reduce the number of centers to ``k`` when finalizing - eval_metrics(list): JSON list of metrics types to be used for reporting the score for the model. - Allowed values are "msd" Means Square Error, "ssd": Sum of square distance. If test data is provided, - the score shall be reported in terms of all requested metrics. + center_factor (int): The algorithm will create + ``num_clusters * extra_center_factor`` as it runs and reduce the + number of centers to ``k`` when finalizing + eval_metrics (list): JSON list of metrics types to be used for + reporting the score for the model. Allowed values are "msd" + Means Square Error, "ssd": Sum of square distance. If test data + is provided, the score shall be reported in terms of all + requested metrics. **kwargs: base class keyword argument values. """ super(KMeans, self).__init__(role, train_instance_count, train_instance_type, **kwargs) @@ -125,11 +149,12 @@ def __init__( self.eval_metrics = eval_metrics def create_model(self, vpc_config_override=VPC_CONFIG_DEFAULT): - """Return a :class:`~sagemaker.amazon.kmeans.KMeansModel` referencing the latest - s3 model data produced by this Estimator. + """Return a :class:`~sagemaker.amazon.kmeans.KMeansModel` referencing + the latest s3 model data produced by this Estimator. Args: - vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on the model. + vpc_config_override (dict[str, list[str]]): Optional override for + VpcConfig set on the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. @@ -142,12 +167,20 @@ def create_model(self, vpc_config_override=VPC_CONFIG_DEFAULT): ) def _prepare_for_training(self, records, mini_batch_size=5000, job_name=None): + """ + Args: + records: + mini_batch_size: + job_name: + """ super(KMeans, self)._prepare_for_training( records, mini_batch_size=mini_batch_size, job_name=job_name ) def hyperparameters(self): - """Return the SageMaker hyperparameters for training this KMeans Estimator""" + """Return the SageMaker hyperparameters for training this KMeans + Estimator + """ hp_dict = dict(force_dense="True") # KMeans requires this hp to fit on Record objects hp_dict.update(super(KMeans, self).hyperparameters()) return hp_dict @@ -156,16 +189,24 @@ def hyperparameters(self): class KMeansPredictor(RealTimePredictor): """Assigns input vectors to their closest cluster in a KMeans model. - The implementation of :meth:`~sagemaker.predictor.RealTimePredictor.predict` in this - `RealTimePredictor` requires a numpy ``ndarray`` as input. The array should contain the - same number of columns as the feature-dimension of the data used to fit the model this - Predictor performs inference on. + The implementation of + :meth:`~sagemaker.predictor.RealTimePredictor.predict` in this + `RealTimePredictor` requires a numpy ``ndarray`` as input. The array should + contain the same number of columns as the feature-dimension of the data used + to fit the model this Predictor performs inference on. - ``predict()`` returns a list of :class:`~sagemaker.amazon.record_pb2.Record` objects, one - for each row in the input ``ndarray``. The nearest cluster is stored in the ``closest_cluster`` - key of the ``Record.label`` field.""" + ``predict()`` returns a list of + :class:`~sagemaker.amazon.record_pb2.Record` objects, one for each row in + the input ``ndarray``. The nearest cluster is stored in the + ``closest_cluster`` key of the ``Record.label`` field. + """ def __init__(self, endpoint, sagemaker_session=None): + """ + Args: + endpoint: + sagemaker_session: + """ super(KMeansPredictor, self).__init__( endpoint, sagemaker_session, @@ -175,10 +216,19 @@ def __init__(self, endpoint, sagemaker_session=None): class KMeansModel(Model): - """Reference KMeans s3 model data. Calling :meth:`~sagemaker.model.Model.deploy` creates an Endpoint and return - a Predictor to performs k-means cluster assignment.""" + """Reference KMeans s3 model data. Calling + :meth:`~sagemaker.model.Model.deploy` creates an Endpoint and return a + Predictor to performs k-means cluster assignment. + """ def __init__(self, model_data, role, sagemaker_session=None, **kwargs): + """ + Args: + model_data: + role: + sagemaker_session: + **kwargs: + """ sagemaker_session = sagemaker_session or Session() repo = "{}:{}".format(KMeans.repo_name, KMeans.repo_version) image = "{}/{}".format(registry(sagemaker_session.boto_session.region_name), repo) diff --git a/src/sagemaker/amazon/knn.py b/src/sagemaker/amazon/knn.py index 9df4325e34..911dc5427f 100644 --- a/src/sagemaker/amazon/knn.py +++ b/src/sagemaker/amazon/knn.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase, registry @@ -23,6 +24,8 @@ class KNN(AmazonAlgorithmEstimatorBase): + """Placeholder docstring""" + repo_name = "knn" repo_version = 1 @@ -73,46 +76,57 @@ def __init__( faiss_index_pq_m=None, **kwargs ): - """k-nearest neighbors (KNN) is :class:`Estimator` used for classification and regression. - This Estimator may be fit via calls to - :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit`. It requires Amazon - :class:`~sagemaker.amazon.record_pb2.Record` protobuf serialized data to be stored in S3. - There is an utility :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.record_set` that - can be used to upload data to S3 and creates :class:`~sagemaker.amazon.amazon_estimator.RecordSet` to be passed - to the `fit` call. - To learn more about the Amazon protobuf Record class and how to prepare bulk data in this format, please - consult AWS technical documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html - After this Estimator is fit, model data is stored in S3. The model may be deployed to an Amazon SageMaker - Endpoint by invoking :meth:`~sagemaker.amazon.estimator.EstimatorBase.deploy`. As well as deploying an Endpoint, - deploy returns a :class:`~sagemaker.amazon.knn.KNNPredictor` object that can be used - for inference calls using the trained model hosted in the SageMaker Endpoint. - KNN Estimators can be configured by setting hyperparameters. The available hyperparameters for - KNN are documented below. - For further information on the AWS KNN algorithm, - please consult AWS technical documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/knn.html + """k-nearest neighbors (KNN) is :class:`Estimator` used for + classification and regression. This Estimator may be fit via calls to + :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit`. + It requires Amazon :class:`~sagemaker.amazon.record_pb2.Record` protobuf + serialized data to be stored in S3. There is an utility + :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.record_set` + that can be used to upload data to S3 and creates + :class:`~sagemaker.amazon.amazon_estimator.RecordSet` to be passed to + the `fit` call. To learn more about the Amazon protobuf Record class and + how to prepare bulk data in this format, please consult AWS technical + documentation: + https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html After + this Estimator is fit, model data is stored in S3. The model may be + deployed to an Amazon SageMaker Endpoint by invoking + :meth:`~sagemaker.amazon.estimator.EstimatorBase.deploy`. As well as + deploying an Endpoint, deploy returns a + :class:`~sagemaker.amazon.knn.KNNPredictor` object that can be used for + inference calls using the trained model hosted in the SageMaker + Endpoint. KNN Estimators can be configured by setting hyperparameters. + The available hyperparameters for KNN are documented below. For further + information on the AWS KNN algorithm, please consult AWS technical + documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/knn.html Args: - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and - APIs that create Amazon SageMaker endpoints use this role to access - training data and model artifacts. After the endpoint is created, - the inference code might use the IAM role, if accessing AWS resource. - train_instance_type (str): Type of EC2 instance to use for training, for example, 'ml.c4.xlarge'. + role (str): An AWS IAM role (either name or full ARN). The Amazon + SageMaker training jobs and APIs that create Amazon SageMaker + endpoints use this role to access training data and model + artifacts. After the endpoint is created, the inference code + might use the IAM role, if accessing AWS resource. + train_instance_count: + train_instance_type (str): Type of EC2 instance to use for training, + for example, 'ml.c4.xlarge'. k (int): Required. Number of nearest neighbors. - sample_size(int): Required. Number of data points to be sampled from the training data set. - predictor_type (str): Required. Type of inference to use on the data's labels, - allowed values are 'classifier' and 'regressor'. - dimension_reduction_type (str): Optional. Type of dimension reduction technique to use. - Valid values: "sign", "fjlt" - dimension_reduction_target (int): Optional. Target dimension to reduce to. Required when - dimension_reduction_type is specified. + sample_size (int): Required. Number of data points to be sampled + from the training data set. + predictor_type (str): Required. Type of inference to use on the + data's labels, allowed values are 'classifier' and 'regressor'. + dimension_reduction_type (str): Optional. Type of dimension + reduction technique to use. Valid values: "sign", "fjlt" + dimension_reduction_target (int): Optional. Target dimension to + reduce to. Required when dimension_reduction_type is specified. index_type (str): Optional. Type of index to use. Valid values are "faiss.Flat", "faiss.IVFFlat", "faiss.IVFPQ". - index_metric(str): Optional. Distance metric to measure between points when finding nearest neighbors. - Valid values are "COSINE", "INNER_PRODUCT", "L2" - faiss_index_ivf_nlists(str): Optional. Number of centroids to construct in the index if - index_type is "faiss.IVFFlat" or "faiss.IVFPQ". - faiss_index_pq_m(int): Optional. Number of vector sub-components to construct in the index, - if index_type is "faiss.IVFPQ". + index_metric (str): Optional. Distance metric to measure between + points when finding nearest neighbors. Valid values are + "COSINE", "INNER_PRODUCT", "L2" + faiss_index_ivf_nlists (str): Optional. Number of centroids to + construct in the index if index_type is "faiss.IVFFlat" or + "faiss.IVFPQ". + faiss_index_pq_m (int): Optional. Number of vector sub-components to + construct in the index, if index_type is "faiss.IVFPQ". **kwargs: base class keyword argument values. """ @@ -136,8 +150,8 @@ def create_model(self, vpc_config_override=VPC_CONFIG_DEFAULT): s3 model data produced by this Estimator. Args: - vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on the model. - Default: use subnets and security groups from this Estimator. + vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on + the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. """ @@ -149,6 +163,12 @@ def create_model(self, vpc_config_override=VPC_CONFIG_DEFAULT): ) def _prepare_for_training(self, records, mini_batch_size=None, job_name=None): + """ + Args: + records: + mini_batch_size: + job_name: + """ super(KNN, self)._prepare_for_training( records, mini_batch_size=mini_batch_size, job_name=job_name ) @@ -157,16 +177,24 @@ def _prepare_for_training(self, records, mini_batch_size=None, job_name=None): class KNNPredictor(RealTimePredictor): """Performs classification or regression prediction from input vectors. - The implementation of :meth:`~sagemaker.predictor.RealTimePredictor.predict` in this - `RealTimePredictor` requires a numpy ``ndarray`` as input. The array should contain the - same number of columns as the feature-dimension of the data used to fit the model this - Predictor performs inference on. + The implementation of + :meth:`~sagemaker.predictor.RealTimePredictor.predict` in this + `RealTimePredictor` requires a numpy ``ndarray`` as input. The array should + contain the same number of columns as the feature-dimension of the data used + to fit the model this Predictor performs inference on. - :func:`predict` returns a list of :class:`~sagemaker.amazon.record_pb2.Record` objects, one - for each row in the input ``ndarray``. The prediction is stored in the ``"predicted_label"`` - key of the ``Record.label`` field.""" + :func:`predict` returns a list of + :class:`~sagemaker.amazon.record_pb2.Record` objects, one for each row in + the input ``ndarray``. The prediction is stored in the ``"predicted_label"`` + key of the ``Record.label`` field. + """ def __init__(self, endpoint, sagemaker_session=None): + """ + Args: + endpoint: + sagemaker_session: + """ super(KNNPredictor, self).__init__( endpoint, sagemaker_session, @@ -176,10 +204,19 @@ def __init__(self, endpoint, sagemaker_session=None): class KNNModel(Model): - """Reference S3 model data created by KNN estimator. Calling :meth:`~sagemaker.model.Model.deploy` - creates an Endpoint and returns :class:`KNNPredictor`.""" + """Reference S3 model data created by KNN estimator. Calling + :meth:`~sagemaker.model.Model.deploy` creates an Endpoint and returns + :class:`KNNPredictor`. + """ def __init__(self, model_data, role, sagemaker_session=None, **kwargs): + """ + Args: + model_data: + role: + sagemaker_session: + **kwargs: + """ sagemaker_session = sagemaker_session or Session() repo = "{}:{}".format(KNN.repo_name, KNN.repo_version) image = "{}/{}".format( diff --git a/src/sagemaker/amazon/lda.py b/src/sagemaker/amazon/lda.py index 8c7cf71e12..117e9f9086 100644 --- a/src/sagemaker/amazon/lda.py +++ b/src/sagemaker/amazon/lda.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase, registry @@ -23,6 +24,7 @@ class LDA(AmazonAlgorithmEstimatorBase): + """Placeholder docstring""" repo_name = "lda" repo_version = 1 @@ -44,48 +46,65 @@ def __init__( tol=None, **kwargs ): - """Latent Dirichlet Allocation (LDA) is :class:`Estimator` used for unsupervised learning. + """Latent Dirichlet Allocation (LDA) is :class:`Estimator` used for + unsupervised learning. - Amazon SageMaker Latent Dirichlet Allocation is an unsupervised learning algorithm that attempts to describe - a set of observations as a mixture of distinct categories. LDA is most commonly used to discover - a user-specified number of topics shared by documents within a text corpus. - Here each observation is a document, the features are the presence (or occurrence count) of each word, and - the categories are the topics. + Amazon SageMaker Latent Dirichlet Allocation is an unsupervised + learning algorithm that attempts to describe a set of observations as a + mixture of distinct categories. LDA is most commonly used to discover a + user-specified number of topics shared by documents within a text + corpus. Here each observation is a document, the features are the + presence (or occurrence count) of each word, and the categories are the + topics. This Estimator may be fit via calls to - :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit`. It requires Amazon - :class:`~sagemaker.amazon.record_pb2.Record` protobuf serialized data to be stored in S3. - There is an utility :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.record_set` that - can be used to upload data to S3 and creates :class:`~sagemaker.amazon.amazon_estimator.RecordSet` to be passed - to the `fit` call. - - To learn more about the Amazon protobuf Record class and how to prepare bulk data in this format, please - consult AWS technical documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html - - After this Estimator is fit, model data is stored in S3. The model may be deployed to an Amazon SageMaker - Endpoint by invoking :meth:`~sagemaker.amazon.estimator.EstimatorBase.deploy`. As well as deploying an Endpoint, - deploy returns a :class:`~sagemaker.amazon.lda.LDAPredictor` object that can be used - for inference calls using the trained model hosted in the SageMaker Endpoint. - - LDA Estimators can be configured by setting hyperparameters. The available hyperparameters for - LDA are documented below. - - For further information on the AWS LDA algorithm, - please consult AWS technical documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/lda.html + :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit`. + It requires Amazon :class:`~sagemaker.amazon.record_pb2.Record` protobuf + serialized data to be stored in S3. There is an utility + :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.record_set` + that can be used to upload data to S3 and creates + :class:`~sagemaker.amazon.amazon_estimator.RecordSet` to be passed to + the `fit` call. + + To learn more about the Amazon protobuf Record class and how to + prepare bulk data in this format, please consult AWS technical + documentation: + https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html + + After this Estimator is fit, model data is stored in S3. The model + may be deployed to an Amazon SageMaker Endpoint by invoking + :meth:`~sagemaker.amazon.estimator.EstimatorBase.deploy`. As well as + deploying an Endpoint, deploy returns a + :class:`~sagemaker.amazon.lda.LDAPredictor` object that can be used for + inference calls using the trained model hosted in the SageMaker + Endpoint. + + LDA Estimators can be configured by setting hyperparameters. The + available hyperparameters for LDA are documented below. + + For further information on the AWS LDA algorithm, please consult AWS + technical documentation: + https://docs.aws.amazon.com/sagemaker/latest/dg/lda.html Args: - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and - APIs that create Amazon SageMaker endpoints use this role to access - training data and model artifacts. After the endpoint is created, - the inference code might use the IAM role, if accessing AWS resource. - train_instance_type (str): Type of EC2 instance to use for training, for example, 'ml.c4.xlarge'. - num_topics (int): The number of topics for LDA to find within the data. - alpha0 (float): Optional. Initial guess for the concentration parameter - max_restarts (int): Optional. The number of restarts to perform during the Alternating Least Squares (ALS) - spectral decomposition phase of the algorithm. - max_iterations (int): Optional. The maximum number of iterations to perform during the - ALS phase of the algorithm. - tol (float): Optional. Target error tolerance for the ALS phase of the algorithm. + role (str): An AWS IAM role (either name or full ARN). The Amazon + SageMaker training jobs and APIs that create Amazon SageMaker + endpoints use this role to access training data and model + artifacts. After the endpoint is created, the inference code + might use the IAM role, if accessing AWS resource. + train_instance_type (str): Type of EC2 instance to use for training, + for example, 'ml.c4.xlarge'. + num_topics (int): The number of topics for LDA to find within the + data. + alpha0 (float): Optional. Initial guess for the concentration + parameter + max_restarts (int): Optional. The number of restarts to perform + during the Alternating Least Squares (ALS) spectral + decomposition phase of the algorithm. + max_iterations (int): Optional. The maximum number of iterations to + perform during the ALS phase of the algorithm. + tol (float): Optional. Target error tolerance for the ALS phase of + the algorithm. **kwargs: base class keyword argument values. """ # this algorithm only supports single instance training @@ -108,7 +127,8 @@ def create_model(self, vpc_config_override=VPC_CONFIG_DEFAULT): s3 model data produced by this Estimator. Args: - vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on the model. + vpc_config_override (dict[str, list[str]]): Optional override for + VpcConfig set on the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. @@ -124,6 +144,12 @@ def _prepare_for_training( # pylint: disable=signature-differs self, records, mini_batch_size, job_name=None ): # mini_batch_size is required, prevent explicit calls with None + """ + Args: + records: + mini_batch_size: + job_name: + """ if mini_batch_size is None: raise ValueError("mini_batch_size must be set") @@ -135,16 +161,24 @@ def _prepare_for_training( # pylint: disable=signature-differs class LDAPredictor(RealTimePredictor): """Transforms input vectors to lower-dimesional representations. - The implementation of :meth:`~sagemaker.predictor.RealTimePredictor.predict` in this - `RealTimePredictor` requires a numpy ``ndarray`` as input. The array should contain the - same number of columns as the feature-dimension of the data used to fit the model this - Predictor performs inference on. + The implementation of + :meth:`~sagemaker.predictor.RealTimePredictor.predict` in this + `RealTimePredictor` requires a numpy ``ndarray`` as input. The array should + contain the same number of columns as the feature-dimension of the data used + to fit the model this Predictor performs inference on. - :meth:`predict()` returns a list of :class:`~sagemaker.amazon.record_pb2.Record` objects, one - for each row in the input ``ndarray``. The lower dimension vector result is stored in the ``projection`` - key of the ``Record.label`` field.""" + :meth:`predict()` returns a list of + :class:`~sagemaker.amazon.record_pb2.Record` objects, one for each row in + the input ``ndarray``. The lower dimension vector result is stored in the + ``projection`` key of the ``Record.label`` field. + """ def __init__(self, endpoint, sagemaker_session=None): + """ + Args: + endpoint: + sagemaker_session: + """ super(LDAPredictor, self).__init__( endpoint, sagemaker_session, @@ -154,10 +188,19 @@ def __init__(self, endpoint, sagemaker_session=None): class LDAModel(Model): - """Reference LDA s3 model data. Calling :meth:`~sagemaker.model.Model.deploy` creates an Endpoint and return - a Predictor that transforms vectors to a lower-dimensional representation.""" + """Reference LDA s3 model data. Calling + :meth:`~sagemaker.model.Model.deploy` creates an Endpoint and return a + Predictor that transforms vectors to a lower-dimensional representation. + """ def __init__(self, model_data, role, sagemaker_session=None, **kwargs): + """ + Args: + model_data: + role: + sagemaker_session: + **kwargs: + """ sagemaker_session = sagemaker_session or Session() repo = "{}:{}".format(LDA.repo_name, LDA.repo_version) image = "{}/{}".format( diff --git a/src/sagemaker/amazon/linear_learner.py b/src/sagemaker/amazon/linear_learner.py index 9efb32f6a7..e33caea804 100644 --- a/src/sagemaker/amazon/linear_learner.py +++ b/src/sagemaker/amazon/linear_learner.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase, registry @@ -23,6 +24,8 @@ class LinearLearner(AmazonAlgorithmEstimatorBase): + """Placeholder docstring""" + repo_name = "linear-learner" repo_version = 1 @@ -165,111 +168,155 @@ def __init__( ): """An :class:`Estimator` for binary classification and regression. - Amazon SageMaker Linear Learner provides a solution for both classification and regression problems, allowing - for exploring different training objectives simultaneously and choosing the best solution from a validation set. - It allows the user to explore a large number of models and choose the best, which optimizes either continuous - objectives such as mean square error, cross entropy loss, absolute error, etc., or discrete objectives suited - for classification such as F1 measure, precision@recall, accuracy. The implementation provides a significant - speedup over naive hyperparameter optimization techniques and an added convenience, when compared with - solutions providing a solution only to continuous objectives. + Amazon SageMaker Linear Learner provides a solution for both + classification and regression problems, allowing for exploring different + training objectives simultaneously and choosing the best solution from a + validation set. It allows the user to explore a large number of models + and choose the best, which optimizes either continuous objectives such + as mean square error, cross entropy loss, absolute error, etc., or + discrete objectives suited for classification such as F1 measure, + precision@recall, accuracy. The implementation provides a significant + speedup over naive hyperparameter optimization techniques and an added + convenience, when compared with solutions providing a solution only to + continuous objectives. This Estimator may be fit via calls to :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit_ndarray` - or :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit`. The former allows a - LinearLearner model to be fit on a 2-dimensional numpy array. The latter requires Amazon - :class:`~sagemaker.amazon.record_pb2.Record` protobuf serialized data to be stored in S3. + or + :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit`. + The former allows a LinearLearner model to be fit on a 2-dimensional + numpy array. The latter requires Amazon + :class:`~sagemaker.amazon.record_pb2.Record` protobuf serialized data to + be stored in S3. - To learn more about the Amazon protobuf Record class and how to prepare bulk data in this format, please - consult AWS technical documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html + To learn more about the Amazon protobuf Record class and how to + prepare bulk data in this format, please consult AWS technical + documentation: + https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html - After this Estimator is fit, model data is stored in S3. The model may be deployed to an Amazon SageMaker - Endpoint by invoking :meth:`~sagemaker.amazon.estimator.EstimatorBase.deploy`. As well as deploying an Endpoint, - ``deploy`` returns a :class:`~sagemaker.amazon.linear_learner.LinearLearnerPredictor` object that can be used - to make class or regression predictions, using the trained model. + After this Estimator is fit, model data is stored in S3. The model + may be deployed to an Amazon SageMaker Endpoint by invoking + :meth:`~sagemaker.amazon.estimator.EstimatorBase.deploy`. As well as + deploying an Endpoint, ``deploy`` returns a + :class:`~sagemaker.amazon.linear_learner.LinearLearnerPredictor` object + that can be used to make class or regression predictions, using the + trained model. - LinearLearner Estimators can be configured by setting hyperparameters. The available hyperparameters for - LinearLearner are documented below. For further information on the AWS LinearLearner algorithm, please consult - AWS technical documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/linear-learner.html + LinearLearner Estimators can be configured by setting + hyperparameters. The available hyperparameters for LinearLearner are + documented below. For further information on the AWS LinearLearner + algorithm, please consult AWS technical documentation: + https://docs.aws.amazon.com/sagemaker/latest/dg/linear-learner.html Args: - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and - APIs that create Amazon SageMaker endpoints use this role to access - training data and model artifacts. After the endpoint is created, - the inference code might use the IAM role, if accessing AWS resource. - train_instance_count (int): Number of Amazon EC2 instances to use for training. - train_instance_type (str): Type of EC2 instance to use for training, for example, 'ml.c4.xlarge'. - predictor_type (str): The type of predictor to learn. Either "binary_classifier" or - "multiclass_classifier" or "regressor". - binary_classifier_model_selection_criteria (str): One of 'accuracy', 'f1', 'f_beta', - 'precision_at_target_recall', 'recall_at_target_precision', 'cross_entropy_loss', 'loss_function' - target_recall (float): Target recall. Only applicable if binary_classifier_model_selection_criteria is + role (str): An AWS IAM role (either name or full ARN). The Amazon + SageMaker training jobs and APIs that create Amazon SageMaker + endpoints use this role to access training data and model + artifacts. After the endpoint is created, the inference code + might use the IAM role, if accessing AWS resource. + train_instance_count (int): Number of Amazon EC2 instances to use + for training. + train_instance_type (str): Type of EC2 instance to use for training, + for example, 'ml.c4.xlarge'. + predictor_type (str): The type of predictor to learn. Either + "binary_classifier" or "multiclass_classifier" or "regressor". + binary_classifier_model_selection_criteria (str): One of 'accuracy', + 'f1', 'f_beta', 'precision_at_target_recall', 'recall_at_target_precision', + 'cross_entropy_loss', 'loss_function' + target_recall (float): Target recall. Only applicable if + binary_classifier_model_selection_criteria is precision_at_target_recall. - target_precision (float): Target precision. Only applicable if binary_classifier_model_selection_criteria - is recall_at_target_precision. - positive_example_weight_mult (float): The importance weight of positive examples is multiplied by this - constant. Useful for skewed datasets. Only applies for classification tasks. - epochs (int): The maximum number of passes to make over the training data. + target_precision (float): Target precision. Only applicable if + binary_classifier_model_selection_criteria is + recall_at_target_precision. + positive_example_weight_mult (float): The importance weight of + positive examples is multiplied by this constant. Useful for + skewed datasets. Only applies for classification tasks. + epochs (int): The maximum number of passes to make over the training + data. use_bias (bool): Whether to include a bias field - num_models (int): Number of models to train in parallel. If not set, the number of parallel models to - train will be decided by the algorithm itself. One model will be trained according to the given training - parameter (regularization, optimizer, loss) and the rest by close by parameters. - num_calibration_samples (int): Number of observations to use from validation dataset for doing model - calibration (finding the best threshold). - init_method (str): Function to use to set the initial model weights. One of "uniform" or "normal" + num_models (int): Number of models to train in parallel. If not set, + the number of parallel models to train will be decided by the + algorithm itself. One model will be trained according to the + given training parameter (regularization, optimizer, loss) and + the rest by close by parameters. + num_calibration_samples (int): Number of observations to use from + validation dataset for doing model calibration (finding the best threshold). + init_method (str): Function to use to set the initial model weights. + One of "uniform" or "normal" init_scale (float): For "uniform" init, the range of values. init_sigma (float): For "normal" init, the standard-deviation. - init_bias (float): Initial weight for bias term + init_bias (float): Initial weight for bias term optimizer (str): One of 'sgd', 'adam', 'rmsprop' or 'auto' - loss (str): One of 'logistic', 'squared_loss', 'absolute_loss', 'hinge_loss', - 'eps_insensitive_squared_loss', 'eps_insensitive_absolute_loss', 'quantile_loss', 'huber_loss' or + loss (str): One of 'logistic', 'squared_loss', 'absolute_loss', + 'hinge_loss', 'eps_insensitive_squared_loss', 'eps_insensitive_absolute_loss', + 'quantile_loss', 'huber_loss' or 'softmax_loss' or 'auto'. - wd (float): L2 regularization parameter i.e. the weight decay parameter. Use 0 for no L2 regularization. - l1 (float): L1 regularization parameter. Use 0 for no L1 regularization. + wd (float): L2 regularization parameter i.e. the weight decay + parameter. Use 0 for no L2 regularization. + l1 (float): L1 regularization parameter. Use 0 for no L1 + regularization. momentum (float): Momentum parameter of sgd optimizer. learning_rate (float): The SGD learning rate - beta_1 (float): Exponential decay rate for first moment estimates. Only applies for adam optimizer. - beta_2 (float): Exponential decay rate for second moment estimates. Only applies for adam optimizer. - bias_lr_mult (float): Allows different learning rate for the bias term. The actual learning rate for the - bias is learning rate times bias_lr_mult. - bias_wd_mult (float): Allows different regularization for the bias term. The actual L2 regularization weight - for the bias is wd times bias_wd_mult. By default there is no regularization on the bias term. - use_lr_scheduler (bool): If true, we use a scheduler for the learning rate. - lr_scheduler_step (int): The number of steps between decreases of the learning rate. Only applies to - learning rate scheduler. - lr_scheduler_factor (float): Every lr_scheduler_step the learning rate will decrease by this quantity. - Only applies for learning rate scheduler. - lr_scheduler_minimum_lr (float): The learning rate will never decrease to a value lower than this. - lr_scheduler_minimum_lr (float): Only applies for learning rate scheduler. - normalize_data (bool): Normalizes the features before training to have standard deviation of 1.0. - normalize_label (bool): Normalizes the regression label to have a standard deviation of 1.0. - If set for classification, it will be ignored. + beta_1 (float): Exponential decay rate for first moment estimates. + Only applies for adam optimizer. + beta_2 (float): Exponential decay rate for second moment estimates. + Only applies for adam optimizer. + bias_lr_mult (float): Allows different learning rate for the bias + term. The actual learning rate for the bias is learning rate times bias_lr_mult. + bias_wd_mult (float): Allows different regularization for the bias + term. The actual L2 regularization weight for the bias is wd times bias_wd_mult. + By default there is no regularization on the bias term. + use_lr_scheduler (bool): If true, we use a scheduler for the + learning rate. + lr_scheduler_step (int): The number of steps between decreases of + the learning rate. Only applies to learning rate scheduler. + lr_scheduler_factor (float): Every lr_scheduler_step the learning + rate will decrease by this quantity. Only applies for learning + rate scheduler. + lr_scheduler_minimum_lr (float): The learning rate will never + decrease to a value lower than this. Only applies for learning rate scheduler. + normalize_data (bool): Normalizes the features before training to + have standard deviation of 1.0. + normalize_label (bool): Normalizes the regression label to have a + standard deviation of 1.0. If set for classification, it will be + ignored. unbias_data (bool): If true, features are modified to have mean 0.0. unbias_label (bool): If true, labels are modified to have mean 0.0. - num_point_for_scaler (int): The number of data points to use for calculating the normalizing and - unbiasing terms. - margin (float): the margin for hinge_loss. - quantile (float): Quantile for quantile loss. For quantile q, the model will attempt to produce - predictions such that true_label < prediction with probability q. - loss_insensitivity (float): Parameter for epsilon insensitive loss type. During training and metric - evaluation, any error smaller than this is considered to be zero. - huber_delta (float): Parameter for Huber loss. During training and metric evaluation, compute L2 loss for - errors smaller than delta and L1 loss for errors larger than delta. - early_stopping_patience (int): the number of epochs to wait before ending training if no improvement is - made. The improvement is training loss if validation data is not provided, or else it is the validation - loss or the binary classification model selection criteria like accuracy, f1-score etc. To disable early - stopping, set early_stopping_patience to a value larger than epochs. - early_stopping_tolerance (float): Relative tolerance to measure an improvement in loss. If the ratio of - the improvement in loss divided by the previous best loss is smaller than this value, early stopping will + num_point_for_scaler (int): The number of data points to use for + calculating the normalizing and unbiasing terms. + margin (float): the margin for hinge_loss. + quantile (float): Quantile for quantile loss. For quantile q, the + model will attempt to produce predictions such that true_label < prediction with + probability q. + loss_insensitivity (float): Parameter for epsilon insensitive loss + type. During training and metric evaluation, any error smaller than this is + considered to be zero. + huber_delta (float): Parameter for Huber loss. During training and + metric evaluation, compute L2 loss for errors smaller than delta and L1 loss for + errors larger than delta. + early_stopping_patience (int): the number of epochs to wait before ending training + if no improvement is made. The improvement is training loss if validation data is + not provided, or else it is the validation loss or the binary classification model + selection criteria like accuracy, f1-score etc. To disable early stopping, + set early_stopping_patience to a value larger than epochs. + early_stopping_tolerance (float): Relative tolerance to measure an + improvement in loss. If the ratio of the improvement in loss divided by the + previous best loss is smaller than this value, early stopping will consider the improvement to be zero. - num_classes (int): The number of classes for the response variable. Required when predictor_type is - multiclass_classifier and ignored otherwise. The classes are assumed to be labeled 0, ..., num_classes - 1. - accuracy_top_k (int): The value of k when computing the Top K Accuracy metric for multiclass - classification. An example is scored as correct if the model assigns one of the top k scores to the true + num_classes (int): The number of classes for the response variable. + Required when predictor_type is multiclass_classifier and ignored otherwise. The + classes are assumed to be labeled 0, ..., num_classes - 1. + accuracy_top_k (int): The value of k when computing the Top K + Accuracy metric for multiclass classification. An example is scored as correct + if the model assigns one of the top k scores to the true label. - f_beta (float): The value of beta to use when calculating F score metrics for binary or multiclass - classification. Also used if binary_classifier_model_selection_criteria is f_beta. - balance_multiclass_weights (bool): Whether to use class weights which give each class equal importance in - the loss function. Only used when predictor_type is multiclass_classifier. + f_beta (float): The value of beta to use when calculating F score + metrics for binary or multiclass classification. Also used if + binary_classifier_model_selection_criteria is f_beta. + balance_multiclass_weights (bool): Whether to use class weights + which give each class equal importance in the loss function. Only used when + predictor_type is multiclass_classifier. **kwargs: base class keyword argument values. """ super(LinearLearner, self).__init__( @@ -326,12 +373,12 @@ def __init__( ) def create_model(self, vpc_config_override=VPC_CONFIG_DEFAULT): - """Return a :class:`~sagemaker.amazon.LinearLearnerModel` referencing the latest - s3 model data produced by this Estimator. + """Return a :class:`~sagemaker.amazon.LinearLearnerModel` referencing + the latest s3 model data produced by this Estimator. Args: - vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on the model. - Default: use subnets and security groups from this Estimator. + vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on + the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. """ @@ -343,6 +390,12 @@ def create_model(self, vpc_config_override=VPC_CONFIG_DEFAULT): ) def _prepare_for_training(self, records, mini_batch_size=None, job_name=None): + """ + Args: + records: + mini_batch_size: + job_name: + """ num_records = None if isinstance(records, list): for record in records: @@ -365,18 +418,27 @@ def _prepare_for_training(self, records, mini_batch_size=None, job_name=None): class LinearLearnerPredictor(RealTimePredictor): - """Performs binary-classification or regression prediction from input vectors. + """Performs binary-classification or regression prediction from input + vectors. - The implementation of :meth:`~sagemaker.predictor.RealTimePredictor.predict` in this - `RealTimePredictor` requires a numpy ``ndarray`` as input. The array should contain the - same number of columns as the feature-dimension of the data used to fit the model this - Predictor performs inference on. + The implementation of + :meth:`~sagemaker.predictor.RealTimePredictor.predict` in this + `RealTimePredictor` requires a numpy ``ndarray`` as input. The array should + contain the same number of columns as the feature-dimension of the data used + to fit the model this Predictor performs inference on. - :func:`predict` returns a list of :class:`~sagemaker.amazon.record_pb2.Record` objects, one - for each row in the input ``ndarray``. The prediction is stored in the ``"predicted_label"`` - key of the ``Record.label`` field.""" + :func:`predict` returns a list of + :class:`~sagemaker.amazon.record_pb2.Record` objects, one for each row in + the input ``ndarray``. The prediction is stored in the ``"predicted_label"`` + key of the ``Record.label`` field. + """ def __init__(self, endpoint, sagemaker_session=None): + """ + Args: + endpoint: + sagemaker_session: + """ super(LinearLearnerPredictor, self).__init__( endpoint, sagemaker_session, @@ -386,10 +448,19 @@ def __init__(self, endpoint, sagemaker_session=None): class LinearLearnerModel(Model): - """Reference LinearLearner s3 model data. Calling :meth:`~sagemaker.model.Model.deploy` creates an Endpoint - and returns a :class:`LinearLearnerPredictor`""" + """Reference LinearLearner s3 model data. Calling + :meth:`~sagemaker.model.Model.deploy` creates an Endpoint and returns a + :class:`LinearLearnerPredictor` + """ def __init__(self, model_data, role, sagemaker_session=None, **kwargs): + """ + Args: + model_data: + role: + sagemaker_session: + **kwargs: + """ sagemaker_session = sagemaker_session or Session() repo = "{}:{}".format(LinearLearner.repo_name, LinearLearner.repo_version) image = "{}/{}".format(registry(sagemaker_session.boto_session.region_name), repo) diff --git a/src/sagemaker/amazon/ntm.py b/src/sagemaker/amazon/ntm.py index 223e4f322e..e765984e40 100644 --- a/src/sagemaker/amazon/ntm.py +++ b/src/sagemaker/amazon/ntm.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase, registry @@ -23,6 +24,7 @@ class NTM(AmazonAlgorithmEstimatorBase): + """Placeholder docstring""" repo_name = "ntm" repo_version = 1 @@ -73,49 +75,68 @@ def __init__( learning_rate=None, **kwargs ): - """Neural Topic Model (NTM) is :class:`Estimator` used for unsupervised learning. + """Neural Topic Model (NTM) is :class:`Estimator` used for unsupervised + learning. This Estimator may be fit via calls to - :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit`. It requires Amazon - :class:`~sagemaker.amazon.record_pb2.Record` protobuf serialized data to be stored in S3. - There is an utility :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.record_set` that - can be used to upload data to S3 and creates :class:`~sagemaker.amazon.amazon_estimator.RecordSet` to be passed - to the `fit` call. - - To learn more about the Amazon protobuf Record class and how to prepare bulk data in this format, please - consult AWS technical documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html - - After this Estimator is fit, model data is stored in S3. The model may be deployed to an Amazon SageMaker - Endpoint by invoking :meth:`~sagemaker.amazon.estimator.EstimatorBase.deploy`. As well as deploying an Endpoint, - deploy returns a :class:`~sagemaker.amazon.ntm.NTMPredictor` object that can be used - for inference calls using the trained model hosted in the SageMaker Endpoint. - - NTM Estimators can be configured by setting hyperparameters. The available hyperparameters for - NTM are documented below. - - For further information on the AWS NTM algorithm, - please consult AWS technical documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/ntm.html + :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit`. + It requires Amazon :class:`~sagemaker.amazon.record_pb2.Record` protobuf + serialized data to be stored in S3. There is an utility + :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.record_set` + that can be used to upload data to S3 and creates + :class:`~sagemaker.amazon.amazon_estimator.RecordSet` to be passed to + the `fit` call. + + To learn more about the Amazon protobuf Record class and how to + prepare bulk data in this format, please consult AWS technical + documentation: + https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html + + After this Estimator is fit, model data is stored in S3. The model + may be deployed to an Amazon SageMaker Endpoint by invoking + :meth:`~sagemaker.amazon.estimator.EstimatorBase.deploy`. As well as + deploying an Endpoint, deploy returns a + :class:`~sagemaker.amazon.ntm.NTMPredictor` object that can be used for + inference calls using the trained model hosted in the SageMaker + Endpoint. + + NTM Estimators can be configured by setting hyperparameters. The + available hyperparameters for NTM are documented below. + + For further information on the AWS NTM algorithm, please consult AWS + technical documentation: + https://docs.aws.amazon.com/sagemaker/latest/dg/ntm.html Args: - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and - APIs that create Amazon SageMaker endpoints use this role to access - training data and model artifacts. After the endpoint is created, - the inference code might use the IAM role, if accessing AWS resource. - train_instance_type (str): Type of EC2 instance to use for training, for example, 'ml.c4.xlarge'. - num_topics (int): Required. The number of topics for NTM to find within the data. - encoder_layers (list): Optional. Represents number of layers in the encoder and the output size of - each layer. - epochs (int): Optional. Maximum number of passes over the training data. - encoder_layers_activation (str): Optional. Activation function to use in the encoder layers. + role (str): An AWS IAM role (either name or full ARN). The Amazon + SageMaker training jobs and APIs that create Amazon SageMaker + endpoints use this role to access training data and model + artifacts. After the endpoint is created, the inference code + might use the IAM role, if accessing AWS resource. + train_instance_count: + train_instance_type (str): Type of EC2 instance to use for training, + for example, 'ml.c4.xlarge'. + num_topics (int): Required. The number of topics for NTM to find + within the data. + encoder_layers (list): Optional. Represents number of layers in the + encoder and the output size of each layer. + epochs (int): Optional. Maximum number of passes over the training + data. + encoder_layers_activation (str): Optional. Activation function to + use in the encoder layers. optimizer (str): Optional. Optimizer to use for training. - tolerance (float): Optional. Maximum relative change in the loss function within the last - num_patience_epochs number of epochs below which early stopping is triggered. - num_patience_epochs (int): Optional. Number of successive epochs over which early stopping criterion - is evaluated. - batch_norm (bool): Optional. Whether to use batch normalization during training. + tolerance (float): Optional. Maximum relative change in the loss + function within the last num_patience_epochs number of epochs + below which early stopping is triggered. + num_patience_epochs (int): Optional. Number of successive epochs + over which early stopping criterion is evaluated. + batch_norm (bool): Optional. Whether to use batch normalization + during training. rescale_gradient (float): Optional. Rescale factor for gradient. - clip_gradient (float): Optional. Maximum magnitude for each gradient component. - weight_decay (float): Optional. Weight decay coefficient. Adds L2 regularization. + clip_gradient (float): Optional. Maximum magnitude for each gradient + component. + weight_decay (float): Optional. Weight decay coefficient. Adds L2 + regularization. learning_rate (float): Optional. Learning rate for the optimizer. **kwargs: base class keyword argument values. """ @@ -139,8 +160,8 @@ def create_model(self, vpc_config_override=VPC_CONFIG_DEFAULT): s3 model data produced by this Estimator. Args: - vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on the model. - Default: use subnets and security groups from this Estimator. + vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on + the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. """ @@ -154,6 +175,12 @@ def create_model(self, vpc_config_override=VPC_CONFIG_DEFAULT): def _prepare_for_training( # pylint: disable=signature-differs self, records, mini_batch_size, job_name=None ): + """ + Args: + records: + mini_batch_size: + job_name: + """ if mini_batch_size is not None and (mini_batch_size < 1 or mini_batch_size > 10000): raise ValueError("mini_batch_size must be in [1, 10000]") super(NTM, self)._prepare_for_training( @@ -164,16 +191,24 @@ def _prepare_for_training( # pylint: disable=signature-differs class NTMPredictor(RealTimePredictor): """Transforms input vectors to lower-dimesional representations. - The implementation of :meth:`~sagemaker.predictor.RealTimePredictor.predict` in this - `RealTimePredictor` requires a numpy ``ndarray`` as input. The array should contain the - same number of columns as the feature-dimension of the data used to fit the model this - Predictor performs inference on. + The implementation of + :meth:`~sagemaker.predictor.RealTimePredictor.predict` in this + `RealTimePredictor` requires a numpy ``ndarray`` as input. The array should + contain the same number of columns as the feature-dimension of the data used + to fit the model this Predictor performs inference on. - :meth:`predict()` returns a list of :class:`~sagemaker.amazon.record_pb2.Record` objects, one - for each row in the input ``ndarray``. The lower dimension vector result is stored in the ``projection`` - key of the ``Record.label`` field.""" + :meth:`predict()` returns a list of + :class:`~sagemaker.amazon.record_pb2.Record` objects, one for each row in + the input ``ndarray``. The lower dimension vector result is stored in the + ``projection`` key of the ``Record.label`` field. + """ def __init__(self, endpoint, sagemaker_session=None): + """ + Args: + endpoint: + sagemaker_session: + """ super(NTMPredictor, self).__init__( endpoint, sagemaker_session, @@ -183,10 +218,19 @@ def __init__(self, endpoint, sagemaker_session=None): class NTMModel(Model): - """Reference NTM s3 model data. Calling :meth:`~sagemaker.model.Model.deploy` creates an Endpoint and return - a Predictor that transforms vectors to a lower-dimensional representation.""" + """Reference NTM s3 model data. Calling + :meth:`~sagemaker.model.Model.deploy` creates an Endpoint and return a + Predictor that transforms vectors to a lower-dimensional representation. + """ def __init__(self, model_data, role, sagemaker_session=None, **kwargs): + """ + Args: + model_data: + role: + sagemaker_session: + **kwargs: + """ sagemaker_session = sagemaker_session or Session() repo = "{}:{}".format(NTM.repo_name, NTM.repo_version) image = "{}/{}".format( diff --git a/src/sagemaker/amazon/object2vec.py b/src/sagemaker/amazon/object2vec.py index 8aaad2bd47..2d1450875d 100644 --- a/src/sagemaker/amazon/object2vec.py +++ b/src/sagemaker/amazon/object2vec.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase, registry @@ -22,6 +23,10 @@ def _list_check_subset(valid_super_list): + """ + Args: + valid_super_list: + """ valid_superset = set(valid_super_list) def validate(value): @@ -35,6 +40,7 @@ def validate(value): class Object2Vec(AmazonAlgorithmEstimatorBase): + """Placeholder docstring""" repo_name = "object2vec" repo_version = 1 @@ -168,66 +174,86 @@ def __init__( This Estimator may be fit via calls to :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit`. - There is an utility :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.record_set` that - can be used to upload data to S3 and creates :class:`~sagemaker.amazon.amazon_estimator.RecordSet` to be passed - to the `fit` call. - - After this Estimator is fit, model data is stored in S3. The model may be deployed to an Amazon SageMaker - Endpoint by invoking :meth:`~sagemaker.amazon.estimator.EstimatorBase.deploy`. As well as deploying an - Endpoint, deploy returns a :class:`~sagemaker.amazon.RealTimePredictor` object that can be used - for inference calls using the trained model hosted in the SageMaker Endpoint. - - Object2Vec Estimators can be configured by setting hyperparameters. The available hyperparameters for - Object2Vec are documented below. - - For further information on the AWS Object2Vec algorithm, - please consult AWS technical documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/object2vec.html + There is an utility + :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.record_set` + that can be used to upload data to S3 and creates + :class:`~sagemaker.amazon.amazon_estimator.RecordSet` to be passed to + the `fit` call. + + After this Estimator is fit, model data is stored in S3. The model + may be deployed to an Amazon SageMaker Endpoint by invoking + :meth:`~sagemaker.amazon.estimator.EstimatorBase.deploy`. As well as + deploying an Endpoint, deploy returns a + :class:`~sagemaker.amazon.RealTimePredictor` object that can be used for + inference calls using the trained model hosted in the SageMaker + Endpoint. + + Object2Vec Estimators can be configured by setting hyperparameters. + The available hyperparameters for Object2Vec are documented below. + + For further information on the AWS Object2Vec algorithm, please + consult AWS technical documentation: + https://docs.aws.amazon.com/sagemaker/latest/dg/object2vec.html Args: - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and - APIs that create Amazon SageMaker endpoints use this role to access - training data and model artifacts. After the endpoint is created, - the inference code might use the IAM role, if accessing AWS resource. - train_instance_count (int): Number of Amazon EC2 instances to use for training. - train_instance_type (str): Type of EC2 instance to use for training, for example, 'ml.c4.xlarge'. - - epochs(int): Total number of epochs for SGD training - enc0_max_seq_len(int): Maximum sequence length - enc0_vocab_size(int): Vocabulary size of tokens - - enc_dim(int): Optional. Dimension of the output of the embedding layer - mini_batch_size(int): Optional. mini batch size for SGD training - early_stopping_patience(int): Optional. The allowed number of consecutive epochs without improvement - before early stopping is applied - early_stopping_tolerance(float): Optional. The value used to determine whether the algorithm has made - improvement between two consecutive epochs for early stopping - dropout(float): Optional. Dropout probability on network layers - weight_decay(float): Optional. Weight decay parameter during optimization - bucket_width(int): Optional. The allowed difference between data sequence length when bucketing is enabled - num_classes(int): Optional. Number of classes for classification training (ignored for regression problems) - mlp_layers(int): Optional. Number of MLP layers in the network - mlp_dim(int): Optional. Dimension of the output of MLP layer - mlp_activation(str): Optional. Type of activation function for the MLP layer - output_layer(str): Optional. Type of output layer - optimizer(str): Optional. Type of optimizer for training - learning_rate(float): Optional. Learning rate for SGD training - negative_sampling_rate(int): Optional. Negative sampling rate - comparator_list(str): Optional. Customization of comparator operator - tied_token_embedding_weight(bool): Optional. Tying of token embedding layer weight - token_embedding_storage_type(str): Optional. Type of token embedding storage - enc0_network(str): Optional. Network model of encoder "enc0" - enc1_network(str): Optional. Network model of encoder "enc1" - enc0_cnn_filter_width(int): Optional. CNN filter width - enc1_cnn_filter_width(int): Optional. CNN filter width - enc1_max_seq_len(int): Optional. Maximum sequence length - enc0_token_embedding_dim(int): Optional. Output dimension of token embedding layer - enc1_token_embedding_dim(int): Optional. Output dimension of token embedding layer - enc1_vocab_size(int): Optional. Vocabulary size of tokens - enc0_layers(int): Optional. Number of layers in encoder - enc1_layers(int): Optional. Number of layers in encoder - enc0_freeze_pretrained_embedding(bool): Optional. Freeze pretrained embedding weights - enc1_freeze_pretrained_embedding(bool): Optional. Freeze pretrained embedding weights - + role (str): An AWS IAM role (either name or full ARN). The Amazon + SageMaker training jobs and APIs that create Amazon SageMaker + endpoints use this role to access training data and model + artifacts. After the endpoint is created, the inference code + might use the IAM role, if accessing AWS resource. + train_instance_count (int): Number of Amazon EC2 instances to use + for training. + train_instance_type (str): Type of EC2 instance to use for training, + for example, 'ml.c4.xlarge'. + epochs (int): Total number of epochs for SGD training + enc0_max_seq_len (int): Maximum sequence length + enc0_vocab_size (int): Vocabulary size of tokens + enc_dim (int): Optional. Dimension of the output of the embedding + layer + mini_batch_size (int): Optional. mini batch size for SGD training + early_stopping_patience (int): Optional. The allowed number of + consecutive epochs without improvement before early stopping is + applied + early_stopping_tolerance (float): Optional. The value used to + determine whether the algorithm has made improvement between two + consecutive epochs for early stopping + dropout (float): Optional. Dropout probability on network layers + weight_decay (float): Optional. Weight decay parameter during + optimization + bucket_width (int): Optional. The allowed difference between data + sequence length when bucketing is enabled + num_classes (int): Optional. Number of classes for classification + training (ignored for regression problems) + mlp_layers (int): Optional. Number of MLP layers in the network + mlp_dim (int): Optional. Dimension of the output of MLP layer + mlp_activation (str): Optional. Type of activation function for the + MLP layer + output_layer (str): Optional. Type of output layer + optimizer (str): Optional. Type of optimizer for training + learning_rate (float): Optional. Learning rate for SGD training + negative_sampling_rate (int): Optional. Negative sampling rate + comparator_list (str): Optional. Customization of comparator + operator + tied_token_embedding_weight (bool): Optional. Tying of token + embedding layer weight + token_embedding_storage_type (str): Optional. Type of token + embedding storage + enc0_network (str): Optional. Network model of encoder "enc0" + enc1_network (str): Optional. Network model of encoder "enc1" + enc0_cnn_filter_width (int): Optional. CNN filter width + enc1_cnn_filter_width (int): Optional. CNN filter width + enc1_max_seq_len (int): Optional. Maximum sequence length + enc0_token_embedding_dim (int): Optional. Output dimension of token + embedding layer + enc1_token_embedding_dim (int): Optional. Output dimension of token + embedding layer + enc1_vocab_size (int): Optional. Vocabulary size of tokens + enc0_layers (int): Optional. Number of layers in encoder + enc1_layers (int): Optional. Number of layers in encoder + enc0_freeze_pretrained_embedding (bool): Optional. Freeze pretrained + embedding weights + enc1_freeze_pretrained_embedding (bool): Optional. Freeze pretrained + embedding weights **kwargs: base class keyword argument values. """ @@ -270,12 +296,12 @@ def __init__( self.enc1_freeze_pretrained_embedding = enc1_freeze_pretrained_embedding def create_model(self, vpc_config_override=VPC_CONFIG_DEFAULT): - """Return a :class:`~sagemaker.amazon.Object2VecModel` referencing the latest - s3 model data produced by this Estimator. + """Return a :class:`~sagemaker.amazon.Object2VecModel` referencing the + latest s3 model data produced by this Estimator. Args: - vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on the model. - Default: use subnets and security groups from this Estimator. + vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on + the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. """ @@ -287,6 +313,12 @@ def create_model(self, vpc_config_override=VPC_CONFIG_DEFAULT): ) def _prepare_for_training(self, records, mini_batch_size=None, job_name=None): + """ + Args: + records: + mini_batch_size: + job_name: + """ if mini_batch_size is None: mini_batch_size = self.MINI_BATCH_SIZE @@ -296,10 +328,19 @@ def _prepare_for_training(self, records, mini_batch_size=None, job_name=None): class Object2VecModel(Model): - """Reference Object2Vec s3 model data. Calling :meth:`~sagemaker.model.Model.deploy` creates an - Endpoint and returns a Predictor that calculates anomaly scores for datapoints.""" + """Reference Object2Vec s3 model data. Calling + :meth:`~sagemaker.model.Model.deploy` creates an Endpoint and returns a + Predictor that calculates anomaly scores for datapoints. + """ def __init__(self, model_data, role, sagemaker_session=None, **kwargs): + """ + Args: + model_data: + role: + sagemaker_session: + **kwargs: + """ sagemaker_session = sagemaker_session or Session() repo = "{}:{}".format(Object2Vec.repo_name, Object2Vec.repo_version) image = "{}/{}".format( diff --git a/src/sagemaker/amazon/pca.py b/src/sagemaker/amazon/pca.py index 75db62e5a9..d3a466c8ea 100644 --- a/src/sagemaker/amazon/pca.py +++ b/src/sagemaker/amazon/pca.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase, registry @@ -23,6 +24,7 @@ class PCA(AmazonAlgorithmEstimatorBase): + """Placeholder docstring""" repo_name = "pca" repo_version = 1 @@ -56,46 +58,60 @@ def __init__( extra_components=None, **kwargs ): - """A Principal Components Analysis (PCA) :class:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase`. + """A Principal Components Analysis (PCA) + :class:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase`. This Estimator may be fit via calls to :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit_ndarray` - or :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit`. The former allows a PCA model - to be fit on a 2-dimensional numpy array. The latter requires Amazon - :class:`~sagemaker.amazon.record_pb2.Record` protobuf serialized data to be stored in S3. - - To learn more about the Amazon protobuf Record class and how to prepare bulk data in this format, please - consult AWS technical documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html - - After this Estimator is fit, model data is stored in S3. The model may be deployed to an Amazon SageMaker - Endpoint by invoking :meth:`~sagemaker.amazon.estimator.EstimatorBase.deploy`. As well as deploying an Endpoint, - deploy returns a :class:`~sagemaker.amazon.pca.PCAPredictor` object that can be used to project - input vectors to the learned lower-dimensional representation, using the trained PCA model hosted in the - SageMaker Endpoint. - - PCA Estimators can be configured by setting hyperparameters. The available hyperparameters for PCA - are documented below. For further information on the AWS PCA algorithm, please consult AWS technical + or + :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit`. + The former allows a PCA model to be fit on a 2-dimensional numpy array. + The latter requires Amazon :class:`~sagemaker.amazon.record_pb2.Record` + protobuf serialized data to be stored in S3. + + To learn more about the Amazon protobuf Record class and how to + prepare bulk data in this format, please consult AWS technical + documentation: + https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html + + After this Estimator is fit, model data is stored in S3. The model + may be deployed to an Amazon SageMaker Endpoint by invoking + :meth:`~sagemaker.amazon.estimator.EstimatorBase.deploy`. As well as + deploying an Endpoint, deploy returns a + :class:`~sagemaker.amazon.pca.PCAPredictor` object that can be used to + project input vectors to the learned lower-dimensional representation, + using the trained PCA model hosted in the SageMaker Endpoint. + + PCA Estimators can be configured by setting hyperparameters. The + available hyperparameters for PCA are documented below. For further + information on the AWS PCA algorithm, please consult AWS technical documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/pca.html - This Estimator uses Amazon SageMaker PCA to perform training and host deployed models. To - learn more about Amazon SageMaker PCA, please read: + This Estimator uses Amazon SageMaker PCA to perform training and host + deployed models. To learn more about Amazon SageMaker PCA, please read: https://docs.aws.amazon.com/sagemaker/latest/dg/how-pca-works.html Args: - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and - APIs that create Amazon SageMaker endpoints use this role to access - training data and model artifacts. After the endpoint is created, - the inference code might use the IAM role, if accessing AWS resource. - train_instance_count (int): Number of Amazon EC2 instances to use for training. - train_instance_type (str): Type of EC2 instance to use for training, for example, 'ml.c4.xlarge'. - num_components(int): The number of principal components. Must be greater than zero. - algorithm_mode (str): Mode for computing the principal components. One of 'regular' or - 'randomized'. - subtract_mean (bool): Whether the data should be unbiased both during train and at inference. - extra_components (int): As the value grows larger, the solution becomes more accurate but the - runtime and memory consumption increase linearly. If this value is unset or set to -1, - then a default value equal to the maximum of 10 and num_components will be used. - Valid for randomized mode only. + role (str): An AWS IAM role (either name or full ARN). The Amazon + SageMaker training jobs and APIs that create Amazon SageMaker + endpoints use this role to access training data and model + artifacts. After the endpoint is created, the inference code + might use the IAM role, if accessing AWS resource. + train_instance_count (int): Number of Amazon EC2 instances to use + for training. + train_instance_type (str): Type of EC2 instance to use for training, + for example, 'ml.c4.xlarge'. + num_components (int): The number of principal components. Must be + greater than zero. + algorithm_mode (str): Mode for computing the principal components. + One of 'regular' or 'randomized'. + subtract_mean (bool): Whether the data should be unbiased both + during train and at inference. + extra_components (int): As the value grows larger, the solution + becomes more accurate but the runtime and memory consumption + increase linearly. If this value is unset or set to -1, then a + default value equal to the maximum of 10 and num_components will + be used. Valid for randomized mode only. **kwargs: base class keyword argument values. """ super(PCA, self).__init__(role, train_instance_count, train_instance_type, **kwargs) @@ -105,12 +121,12 @@ def __init__( self.extra_components = extra_components def create_model(self, vpc_config_override=VPC_CONFIG_DEFAULT): - """Return a :class:`~sagemaker.amazon.pca.PCAModel` referencing the latest - s3 model data produced by this Estimator. + """Return a :class:`~sagemaker.amazon.pca.PCAModel` referencing the + latest s3 model data produced by this Estimator. Args: - vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on the model. - Default: use subnets and security groups from this Estimator. + vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on + the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. """ @@ -125,11 +141,12 @@ def _prepare_for_training(self, records, mini_batch_size=None, job_name=None): """Set hyperparameters needed for training. Args: - * records (:class:`~RecordSet`): The records to train this ``Estimator`` on. - * mini_batch_size (int or None): The size of each mini-batch to use when training. If ``None``, a - default value will be used. - * job_name (str): Name of the training job to be created. If not specified, one is generated, - using the base name given to the constructor if applicable. + records (:class:`~RecordSet`): The records to train this ``Estimator`` on. + mini_batch_size (int or None): The size of each mini-batch to use when + training. If ``None``, a default value will be used. + job_name (str): Name of the training job to be created. If not + specified, one is generated, using the base name given to the + constructor if applicable. """ num_records = None if isinstance(records, list): @@ -156,16 +173,24 @@ def _prepare_for_training(self, records, mini_batch_size=None, job_name=None): class PCAPredictor(RealTimePredictor): """Transforms input vectors to lower-dimesional representations. - The implementation of :meth:`~sagemaker.predictor.RealTimePredictor.predict` in this - `RealTimePredictor` requires a numpy ``ndarray`` as input. The array should contain the - same number of columns as the feature-dimension of the data used to fit the model this - Predictor performs inference on. + The implementation of + :meth:`~sagemaker.predictor.RealTimePredictor.predict` in this + `RealTimePredictor` requires a numpy ``ndarray`` as input. The array should + contain the same number of columns as the feature-dimension of the data used + to fit the model this Predictor performs inference on. - :meth:`predict()` returns a list of :class:`~sagemaker.amazon.record_pb2.Record` objects, one - for each row in the input ``ndarray``. The lower dimension vector result is stored in the ``projection`` - key of the ``Record.label`` field.""" + :meth:`predict()` returns a list of + :class:`~sagemaker.amazon.record_pb2.Record` objects, one for each row in + the input ``ndarray``. The lower dimension vector result is stored in the + ``projection`` key of the ``Record.label`` field. + """ def __init__(self, endpoint, sagemaker_session=None): + """ + Args: + endpoint: + sagemaker_session: + """ super(PCAPredictor, self).__init__( endpoint, sagemaker_session, @@ -175,10 +200,19 @@ def __init__(self, endpoint, sagemaker_session=None): class PCAModel(Model): - """Reference PCA s3 model data. Calling :meth:`~sagemaker.model.Model.deploy` creates an Endpoint and return - a Predictor that transforms vectors to a lower-dimensional representation.""" + """Reference PCA s3 model data. Calling + :meth:`~sagemaker.model.Model.deploy` creates an Endpoint and return a + Predictor that transforms vectors to a lower-dimensional representation. + """ def __init__(self, model_data, role, sagemaker_session=None, **kwargs): + """ + Args: + model_data: + role: + sagemaker_session: + **kwargs: + """ sagemaker_session = sagemaker_session or Session() repo = "{}:{}".format(PCA.repo_name, PCA.repo_version) image = "{}/{}".format(registry(sagemaker_session.boto_session.region_name), repo) diff --git a/src/sagemaker/amazon/randomcutforest.py b/src/sagemaker/amazon/randomcutforest.py index 2d9e514553..05f1827f92 100644 --- a/src/sagemaker/amazon/randomcutforest.py +++ b/src/sagemaker/amazon/randomcutforest.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase, registry @@ -23,6 +24,7 @@ class RandomCutForest(AmazonAlgorithmEstimatorBase): + """Placeholder docstring""" repo_name = "randomcutforest" repo_version = 1 @@ -53,39 +55,54 @@ def __init__( """RandomCutForest is :class:`Estimator` used for anomaly detection. This Estimator may be fit via calls to - :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit`. It requires Amazon - :class:`~sagemaker.amazon.record_pb2.Record` protobuf serialized data to be stored in S3. - There is an utility :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.record_set` that - can be used to upload data to S3 and creates :class:`~sagemaker.amazon.amazon_estimator.RecordSet` to be passed - to the `fit` call. - - To learn more about the Amazon protobuf Record class and how to prepare bulk data in this format, please - consult AWS technical documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html - - After this Estimator is fit, model data is stored in S3. The model may be deployed to an Amazon SageMaker - Endpoint by invoking :meth:`~sagemaker.amazon.estimator.EstimatorBase.deploy`. As well as deploying an - Endpoint, deploy returns a :class:`~sagemaker.amazon.ntm.RandomCutForestPredictor` object that can be used - for inference calls using the trained model hosted in the SageMaker Endpoint. - - RandomCutForest Estimators can be configured by setting hyperparameters. The available hyperparameters for - RandomCutForest are documented below. + :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit`. + It requires Amazon :class:`~sagemaker.amazon.record_pb2.Record` protobuf + serialized data to be stored in S3. There is an utility + :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.record_set` + that can be used to upload data to S3 and creates + :class:`~sagemaker.amazon.amazon_estimator.RecordSet` to be passed to + the `fit` call. + + To learn more about the Amazon protobuf Record class and how to + prepare bulk data in this format, please consult AWS technical + documentation: + https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html + + After this Estimator is fit, model data is stored in S3. The model + may be deployed to an Amazon SageMaker Endpoint by invoking + :meth:`~sagemaker.amazon.estimator.EstimatorBase.deploy`. As well as + deploying an Endpoint, deploy returns a + :class:`~sagemaker.amazon.ntm.RandomCutForestPredictor` object that can + be used for inference calls using the trained model hosted in the + SageMaker Endpoint. + + RandomCutForest Estimators can be configured by setting + hyperparameters. The available hyperparameters for RandomCutForest are + documented below. For further information on the AWS Random Cut Forest algorithm, - please consult AWS technical documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/randomcutforest.html + please consult AWS technical documentation: + https://docs.aws.amazon.com/sagemaker/latest/dg/randomcutforest.html Args: - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and - APIs that create Amazon SageMaker endpoints use this role to access - training data and model artifacts. After the endpoint is created, - the inference code might use the IAM role, if accessing AWS resource. - train_instance_count (int): Number of Amazon EC2 instances to use for training. - train_instance_type (str): Type of EC2 instance to use for training, for example, 'ml.c4.xlarge'. - num_samples_per_tree (int): Optional. The number of samples used to build each tree in the forest. - The total number of samples drawn from the train dataset is num_trees * num_samples_per_tree. + role (str): An AWS IAM role (either name or full ARN). The Amazon + SageMaker training jobs and APIs that create Amazon SageMaker + endpoints use this role to access training data and model + artifacts. After the endpoint is created, the inference code + might use the IAM role, if accessing AWS resource. + train_instance_count (int): Number of Amazon EC2 instances to use + for training. + train_instance_type (str): Type of EC2 instance to use for training, + for example, 'ml.c4.xlarge'. + num_samples_per_tree (int): Optional. The number of samples used to + build each tree in the forest. The total number of samples drawn + from the train dataset is num_trees * num_samples_per_tree. num_trees (int): Optional. The number of trees used in the forest. - eval_metrics(list): Optional. JSON list of metrics types to be used for reporting the score for the model. - Allowed values are "accuracy", "precision_recall_fscore": positive and negative precision, recall, - and f1 scores. If test data is provided, the score shall be reported in terms of all requested metrics. + eval_metrics (list): Optional. JSON list of metrics types to be used + for reporting the score for the model. Allowed values are + "accuracy", "precision_recall_fscore": positive and negative + precision, recall, and f1 scores. If test data is provided, the + score shall be reported in terms of all requested metrics. **kwargs: base class keyword argument values. """ @@ -97,12 +114,12 @@ def __init__( self.eval_metrics = eval_metrics def create_model(self, vpc_config_override=VPC_CONFIG_DEFAULT): - """Return a :class:`~sagemaker.amazon.RandomCutForestModel` referencing the latest - s3 model data produced by this Estimator. + """Return a :class:`~sagemaker.amazon.RandomCutForestModel` referencing + the latest s3 model data produced by this Estimator. Args: - vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on the model. - Default: use subnets and security groups from this Estimator. + vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on + the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. """ @@ -114,6 +131,12 @@ def create_model(self, vpc_config_override=VPC_CONFIG_DEFAULT): ) def _prepare_for_training(self, records, mini_batch_size=None, job_name=None): + """ + Args: + records: + mini_batch_size: + job_name: + """ if mini_batch_size is None: mini_batch_size = self.MINI_BATCH_SIZE elif mini_batch_size != self.MINI_BATCH_SIZE: @@ -129,16 +152,24 @@ def _prepare_for_training(self, records, mini_batch_size=None, job_name=None): class RandomCutForestPredictor(RealTimePredictor): """Assigns an anomaly score to each of the datapoints provided. - The implementation of :meth:`~sagemaker.predictor.RealTimePredictor.predict` in this - `RealTimePredictor` requires a numpy ``ndarray`` as input. The array should contain the - same number of columns as the feature-dimension of the data used to fit the model this - Predictor performs inference on. + The implementation of + :meth:`~sagemaker.predictor.RealTimePredictor.predict` in this + `RealTimePredictor` requires a numpy ``ndarray`` as input. The array should + contain the same number of columns as the feature-dimension of the data used + to fit the model this Predictor performs inference on. - :meth:`predict()` returns a list of :class:`~sagemaker.amazon.record_pb2.Record` objects, - one for each row in the input. Each row's score is stored in the key ``score`` of the - ``Record.label`` field.""" + :meth:`predict()` returns a list of + :class:`~sagemaker.amazon.record_pb2.Record` objects, one for each row in + the input. Each row's score is stored in the key ``score`` of the + ``Record.label`` field. + """ def __init__(self, endpoint, sagemaker_session=None): + """ + Args: + endpoint: + sagemaker_session: + """ super(RandomCutForestPredictor, self).__init__( endpoint, sagemaker_session, @@ -148,10 +179,19 @@ def __init__(self, endpoint, sagemaker_session=None): class RandomCutForestModel(Model): - """Reference RandomCutForest s3 model data. Calling :meth:`~sagemaker.model.Model.deploy` creates an - Endpoint and returns a Predictor that calculates anomaly scores for datapoints.""" + """Reference RandomCutForest s3 model data. Calling + :meth:`~sagemaker.model.Model.deploy` creates an Endpoint and returns a + Predictor that calculates anomaly scores for datapoints. + """ def __init__(self, model_data, role, sagemaker_session=None, **kwargs): + """ + Args: + model_data: + role: + sagemaker_session: + **kwargs: + """ sagemaker_session = sagemaker_session or Session() repo = "{}:{}".format(RandomCutForest.repo_name, RandomCutForest.repo_version) image = "{}/{}".format( diff --git a/src/sagemaker/amazon/validation.py b/src/sagemaker/amazon/validation.py index c6a3291e7e..cd92eb9ef9 100644 --- a/src/sagemaker/amazon/validation.py +++ b/src/sagemaker/amazon/validation.py @@ -10,10 +10,16 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import def gt(minimum): + """ + Args: + minimum: + """ + def validate(value): return value > minimum @@ -21,6 +27,11 @@ def validate(value): def ge(minimum): + """ + Args: + minimum: + """ + def validate(value): return value >= minimum @@ -28,6 +39,11 @@ def validate(value): def lt(maximum): + """ + Args: + maximum: + """ + def validate(value): return value < maximum @@ -35,6 +51,11 @@ def validate(value): def le(maximum): + """ + Args: + maximum: + """ + def validate(value): return value <= maximum @@ -42,6 +63,11 @@ def validate(value): def isin(*expected): + """ + Args: + *expected: + """ + def validate(value): return value in expected @@ -49,6 +75,11 @@ def validate(value): def istype(expected): + """ + Args: + expected: + """ + def validate(value): return isinstance(value, expected) diff --git a/src/sagemaker/analytics.py b/src/sagemaker/analytics.py index 1b808aead6..e32792606b 100644 --- a/src/sagemaker/analytics.py +++ b/src/sagemaker/analytics.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import print_function, absolute_import from abc import ABCMeta, abstractmethod @@ -33,8 +34,8 @@ class AnalyticsMetricsBase(with_metaclass(ABCMeta, object)): - """Base class for tuning job or training job analytics classes. - Understands common functionality like persistence and caching. + """Base class for tuning job or training job analytics classes. Understands + common functionality like persistence and caching. """ def __init__(self): @@ -49,12 +50,13 @@ def export_csv(self, filename): self.dataframe().to_csv(filename) def dataframe(self, force_refresh=False): - """A pandas dataframe with lots of interesting results about this object. - Created by calling SageMaker List and Describe APIs and converting them into - a convenient tabular summary. + """A pandas dataframe with lots of interesting results about this + object. Created by calling SageMaker List and Describe APIs and + converting them into a convenient tabular summary. Args: - force_refresh (bool): Set to True to fetch the latest data from SageMaker API. + force_refresh (bool): Set to True to fetch the latest data from + SageMaker API. """ if force_refresh: self.clear_cache() @@ -64,29 +66,31 @@ def dataframe(self, force_refresh=False): @abstractmethod def _fetch_dataframe(self): - """Sub-class must calculate the dataframe and return it. - """ + """Sub-class must calculate the dataframe and return it.""" def clear_cache(self): - """Clear the object of all local caches of API methods, so - that the next time any properties are accessed they will be refreshed from - the service. + """Clear the object of all local caches of API methods, so that the next + time any properties are accessed they will be refreshed from the + service. """ self._dataframe = None class HyperparameterTuningJobAnalytics(AnalyticsMetricsBase): - """Fetch results about a hyperparameter tuning job and make them accessible for analytics. + """Fetch results about a hyperparameter tuning job and make them accessible + for analytics. """ def __init__(self, hyperparameter_tuning_job_name, sagemaker_session=None): """Initialize a ``HyperparameterTuningJobAnalytics`` instance. Args: - hyperparameter_tuning_job_name (str): name of the HyperparameterTuningJob to analyze. - sagemaker_session (sagemaker.session.Session): Session object which manages interactions with - Amazon SageMaker APIs and any other AWS services needed. If not specified, one is created - using the default AWS configuration chain. + hyperparameter_tuning_job_name (str): name of the + HyperparameterTuningJob to analyze. + sagemaker_session (sagemaker.session.Session): Session object which + manages interactions with Amazon SageMaker APIs and any other + AWS services needed. If not specified, one is created using the + default AWS configuration chain. """ sagemaker_session = sagemaker_session or Session() self._sage_client = sagemaker_session.sagemaker_client @@ -98,24 +102,22 @@ def __init__(self, hyperparameter_tuning_job_name, sagemaker_session=None): @property def name(self): - """Name of the HyperparameterTuningJob being analyzed - """ + """Name of the HyperparameterTuningJob being analyzed""" return self._tuning_job_name def __repr__(self): return "" % self.name def clear_cache(self): - """Clear the object of all local caches of API methods. - """ + """Clear the object of all local caches of API methods.""" super(HyperparameterTuningJobAnalytics, self).clear_cache() self._tuning_job_describe_result = None self._training_job_summaries = None def _fetch_dataframe(self): - """Return a pandas dataframe with all the training jobs, along with their - hyperparameters, results, and metadata. This also includes a column to indicate - if a training job was the best seen so far. + """Return a pandas dataframe with all the training jobs, along with + their hyperparameters, results, and metadata. This also includes a + column to indicate if a training job was the best seen so far. """ def reshape(training_summary): @@ -148,8 +150,8 @@ def reshape(training_summary): @property def tuning_ranges(self): - """A dictionary describing the ranges of all tuned hyperparameters. - The keys are the names of the hyperparameter, and the values are the ranges. + """A dictionary describing the ranges of all tuned hyperparameters. The + keys are the names of the hyperparameter, and the values are the ranges. """ out = {} for _, ranges in self.description()["HyperParameterTuningJobConfig"][ @@ -160,13 +162,16 @@ def tuning_ranges(self): return out def description(self, force_refresh=False): - """Call ``DescribeHyperParameterTuningJob`` for the hyperparameter tuning job. + """Call ``DescribeHyperParameterTuningJob`` for the hyperparameter + tuning job. Args: - force_refresh (bool): Set to True to fetch the latest data from SageMaker API. + force_refresh (bool): Set to True to fetch the latest data from + SageMaker API. Returns: - dict: The Amazon SageMaker response for ``DescribeHyperParameterTuningJob``. + dict: The Amazon SageMaker response for + ``DescribeHyperParameterTuningJob``. """ if force_refresh: self.clear_cache() @@ -177,13 +182,16 @@ def description(self, force_refresh=False): return self._tuning_job_describe_result def training_job_summaries(self, force_refresh=False): - """A (paginated) list of everything from ``ListTrainingJobsForTuningJob``. + """A (paginated) list of everything from + ``ListTrainingJobsForTuningJob``. Args: - force_refresh (bool): Set to True to fetch the latest data from SageMaker API. + force_refresh (bool): Set to True to fetch the latest data from + SageMaker API. Returns: - dict: The Amazon SageMaker response for ``ListTrainingJobsForTuningJob``. + dict: The Amazon SageMaker response for + ``ListTrainingJobsForTuningJob``. """ if force_refresh: self.clear_cache() @@ -210,7 +218,8 @@ def training_job_summaries(self, force_refresh=False): class TrainingJobAnalytics(AnalyticsMetricsBase): - """Fetch training curve data from CloudWatch Metrics for a specific training job. + """Fetch training curve data from CloudWatch Metrics for a specific training + job. """ CLOUDWATCH_NAMESPACE = "/aws/sagemaker/TrainingJobs" @@ -228,11 +237,16 @@ def __init__( Args: training_job_name (str): name of the TrainingJob to analyze. - metric_names (list, optional): string names of all the metrics to collect for this training job. - If not specified, then it will use all metric names configured for this job. - sagemaker_session (sagemaker.session.Session): Session object which manages interactions with - Amazon SageMaker APIs and any other AWS services needed. If not specified, one is specified - using the default AWS configuration chain. + metric_names (list, optional): string names of all the metrics to + collect for this training job. If not specified, then it will + use all metric names configured for this job. + sagemaker_session (sagemaker.session.Session): Session object which + manages interactions with Amazon SageMaker APIs and any other + AWS services needed. If not specified, one is specified using + the default AWS configuration chain. + start_time: + end_time: + period: """ sagemaker_session = sagemaker_session or Session() self._sage_client = sagemaker_session.sagemaker_client @@ -252,25 +266,24 @@ def __init__( @property def name(self): - """Name of the TrainingJob being analyzed - """ + """Name of the TrainingJob being analyzed""" return self._training_job_name def __repr__(self): return "" % self.name def clear_cache(self): - """Clear the object of all local caches of API methods, so - that the next time any properties are accessed they will be refreshed from - the service. + """Clear the object of all local caches of API methods, so that the next + time any properties are accessed they will be refreshed from the + service. """ super(TrainingJobAnalytics, self).clear_cache() self._data = defaultdict(list) self._time_interval = self._determine_timeinterval() def _determine_timeinterval(self): - """Return a dictionary with two datetime objects, start_time and end_time, - covering the interval of the training job + """Return a dictionary with two datetime objects, start_time and + end_time, covering the interval of the training job """ description = self._sage_client.describe_training_job(TrainingJobName=self.name) start_time = self._start_time or description[u"TrainingStartTime"] # datetime object @@ -292,6 +305,9 @@ def _fetch_dataframe(self): def _fetch_metric(self, metric_name): """Fetch all the values of a named metric, and add them to _data + + Args: + metric_name: """ request = { "Namespace": self.CLOUDWATCH_NAMESPACE, @@ -321,8 +337,13 @@ def _fetch_metric(self, metric_name): self._add_single_metric(elapsed_seconds, metric_name, value) def _add_single_metric(self, timestamp, metric_name, value): - """Store a single metric in the _data dict which can be - converted to a dataframe. + """Store a single metric in the _data dict which can be converted to a + dataframe. + + Args: + timestamp: + metric_name: + value: """ # note that this method is built this way to make it possible to # support live-refreshing charts in Bokeh at some point in the future. @@ -331,8 +352,7 @@ def _add_single_metric(self, timestamp, metric_name, value): self._data["value"].append(value) def _metric_names_for_training_job(self): - """Helper method to discover the metrics defined for a training job. - """ + """Helper method to discover the metrics defined for a training job.""" training_description = self._sage_client.describe_training_job( TrainingJobName=self._training_job_name ) diff --git a/src/sagemaker/chainer/__init__.py b/src/sagemaker/chainer/__init__.py index 41b4657ec2..893cfab8ab 100644 --- a/src/sagemaker/chainer/__init__.py +++ b/src/sagemaker/chainer/__init__.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import from sagemaker.chainer.estimator import Chainer # noqa: F401 diff --git a/src/sagemaker/chainer/defaults.py b/src/sagemaker/chainer/defaults.py index 878f76747a..c31e807c54 100644 --- a/src/sagemaker/chainer/defaults.py +++ b/src/sagemaker/chainer/defaults.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import CHAINER_VERSION = "4.1.0" diff --git a/src/sagemaker/chainer/estimator.py b/src/sagemaker/chainer/estimator.py index 6c553aafa4..d01c495174 100644 --- a/src/sagemaker/chainer/estimator.py +++ b/src/sagemaker/chainer/estimator.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import logging @@ -56,49 +57,67 @@ def __init__( image_name=None, **kwargs ): - """ - This ``Estimator`` executes an Chainer script in a managed Chainer execution environment, within a SageMaker - Training Job. The managed Chainer environment is an Amazon-built Docker container that executes functions - defined in the supplied ``entry_point`` Python script. - - Training is started by calling :meth:`~sagemaker.amazon.estimator.Framework.fit` on this Estimator. - After training is complete, calling :meth:`~sagemaker.amazon.estimator.Framework.deploy` creates a - hosted SageMaker endpoint and returns an :class:`~sagemaker.amazon.chainer.model.ChainerPredictor` instance - that can be used to perform inference against the hosted model. - - Technical documentation on preparing Chainer scripts for SageMaker training and using the Chainer Estimator is - available on the project home-page: https://github.com/aws/sagemaker-python-sdk + """This ``Estimator`` executes an Chainer script in a managed Chainer + execution environment, within a SageMaker Training Job. The managed + Chainer environment is an Amazon-built Docker container that executes + functions defined in the supplied ``entry_point`` Python script. + + Training is started by calling + :meth:`~sagemaker.amazon.estimator.Framework.fit` on this Estimator. + After training is complete, calling + :meth:`~sagemaker.amazon.estimator.Framework.deploy` creates a hosted + SageMaker endpoint and returns an + :class:`~sagemaker.amazon.chainer.model.ChainerPredictor` instance that + can be used to perform inference against the hosted model. + + Technical documentation on preparing Chainer scripts for SageMaker + training and using the Chainer Estimator is available on the project + home-page: https://github.com/aws/sagemaker-python-sdk Args: - entry_point (str): Path (absolute or relative) to the Python source file which should be executed - as the entry point to training. This should be compatible with either Python 2.7 or Python 3.5. - use_mpi (bool): If true, entry point is run as an MPI script. By default, the Chainer Framework runs - the entry point with 'mpirun' if more than one instance is used. - num_processes (int): Total number of processes to run the entry point with. By default, the Chainer - Framework runs one process per GPU (on GPU instances), or one process per host (on CPU instances). - process_slots_per_host (int): The number of processes that can run on each instance. By default, this is - set to the number of GPUs on the instance (on GPU instances), or one (on CPU instances). - additional_mpi_options (str): String of options to the 'mpirun' command used to run the entry point. - For example, '-X NCCL_DEBUG=WARN' will pass that option string to the mpirun command. - source_dir (str): Path (absolute or relative) to a directory with any other training - source code dependencies aside from tne entry point file (default: None). Structure within this - directory are preserved when training on Amazon SageMaker. - hyperparameters (dict): Hyperparameters that will be used for training (default: None). - The hyperparameters are made accessible as a dict[str, str] to the training code on SageMaker. - For convenience, this accepts other types for keys and values, but ``str()`` will be called - to convert them before training. - py_version (str): Python version you want to use for executing your model training code (default: 'py2'). - One of 'py2' or 'py3'. - framework_version (str): Chainer version you want to use for executing your model training code. - List of supported versions https://github.com/aws/sagemaker-python-sdk#chainer-sagemaker-estimators. + entry_point (str): Path (absolute or relative) to the Python source + file which should be executed as the entry point to training. + This should be compatible with either Python 2.7 or Python 3.5. + use_mpi (bool): If true, entry point is run as an MPI script. By + default, the Chainer Framework runs the entry point with + 'mpirun' if more than one instance is used. + num_processes (int): Total number of processes to run the entry + point with. By default, the Chainer Framework runs one process + per GPU (on GPU instances), or one process per host (on CPU + instances). + process_slots_per_host (int): The number of processes that can run + on each instance. By default, this is set to the number of GPUs + on the instance (on GPU instances), or one (on CPU instances). + additional_mpi_options (str): String of options to the 'mpirun' + command used to run the entry point. For example, '-X + NCCL_DEBUG=WARN' will pass that option string to the mpirun + command. + source_dir (str): Path (absolute or relative) to a directory with + any other training source code dependencies aside from tne entry + point file (default: None). Structure within this directory are + preserved when training on Amazon SageMaker. + hyperparameters (dict): Hyperparameters that will be used for + training (default: None). The hyperparameters are made + accessible as a dict[str, str] to the training code on + SageMaker. For convenience, this accepts other types for keys + and values, but ``str()`` will be called to convert them before + training. + py_version (str): Python version you want to use for executing your + model training code (default: 'py2'). One of 'py2' or 'py3'. + framework_version (str): Chainer version you want to use for + executing your model training code. List of supported versions + https://github.com/aws/sagemaker-python-sdk#chainer-sagemaker-estimators. If not specified, this will default to 4.1. - image_name (str): If specified, the estimator will use this image for training and hosting, instead of - selecting the appropriate SageMaker official image based on framework_version and py_version. It can - be an ECR url or dockerhub image and tag. - Examples: + image_name (str): If specified, the estimator will use this image + for training and hosting, instead of selecting the appropriate + SageMaker official image based on framework_version and + py_version. It can be an ECR url or dockerhub image and tag. .. + admonition:: Examples + 123.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0 custom-image:latest. - **kwargs: Additional kwargs passed to the :class:`~sagemaker.estimator.Framework` constructor. + **kwargs: Additional kwargs passed to the + :class:`~sagemaker.estimator.Framework` constructor. """ if framework_version is None: logger.warning(empty_framework_version_warning(CHAINER_VERSION, self.LATEST_VERSION)) @@ -118,7 +137,9 @@ def __init__( self.additional_mpi_options = additional_mpi_options def hyperparameters(self): - """Return hyperparameters used by your custom Chainer code during training.""" + """Return hyperparameters used by your custom Chainer code during + training. + """ hyperparameters = super(Chainer, self).hyperparameters() additional_hyperparameters = { @@ -136,21 +157,24 @@ def hyperparameters(self): def create_model( self, model_server_workers=None, role=None, vpc_config_override=VPC_CONFIG_DEFAULT ): - """Create a SageMaker ``ChainerModel`` object that can be deployed to an ``Endpoint``. + """Create a SageMaker ``ChainerModel`` object that can be deployed to an + ``Endpoint``. Args: - role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also used during - transform jobs. If not specified, the role from the Estimator will be used. - model_server_workers (int): Optional. The number of worker processes used by the inference server. - If None, server will use one worker per vCPU. - vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on the model. - Default: use subnets and security groups from this Estimator. + model_server_workers (int): Optional. The number of worker processes + used by the inference server. If None, server will use one + worker per vCPU. + role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, + which is also used during transform jobs. If not specified, the + role from the Estimator will be used. + vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on + the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. Returns: - sagemaker.chainer.model.ChainerModel: A SageMaker ``ChainerModel`` object. - See :func:`~sagemaker.chainer.model.ChainerModel` for full details. + sagemaker.chainer.model.ChainerModel: A SageMaker ``ChainerModel`` + object. See :func:`~sagemaker.chainer.model.ChainerModel` for full details. """ role = role or self.role return ChainerModel( @@ -173,15 +197,17 @@ def create_model( @classmethod def _prepare_init_params_from_job_description(cls, job_details, model_channel_name=None): - """Convert the job description to init params that can be handled by the class constructor + """Convert the job description to init params that can be handled by the + class constructor Args: - job_details: the returned job details from a describe_training_job API call. - model_channel_name (str): Name of the channel where pre-trained model data will be downloaded. + job_details: the returned job details from a describe_training_job + API call. + model_channel_name (str): Name of the channel where pre-trained + model data will be downloaded. Returns: - dictionary: The transformed init_params - + dictionary: The transformed init_params """ init_params = super(Chainer, cls)._prepare_init_params_from_job_description( job_details, model_channel_name diff --git a/src/sagemaker/chainer/model.py b/src/sagemaker/chainer/model.py index 2fd2c8a70e..df540dd8b1 100644 --- a/src/sagemaker/chainer/model.py +++ b/src/sagemaker/chainer/model.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import logging @@ -26,16 +27,19 @@ class ChainerPredictor(RealTimePredictor): """A RealTimePredictor for inference against Chainer Endpoints. - This is able to serialize Python lists, dictionaries, and numpy arrays to multidimensional tensors for Chainer - inference.""" + This is able to serialize Python lists, dictionaries, and numpy arrays to + multidimensional tensors for Chainer inference. + """ def __init__(self, endpoint_name, sagemaker_session=None): """Initialize an ``ChainerPredictor``. Args: - endpoint_name (str): The name of the endpoint to perform inference on. - sagemaker_session (sagemaker.session.Session): Session object which manages interactions with - Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one + endpoint_name (str): The name of the endpoint to perform inference + on. + sagemaker_session (sagemaker.session.Session): Session object which + manages interactions with Amazon SageMaker APIs and any other + AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. """ super(ChainerPredictor, self).__init__( @@ -44,7 +48,9 @@ def __init__(self, endpoint_name, sagemaker_session=None): class ChainerModel(FrameworkModel): - """An Chainer SageMaker ``Model`` that can be deployed to a SageMaker ``Endpoint``.""" + """An Chainer SageMaker ``Model`` that can be deployed to a SageMaker + ``Endpoint``. + """ __framework_name__ = "chainer" @@ -63,22 +69,32 @@ def __init__( """Initialize an ChainerModel. Args: - model_data (str): The S3 location of a SageMaker model data ``.tar.gz`` file. - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs - that create Amazon SageMaker endpoints use this role to access training data and model artifacts. - After the endpoint is created, the inference code might use the IAM role, - if it needs to access an AWS resource. - entry_point (str): Path (absolute or relative) to the Python source file which should be executed - as the entry point to model hosting. This should be compatible with either Python 2.7 or Python 3.5. - image (str): A Docker image URI (default: None). If not specified, a default image for Chainer will be used. - py_version (str): Python version you want to use for executing your model training code (default: 'py2'). - framework_version (str): Chainer version you want to use for executing your model training code. - predictor_cls (callable[str, sagemaker.session.Session]): A function to call to create a predictor - with an endpoint name and SageMaker ``Session``. If specified, ``deploy()`` returns the result of - invoking this function on the created endpoint name. - model_server_workers (int): Optional. The number of worker processes used by the inference server. - If None, server will use one worker per vCPU. - **kwargs: Keyword arguments passed to the ``FrameworkModel`` initializer. + model_data (str): The S3 location of a SageMaker model data + ``.tar.gz`` file. + role (str): An AWS IAM role (either name or full ARN). The Amazon + SageMaker training jobs and APIs that create Amazon SageMaker + endpoints use this role to access training data and model + artifacts. After the endpoint is created, the inference code + might use the IAM role, if it needs to access an AWS resource. + entry_point (str): Path (absolute or relative) to the Python source + file which should be executed as the entry point to model + hosting. This should be compatible with either Python 2.7 or + Python 3.5. + image (str): A Docker image URI (default: None). If not specified, a + default image for Chainer will be used. + py_version (str): Python version you want to use for executing your + model training code (default: 'py2'). + framework_version (str): Chainer version you want to use for + executing your model training code. + predictor_cls (callable[str, sagemaker.session.Session]): A function + to call to create a predictor with an endpoint name and + SageMaker ``Session``. If specified, ``deploy()`` returns the + result of invoking this function on the created endpoint name. + model_server_workers (int): Optional. The number of worker processes + used by the inference server. If None, server will use one + worker per vCPU. + **kwargs: Keyword arguments passed to the ``FrameworkModel`` + initializer. """ super(ChainerModel, self).__init__( model_data, image, role, entry_point, predictor_cls=predictor_cls, **kwargs @@ -91,15 +107,19 @@ def __init__( self.model_server_workers = model_server_workers def prepare_container_def(self, instance_type, accelerator_type=None): - """Return a container definition with framework configuration set in model environment variables. + """Return a container definition with framework configuration set in + model environment variables. Args: - instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. - accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and - making inferences to the model. For example, 'ml.eia1.medium'. + instance_type (str): The EC2 instance type to deploy this Model to. + For example, 'ml.p2.xlarge'. + accelerator_type (str): The Elastic Inference accelerator type to + deploy to the instance for loading and making inferences to the + model. For example, 'ml.eia1.medium'. Returns: - dict[str, str]: A container definition object usable with the CreateModel API. + dict[str, str]: A container definition object usable with the + CreateModel API. """ deploy_image = self.image if not deploy_image: diff --git a/src/sagemaker/cli/common.py b/src/sagemaker/cli/common.py index a98fee5774..b1bef51f1a 100644 --- a/src/sagemaker/cli/common.py +++ b/src/sagemaker/cli/common.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import json @@ -25,7 +26,13 @@ class HostCommand(object): + """Placeholder docstring""" + def __init__(self, args): + """ + Args: + args: + """ self.endpoint_name = args.job_name self.bucket = args.bucket_name # may be None self.role_name = args.role_name @@ -39,6 +46,7 @@ def __init__(self, args): self.session = sagemaker.Session() def upload_model(self): + """Placeholder docstring""" prefix = "{}/model".format(self.endpoint_name) archive = self.create_model_archive(self.data) @@ -49,6 +57,10 @@ def upload_model(self): @staticmethod def create_model_archive(src): + """ + Args: + src: + """ if os.path.isdir(src): arcname = "." else: @@ -62,9 +74,14 @@ def create_model_archive(src): return archive def create_model(self, model_url): + """ + Args: + model_url: + """ raise NotImplementedError # subclasses must override def start(self): + """Placeholder docstring""" model_url = self.upload_model() model = self.create_model(model_url) predictor = model.deploy( @@ -75,7 +92,13 @@ def start(self): class TrainCommand(object): + """Placeholder docstring""" + def __init__(self, args): + """ + Args: + args: + """ self.job_name = args.job_name self.bucket = args.bucket_name # may be None self.role_name = args.role_name @@ -90,6 +113,10 @@ def __init__(self, args): @staticmethod def load_hyperparameters(src): + """ + Args: + src: + """ hp = {} if src and os.path.exists(src): with open(src, "r") as f: @@ -97,14 +124,17 @@ def load_hyperparameters(src): return hp def upload_training_data(self): + """Placeholder docstring""" prefix = "{}/data".format(self.job_name) data_url = self.session.upload_data(path=self.data, bucket=self.bucket, key_prefix=prefix) return data_url def create_estimator(self): + """Placeholder docstring""" raise NotImplementedError # subclasses must override def start(self): + """Placeholder docstring""" data_url = self.upload_training_data() estimator = self.create_estimator() estimator.fit(data_url) diff --git a/src/sagemaker/cli/main.py b/src/sagemaker/cli/main.py index 4dcb1a3838..870b86b141 100644 --- a/src/sagemaker/cli/main.py +++ b/src/sagemaker/cli/main.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import argparse @@ -27,6 +28,10 @@ def parse_arguments(args): + """ + Args: + args: + """ parser = argparse.ArgumentParser( description="Launch SageMaker training jobs or hosting endpoints" ) @@ -125,6 +130,10 @@ def parse_arguments(args): def configure_logging(args): + """ + Args: + args: + """ log_format = "%(asctime)s %(levelname)s %(name)s: %(message)s" log_level = logging.getLevelName(args.log_level.upper()) logging.basicConfig(format=log_format, level=log_level) @@ -132,6 +141,7 @@ def configure_logging(args): def main(): + """Placeholder docstring""" args = parse_arguments(sys.argv[1:]) configure_logging(args) logger.debug("args: %s", args) diff --git a/src/sagemaker/cli/mxnet.py b/src/sagemaker/cli/mxnet.py index d989d2d01f..1f75f74fda 100644 --- a/src/sagemaker/cli/mxnet.py +++ b/src/sagemaker/cli/mxnet.py @@ -10,21 +10,33 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import from sagemaker.cli.common import HostCommand, TrainCommand def train(args): + """ + Args: + args: + """ MXNetTrainCommand(args).start() def host(args): + """ + Args: + args: + """ MXNetHostCommand(args).start() class MXNetTrainCommand(TrainCommand): + """Placeholder docstring""" + def create_estimator(self): + """Placeholder docstring""" from sagemaker.mxnet.estimator import MXNet return MXNet( @@ -39,7 +51,13 @@ def create_estimator(self): class MXNetHostCommand(HostCommand): + """Placeholder docstring""" + def create_model(self, model_url): + """ + Args: + model_url: + """ from sagemaker.mxnet.model import MXNetModel return MXNetModel( diff --git a/src/sagemaker/cli/tensorflow.py b/src/sagemaker/cli/tensorflow.py index 9fbd7dfa1f..27df60ae7f 100644 --- a/src/sagemaker/cli/tensorflow.py +++ b/src/sagemaker/cli/tensorflow.py @@ -10,21 +10,36 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import from sagemaker.cli.common import HostCommand, TrainCommand def train(args): + """ + Args: + args: + """ TensorFlowTrainCommand(args).start() def host(args): + """ + Args: + args: + """ TensorFlowHostCommand(args).start() class TensorFlowTrainCommand(TrainCommand): + """Placeholder docstring""" + def __init__(self, args): + """ + Args: + args: + """ super(TensorFlowTrainCommand, self).__init__(args) self.training_steps = args.training_steps self.evaluation_steps = args.evaluation_steps @@ -46,7 +61,13 @@ def create_estimator(self): class TensorFlowHostCommand(HostCommand): + """Placeholder docstring""" + def create_model(self, model_url): + """ + Args: + model_url: + """ from sagemaker.tensorflow.model import TensorFlowModel return TensorFlowModel( diff --git a/src/sagemaker/content_types.py b/src/sagemaker/content_types.py index a126f3786c..0c4f20c38b 100644 --- a/src/sagemaker/content_types.py +++ b/src/sagemaker/content_types.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Contains content types strings""" from __future__ import absolute_import CONTENT_TYPE_JSON = "application/json" diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py index 653c0942ae..83c72b27e7 100644 --- a/src/sagemaker/estimator.py +++ b/src/sagemaker/estimator.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import print_function, absolute_import import json @@ -57,7 +58,8 @@ class EstimatorBase(with_metaclass(ABCMeta, object)): http://docs.aws.amazon.com/sagemaker/latest/dg/how-it-works-training.html Subclasses must define a way to determine what image to use for training, - what hyperparameters to use, and how to create an appropriate predictor instance. + what hyperparameters to use, and how to create an appropriate predictor + instance. """ def __init__( @@ -84,55 +86,77 @@ def __init__( """Initialize an ``EstimatorBase`` instance. Args: - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs - that create Amazon SageMaker endpoints use this role to access training data and model artifacts. - After the endpoint is created, the inference code might use the IAM role, - if it needs to access an AWS resource. - train_instance_count (int): Number of Amazon EC2 instances to use for training. - train_instance_type (str): Type of EC2 instance to use for training, for example, 'ml.c4.xlarge'. - train_volume_size (int): Size in GB of the EBS volume to use for storing input data - during training (default: 30). Must be large enough to store training data if File Mode is used - (which is the default). - train_volume_kms_key (str): Optional. KMS key ID for encrypting EBS volume attached to the - training instance (default: None). - train_max_run (int): Timeout in seconds for training (default: 24 * 60 * 60). - After this amount of time Amazon SageMaker terminates the job regardless of its current status. - input_mode (str): The input mode that the algorithm supports (default: 'File'). Valid modes: - 'File' - Amazon SageMaker copies the training dataset from the S3 location to a local directory. - 'Pipe' - Amazon SageMaker streams data directly from S3 to the container via a Unix-named pipe. - This argument can be overriden on a per-channel basis using ``sagemaker.session.s3_input.input_mode``. - output_path (str): S3 location for saving the training result (model artifacts and output files). - If not specified, results are stored to a default bucket. If the bucket with the specific name + role (str): An AWS IAM role (either name or full ARN). The Amazon + SageMaker training jobs and APIs that create Amazon SageMaker + endpoints use this role to access training data and model + artifacts. After the endpoint is created, the inference code + might use the IAM role, if it needs to access an AWS resource. + train_instance_count (int): Number of Amazon EC2 instances to use + for training. + train_instance_type (str): Type of EC2 instance to use for training, + for example, 'ml.c4.xlarge'. + train_volume_size (int): Size in GB of the EBS volume to use for + storing input data during training (default: 30). Must be large + enough to store training data if File Mode is used (which is the + default). + train_volume_kms_key (str): Optional. KMS key ID for encrypting EBS + volume attached to the training instance (default: None). + train_max_run (int): Timeout in seconds for training (default: 24 * + 60 * 60). After this amount of time Amazon SageMaker terminates + the job regardless of its current status. + input_mode (str): The input mode that the algorithm supports + (default: 'File'). Valid modes: 'File' - Amazon SageMaker copies + the training dataset from the S3 location to a local directory. + 'Pipe' - Amazon SageMaker streams data directly from S3 to the + container via a Unix-named pipe. This argument can be overriden + on a per-channel basis using + ``sagemaker.session.s3_input.input_mode``. + output_path (str): S3 location for saving the training result (model + artifacts and output files). If not specified, results are + stored to a default bucket. If the bucket with the specific name does not exist, the estimator creates the bucket during the :meth:`~sagemaker.estimator.EstimatorBase.fit` method execution. - output_kms_key (str): Optional. KMS key ID for encrypting the training output (default: None). - base_job_name (str): Prefix for training job name when the :meth:`~sagemaker.estimator.EstimatorBase.fit` - method launches. If not specified, the estimator generates a default job name, based on - the training image name and current timestamp. - sagemaker_session (sagemaker.session.Session): Session object which manages interactions with - Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one + output_kms_key (str): Optional. KMS key ID for encrypting the + training output (default: None). + base_job_name (str): Prefix for training job name when the + :meth:`~sagemaker.estimator.EstimatorBase.fit` method launches. + If not specified, the estimator generates a default job name, + based on the training image name and current timestamp. + sagemaker_session (sagemaker.session.Session): Session object which + manages interactions with Amazon SageMaker APIs and any other + AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. - tags (list[dict]): List of tags for labeling a training job. For more, see + tags (list[dict]): List of tags for labeling a training job. For + more, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. - subnets (list[str]): List of subnet ids. If not specified training job will be created without VPC config. - security_group_ids (list[str]): List of security group ids. If not specified training job will be created - without VPC config. - model_uri (str): URI where a pre-trained model is stored, either locally or in S3 (default: None). If - specified, the estimator will create a channel pointing to the model so the training job can download - it. This model can be a 'model.tar.gz' from a previous training job, or other artifacts coming from a + subnets (list[str]): List of subnet ids. If not specified training + job will be created without VPC config. + security_group_ids (list[str]): List of security group ids. If not + specified training job will be created without VPC config. + model_uri (str): URI where a pre-trained model is stored, either + locally or in S3 (default: None). If specified, the estimator + will create a channel pointing to the model so the training job + can download it. This model can be a 'model.tar.gz' from a + previous training job, or other artifacts coming from a different source. - In local mode, this should point to the path in which the model is located and not the file itself, as - local Docker containers will try to mount the URI as a volume. - - More information: https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html#td-deserialization - model_channel_name (str): Name of the channel where 'model_uri' will be downloaded (default: 'model'). - metric_definitions (list[dict]): A list of dictionaries that defines the metric(s) used to evaluate the - training jobs. Each dictionary contains two keys: 'Name' for the name of the metric, and 'Regex' for - the regular expression used to extract the metric from the logs. This should be defined only - for jobs that don't use an Amazon algorithm. - encrypt_inter_container_traffic (bool): Specifies whether traffic between training containers is encrypted - for the training job (default: ``False``). + In local mode, this should point to the path in which the model + is located and not the file itself, as local Docker containers + will try to mount the URI as a volume. + + More information: + https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html#td-deserialization + model_channel_name (str): Name of the channel where 'model_uri' will + be downloaded (default: 'model'). + metric_definitions (list[dict]): A list of dictionaries that defines + the metric(s) used to evaluate the training jobs. Each + dictionary contains two keys: 'Name' for the name of the metric, + and 'Regex' for the regular expression used to extract the + metric from the logs. This should be defined only for jobs that + don't use an Amazon algorithm. + encrypt_inter_container_traffic (bool): Specifies whether traffic + between training containers is encrypted for the training job + (default: ``False``). """ self.role = role self.train_instance_count = train_instance_count @@ -180,8 +204,9 @@ def __init__( def train_image(self): """Return the Docker image to use for training. - The :meth:`~sagemaker.estimator.EstimatorBase.fit` method, which does the model training, calls this method to - find the image to use for model training. + The :meth:`~sagemaker.estimator.EstimatorBase.fit` method, which does + the model training, calls this method to find the image to use for model + training. Returns: str: The URI of the Docker image. @@ -191,8 +216,8 @@ def train_image(self): def hyperparameters(self): """Return the hyperparameters as a dictionary to use for training. - The :meth:`~sagemaker.estimator.EstimatorBase.fit` method, which trains the model, calls this method to - find the hyperparameters. + The :meth:`~sagemaker.estimator.EstimatorBase.fit` method, which + trains the model, calls this method to find the hyperparameters. Returns: dict[str, str]: The hyperparameters. @@ -210,8 +235,9 @@ def _prepare_for_training(self, job_name=None): """Set any values in the estimator that need to be set before training. Args: - * job_name (str): Name of the training job to be created. If not specified, one is generated, - using the base name given to the constructor if applicable. + job_name (str): Name of the training job to be created. If not + specified, one is generated, using the base name given to the + constructor if applicable. """ if job_name is not None: self._current_job_name = job_name @@ -238,30 +264,37 @@ def _prepare_for_training(self, job_name=None): def fit(self, inputs=None, wait=True, logs=True, job_name=None): """Train a model using the input training dataset. - The API calls the Amazon SageMaker CreateTrainingJob API to start model training. - The API uses configuration you provided to create the estimator and the - specified input training data to send the CreatingTrainingJob request to Amazon SageMaker. + The API calls the Amazon SageMaker CreateTrainingJob API to start + model training. The API uses configuration you provided to create the + estimator and the specified input training data to send the + CreatingTrainingJob request to Amazon SageMaker. - This is a synchronous operation. After the model training successfully completes, - you can call the ``deploy()`` method to host the model using the Amazon SageMaker hosting services. + This is a synchronous operation. After the model training + successfully completes, you can call the ``deploy()`` method to host the + model using the Amazon SageMaker hosting services. Args: - inputs (str or dict or sagemaker.session.s3_input): Information about the training data. - This can be one of three types: + inputs (str or dict or sagemaker.session.s3_input): Information + about the training data. This can be one of three types: * (str) the S3 location where training data is saved. * (dict[str, str] or dict[str, sagemaker.session.s3_input]) If using multiple channels for - training data, you can specify a dict mapping channel names - to strings or :func:`~sagemaker.session.s3_input` objects. + training data, you can specify a dict mapping channel + names to strings or :func:`~sagemaker.session.s3_input` + objects. + * (sagemaker.session.s3_input) - channel configuration for S3 data sources that can provide - additional information as well as the path to the training dataset. - See :func:`sagemaker.session.s3_input` for full details. - wait (bool): Whether the call should wait until the job completes (default: True). - logs (bool): Whether to show the logs produced by the job. - Only meaningful when wait is True (default: True). - job_name (str): Training job name. If not specified, the estimator generates a default job name, - based on the training image name and current timestamp. + additional information as well as the path to the training + dataset. See :func:`sagemaker.session.s3_input` for full + details. + wait (bool): Whether the call should wait until the job completes + (default: True). + logs (bool): Whether to show the logs produced by the job. Only + meaningful when wait is True (default: True). + job_name (str): Training job name. If not specified, the estimator + generates a default job name, based on the training image name + and current timestamp. """ self._prepare_for_training(job_name=job_name) @@ -270,6 +303,7 @@ def fit(self, inputs=None, wait=True, logs=True, job_name=None): self.latest_training_job.wait(logs=logs) def _compilation_job_name(self): + """Placeholder docstring""" base_name = self.base_job_name or base_name_from_image(self.train_image()) return name_from_base("compilation-" + base_name) @@ -287,25 +321,33 @@ def compile_model( """Compile a Neo model using the input model. Args: - target_instance_family (str): Identifies the device that you want to run your model after compilation, for - example: ml_c5. Allowed strings are: ml_c5, ml_m5, ml_c4, ml_m4, jetsontx1, jetsontx2, ml_p2, ml_p3, - deeplens, rasp3b - input_shape (dict): Specifies the name and shape of the expected inputs for your trained model in json - dictionary form, for example: {'data':[1,3,1024,1024]}, or {'var1': [1,1,28,28], 'var2':[1,1,28,28]} + target_instance_family (str): Identifies the device that you want to + run your model after compilation, for example: ml_c5. Allowed + strings are: ml_c5, ml_m5, ml_c4, ml_m4, jetsontx1, jetsontx2, + ml_p2, ml_p3, deeplens, rasp3b + input_shape (dict): Specifies the name and shape of the expected + inputs for your trained model in json dictionary form, for + example: {'data':[1,3,1024,1024]}, or {'var1': [1,1,28,28], + 'var2':[1,1,28,28]} output_path (str): Specifies where to store the compiled model - framework (str): The framework that is used to train the original model. Allowed values: 'mxnet', - 'tensorflow', 'pytorch', 'onnx', 'xgboost' + framework (str): The framework that is used to train the original + model. Allowed values: 'mxnet', 'tensorflow', 'pytorch', 'onnx', + 'xgboost' framework_version (str): The version of the framework - compile_max_run (int): Timeout in seconds for compilation (default: 3 * 60). - After this amount of time Amazon SageMaker Neo terminates the compilation job regardless of its - current status. - tags (list[dict]): List of tags for labeling a compilation job. For more, see + compile_max_run (int): Timeout in seconds for compilation (default: + 3 * 60). After this amount of time Amazon SageMaker Neo + terminates the compilation job regardless of its current status. + tags (list[dict]): List of tags for labeling a compilation job. For + more, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. - **kwargs: Passed to invocation of ``create_model()``. Implementations may customize - ``create_model()`` to accept ``**kwargs`` to customize model creation during deploy. - For more, see the implementation docs. + **kwargs: Passed to invocation of ``create_model()``. + Implementations may customize ``create_model()`` to accept + ``**kwargs`` to customize model creation during deploy. For + more, see the implementation docs. + Returns: - sagemaker.model.Model: A SageMaker ``Model`` object. See :func:`~sagemaker.model.Model` for full details. + sagemaker.model.Model: A SageMaker ``Model`` object. See + :func:`~sagemaker.model.Model` for full details. """ if target_instance_family not in NEO_ALLOWED_TARGET_INSTANCE_FAMILY: raise ValueError( @@ -339,21 +381,16 @@ def compile_model( def attach(cls, training_job_name, sagemaker_session=None, model_channel_name="model"): """Attach to an existing training job. - Create an Estimator bound to an existing training job, each subclass is responsible to implement - ``_prepare_init_params_from_job_description()`` as this method delegates the actual conversion of a training - job description to the arguments that the class constructor expects. After attaching, if the training job has a - Complete status, it can be ``deploy()`` ed to create a SageMaker Endpoint and return a ``Predictor``. - - If the training job is in progress, attach will block and display log messages - from the training job, until the training job completes. + Create an Estimator bound to an existing training job, each subclass + is responsible to implement + ``_prepare_init_params_from_job_description()`` as this method delegates + the actual conversion of a training job description to the arguments + that the class constructor expects. After attaching, if the training job + has a Complete status, it can be ``deploy()`` ed to create a SageMaker + Endpoint and return a ``Predictor``. - Args: - training_job_name (str): The name of the training job to attach to. - sagemaker_session (sagemaker.session.Session): Session object which manages interactions with - Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one - using the default AWS configuration chain. - model_channel_name (str): Name of the channel where pre-trained model data will be downloaded (default: - 'model'). If no channel with the same name exists in the training job, this option will be ignored. + If the training job is in progress, attach will block and display log + messages from the training job, until the training job completes. Examples: >>> my_estimator.fit(wait=False) @@ -362,8 +399,20 @@ def attach(cls, training_job_name, sagemaker_session=None, model_channel_name="m >>> attached_estimator = Estimator.attach(training_job_name) >>> attached_estimator.deploy() + Args: + training_job_name (str): The name of the training job to attach to. + sagemaker_session (sagemaker.session.Session): Session object which + manages interactions with Amazon SageMaker APIs and any other + AWS services needed. If not specified, the estimator creates one + using the default AWS configuration chain. + model_channel_name (str): Name of the channel where pre-trained + model data will be downloaded (default: 'model'). If no channel + with the same name exists in the training job, this option will + be ignored. + Returns: - Instance of the calling ``Estimator`` Class with the attached training job. + Instance of the calling ``Estimator`` Class with the attached + training job. """ sagemaker_session = sagemaker_session or Session() @@ -396,40 +445,52 @@ def deploy( model_name=None, **kwargs ): - """Deploy the trained model to an Amazon SageMaker endpoint and return a ``sagemaker.RealTimePredictor`` object. + """Deploy the trained model to an Amazon SageMaker endpoint and return a + ``sagemaker.RealTimePredictor`` object. More information: http://docs.aws.amazon.com/sagemaker/latest/dg/how-it-works-training.html Args: - initial_instance_count (int): Minimum number of EC2 instances to deploy to an endpoint for prediction. - instance_type (str): Type of EC2 instance to deploy to an endpoint for prediction, - for example, 'ml.c4.xlarge'. - accelerator_type (str): Type of Elastic Inference accelerator to attach to an endpoint for model loading - and inference, for example, 'ml.eia1.medium'. If not specified, no Elastic Inference accelerator - will be attached to the endpoint. - For more information: https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html - endpoint_name (str): Name to use for creating an Amazon SageMaker endpoint. If not specified, the name of - the training job is used. - use_compiled_model (bool): Flag to select whether to use compiled (optimized) model. Default: False. - update_endpoint (bool): Flag to update the model in an existing Amazon SageMaker endpoint. - If True, this will deploy a new EndpointConfig to an already existing endpoint and delete resources - corresponding to the previous EndpointConfig. Default: False - wait (bool): Whether the call should wait until the deployment of model completes (default: True). - model_name (str): Name to use for creating an Amazon SageMaker model. If not specified, the name of - the training job is used. - tags(List[dict[str, str]]): Optional. The list of tags to attach to this specific endpoint. Example: - >>> tags = [{'Key': 'tagname', 'Value': 'tagvalue'}] - For more information about tags, see https://boto3.amazonaws.com/v1/documentation\ - /api/latest/reference/services/sagemaker.html#SageMaker.Client.add_tags - - **kwargs: Passed to invocation of ``create_model()``. Implementations may customize - ``create_model()`` to accept ``**kwargs`` to customize model creation during deploy. + initial_instance_count (int): Minimum number of EC2 instances to + deploy to an endpoint for prediction. + instance_type (str): Type of EC2 instance to deploy to an endpoint + for prediction, for example, 'ml.c4.xlarge'. + accelerator_type (str): Type of Elastic Inference accelerator to + attach to an endpoint for model loading and inference, for + example, 'ml.eia1.medium'. If not specified, no Elastic + Inference accelerator will be attached to the endpoint. For more + information: + https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html + endpoint_name (str): Name to use for creating an Amazon SageMaker + endpoint. If not specified, the name of the training job is + used. + use_compiled_model (bool): Flag to select whether to use compiled + (optimized) model. Default: False. + update_endpoint (bool): Flag to update the model in an existing + Amazon SageMaker endpoint. If True, this will deploy a new + EndpointConfig to an already existing endpoint and delete + resources corresponding to the previous EndpointConfig. Default: + False + wait (bool): Whether the call should wait until the deployment of + model completes (default: True). + model_name (str): Name to use for creating an Amazon SageMaker + model. If not specified, the name of the training job is used. + tags(List[dict[str, str]]): Optional. The list of tags to attach to this specific + endpoint. Example: + >>> tags = [{'Key': 'tagname', 'Value': 'tagvalue'}] + For more information about tags, see + https://boto3.amazonaws.com/v1/documentation\ + /api/latest/reference/services/sagemaker.html#SageMaker.Client.add_tags + **kwargs: Passed to invocation of ``create_model()``. + Implementations may customize ``create_model()`` to accept + ``**kwargs`` to customize model creation during deploy. For more, see the implementation docs. Returns: sagemaker.predictor.RealTimePredictor: A predictor that provides a ``predict()`` method, - which can be used to send requests to the Amazon SageMaker endpoint and obtain inferences. + which can be used to send requests to the Amazon SageMaker + endpoint and obtain inferences. """ self._ensure_latest_training_job() endpoint_name = endpoint_name or self.latest_training_job.name @@ -458,7 +519,9 @@ def deploy( @property def model_data(self): - """str: The model location in S3. Only set if Estimator has been ``fit()``.""" + """str: The model location in S3. Only set if Estimator has been + ``fit()``. + """ if self.latest_training_job is not None: model_uri = self.sagemaker_session.sagemaker_client.describe_training_job( TrainingJobName=self.latest_training_job.name @@ -476,26 +539,31 @@ def model_data(self): @abstractmethod def create_model(self, **kwargs): - """Create a SageMaker ``Model`` object that can be deployed to an ``Endpoint``. + """Create a SageMaker ``Model`` object that can be deployed to an + ``Endpoint``. Args: - **kwargs: Keyword arguments used by the implemented method for creating the ``Model``. + **kwargs: Keyword arguments used by the implemented method for + creating the ``Model``. Returns: - sagemaker.model.Model: A SageMaker ``Model`` object. See :func:`~sagemaker.model.Model` for full details. + sagemaker.model.Model: A SageMaker ``Model`` object. See + :func:`~sagemaker.model.Model` for full details. """ @classmethod def _prepare_init_params_from_job_description(cls, job_details, model_channel_name=None): - """Convert the job description to init params that can be handled by the class constructor + """Convert the job description to init params that can be handled by the + class constructor Args: - job_details: the returned job details from a describe_training_job API call. - model_channel_name (str): Name of the channel where pre-trained model data will be downloaded. + job_details: the returned job details from a describe_training_job + API call. + model_channel_name (str): Name of the channel where pre-trained + model data will be downloaded. Returns: - dictionary: The transformed init_params - + dictionary: The transformed init_params """ init_params = dict() @@ -572,29 +640,39 @@ def transformer( role=None, volume_kms_key=None, ): - """Return a ``Transformer`` that uses a SageMaker Model based on the training job. It reuses the - SageMaker Session and base job name used by the Estimator. + """Return a ``Transformer`` that uses a SageMaker Model based on the + training job. It reuses the SageMaker Session and base job name used by + the Estimator. Args: instance_count (int): Number of EC2 instances to use. - instance_type (str): Type of EC2 instance to use, for example, 'ml.c4.xlarge'. - strategy (str): The strategy used to decide how to batch records in a single request (default: None). - Valid values: 'MULTI_RECORD' and 'SINGLE_RECORD'. - assemble_with (str): How the output is assembled (default: None). Valid values: 'Line' or 'None'. - output_path (str): S3 location for saving the transform result. If not specified, results are stored to - a default bucket. - output_kms_key (str): Optional. KMS key ID for encrypting the transform output (default: None). - accept (str): The content type accepted by the endpoint deployed during the transform job. - env (dict): Environment variables to be set for use during the transform job (default: None). - max_concurrent_transforms (int): The maximum number of HTTP requests to be made to - each individual transform container at one time. - max_payload (int): Maximum size of the payload in a single HTTP request to the container in MB. - tags (list[dict]): List of tags for labeling a transform job. If none specified, then the tags used for - the training job are used for the transform job. - role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also used during - transform jobs. If not specified, the role from the Estimator will be used. - volume_kms_key (str): Optional. KMS key ID for encrypting the volume attached to the ML - compute instance (default: None). + instance_type (str): Type of EC2 instance to use, for example, + 'ml.c4.xlarge'. + strategy (str): The strategy used to decide how to batch records in + a single request (default: None). Valid values: 'MULTI_RECORD' + and 'SINGLE_RECORD'. + assemble_with (str): How the output is assembled (default: None). + Valid values: 'Line' or 'None'. + output_path (str): S3 location for saving the transform result. If + not specified, results are stored to a default bucket. + output_kms_key (str): Optional. KMS key ID for encrypting the + transform output (default: None). + accept (str): The content type accepted by the endpoint deployed + during the transform job. + env (dict): Environment variables to be set for use during the + transform job (default: None). + max_concurrent_transforms (int): The maximum number of HTTP requests + to be made to each individual transform container at one time. + max_payload (int): Maximum size of the payload in a single HTTP + request to the container in MB. + tags (list[dict]): List of tags for labeling a transform job. If + none specified, then the tags used for the training job are used + for the transform job. + role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, + which is also used during transform jobs. If not specified, the + role from the Estimator will be used. + volume_kms_key (str): Optional. KMS key ID for encrypting the volume + attached to the ML compute instance (default: None). """ tags = tags or self.tags @@ -629,7 +707,8 @@ def transformer( @property def training_job_analytics(self): - """Return a ``TrainingJobAnalytics`` object for the current training job. + """Return a ``TrainingJobAnalytics`` object for the current training + job. """ if self._current_job_name is None: raise ValueError("Estimator is not associated with a TrainingJob") @@ -638,9 +717,11 @@ def training_job_analytics(self): ) def get_vpc_config(self, vpc_config_override=vpc_utils.VPC_CONFIG_DEFAULT): - """ - Returns VpcConfig dict either from this Estimator's subnets and security groups, - or else validate and return an optional override value. + """Returns VpcConfig dict either from this Estimator's subnets and + security groups, or else validate and return an optional override value. + + Args: + vpc_config_override: """ if vpc_config_override is vpc_utils.VPC_CONFIG_DEFAULT: return vpc_utils.to_dict(self.subnets, self.security_group_ids) @@ -649,22 +730,30 @@ def get_vpc_config(self, vpc_config_override=vpc_utils.VPC_CONFIG_DEFAULT): def _ensure_latest_training_job( self, error_message="Estimator is not associated with a training job" ): + """ + Args: + error_message: + """ if self.latest_training_job is None: raise ValueError(error_message) class _TrainingJob(_Job): + """Placeholder docstring""" + @classmethod def start_new(cls, estimator, inputs): """Create a new Amazon SageMaker training job from the estimator. Args: - estimator (sagemaker.estimator.EstimatorBase): Estimator object created by the user. - inputs (str): Parameters used when called :meth:`~sagemaker.estimator.EstimatorBase.fit`. + estimator (sagemaker.estimator.EstimatorBase): Estimator object + created by the user. + inputs (str): Parameters used when called + :meth:`~sagemaker.estimator.EstimatorBase.fit`. Returns: - sagemaker.estimator._TrainingJob: Constructed object that captures all information about the started - training job. + sagemaker.estimator._TrainingJob: Constructed object that captures + all information about the started training job. """ local_mode = estimator.sagemaker_session.local_mode @@ -714,9 +803,17 @@ def start_new(cls, estimator, inputs): @classmethod def _is_local_channel(cls, input_uri): + """ + Args: + input_uri: + """ return isinstance(input_uri, string_types) and input_uri.startswith("file://") def wait(self, logs=True): + """ + Args: + logs: + """ if logs: self.sagemaker_session.logs_for_job(self.job_name, wait=True) else: @@ -724,9 +821,8 @@ def wait(self, logs=True): class Estimator(EstimatorBase): - """ - A generic Estimator to train using any supplied algorithm. This class is designed for use with - algorithms that don't have their own, custom class. + """A generic Estimator to train using any supplied algorithm. This class is + designed for use with algorithms that don't have their own, custom class. """ def __init__( @@ -756,58 +852,82 @@ def __init__( Args: image_name (str): The container image to use for training. - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs - that create Amazon SageMaker endpoints use this role to access training data and model artifacts. - After the endpoint is created, the inference code might use the IAM role, - if it needs to access an AWS resource. - train_instance_count (int): Number of Amazon EC2 instances to use for training. - train_instance_type (str): Type of EC2 instance to use for training, for example, 'ml.c4.xlarge'. - train_volume_size (int): Size in GB of the EBS volume to use for storing input data - during training (default: 30). Must be large enough to store training data if File Mode is used - (which is the default). - train_volume_kms_key (str): Optional. KMS key ID for encrypting EBS volume attached to the - training instance (default: None). - train_max_run (int): Timeout in seconds for training (default: 24 * 60 * 60). - After this amount of time Amazon SageMaker terminates the job regardless of its current status. - input_mode (str): The input mode that the algorithm supports (default: 'File'). Valid modes: - - * 'File' - Amazon SageMaker copies the training dataset from the S3 location to a local directory. - * 'Pipe' - Amazon SageMaker streams data directly from S3 to the container via a Unix-named pipe. - - This argument can be overriden on a per-channel basis using ``sagemaker.session.s3_input.input_mode``. - output_path (str): S3 location for saving the training result (model artifacts and output files). - If not specified, results are stored to a default bucket. If the bucket with the specific name + role (str): An AWS IAM role (either name or full ARN). The Amazon + SageMaker training jobs and APIs that create Amazon SageMaker + endpoints use this role to access training data and model + artifacts. After the endpoint is created, the inference code + might use the IAM role, if it needs to access an AWS resource. + train_instance_count (int): Number of Amazon EC2 instances to use + for training. + train_instance_type (str): Type of EC2 instance to use for training, + for example, 'ml.c4.xlarge'. + train_volume_size (int): Size in GB of the EBS volume to use for + storing input data during training (default: 30). Must be large + enough to store training data if File Mode is used (which is the + default). + train_volume_kms_key (str): Optional. KMS key ID for encrypting EBS + volume attached to the training instance (default: None). + train_max_run (int): Timeout in seconds for training (default: 24 * + 60 * 60). After this amount of time Amazon SageMaker terminates + the job regardless of its current status. + input_mode (str): The input mode that the algorithm supports + (default: 'File'). Valid modes: + + * 'File' - Amazon SageMaker copies the training dataset from the + S3 location to a local directory. + * 'Pipe' - Amazon SageMaker streams data directly from S3 to the + container via a Unix-named pipe. + + This argument can be overriden on a per-channel basis using + ``sagemaker.session.s3_input.input_mode``. + output_path (str): S3 location for saving the training result (model + artifacts and output files). If not specified, results are + stored to a default bucket. If the bucket with the specific name does not exist, the estimator creates the bucket during the :meth:`~sagemaker.estimator.EstimatorBase.fit` method execution. - output_kms_key (str): Optional. KMS key ID for encrypting the training output (default: None). - base_job_name (str): Prefix for training job name when the :meth:`~sagemaker.estimator.EstimatorBase.fit` - method launches. If not specified, the estimator generates a default job name, based on - the training image name and current timestamp. - sagemaker_session (sagemaker.session.Session): Session object which manages interactions with - Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one + output_kms_key (str): Optional. KMS key ID for encrypting the + training output (default: None). + base_job_name (str): Prefix for training job name when the + :meth:`~sagemaker.estimator.EstimatorBase.fit` method launches. + If not specified, the estimator generates a default job name, + based on the training image name and current timestamp. + sagemaker_session (sagemaker.session.Session): Session object which + manages interactions with Amazon SageMaker APIs and any other + AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. - hyperparameters (dict): Dictionary containing the hyperparameters to initialize this estimator with. - tags (list[dict]): List of tags for labeling a training job. For more, see + hyperparameters (dict): Dictionary containing the hyperparameters to + initialize this estimator with. + tags (list[dict]): List of tags for labeling a training job. For + more, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. - subnets (list[str]): List of subnet ids. If not specified training job will be created without VPC config. - security_group_ids (list[str]): List of security group ids. If not specified training job will be created - without VPC config. - model_uri (str): URI where a pre-trained model is stored, either locally or in S3 (default: None). If - specified, the estimator will create a channel pointing to the model so the training job can download - it. This model can be a 'model.tar.gz' from a previous training job, or other artifacts coming from a + subnets (list[str]): List of subnet ids. If not specified training + job will be created without VPC config. + security_group_ids (list[str]): List of security group ids. If not + specified training job will be created without VPC config. + model_uri (str): URI where a pre-trained model is stored, either + locally or in S3 (default: None). If specified, the estimator + will create a channel pointing to the model so the training job + can download it. This model can be a 'model.tar.gz' from a + previous training job, or other artifacts coming from a different source. - In local mode, this should point to the path in which the model is located and not the file itself, - as local Docker containers will try to mount the URI as a volume. - - More information: https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html#td-deserialization - model_channel_name (str): Name of the channel where 'model_uri' will be downloaded (default: 'model'). - metric_definitions (list[dict]): A list of dictionaries that defines the metric(s) used to evaluate the - training jobs. Each dictionary contains two keys: 'Name' for the name of the metric, and 'Regex' for - the regular expression used to extract the metric from the logs. This should be defined only - for jobs that don't use an Amazon algorithm. - encrypt_inter_container_traffic (bool): Specifies whether traffic between training containers is encrypted - for the training job (default: ``False``). + In local mode, this should point to the path in which the model + is located and not the file itself, as local Docker containers + will try to mount the URI as a volume. + + More information: + https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html#td-deserialization + model_channel_name (str): Name of the channel where 'model_uri' will + be downloaded (default: 'model'). + metric_definitions (list[dict]): A list of dictionaries that defines + the metric(s) used to evaluate the training jobs. Each + dictionary contains two keys: 'Name' for the name of the metric, + and 'Regex' for the regular expression used to extract the + metric from the logs. This should be defined only for jobs that + don't use an Amazon algorithm. + encrypt_inter_container_traffic (bool): Specifies whether traffic + between training containers is encrypted for the training job + (default: ``False``). """ self.image_name = image_name self.hyperparam_dict = hyperparameters.copy() if hyperparameters else {} @@ -833,21 +953,26 @@ def __init__( ) def train_image(self): - """ - Returns the docker image to use for training. + """Returns the docker image to use for training. - The fit() method, that does the model training, calls this method to find the image to use for model training. + The fit() method, that does the model training, calls this method to + find the image to use for model training. """ return self.image_name def set_hyperparameters(self, **kwargs): + """ + Args: + **kwargs: + """ for k, v in kwargs.items(): self.hyperparam_dict[k] = v def hyperparameters(self): """Returns the hyperparameters as a dictionary to use for training. - The fit() method, that does the model training, calls this method to find the hyperparameters you specified. + The fit() method, that does the model training, calls this method to + find the hyperparameters you specified. """ return self.hyperparam_dict @@ -863,29 +988,37 @@ def create_model( vpc_config_override=vpc_utils.VPC_CONFIG_DEFAULT, **kwargs ): - """ - Create a model to deploy. + """Create a model to deploy. + + The serializer, deserializer, content_type, and accept arguments are only used to define a default + RealTimePredictor. They are ignored if an explicit predictor class is passed in. Other arguments + are passed through to the Model class. Args: - role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also used during - transform jobs. If not specified, the role from the Estimator will be used. - image (str): An container image to use for deploying the model. Defaults to the image used for training. - predictor_cls (RealTimePredictor): The predictor class to use when deploying the model. - serializer (callable): Should accept a single argument, the input data, and return a sequence - of bytes. May provide a content_type attribute that defines the endpoint request content type - deserializer (callable): Should accept two arguments, the result data and the response content type, - and return a sequence of bytes. May provide a content_type attribute that defines th endpoint - response Accept content type. - content_type (str): The invocation ContentType, overriding any content_type from the serializer - accept (str): The invocation Accept, overriding any accept from the deserializer. - vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on the model. + role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, + which is also used during transform jobs. If not specified, the + role from the Estimator will be used. + image (str): An container image to use for deploying the model. + Defaults to the image used for training. + predictor_cls (RealTimePredictor): The predictor class to use when + deploying the model. + serializer (callable): Should accept a single argument, the input + data, and return a sequence of bytes. May provide a content_type + attribute that defines the endpoint request content type + deserializer (callable): Should accept two arguments, the result + data and the response content type, and return a sequence of + bytes. May provide a content_type attribute that defines th + endpoint response Accept content type. + content_type (str): The invocation ContentType, overriding any + content_type from the serializer + accept (str): The invocation Accept, overriding any accept from the + deserializer. + vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on + the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. - - The serializer, deserializer, content_type, and accept arguments are only used to define a default - RealTimePredictor. They are ignored if an explicit predictor class is passed in. Other arguments - are passed through to the Model class. + **kwargs: Returns: a Model ready for deployment. """ @@ -912,15 +1045,17 @@ def predict_wrapper(endpoint, session): @classmethod def _prepare_init_params_from_job_description(cls, job_details, model_channel_name=None): - """Convert the job description to init params that can be handled by the class constructor + """Convert the job description to init params that can be handled by the + class constructor Args: - job_details: the returned job details from a describe_training_job API call. - model_channel_name (str): Name of the channel where pre-trained model data will be downloaded + job_details: the returned job details from a describe_training_job + API call. + model_channel_name (str): Name of the channel where pre-trained + model data will be downloaded Returns: - dictionary: The transformed init_params - + dictionary: The transformed init_params """ init_params = super(Estimator, cls)._prepare_init_params_from_job_description( job_details, model_channel_name @@ -959,14 +1094,16 @@ def __init__( git_config=None, **kwargs ): - """Base class initializer. Subclasses which override ``__init__`` should invoke ``super()`` + """Base class initializer. Subclasses which override ``__init__`` should + invoke ``super()`` Args: - entry_point (str): Path (absolute or relative) to the local Python source file which should be executed - as the entry point to training. This should be compatible with either Python 2.7 or Python 3.5. - If 'git_config' is provided, 'entry_point' should be a relative location to the Python source file in - the Git repo. - Example: + entry_point (str): Path (absolute or relative) to the local Python + source file which should be executed as the entry point to + training. This should be compatible with either Python 2.7 or + Python 3.5. If 'git_config' is provided, 'entry_point' should be + a relative location to the Python source file in the Git repo. + Example With the following GitHub repo directory structure: @@ -976,45 +1113,12 @@ def __init__( >>> |----- test.py You can assign entry_point='src/train.py'. - git_config (dict[str, str]): Git configurations used for cloning files, including ``repo``, ``branch``, - ``commit``, ``2FA_enabled``, ``username``, ``password`` and ``token``. The ``repo`` field is required. - All other fields are optional. ``repo`` specifies the Git repository where your training script is - stored. If you don't provide ``branch``, the default value 'master' is used. If you don't provide - ``commit``, the latest commit in the specified branch is used. - Example: - - The following config: - - >>> git_config = {'repo': 'https://github.com/aws/sagemaker-python-sdk.git', - >>> 'branch': 'test-branch-git-config', - >>> 'commit': '329bfcf884482002c05ff7f44f62599ebc9f445a'} - - results in cloning the repo specified in 'repo', then checkout the 'master' branch, and checkout - the specified commit. - ``2FA_enabled``, ``username``, ``password`` and ``token`` are used for authentication. For GitHub - (or other Git) accounts, set ``2FA_enabled`` to 'True' if two-factor authentication is enabled for the - account, otherwise set it to 'False'. If you do not provide a value for ``2FA_enabled``, a default - value of 'False' is used. CodeCommit does not support two-factor authentication, so do not provide - "2FA_enabled" with CodeCommit repositories. - - For GitHub and other Git repos, when SSH URLs are provided, it doesn't matter whether 2FA is - enabled or disabled; you should either have no passphrase for the SSH key pairs, or have the ssh-agent - configured so that you will not be prompted for SSH passphrase when you do 'git clone' command with SSH - URLs. When HTTPS URLs are provided: if 2FA is disabled, then either token or username+password will be - used for authentication if provided (token prioritized); if 2FA is enabled, only token will be used for - authentication if provided. If required authentication info is not provided, python SDK will try to use - local credentials storage to authenticate. If that fails either, an error message will be thrown. - - For CodeCommit repos, 2FA is not supported, so '2FA_enabled' should not be provided. There is no token - in CodeCommit, so 'token' should not be provided too. When 'repo' is an SSH URL, the requirements are - the same as GitHub-like repos. When 'repo' is an HTTPS URL, username+password will be used for - authentication if they are provided; otherwise, python SDK will try to use either CodeCommit credential - helper or local credential storage for authentication. - source_dir (str): Path (absolute or relative) to a directory with any other training - source code dependencies aside from the entry point file (default: None). Structure within this - directory are preserved when training on Amazon SageMaker. If 'git_config' is provided, - 'source_dir' should be a relative location to a directory in the Git repo. - Example: + source_dir (str): Path (absolute or relative) to a directory with + any other training source code dependencies aside from the entry + point file (default: None). Structure within this directory are + preserved when training on Amazon SageMaker. If 'git_config' is + provided, 'source_dir' should be a relative location to a + directory in the Git repo. .. admonition:: Example With the following GitHub repo directory structure: @@ -1023,33 +1127,42 @@ def __init__( >>> |----- train.py >>> |----- test.py - and you need 'train.py' as entry point and 'test.py' as training source code as well, you can - assign entry_point='train.py', source_dir='src'. - hyperparameters (dict): Hyperparameters that will be used for training (default: None). - The hyperparameters are made accessible as a dict[str, str] to the training code on SageMaker. - For convenience, this accepts other types for keys and values, but ``str()`` will be called - to convert them before training. - enable_cloudwatch_metrics (bool): [DEPRECATED] Now there are cloudwatch metrics emitted by all SageMaker - training jobs. This will be ignored for now and removed in a further release. - container_log_level (int): Log level to use within the container (default: logging.INFO). - Valid values are defined in the Python logging module. - code_location (str): The S3 prefix URI where custom code will be uploaded (default: None). - The code file uploaded in S3 is 'code_location/source/sourcedir.tar.gz'. - If not specified, the default code location is s3://default_bucket/job-name/. And code file - uploaded to S3 is s3://default_bucket/job-name/source/sourcedir.tar.gz - image_name (str): An alternate image name to use instead of the official Sagemaker image - for the framework. This is useful to run one of the Sagemaker supported frameworks - with an image containing custom dependencies. - dependencies (list[str]): A list of paths to directories (absolute or relative) with - any additional libraries that will be exported to the container (default: []). - The library folders will be copied to SageMaker in the same folder where the entrypoint is copied. - If 'git_config' is provided, 'dependencies' should be a list of relative locations to directories - with any additional libraries needed in the Git repo. - Example: - - The following call - >>> Estimator(entry_point='train.py', dependencies=['my/libs/common', 'virtual-env']) - results in the following inside the container: + and you need 'train.py' as entry point and 'test.py' as + training source code as well, you can assign + entry_point='train.py', source_dir='src'. + hyperparameters (dict): Hyperparameters that will be used for + training (default: None). The hyperparameters are made + accessible as a dict[str, str] to the training code on + SageMaker. For convenience, this accepts other types for keys + and values, but ``str()`` will be called to convert them before + training. + enable_cloudwatch_metrics (bool): [DEPRECATED] Now there are + cloudwatch metrics emitted by all SageMaker training jobs. This + will be ignored for now and removed in a further release. + container_log_level (int): Log level to use within the container + (default: logging.INFO). Valid values are defined in the Python + logging module. + code_location (str): The S3 prefix URI where custom code will be + uploaded (default: None). The code file uploaded in S3 is + 'code_location/source/sourcedir.tar.gz'. If not specified, the + default code location is s3://default_bucket/job-name/. And code + file uploaded to S3 is + s3://default_bucket/job-name/source/sourcedir.tar.gz + image_name (str): An alternate image name to use instead of the + official Sagemaker image for the framework. This is useful to + run one of the Sagemaker supported frameworks with an image + containing custom dependencies. + dependencies (list[str]): A list of paths to directories (absolute + or relative) with any additional libraries that will be exported + to the container (default: []). The library folders will be + copied to SageMaker in the same folder where the entrypoint is + copied. If 'git_config' is provided, 'dependencies' should be a + list of relative locations to directories with any additional + libraries needed in the Git repo. .. admonition:: Example + + The following call >>> Estimator(entry_point='train.py', + dependencies=['my/libs/common', 'virtual-env']) results in + the following inside the container: >>> $ ls @@ -1057,13 +1170,66 @@ def __init__( >>> |------ train.py >>> |------ common >>> |------ virtual-env + enable_network_isolation (bool): Specifies whether container will + run in network isolation mode. Network isolation mode restricts + the container access to outside networks (such as the internet). + The container does not make any inbound or outbound network + calls. If True, a channel named "code" will be created for any + user entry script for training. The user entry script, files in + source_dir (if specified), and dependencies will be uploaded in + a tar to S3. Also known as internet-free mode (default: `False` + ). + git_config (dict[str, str]): Git configurations used for cloning + files, including ``repo``, ``branch``, ``commit``, + ``2FA_enabled``, ``username``, ``password`` and ``token``. The + ``repo`` field is required. All other fields are optional. + ``repo`` specifies the Git repository where your training script + is stored. If you don't provide ``branch``, the default value + 'master' is used. If you don't provide ``commit``, the latest + commit in the specified branch is used. .. admonition:: Example + + The following config: + + >>> git_config = {'repo': 'https://github.com/aws/sagemaker-python-sdk.git', + >>> 'branch': 'test-branch-git-config', + >>> 'commit': '329bfcf884482002c05ff7f44f62599ebc9f445a'} - enable_network_isolation (bool): Specifies whether container will run in network isolation mode. Network - isolation mode restricts the container access to outside networks (such as the internet). The container - does not make any inbound or outbound network calls. If True, a channel named "code" will be created - for any user entry script for training. The user entry script, files in source_dir (if specified), and - dependencies will be uploaded in a tar to S3. Also known as internet-free mode (default: `False`). - **kwargs: Additional kwargs passed to the ``EstimatorBase`` constructor. + results in cloning the repo specified in 'repo', then + checkout the 'master' branch, and checkout the specified + commit. + + ``2FA_enabled``, ``username``, ``password`` and ``token`` are + used for authentication. For GitHub (or other Git) accounts, set + ``2FA_enabled`` to 'True' if two-factor authentication is + enabled for the account, otherwise set it to 'False'. If you do + not provide a value for ``2FA_enabled``, a default value of + 'False' is used. CodeCommit does not support two-factor + authentication, so do not provide "2FA_enabled" with CodeCommit + repositories. + + For GitHub and other Git repos, when SSH URLs are provided, it + doesn't matter whether 2FA is enabled or disabled; you should + either have no passphrase for the SSH key pairs, or have the + ssh-agent configured so that you will not be prompted for SSH + passphrase when you do 'git clone' command with SSH URLs. When + HTTPS URLs are provided: if 2FA is disabled, then either token + or username+password will be used for authentication if provided + (token prioritized); if 2FA is enabled, only token will be used + for authentication if provided. If required authentication info + is not provided, python SDK will try to use local credentials + storage to authenticate. If that fails either, an error message + will be thrown. + + For CodeCommit repos, 2FA is not supported, so '2FA_enabled' + should not be provided. There is no token in CodeCommit, so + 'token' should not be provided too. When 'repo' is an SSH URL, + the requirements are the same as GitHub-like repos. When 'repo' + is an HTTPS URL, username+password will be used for + authentication if they are provided; otherwise, python SDK will + try to use either CodeCommit credential helper or local + credential storage for authentication. + **kwargs: Additional kwargs passed to the ``EstimatorBase`` + constructor. """ super(Framework, self).__init__(**kwargs) if entry_point.startswith("s3://"): @@ -1100,11 +1266,13 @@ def enable_network_isolation(self): return self._enable_network_isolation def _prepare_for_training(self, job_name=None): - """Set hyperparameters needed for training. This method will also validate ``source_dir``. + """Set hyperparameters needed for training. This method will also + validate ``source_dir``. Args: - * job_name (str): Name of the training job to be created. If not specified, one is generated, - using the base name given to the constructor if applicable. + * job_name (str): Name of the training job to be created. If not + specified, one is generated, using the base name given to the + constructor if applicable. """ super(Framework, self)._prepare_for_training(job_name=job_name) @@ -1154,7 +1322,6 @@ def _stage_user_code_in_s3(self): """Upload the user training script to s3 and return the location. Returns: s3 uri - """ local_mode = self.output_path.startswith("file://") @@ -1185,10 +1352,12 @@ def _stage_user_code_in_s3(self): ) def _model_source_dir(self): - """Get the appropriate value to pass as source_dir to model constructor on deploying + """Get the appropriate value to pass as source_dir to model constructor + on deploying Returns: - str: Either a local or an S3 path pointing to the source_dir to be used for code by the model to be deployed + str: Either a local or an S3 path pointing to the source_dir to be + used for code by the model to be deployed """ return ( self.source_dir if self.sagemaker_session.local_mode else self.uploaded_code.s3_prefix @@ -1197,8 +1366,8 @@ def _model_source_dir(self): def hyperparameters(self): """Return the hyperparameters as a dictionary to use for training. - The :meth:`~sagemaker.estimator.EstimatorBase.fit` method, which trains the model, calls this method - to find the hyperparameters. + The :meth:`~sagemaker.estimator.EstimatorBase.fit` method, which + trains the model, calls this method to find the hyperparameters. Returns: dict[str, str]: The hyperparameters. @@ -1207,15 +1376,17 @@ def hyperparameters(self): @classmethod def _prepare_init_params_from_job_description(cls, job_details, model_channel_name=None): - """Convert the job description to init params that can be handled by the class constructor + """Convert the job description to init params that can be handled by the + class constructor Args: - job_details: the returned job details from a describe_training_job API call. - model_channel_name (str): Name of the channel where pre-trained model data will be downloaded + job_details: the returned job details from a describe_training_job + API call. + model_channel_name (str): Name of the channel where pre-trained + model data will be downloaded Returns: - dictionary: The transformed init_params - + dictionary: The transformed init_params """ init_params = super(Framework, cls)._prepare_init_params_from_job_description( job_details, model_channel_name @@ -1249,8 +1420,9 @@ def _prepare_init_params_from_job_description(cls, job_details, model_channel_na def train_image(self): """Return the Docker image to use for training. - The :meth:`~sagemaker.estimator.EstimatorBase.fit` method, which does the model training, - calls this method to find the image to use for model training. + The :meth:`~sagemaker.estimator.EstimatorBase.fit` method, which does + the model training, calls this method to find the image to use for model + training. Returns: str: The URI of the Docker image. @@ -1269,21 +1441,16 @@ def train_image(self): def attach(cls, training_job_name, sagemaker_session=None, model_channel_name="model"): """Attach to an existing training job. - Create an Estimator bound to an existing training job, each subclass is responsible to implement - ``_prepare_init_params_from_job_description()`` as this method delegates the actual conversion of a training - job description to the arguments that the class constructor expects. After attaching, if the training job has a - Complete status, it can be ``deploy()`` ed to create a SageMaker Endpoint and return a ``Predictor``. + Create an Estimator bound to an existing training job, each subclass + is responsible to implement + ``_prepare_init_params_from_job_description()`` as this method delegates + the actual conversion of a training job description to the arguments + that the class constructor expects. After attaching, if the training job + has a Complete status, it can be ``deploy()`` ed to create a SageMaker + Endpoint and return a ``Predictor``. - If the training job is in progress, attach will block and display log messages - from the training job, until the training job completes. - - Args: - training_job_name (str): The name of the training job to attach to. - sagemaker_session (sagemaker.session.Session): Session object which manages interactions with - Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one - using the default AWS configuration chain. - model_channel_name (str): Name of the channel where pre-trained model data will be downloaded (default: - 'model'). If no channel with the same name exists in the training job, this option will be ignored. + If the training job is in progress, attach will block and display log + messages from the training job, until the training job completes. Examples: >>> my_estimator.fit(wait=False) @@ -1292,8 +1459,20 @@ def attach(cls, training_job_name, sagemaker_session=None, model_channel_name="m >>> attached_estimator = Estimator.attach(training_job_name) >>> attached_estimator.deploy() + Args: + training_job_name (str): The name of the training job to attach to. + sagemaker_session (sagemaker.session.Session): Session object which + manages interactions with Amazon SageMaker APIs and any other + AWS services needed. If not specified, the estimator creates one + using the default AWS configuration chain. + model_channel_name (str): Name of the channel where pre-trained + model data will be downloaded (default: 'model'). If no channel + with the same name exists in the training job, this option will + be ignored. + Returns: - Instance of the calling ``Estimator`` Class with the attached training job. + Instance of the calling ``Estimator`` Class with the attached + training job. """ estimator = super(Framework, cls).attach( training_job_name, sagemaker_session, model_channel_name @@ -1308,10 +1487,19 @@ def attach(cls, training_job_name, sagemaker_session=None, model_channel_name="m @staticmethod def _json_encode_hyperparameters(hyperparameters): + """ + Args: + hyperparameters: + """ return {str(k): json.dumps(v) for (k, v) in hyperparameters.items()} @classmethod def _update_init_params(cls, hp, tf_arguments): + """ + Args: + hp: + tf_arguments: + """ updated_params = {} for argument in tf_arguments: value = hp.pop(argument, None) @@ -1337,31 +1525,42 @@ def transformer( model_server_workers=None, volume_kms_key=None, ): - """Return a ``Transformer`` that uses a SageMaker Model based on the training job. It reuses the - SageMaker Session and base job name used by the Estimator. + """Return a ``Transformer`` that uses a SageMaker Model based on the + training job. It reuses the SageMaker Session and base job name used by + the Estimator. Args: instance_count (int): Number of EC2 instances to use. - instance_type (str): Type of EC2 instance to use, for example, 'ml.c4.xlarge'. - strategy (str): The strategy used to decide how to batch records in a single request (default: None). - Valid values: 'MULTI_RECORD' and 'SINGLE_RECORD'. - assemble_with (str): How the output is assembled (default: None). Valid values: 'Line' or 'None'. - output_path (str): S3 location for saving the transform result. If not specified, results are stored to - a default bucket. - output_kms_key (str): Optional. KMS key ID for encrypting the transform output (default: None). - accept (str): The content type accepted by the endpoint deployed during the transform job. - env (dict): Environment variables to be set for use during the transform job (default: None). - max_concurrent_transforms (int): The maximum number of HTTP requests to be made to - each individual transform container at one time. - max_payload (int): Maximum size of the payload in a single HTTP request to the container in MB. - tags (list[dict]): List of tags for labeling a transform job. If none specified, then the tags used for - the training job are used for the transform job. - role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also used during - transform jobs. If not specified, the role from the Estimator will be used. - model_server_workers (int): Optional. The number of worker processes used by the inference server. - If None, server will use one worker per vCPU. - volume_kms_key (str): Optional. KMS key ID for encrypting the volume attached to the ML - compute instance (default: None). + instance_type (str): Type of EC2 instance to use, for example, + 'ml.c4.xlarge'. + strategy (str): The strategy used to decide how to batch records in + a single request (default: None). Valid values: 'MULTI_RECORD' + and 'SINGLE_RECORD'. + assemble_with (str): How the output is assembled (default: None). + Valid values: 'Line' or 'None'. + output_path (str): S3 location for saving the transform result. If + not specified, results are stored to a default bucket. + output_kms_key (str): Optional. KMS key ID for encrypting the + transform output (default: None). + accept (str): The content type accepted by the endpoint deployed + during the transform job. + env (dict): Environment variables to be set for use during the + transform job (default: None). + max_concurrent_transforms (int): The maximum number of HTTP requests + to be made to each individual transform container at one time. + max_payload (int): Maximum size of the payload in a single HTTP + request to the container in MB. + tags (list[dict]): List of tags for labeling a transform job. If + none specified, then the tags used for the training job are used + for the transform job. + role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, + which is also used during transform jobs. If not specified, the + role from the Estimator will be used. + model_server_workers (int): Optional. The number of worker processes + used by the inference server. If None, server will use one + worker per vCPU. + volume_kms_key (str): Optional. KMS key ID for encrypting the volume + attached to the ML compute instance (default: None). """ role = role or self.role @@ -1407,6 +1606,11 @@ def transformer( def _s3_uri_prefix(channel_name, s3_data): + """ + Args: + channel_name: + s3_data: + """ if isinstance(s3_data, s3_input): s3_uri = s3_data.config["DataSource"]["S3DataSource"]["S3Uri"] else: @@ -1420,6 +1624,10 @@ def _s3_uri_prefix(channel_name, s3_data): # Also accepts other valid input types, e.g. dict and s3_input. def _s3_uri_without_prefix_from_input(input_data): # Unpack an input_config object from a dict if a dict was passed in. + """ + Args: + input_data: + """ if isinstance(input_data, dict): response = {} for channel_name, channel_s3_uri in input_data.items(): diff --git a/src/sagemaker/fw_registry.py b/src/sagemaker/fw_registry.py index 19772a6fa6..71403f7101 100644 --- a/src/sagemaker/fw_registry.py +++ b/src/sagemaker/fw_registry.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import logging @@ -35,9 +36,12 @@ def registry(region_name, framework=None): - """ - Return docker registry for the given AWS region for the given framework. + """Return docker registry for the given AWS region for the given framework. This is only used for SparkML and Scikit-learn for now. + + Args: + region_name: + framework: """ try: account_id = image_registry_map[region_name][framework] @@ -48,6 +52,12 @@ def registry(region_name, framework=None): def default_framework_uri(framework, region_name, image_tag): + """ + Args: + framework: + region_name: + image_tag: + """ repository_name = "sagemaker-{}".format(framework) account_name = registry(region_name, framework) return "{}/{}:{}".format(account_name, repository_name, image_tag) diff --git a/src/sagemaker/fw_utils.py b/src/sagemaker/fw_utils.py index 8deb803fa3..b23deec8fd 100644 --- a/src/sagemaker/fw_utils.py +++ b/src/sagemaker/fw_utils.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import from collections import namedtuple @@ -70,20 +71,28 @@ def is_version_equal_or_higher(lowest_version, framework_version): - """Determine whether the ``framework_version`` is equal to or higher than ``lowest_version`` + """Determine whether the ``framework_version`` is equal to or higher than + ``lowest_version`` Args: - lowest_version (List[int]): lowest version represented in an integer list + lowest_version (List[int]): lowest version represented in an integer + list framework_version (str): framework version string Returns: - bool: Whether or not framework_version is equal to or higher than lowest_version + bool: Whether or not framework_version is equal to or higher than + lowest_version """ version_list = [int(s) for s in framework_version.split(".")] return version_list >= lowest_version[0 : len(version_list)] def _is_merged_versions(framework, framework_version): + """ + Args: + framework: + framework_version: + """ lowest_version_list = MERGED_FRAMEWORKS_LOWEST_VERSIONS.get(framework) if lowest_version_list: return is_version_equal_or_higher(lowest_version_list, framework_version) @@ -91,6 +100,14 @@ def _is_merged_versions(framework, framework_version): def _using_merged_images(region, framework, py_version, accelerator_type, framework_version): + """ + Args: + region: + framework: + py_version: + accelerator_type: + framework_version: + """ is_gov_region = region in VALID_ACCOUNTS_BY_REGION is_py3 = py_version == "py3" or py_version is None is_merged_versions = _is_merged_versions(framework, framework_version) @@ -98,6 +115,15 @@ def _using_merged_images(region, framework, py_version, accelerator_type, framew def _registry_id(region, framework, py_version, account, accelerator_type, framework_version): + """ + Args: + region: + framework: + py_version: + account: + accelerator_type: + framework_version: + """ if _using_merged_images(region, framework, py_version, accelerator_type, framework_version): return "763104351884" return VALID_ACCOUNTS_BY_REGION.get(region, account) @@ -118,13 +144,17 @@ def create_image_uri( Args: region (str): AWS region where the image is uploaded. framework (str): framework used by the image. - instance_type (str): SageMaker instance type. Used to determine device type (cpu/gpu/family-specific optimized). + instance_type (str): SageMaker instance type. Used to determine device + type (cpu/gpu/family-specific optimized). framework_version (str): The version of the framework. - py_version (str): Optional. Python version. If specified, should be one of 'py2' or 'py3'. - If not specified, image uri will not include a python component. - account (str): AWS account that contains the image. (default: '520713654638') + py_version (str): Optional. Python version. If specified, should be one + of 'py2' or 'py3'. If not specified, image uri will not include a + python component. + account (str): AWS account that contains the image. (default: + '520713654638') accelerator_type (str): SageMaker Elastic Inference accelerator type. - optimized_families (str): Instance families for which there exist specific optimized images. + optimized_families (str): Instance families for which there exist + specific optimized images. Returns: str: The appropriate image URI based on the given parameters. @@ -186,6 +216,12 @@ def create_image_uri( def _accelerator_type_valid_for_framework( framework, accelerator_type=None, optimized_families=None ): + """ + Args: + framework: + accelerator_type: + optimized_families: + """ if accelerator_type is None: return False @@ -214,11 +250,12 @@ def validate_source_dir(script, directory): """Validate that the source directory exists and it contains the user script Args: - script (str): Script filename. + script (str): Script filename. directory (str): Directory containing the source file. Raises: - ValueError: If ``directory`` does not exist, is not a directory, or does not contain ``script``. + ValueError: If ``directory`` does not exist, is not a directory, or does + not contain ``script``. """ if directory: if not os.path.isfile(os.path.join(directory, script)): @@ -232,29 +269,32 @@ def validate_source_dir(script, directory): def tar_and_upload_dir( session, bucket, s3_key_prefix, script, directory=None, dependencies=None, kms_key=None ): - """Package source files and upload a compress tar file to S3. The S3 location will be - ``s3:///s3_key_prefix/sourcedir.tar.gz``. + """Package source files and upload a compress tar file to S3. The S3 + location will be ``s3:///s3_key_prefix/sourcedir.tar.gz``. - If directory is an S3 URI, an UploadedCode object will be returned, but nothing will be - uploaded to S3 (this allow reuse of code already in S3). + If directory is an S3 URI, an UploadedCode object will be returned, but + nothing will be uploaded to S3 (this allow reuse of code already in S3). - If directory is None, the script will be added to the archive at ``./``. + If directory is None, the script will be added to the archive at + ``./``. - If directory is not None, the (recursive) contents of the directory will be added to - the archive. directory is treated as the base path of the archive, and the script name is - assumed to be a filename or relative path inside the directory. + If directory is not None, the (recursive) contents of the directory will + be added to the archive. directory is treated as the base path of the + archive, and the script name is assumed to be a filename or relative path + inside the directory. Args: session (boto3.Session): Boto session used to access S3. bucket (str): S3 bucket to which the compressed file is uploaded. s3_key_prefix (str): Prefix for the S3 key. script (str): Script filename or path. - directory (str): Optional. Directory containing the source file. If it starts with "s3://", - no action is taken. - dependencies (List[str]): Optional. A list of paths to directories (absolute or relative) - containing additional libraries that will be copied into - /opt/ml/lib - kms_key (str): Optional. KMS key ID used to upload objects to the bucket (default: None). + directory (str): Optional. Directory containing the source file. If it + starts with "s3://", no action is taken. + dependencies (List[str]): Optional. A list of paths to directories + (absolute or relative) containing additional libraries that will be + copied into /opt/ml/lib + kms_key (str): Optional. KMS key ID used to upload objects to the bucket + (default: None). Returns: sagemaker.fw_utils.UserCode: An object with the S3 bucket and key (S3 prefix) and @@ -287,6 +327,11 @@ def tar_and_upload_dir( def _list_files_to_compress(script, directory): + """ + Args: + script: + directory: + """ if directory is None: return [script] @@ -310,9 +355,7 @@ def framework_name_from_image(image_name): Returns: tuple: A tuple containing: - str: The framework name - str: The Python version - str: The image tag + str: The framework name str: The Python version str: The image tag str: If the image is script mode """ sagemaker_pattern = re.compile(ECR_URI_PATTERN) @@ -347,7 +390,8 @@ def framework_version_from_tag(image_tag): """Extract the framework version from the image tag. Args: - image_tag (str): Image tag, which should take the form '--' + image_tag (str): Image tag, which should take the form + '--' Returns: str: The framework version. @@ -358,15 +402,15 @@ def framework_version_from_tag(image_tag): def parse_s3_url(url): - """Returns an (s3 bucket, key name/prefix) tuple from a url with an s3 scheme + """Returns an (s3 bucket, key name/prefix) tuple from a url with an s3 + scheme Args: url (str): Returns: tuple: A tuple containing: - str: S3 bucket name - str: S3 key + str: S3 bucket name str: S3 key """ parsed_url = urlparse(url) if parsed_url.scheme != "s3": @@ -394,6 +438,11 @@ def model_code_key_prefix(code_location_key_prefix, model_name, image): def empty_framework_version_warning(default_version, latest_version): + """ + Args: + default_version: + latest_version: + """ msgs = [EMPTY_FRAMEWORK_VERSION_WARNING.format(default_version)] if default_version != latest_version: msgs.append(LATER_FRAMEWORK_VERSION_WARNING.format(latest=latest_version)) @@ -401,4 +450,8 @@ def empty_framework_version_warning(default_version, latest_version): def python_deprecation_warning(framework): + """ + Args: + framework: + """ return PYTHON_2_DEPRECATION_WARNING.format(framework=framework) diff --git a/src/sagemaker/git_utils.py b/src/sagemaker/git_utils.py index 8490ec5788..8989d3c433 100644 --- a/src/sagemaker/git_utils.py +++ b/src/sagemaker/git_utils.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import os @@ -21,37 +22,55 @@ def git_clone_repo(git_config, entry_point, source_dir=None, dependencies=None): - """Git clone repo containing the training code and serving code. This method also validate ``git_config``, - and set ``entry_point``, ``source_dir`` and ``dependencies`` to the right file or directory in the repo cloned. + """Git clone repo containing the training code and serving code. This method + also validate ``git_config``, and set ``entry_point``, ``source_dir`` and + ``dependencies`` to the right file or directory in the repo cloned. Args: - git_config (dict[str, str]): Git configurations used for cloning files, including ``repo``, ``branch``, - ``commit``, ``2FA_enabled``, ``username``, ``password`` and ``token``. The ``repo`` field is required. - All other fields are optional. ``repo`` specifies the Git repository where your training script is stored. - If you don't provide ``branch``, the default value 'master' is used. If you don't provide ``commit``, - the latest commit in the specified branch is used. ``2FA_enabled``, ``username``, ``password`` and - ``token`` are for authentication purpose. If ``2FA_enabled`` is not provided, we consider 2FA as disabled. - - For GitHub and GitHub-like repos, when SSH URLs are provided, it doesn't matter whether 2FA is - enabled or disabled; you should either have no passphrase for the SSH key pairs, or have the ssh-agent - configured so that you will not be prompted for SSH passphrase when you do 'git clone' command with SSH - URLs. When https URLs are provided: if 2FA is disabled, then either token or username+password will be - used for authentication if provided (token prioritized); if 2FA is enabled, only token will be used for - authentication if provided. If required authentication info is not provided, python SDK will try to use - local credentials storage to authenticate. If that fails either, an error message will be thrown. - - For CodeCommit repos, 2FA is not supported, so '2FA_enabled' should not be provided. There is no token in - CodeCommit, so 'token' should not be provided too. When 'repo' is an SSH URL, the requirements are the - same as GitHub-like repos. When 'repo' is an https URL, username+password will be used for - authentication if they are provided; otherwise, python SDK will try to use either CodeCommit credential - helper or local credential storage for authentication. - entry_point (str): A relative location to the Python source file which should be executed as the entry point - to training or model hosting in the Git repo. - source_dir (str): A relative location to a directory with other training or model hosting source code - dependencies aside from the entry point file in the Git repo (default: None). Structure within this + git_config (dict[str, str]): Git configurations used for cloning files, + including ``repo``, ``branch``, ``commit``, ``2FA_enabled``, + ``username``, ``password`` and ``token``. The ``repo`` field is + required. All other fields are optional. ``repo`` specifies the Git + repository where your training script is stored. If you don't + provide ``branch``, the default value 'master' is used. If you don't + provide ``commit``, the latest commit in the specified branch is + used. ``2FA_enabled``, ``username``, ``password`` and ``token`` are + for authentication purpose. If ``2FA_enabled`` is not provided, we + consider 2FA as disabled. + + For GitHub and GitHub-like repos, when SSH URLs are provided, it + doesn't matter whether 2FA is enabled or disabled; you should either + have no passphrase for the SSH key pairs, or have the ssh-agent + configured so that you will not be prompted for SSH passphrase when + you do 'git clone' command with SSH URLs. When https URLs are + provided: if 2FA is disabled, then either token or username+password + will be used for authentication if provided (token prioritized); if + 2FA is enabled, only token will be used for authentication if + provided. If required authentication info is not provided, python + SDK will try to use local credentials storage to authenticate. If + that fails either, an error message will be thrown. + + For CodeCommit repos, 2FA is not supported, so '2FA_enabled' should + not be provided. There is no token in CodeCommit, so 'token' should + not be provided too. When 'repo' is an SSH URL, the requirements are + the same as GitHub-like repos. When 'repo' is an https URL, + username+password will be used for authentication if they are + provided; otherwise, python SDK will try to use either CodeCommit + credential helper or local credential storage for authentication. + entry_point (str): A relative location to the Python source file which + should be executed as the entry point to training or model hosting + in the Git repo. + source_dir (str): A relative location to a directory with other training + or model hosting source code dependencies aside from the entry point + file in the Git repo (default: None). Structure within this directory are preserved when training on Amazon SageMaker. - dependencies (list[str]): A list of relative locations to directories with any additional libraries that will - be exported to the container in the Git repo (default: []). + dependencies (list[str]): A list of relative locations to directories + with any additional libraries that will be exported to the container + in the Git repo (default: []). + + Returns: + dict: A dict that contains the updated values of entry_point, source_dir + and dependencies. Raises: CalledProcessError: If 1. failed to clone git repo @@ -61,9 +80,6 @@ def git_clone_repo(git_config, entry_point, source_dir=None, dependencies=None): 2. source dir specified does not exist in the repo 3. dependencies specified do not exist in the repo 4. wrong format is provided for git_config - - Returns: - dict: A dict that contains the updated values of entry_point, source_dir and dependencies. """ if entry_point is None: raise ValueError("Please provide an entry point.") @@ -102,6 +118,10 @@ def git_clone_repo(git_config, entry_point, source_dir=None, dependencies=None): def _validate_git_config(git_config): + """ + Args: + git_config: + """ if "repo" not in git_config: raise ValueError("Please provide a repo for git_config.") for key in git_config: @@ -113,11 +133,12 @@ def _validate_git_config(git_config): def _generate_and_run_clone_command(git_config, dest_dir): - """check if a git_config param is valid, if it is, create the command to git clone the repo, and run it. + """check if a git_config param is valid, if it is, create the command to git + clone the repo, and run it. Args: - git_config ((dict[str, str]): Git configurations used for cloning files, including ``repo``, ``branch`` - and ``commit``. + git_config ((dict[str, str]): Git configurations used for cloning files, + including ``repo``, ``branch`` and ``commit``. dest_dir (str): The local directory to clone the Git repo into. Raises: @@ -132,12 +153,12 @@ def _generate_and_run_clone_command(git_config, dest_dir): def _clone_command_for_github_like(git_config, dest_dir): - """check if a git_config param representing a GitHub (or like) repo is valid, if it is, create the command to - git clone the repo, and run it. + """check if a git_config param representing a GitHub (or like) repo is + valid, if it is, create the command to git clone the repo, and run it. Args: - git_config ((dict[str, str]): Git configurations used for cloning files, including ``repo``, ``branch`` - and ``commit``. + git_config ((dict[str, str]): Git configurations used for cloning files, + including ``repo``, ``branch`` and ``commit``. dest_dir (str): The local directory to clone the Git repo into. Raises: @@ -157,12 +178,22 @@ def _clone_command_for_github_like(git_config, dest_dir): def _clone_command_for_ssh(git_config, dest_dir): + """ + Args: + git_config: + dest_dir: + """ if "username" in git_config or "password" in git_config or "token" in git_config: warnings.warn("SSH cloning, authentication information in git config will be ignored.") _run_clone_command(git_config["repo"], dest_dir) def _clone_command_for_github_like_https_2fa_disabled(git_config, dest_dir): + """ + Args: + git_config: + dest_dir: + """ updated_url = git_config["repo"] if "token" in git_config: if "username" in git_config or "password" in git_config: @@ -178,6 +209,11 @@ def _clone_command_for_github_like_https_2fa_disabled(git_config, dest_dir): def _clone_command_for_github_like_https_2fa_enabled(git_config, dest_dir): + """ + Args: + git_config: + dest_dir: + """ updated_url = git_config["repo"] if "token" in git_config: if "username" in git_config or "password" in git_config: @@ -187,17 +223,17 @@ def _clone_command_for_github_like_https_2fa_enabled(git_config, dest_dir): def _clone_command_for_codecommit(git_config, dest_dir): - """check if a git_config param representing a CodeCommit repo is valid, if it is, create the command to - git clone the repo, and run it. + """check if a git_config param representing a CodeCommit repo is valid, if + it is, create the command to git clone the repo, and run it. - Args: - git_config ((dict[str, str]): Git configurations used for cloning files, including ``repo``, ``branch`` - and ``commit``. - dest_dir (str): The local directory to clone the Git repo into. + Args: + git_config ((dict[str, str]): Git configurations used for cloning files, + including ``repo``, ``branch`` and ``commit``. + dest_dir (str): The local directory to clone the Git repo into. - Raises: - ValueError: If git_config['repo'] is in the wrong format. - CalledProcessError: If failed to clone git repo. + Raises: + ValueError: If git_config['repo'] is in the wrong format. + CalledProcessError: If failed to clone git repo. """ is_https = git_config["repo"].startswith("https://git-codecommit") is_ssh = git_config["repo"].startswith("ssh://git-codecommit") @@ -214,6 +250,11 @@ def _clone_command_for_codecommit(git_config, dest_dir): def _clone_command_for_codecommit_https(git_config, dest_dir): + """ + Args: + git_config: + dest_dir: + """ updated_url = git_config["repo"] if "username" in git_config and "password" in git_config: updated_url = _insert_username_and_password_to_repo_url( @@ -225,7 +266,8 @@ def _clone_command_for_codecommit_https(git_config, dest_dir): def _run_clone_command(repo_url, dest_dir): - """Run the 'git clone' command with the repo url and the directory to clone the repo into. + """Run the 'git clone' command with the repo url and the directory to clone + the repo into. Args: repo_url (str): Git repo url to be cloned. @@ -250,8 +292,8 @@ def _run_clone_command(repo_url, dest_dir): def _insert_token_to_repo_url(url, token): - """Insert the token to the Git repo url, to make a component of the git clone command. This method can - only be called when repo_url is an https url. + """Insert the token to the Git repo url, to make a component of the git + clone command. This method can only be called when repo_url is an https url. Args: url (str): Git repo url where the token should be inserted into. @@ -267,8 +309,9 @@ def _insert_token_to_repo_url(url, token): def _insert_username_and_password_to_repo_url(url, username, password): - """Insert the username and the password to the Git repo url, to make a component of the git clone command. - This method can only be called when repo_url is an https url. + """Insert the username and the password to the Git repo url, to make a + component of the git clone command. This method can only be called when + repo_url is an https url. Args: url (str): Git repo url where the token should be inserted into. @@ -289,13 +332,13 @@ def _checkout_branch_and_commit(git_config, dest_dir): """Checkout the required branch and commit. Args: - git_config (dict[str, str]): Git configurations used for cloning files, including ``repo``, ``branch`` - and ``commit``. + git_config (dict[str, str]): Git configurations used for cloning files, + including ``repo``, ``branch`` and ``commit``. dest_dir (str): the directory where the repo is cloned Raises: - CalledProcessError: If 1. failed to checkout the required branch - 2. failed to checkout the required commit + CalledProcessError: If 1. failed to checkout the required branch 2. + failed to checkout the required commit """ if "branch" in git_config: subprocess.check_call(args=["git", "checkout", git_config["branch"]], cwd=str(dest_dir)) diff --git a/src/sagemaker/job.py b/src/sagemaker/job.py index e590e474a7..0b7327757a 100644 --- a/src/sagemaker/job.py +++ b/src/sagemaker/job.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import from abc import abstractmethod @@ -20,14 +21,21 @@ class _Job(object): - """Handle creating, starting and waiting for Amazon SageMaker jobs to finish. + """Handle creating, starting and waiting for Amazon SageMaker jobs to + finish. This class shouldn't be directly instantiated. - Subclasses must define a way to create, start and wait for an Amazon SageMaker job. + Subclasses must define a way to create, start and wait for an Amazon + SageMaker job. """ def __init__(self, sagemaker_session, job_name): + """ + Args: + sagemaker_session: + job_name: + """ self.sagemaker_session = sagemaker_session self.job_name = job_name @@ -36,20 +44,29 @@ def start_new(self, estimator, inputs): """Create a new Amazon SageMaker job from the estimator. Args: - estimator (sagemaker.estimator.EstimatorBase): Estimator object created by the user. - inputs (str): Parameters used when called :meth:`~sagemaker.estimator.EstimatorBase.fit`. + estimator (sagemaker.estimator.EstimatorBase): Estimator object + created by the user. + inputs (str): Parameters used when called + :meth:`~sagemaker.estimator.EstimatorBase.fit`. Returns: - sagemaker.job: Constructed object that captures all information about the started job. + sagemaker.job: Constructed object that captures all information + about the started job. """ @abstractmethod def wait(self): - """Wait for the Amazon SageMaker job to finish. - """ + """Wait for the Amazon SageMaker job to finish.""" @staticmethod def _load_config(inputs, estimator, expand_role=True, validate_uri=True): + """ + Args: + inputs: + estimator: + expand_role: + validate_uri: + """ input_config = _Job._format_inputs_to_input_config(inputs, validate_uri) role = ( estimator.sagemaker_session.expand_role(estimator.role) @@ -98,6 +115,11 @@ def _load_config(inputs, estimator, expand_role=True, validate_uri=True): @staticmethod def _format_inputs_to_input_config(inputs, validate_uri=True): + """ + Args: + inputs: + validate_uri: + """ if inputs is None: return None @@ -132,12 +154,24 @@ def _format_inputs_to_input_config(inputs, validate_uri=True): @staticmethod def _convert_input_to_channel(channel_name, channel_s3_input): + """ + Args: + channel_name: + channel_s3_input: + """ channel_config = channel_s3_input.config.copy() channel_config["ChannelName"] = channel_name return channel_config @staticmethod def _format_string_uri_input(uri_input, validate_uri=True, content_type=None, input_mode=None): + """ + Args: + uri_input: + validate_uri: + content_type: + input_mode: + """ if isinstance(uri_input, str) and validate_uri and uri_input.startswith("s3://"): return s3_input(uri_input, content_type=content_type, input_mode=input_mode) if isinstance(uri_input, str) and validate_uri and uri_input.startswith("file://"): @@ -168,6 +202,15 @@ def _prepare_channel( content_type=None, input_mode=None, ): + """ + Args: + input_config: + channel_uri: + channel_name: + validate_uri: + content_type: + input_mode: + """ if not channel_uri: return None if not channel_name: @@ -189,6 +232,11 @@ def _prepare_channel( @staticmethod def _format_model_uri_input(model_uri, validate_uri=True): + """ + Args: + model_uri: + validate_uri: + """ if isinstance(model_uri, string_types) and validate_uri and model_uri.startswith("s3://"): return s3_input( model_uri, @@ -214,6 +262,10 @@ def _format_model_uri_input(model_uri, validate_uri=True): @staticmethod def _format_record_set_list_input(inputs): # Deferred import due to circular dependency + """ + Args: + inputs: + """ from sagemaker.amazon.amazon_estimator import RecordSet input_dict = {} @@ -230,6 +282,11 @@ def _format_record_set_list_input(inputs): @staticmethod def _prepare_output_config(s3_path, kms_key_id): + """ + Args: + s3_path: + kms_key_id: + """ config = {"S3OutputPath": s3_path} if kms_key_id is not None: config["KmsKeyId"] = kms_key_id @@ -237,6 +294,13 @@ def _prepare_output_config(s3_path, kms_key_id): @staticmethod def _prepare_resource_config(instance_count, instance_type, volume_size, train_volume_kms_key): + """ + Args: + instance_count: + instance_type: + volume_size: + train_volume_kms_key: + """ resource_config = { "InstanceCount": instance_count, "InstanceType": instance_type, @@ -249,8 +313,13 @@ def _prepare_resource_config(instance_count, instance_type, volume_size, train_v @staticmethod def _prepare_stop_condition(max_run): + """ + Args: + max_run: + """ return {"MaxRuntimeInSeconds": max_run} @property def name(self): + """Placeholder docstring""" return self.job_name diff --git a/src/sagemaker/local/__init__.py b/src/sagemaker/local/__init__.py index ad5dea8c3d..cd1fab2107 100644 --- a/src/sagemaker/local/__init__.py +++ b/src/sagemaker/local/__init__.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import from .local_session import ( # noqa: F401 diff --git a/src/sagemaker/local/data.py b/src/sagemaker/local/data.py index cf3cd88af5..a1256448e4 100644 --- a/src/sagemaker/local/data.py +++ b/src/sagemaker/local/data.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import os @@ -28,22 +29,22 @@ def get_data_source_instance(data_source, sagemaker_session): - """Return an Instance of :class:`sagemaker.local.data.DataSource` that can handle - the provided data_source URI. + """Return an Instance of :class:`sagemaker.local.data.DataSource` that can + handle the provided data_source URI. data_source can be either file:// or s3:// Args: data_source (str): a valid URI that points to a data source. - sagemaker_session (:class:`sagemaker.session.Session`): a SageMaker Session to interact with - S3 if required. + sagemaker_session (:class:`sagemaker.session.Session`): a SageMaker Session to + interact with S3 if required. Returns: - :class:`sagemaker.local.data.DataSource`: an Instance of a Data Source + sagemaker.local.data.DataSource: an Instance of a Data Source Raises: - ValueError: If parsed_uri scheme is neither `file` nor `s3`, raise an error. - + ValueError: If parsed_uri scheme is neither `file` nor `s3` , raise an + error. """ parsed_uri = urlparse(data_source) if parsed_uri.scheme == "file": @@ -57,11 +58,11 @@ def get_data_source_instance(data_source, sagemaker_session): def get_splitter_instance(split_type): """Return an Instance of :class:`sagemaker.local.data.Splitter` according to - the specified `split_type`. + the specified `split_type` . Args: - split_type (str): either 'Line' or 'RecordIO'. Can be left as None to signal no data split - will happen. + split_type (str): either 'Line' or 'RecordIO'. Can be left as None to + signal no data split will happen. Returns :class:`sagemaker.local.data.Splitter`: an Instance of a Splitter @@ -93,12 +94,15 @@ def get_batch_strategy_instance(strategy, splitter): class DataSource(with_metaclass(ABCMeta, object)): + """Placeholder docstring""" + @abstractmethod def get_file_list(self): - """Retrieve the list of absolute paths to all the files in this data source. + """Retrieve the list of absolute paths to all the files in this data + source. Returns: - List[str]: List of absolute paths. + List[str]: List of absolute paths. """ @abstractmethod @@ -111,19 +115,23 @@ def get_root_dir(self): class LocalFileDataSource(DataSource): - """Represents a data source within the local filesystem. - """ + """Represents a data source within the local filesystem.""" def __init__(self, root_path): + """ + Args: + root_path: + """ self.root_path = os.path.abspath(root_path) if not os.path.exists(self.root_path): raise RuntimeError("Invalid data source: %s does not exist." % self.root_path) def get_file_list(self): - """Retrieve the list of absolute paths to all the files in this data source. + """Retrieve the list of absolute paths to all the files in this data + source. Returns: - List[str] List of absolute paths. + List[str] List of absolute paths. """ if os.path.isdir(self.root_path): return [ @@ -145,8 +153,8 @@ def get_root_dir(self): class S3DataSource(DataSource): - """Defines a data source given by a bucket and S3 prefix. The contents will be downloaded - and then processed as local data. + """Defines a data source given by a bucket and S3 prefix. The contents will + be downloaded and then processed as local data. """ def __init__(self, bucket, prefix, sagemaker_session): @@ -155,7 +163,8 @@ def __init__(self, bucket, prefix, sagemaker_session): Args: bucket (str): S3 bucket name prefix (str): S3 prefix path to the data - sagemaker_session (:class:`sagemaker.session.Session`): a sagemaker_session with the desired settings + sagemaker_session (:class:`sagemaker.session.Session`): a sagemaker_session with the + desired settings to talk to S3 """ @@ -177,10 +186,11 @@ def __init__(self, bucket, prefix, sagemaker_session): self.files = LocalFileDataSource(working_dir) def get_file_list(self): - """Retrieve the list of absolute paths to all the files in this data source. + """Retrieve the list of absolute paths to all the files in this data + source. Returns: - List[str]: List of absolute paths. + List[str]: List of absolute paths. """ return self.files.get_file_list() @@ -194,6 +204,8 @@ def get_root_dir(self): class Splitter(with_metaclass(ABCMeta, object)): + """Placeholder docstring""" + @abstractmethod def split(self, file): """Split a file into records using a specific strategy @@ -207,27 +219,26 @@ def split(self, file): class NoneSplitter(Splitter): - """Does not split records, essentially reads the whole file. - """ + """Does not split records, essentially reads the whole file.""" def split(self, file): """Split a file into records using a specific strategy. - For this NoneSplitter there is no actual split happening and the file is returned - as a whole. + For this NoneSplitter there is no actual split happening and the file + is returned as a whole. Args: file (str): path to the file to split - Returns: generator for the individual records that were split from the file + Returns: generator for the individual records that were split from + the file """ with open(file, "r") as f: yield f.read() class LineSplitter(Splitter): - """Split records by new line. - """ + """Split records by new line.""" def split(self, file): """Split a file into records using a specific strategy @@ -237,7 +248,8 @@ def split(self, file): Args: file (str): path to the file to split - Returns: generator for the individual records that were split from the file + Returns: generator for the individual records that were split from + the file """ with open(file, "r") as f: for line in f: @@ -248,18 +260,19 @@ class RecordIOSplitter(Splitter): """Split using Amazon Recordio. Not useful for string content. - """ def split(self, file): """Split a file into records using a specific strategy - This RecordIOSplitter splits the data into individual RecordIO records. + This RecordIOSplitter splits the data into individual RecordIO + records. Args: file (str): path to the file to split - Returns: generator for the individual records that were split from the file + Returns: generator for the individual records that were split from + the file """ with open(file, "rb") as f: for record in sagemaker.amazon.common.read_recordio(f): @@ -267,23 +280,26 @@ def split(self, file): class BatchStrategy(with_metaclass(ABCMeta, object)): + """Placeholder docstring""" + def __init__(self, splitter): """Create a Batch Strategy Instance Args: - splitter (:class:`sagemaker.local.data.Splitter`): A Splitter to pre-process the data - before batching. + splitter (sagemaker.local.data.Splitter): A Splitter to pre-process + the data before batching. """ self.splitter = splitter @abstractmethod def pad(self, file, size): - """Group together as many records as possible to fit in the specified size + """Group together as many records as possible to fit in the specified + size Args: file (str): file path to read the records from. - size (int): maximum size in MB that each group of records will be fitted to. - passing 0 means unlimited size. + size (int): maximum size in MB that each group of records will be + fitted to. passing 0 means unlimited size. Returns: generator of records @@ -297,12 +313,13 @@ class MultiRecordStrategy(BatchStrategy): """ def pad(self, file, size=6): - """Group together as many records as possible to fit in the specified size + """Group together as many records as possible to fit in the specified + size Args: file (str): file path to read the records from. - size (int): maximum size in MB that each group of records will be fitted to. - passing 0 means unlimited size. + size (int): maximum size in MB that each group of records will be + fitted to. passing 0 means unlimited size. Returns: generator of records @@ -322,22 +339,24 @@ def pad(self, file, size=6): class SingleRecordStrategy(BatchStrategy): """Feed a single record at a time for batch inference. - If a single record does not fit within the payload specified it will throw a RuntimeError. + If a single record does not fit within the payload specified it will + throw a RuntimeError. """ def pad(self, file, size=6): - """Group together as many records as possible to fit in the specified size + """Group together as many records as possible to fit in the specified + size - This SingleRecordStrategy will not group any record and will return them one by one as - long as they are within the maximum size. + This SingleRecordStrategy will not group any record and will return + them one by one as long as they are within the maximum size. Args: file (str): file path to read the records from. - size (int): maximum size in MB that each group of records will be fitted to. - passing 0 means unlimited size. + size (int): maximum size in MB that each group of records will be + fitted to. passing 0 means unlimited size. Returns: - generator of records + generator of records """ for element in self.splitter.split(file): if _validate_payload_size(element, size): @@ -345,6 +364,11 @@ def pad(self, file, size=6): def _payload_size_within_limit(payload, size): + """ + Args: + payload: + size: + """ size_in_bytes = size * 1024 * 1024 if size == 0: return True @@ -352,14 +376,15 @@ def _payload_size_within_limit(payload, size): def _validate_payload_size(payload, size): - """Check if a payload is within the size in MB threshold. Raise an exception otherwise. + """Check if a payload is within the size in MB threshold. Raise an exception + otherwise. Args: payload: data that will be checked size (int): max size in MB Returns: - bool: True if within bounds. if size=0 it will always return True + bool: True if within bounds. if size=0 it will always return True Raises: RuntimeError: If the payload is larger a runtime error is thrown. diff --git a/src/sagemaker/local/entities.py b/src/sagemaker/local/entities.py index b92ece29eb..ba7a832943 100644 --- a/src/sagemaker/local/entities.py +++ b/src/sagemaker/local/entities.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import datetime @@ -32,6 +33,7 @@ class _LocalTrainingJob(object): + """Placeholder docstring""" _STARTING = "Starting" _TRAINING = "Training" @@ -39,6 +41,10 @@ class _LocalTrainingJob(object): _states = ["Starting", "Training", "Completed"] def __init__(self, container): + """ + Args: + container: + """ self.container = container self.model_artifacts = None self.state = "created" @@ -46,6 +52,13 @@ def __init__(self, container): self.end_time = None def start(self, input_data_config, output_data_config, hyperparameters, job_name): + """ + Args: + input_data_config: + output_data_config: + hyperparameters: + job_name: + """ for channel in input_data_config: if channel["DataSource"] and "S3DataSource" in channel["DataSource"]: data_distribution = channel["DataSource"]["S3DataSource"]["S3DataDistributionType"] @@ -79,6 +92,7 @@ def start(self, input_data_config, output_data_config, hyperparameters, job_name self.state = self._COMPLETED def describe(self): + """Placeholder docstring""" response = { "ResourceConfig": {"InstanceCount": self.container.instance_count}, "TrainingJobStatus": self.state, @@ -90,11 +104,18 @@ def describe(self): class _LocalTransformJob(object): + """Placeholder docstring""" _CREATING = "Creating" _COMPLETED = "Completed" def __init__(self, transform_job_name, model_name, local_session=None): + """ + Args: + transform_job_name: + model_name: + local_session: + """ from sagemaker.local import LocalSession self.local_session = local_session or LocalSession() @@ -120,10 +141,12 @@ def start(self, input_data, output_data, transform_resources, **kwargs): """Start the Local Transform Job Args: - input_data (dict): Describes the dataset to be transformed and the location where it is stored. - output_data (dict): Identifies the location where to save the results from the transform job - transform_resources (dict): compute instances for the transform job. Currently only supports local or - local_gpu + input_data (dict): Describes the dataset to be transformed and the + location where it is stored. + output_data (dict): Identifies the location where to save the + results from the transform job + transform_resources (dict): compute instances for the transform job. + Currently only supports local or local_gpu **kwargs: additional arguments coming from the boto request object """ self.transform_resources = transform_resources @@ -171,8 +194,8 @@ def start(self, input_data, output_data, transform_resources, **kwargs): def describe(self): """Describe this _LocalTransformJob - The response is a JSON-like dictionary that follows the response of the - boto describe_transform_job() API. + The response is a JSON-like dictionary that follows the response of + the boto describe_transform_job() API. Returns: dict: description of this _LocalTransformJob @@ -201,17 +224,19 @@ def describe(self): return response def _get_container_environment(self, **kwargs): - """Get all the Environment variables that will be passed to the container + """Get all the Environment variables that will be passed to the + container - Certain input fields such as BatchStrategy have different values for the API vs the Environment - variables, such as SingleRecord vs SINGLE_RECORD. This method also handles this conversion. + Certain input fields such as BatchStrategy have different values for + the API vs the Environment variables, such as SingleRecord vs + SINGLE_RECORD. This method also handles this conversion. Args: **kwargs: existing transform arguments Returns: - dict: All the environment variables that should be set in the container - + dict: All the environment variables that should be set in the + container """ environment = {} environment.update(self.primary_container["Environment"]) @@ -242,7 +267,8 @@ def _get_container_environment(self, **kwargs): return environment def _get_required_defaults(self, **kwargs): - """Return the default values for anything that was not provided by either the user or the container + """Return the default values for anything that was not provided by + either the user or the container Args: **kwargs: current transform arguments @@ -260,6 +286,7 @@ def _get_required_defaults(self, **kwargs): return defaults def _get_working_directory(self): + """Placeholder docstring""" # Root dir to use for intermediate data location. To make things simple we will write here regardless # of the final destination. At the end the files will either be moved or uploaded to S3 and deleted. root_dir = get_config_value("local.container_root", self.local_session.config) @@ -270,6 +297,11 @@ def _get_working_directory(self): return working_dir def _prepare_data_transformation(self, input_data, batch_strategy): + """ + Args: + input_data: + batch_strategy: + """ input_path = input_data["DataSource"]["S3DataSource"]["S3Uri"] data_source = sagemaker.local.data.get_data_source_instance(input_path, self.local_session) @@ -284,6 +316,12 @@ def _perform_batch_inference(self, input_data, output_data, **kwargs): # from S3 or Local FileSystem. Split them as required (Line, RecordIO, None) and finally batch them # according to the batch strategy and limit the request size. + """ + Args: + input_data: + output_data: + **kwargs: + """ batch_strategy = kwargs["BatchStrategy"] max_payload = int(kwargs["MaxPayloadInMB"]) data_source, batch_provider = self._prepare_data_transformation(input_data, batch_strategy) @@ -320,12 +358,20 @@ def _perform_batch_inference(self, input_data, output_data, **kwargs): class _LocalModel(object): + """Placeholder docstring""" + def __init__(self, model_name, primary_container): + """ + Args: + model_name: + primary_container: + """ self.model_name = model_name self.primary_container = primary_container self.creation_time = datetime.datetime.now() def describe(self): + """Placeholder docstring""" response = { "ModelName": self.model_name, "CreationTime": self.creation_time, @@ -337,13 +383,22 @@ def describe(self): class _LocalEndpointConfig(object): + """Placeholder docstring""" + def __init__(self, config_name, production_variants, tags=None): + """ + Args: + config_name: + production_variants: + tags: + """ self.name = config_name self.production_variants = production_variants self.tags = tags self.creation_time = datetime.datetime.now() def describe(self): + """Placeholder docstring""" response = { "EndpointConfigName": self.name, "EndpointConfigArn": _UNUSED_ARN, @@ -355,6 +410,7 @@ def describe(self): class _LocalEndpoint(object): + """Placeholder docstring""" _CREATING = "Creating" _IN_SERVICE = "InService" @@ -362,6 +418,13 @@ class _LocalEndpoint(object): def __init__(self, endpoint_name, endpoint_config_name, tags=None, local_session=None): # runtime import since there is a cyclic dependency between entities and local_session + """ + Args: + endpoint_name: + endpoint_config_name: + tags: + local_session: + """ from sagemaker.local import LocalSession self.local_session = local_session or LocalSession() @@ -380,6 +443,7 @@ def __init__(self, endpoint_name, endpoint_config_name, tags=None, local_session self.state = _LocalEndpoint._CREATING def serve(self): + """Placeholder docstring""" image = self.primary_container["Image"] instance_type = self.production_variant["InstanceType"] instance_count = self.production_variant["InitialInstanceCount"] @@ -404,10 +468,12 @@ def serve(self): self.state = _LocalEndpoint._IN_SERVICE def stop(self): + """Placeholder docstring""" if self.container: self.container.stop_serving() def describe(self): + """Placeholder docstring""" response = { "EndpointConfigName": self.endpoint_config["EndpointConfigName"], "CreationTime": self.create_time, @@ -421,6 +487,10 @@ def describe(self): def _wait_for_serving_container(serving_port): + """ + Args: + serving_port: + """ i = 0 http = urllib3.PoolManager() @@ -441,6 +511,11 @@ def _wait_for_serving_container(serving_port): def _perform_request(endpoint_url, pool_manager=None): + """ + Args: + endpoint_url: + pool_manager: + """ http = pool_manager or urllib3.PoolManager() try: r = http.request("GET", endpoint_url) diff --git a/src/sagemaker/local/image.py b/src/sagemaker/local/image.py index bcc2bfab36..d1335d6c6e 100644 --- a/src/sagemaker/local/image.py +++ b/src/sagemaker/local/image.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import base64 @@ -52,25 +53,27 @@ class _SageMakerContainer(object): - """Handle the lifecycle and configuration of a local docker container execution. + """Handle the lifecycle and configuration of a local docker container + execution. - This class is responsible for creating the directories and configuration files that - the docker containers will use for either training or serving. + This class is responsible for creating the directories and configuration + files that the docker containers will use for either training or serving. """ def __init__(self, instance_type, instance_count, image, sagemaker_session=None): """Initialize a SageMakerContainer instance - It uses a :class:`sagemaker.session.Session` for general interaction with user configuration - such as getting the default sagemaker S3 bucket. However this class does not call any of the - SageMaker APIs. + It uses a :class:`sagemaker.session.Session` for general interaction + with user configuration such as getting the default sagemaker S3 bucket. + However this class does not call any of the SageMaker APIs. Args: - instance_type (str): The instance type to use. Either 'local' or 'local_gpu' + instance_type (str): The instance type to use. Either 'local' or + 'local_gpu' instance_count (int): The number of instances to create. image (str): docker image to use. - sagemaker_session (sagemaker.session.Session): a sagemaker session to use when interacting - with SageMaker. + sagemaker_session (sagemaker.session.Session): a sagemaker session + to use when interacting with SageMaker. """ from sagemaker.local.local_session import LocalSession @@ -90,9 +93,11 @@ def __init__(self, instance_type, instance_count, image, sagemaker_session=None) def train(self, input_data_config, output_data_config, hyperparameters, job_name): """Run a training job locally using docker-compose. + Args: input_data_config (dict): The Input Data Configuration, this contains data such as the channels to be used for training. + output_data_config: hyperparameters (dict): The HyperParameters for the training job. job_name (str): Name of the local training job being run. @@ -166,7 +171,8 @@ def serve(self, model_dir, environment): primary_container (dict): dictionary containing the container runtime settings for serving. Expected keys: - 'ModelDataUrl' pointing to a file or s3:// location. - - 'Environment' a dictionary of environment variables to be passed to the hosting container. + - 'Environment' a dictionary of environment variables to be passed to the + hosting container. """ logger.info("serving") @@ -199,7 +205,8 @@ def serve(self, model_dir, environment): def stop_serving(self): """Stop the serving container. - The serving container runs in async mode to allow the SDK to do other tasks. + The serving container runs in async mode to allow the SDK to do other + tasks. """ if self.container: self.container.down() @@ -211,15 +218,18 @@ def stop_serving(self): def retrieve_artifacts(self, compose_data, output_data_config, job_name): """Get the model artifacts from all the container nodes. - Used after training completes to gather the data from all the individual containers. As the - official SageMaker Training Service, it will override duplicate files if multiple containers have - the same file names. + Used after training completes to gather the data from all the + individual containers. As the official SageMaker Training Service, it + will override duplicate files if multiple containers have the same file + names. Args: - compose_data(dict): Docker-Compose configuration in dictionary format. + compose_data (dict): Docker-Compose configuration in dictionary + format. + output_data_config: + job_name: Returns: Local path to the collected model artifacts. - """ # We need a directory to store the artfiacts from all the nodes # and another one to contained the compressed final artifacts @@ -275,15 +285,16 @@ def retrieve_artifacts(self, compose_data, output_data_config, job_name): def write_config_files(self, host, hyperparameters, input_data_config): """Write the config files for the training containers. - This method writes the hyperparameters, resources and input data configuration files. + This method writes the hyperparameters, resources and input data + configuration files. + + Returns: None Args: host (str): Host to write the configuration for hyperparameters (dict): Hyperparameters for training. - input_data_config (dict): Training input channels to be used for training. - - Returns: None - + input_data_config (dict): Training input channels to be used for + training. """ config_path = os.path.join(self.container_root, host, "input", "config") @@ -303,6 +314,13 @@ def write_config_files(self, host, hyperparameters, input_data_config): def _prepare_training_volumes( self, data_dir, input_data_config, output_data_config, hyperparameters ): + """ + Args: + data_dir: + input_data_config: + output_data_config: + hyperparameters: + """ shared_dir = os.path.join(self.container_root, "shared") model_dir = os.path.join(self.container_root, "model") volumes = [] @@ -343,6 +361,11 @@ def _prepare_training_volumes( return volumes def _update_local_src_path(self, params, key): + """ + Args: + params: + key: + """ if key in params: src_dir = json.loads(params[key]) parsed_uri = urlparse(src_dir) @@ -353,6 +376,10 @@ def _update_local_src_path(self, params, key): return params def _prepare_serving_volumes(self, model_location): + """ + Args: + model_location: + """ volumes = [] host = self.hosts[0] # Make the model available to the container. If this is a local file just mount it to @@ -375,21 +402,22 @@ def _prepare_serving_volumes(self, model_location): return volumes def _generate_compose_file(self, command, additional_volumes=None, additional_env_vars=None): - """Writes a config file describing a training/hosting environment. + """Writes a config file describing a training/hosting environment. - This method generates a docker compose configuration file, it has an entry for each container - that will be created (based on self.hosts). it calls - :meth:~sagemaker.local_session.SageMakerContainer._create_docker_host to generate the config - for each individual container. + This method generates a docker compose configuration file, it has an + entry for each container that will be created (based on self.hosts). it + calls + :meth:~sagemaker.local_session.SageMakerContainer._create_docker_host to + generate the config for each individual container. Args: command (str): either 'train' or 'serve' - additional_volumes (list): a list of volumes that will be mapped to the containers - additional_env_vars (dict): a dictionary with additional environment variables to be - passed on to the containers. + additional_volumes (list): a list of volumes that will be mapped to + the containers + additional_env_vars (dict): a dictionary with additional environment + variables to be passed on to the containers. Returns: (dict) A dictionary representation of the configuration that was written. - """ boto_session = self.sagemaker_session.boto_session additional_volumes = additional_volumes or [] @@ -431,6 +459,10 @@ def _generate_compose_file(self, command, additional_volumes=None, additional_en return content def _compose(self, detached=False): + """ + Args: + detached: + """ compose_cmd = "docker-compose" command = [ @@ -449,6 +481,14 @@ def _compose(self, detached=False): return command def _create_docker_host(self, host, environment, optml_subdirs, command, volumes): + """ + Args: + host: + environment: + optml_subdirs: + command: + volumes: + """ optml_volumes = self._build_optml_volumes(host, optml_subdirs) optml_volumes.extend(volumes) @@ -479,6 +519,7 @@ def _create_docker_host(self, host, environment, optml_subdirs, command, volumes return host_config def _create_tmp_folder(self): + """Placeholder docstring""" root_dir = sagemaker.utils.get_config_value( "local.container_root", self.sagemaker_session.config ) @@ -496,14 +537,16 @@ def _create_tmp_folder(self): return os.path.abspath(working_dir) def _build_optml_volumes(self, host, subdirs): - """Generate a list of :class:`~sagemaker.local_session.Volume` required for the container to start. + """Generate a list of :class:`~sagemaker.local_session.Volume` required + for the container to start. - It takes a folder with the necessary files for training and creates a list of opt volumes that - the Container needs to start. + It takes a folder with the necessary files for training and creates a + list of opt volumes that the Container needs to start. Args: host (str): container for which the volumes will be generated. - subdirs (list): list of subdirectories that will be mapped. For example: ['input', 'output', 'model'] + subdirs (list): list of subdirectories that will be mapped. For + example: ['input', 'output', 'model'] Returns: (list) List of :class:`~sagemaker.local_session.Volume` """ @@ -518,6 +561,10 @@ def _build_optml_volumes(self, host, subdirs): return volumes def _cleanup(self, dirs_to_delete=None): + """ + Args: + dirs_to_delete: + """ if dirs_to_delete: for d in dirs_to_delete: _delete_tree(d) @@ -529,12 +576,19 @@ def _cleanup(self, dirs_to_delete=None): class _HostingContainer(Thread): + """Placeholder docstring""" + def __init__(self, command): + """ + Args: + command: + """ Thread.__init__(self) self.command = command self.process = None def run(self): + """Placeholder docstring""" self.process = subprocess.Popen( self.command, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) @@ -547,13 +601,12 @@ def run(self): raise RuntimeError(msg) def down(self): + """Placeholder docstring""" self.process.terminate() class _Volume(object): - """Represent a Volume that will be mapped to a container. - - """ + """Represent a Volume that will be mapped to a container.""" def __init__(self, host_dir, container_dir=None, channel=None): """Create a Volume instance @@ -584,11 +637,11 @@ def __init__(self, host_dir, container_dir=None, channel=None): def _stream_output(process): """Stream the output of a process to stdout - This function takes an existing process that will be polled for output. Only stdout - will be polled and sent to sys.stdout. + This function takes an existing process that will be polled for output. + Only stdout will be polled and sent to sys.stdout. Args: - process(subprocess.Popen): a process that has been started with + process (subprocess.Popen): a process that has been started with stdout=PIPE and stderr=STDOUT Returns (int): process exit code @@ -607,6 +660,12 @@ def _stream_output(process): def _check_output(cmd, *popenargs, **kwargs): + """ + Args: + cmd: + *popenargs: + **kwargs: + """ if isinstance(cmd, str): cmd = shlex.split(cmd) @@ -626,11 +685,20 @@ def _check_output(cmd, *popenargs, **kwargs): def _create_config_file_directories(root, host): + """ + Args: + root: + host: + """ for d in ["input", "input/config", "output", "model"]: os.makedirs(os.path.join(root, host, d)) def _delete_tree(path): + """ + Args: + path: + """ try: shutil.rmtree(path) except OSError as exc: @@ -646,6 +714,10 @@ def _delete_tree(path): def _aws_credentials(session): + """ + Args: + session: + """ try: creds = session.get_credentials() access_key = creds.access_key @@ -685,6 +757,7 @@ def _aws_credentials(session): def _aws_credentials_available_in_metadata_service(): + """Placeholder docstring""" import botocore from botocore.credentials import InstanceMetadataProvider from botocore.utils import InstanceMetadataFetcher @@ -701,12 +774,22 @@ def _aws_credentials_available_in_metadata_service(): def _write_json_file(filename, content): + """ + Args: + filename: + content: + """ with open(filename, "w") as f: json.dump(content, f) def _ecr_login_if_needed(boto_session, image): # Only ECR images need login + """ + Args: + boto_session: + image: + """ sagemaker_pattern = re.compile(sagemaker.utils.ECR_URI_PATTERN) sagemaker_match = sagemaker_pattern.match(image) if not sagemaker_match: @@ -737,6 +820,10 @@ def _ecr_login_if_needed(boto_session, image): def _pull_image(image): + """ + Args: + image: + """ pull_image_command = ("docker pull %s" % image).strip() logger.info("docker command: %s", pull_image_command) diff --git a/src/sagemaker/local/local_session.py b/src/sagemaker/local/local_session.py index d2e6582ed4..a1aa62919b 100644 --- a/src/sagemaker/local/local_session.py +++ b/src/sagemaker/local/local_session.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import logging @@ -40,6 +41,11 @@ class LocalSagemakerClient(object): a boto client to interact with S3 but it won't perform any SageMaker call. Implements the methods with the same signature as the boto SageMakerClient. + + Args: + + Returns: + """ _training_jobs = {} @@ -66,16 +72,23 @@ def create_training_job( InputDataConfig=None, **kwargs ): - """ - Create a training job in Local Mode + """Create a training job in Local Mode + Args: - TrainingJobName (str): local training job name. - AlgorithmSpecification (dict): Identifies the training algorithm to use. - InputDataConfig (dict): Describes the training dataset and the location where it is stored. - OutputDataConfig (dict): Identifies the location where you want to save the results of model training. - ResourceConfig (dict): Identifies the resources to use for local model traininig. - HyperParameters (dict) [optional]: Specifies these algorithm-specific parameters to influence the quality of - the final model. + TrainingJobName(str): local training job name. + AlgorithmSpecification(dict): Identifies the training algorithm to use. + InputDataConfig(dict, optional): Describes the training dataset and the location where + it is stored. (Default value = None) + OutputDataConfig(dict): Identifies the location where you want to save the results of + model training. + ResourceConfig(dict): Identifies the resources to use for local model training. + HyperParameters(dict) [optional]: Specifies these algorithm-specific parameters to influence the + quality of + the final model. + **kwargs: + + Returns: + """ InputDataConfig = InputDataConfig or {} container = _SageMakerContainer( @@ -94,10 +107,11 @@ def describe_training_job(self, TrainingJobName): """Describe a local training job. Args: - TrainingJobName (str): Training job name to describe. - + TrainingJobName(str): Training job name to describe. Returns: (dict) DescribeTrainingJob Response. + Returns: + """ if TrainingJobName not in LocalSagemakerClient._training_jobs: error_response = { @@ -118,11 +132,32 @@ def create_transform_job( TransformResources, **kwargs ): + """ + + Args: + TransformJobName: + ModelName: + TransformInput: + TransformOutput: + TransformResources: + **kwargs: + + Returns: + + """ transform_job = _LocalTransformJob(TransformJobName, ModelName, self.sagemaker_session) LocalSagemakerClient._transform_jobs[TransformJobName] = transform_job transform_job.start(TransformInput, TransformOutput, TransformResources, **kwargs) def describe_transform_job(self, TransformJobName): + """ + + Args: + TransformJobName: + + Returns: + + """ if TransformJobName not in LocalSagemakerClient._transform_jobs: error_response = { "Error": { @@ -138,13 +173,26 @@ def create_model( ): # pylint: disable=unused-argument """Create a Local Model Object + Args: - ModelName (str): the Model Name - PrimaryContainer (dict): a SageMaker primary container definition + ModelName (str): the Model Name + PrimaryContainer (dict): a SageMaker primary container definition + *args: + **kwargs: + + Returns: """ LocalSagemakerClient._models[ModelName] = _LocalModel(ModelName, PrimaryContainer) def describe_model(self, ModelName): + """ + + Args: + ModelName: + + Returns: + + """ if ModelName not in LocalSagemakerClient._models: error_response = { "Error": {"Code": "ValidationException", "Message": "Could not find local model"} @@ -153,6 +201,14 @@ def describe_model(self, ModelName): return LocalSagemakerClient._models[ModelName].describe() def describe_endpoint_config(self, EndpointConfigName): + """ + + Args: + EndpointConfigName: + + Returns: + + """ if EndpointConfigName not in LocalSagemakerClient._endpoint_configs: error_response = { "Error": { @@ -164,11 +220,29 @@ def describe_endpoint_config(self, EndpointConfigName): return LocalSagemakerClient._endpoint_configs[EndpointConfigName].describe() def create_endpoint_config(self, EndpointConfigName, ProductionVariants, Tags=None): + """ + + Args: + EndpointConfigName: + ProductionVariants: + Tags: (Default value = None) + + Returns: + + """ LocalSagemakerClient._endpoint_configs[EndpointConfigName] = _LocalEndpointConfig( EndpointConfigName, ProductionVariants, Tags ) def describe_endpoint(self, EndpointName): + """ + + Args: + EndpointName: + + Returns: + + """ if EndpointName not in LocalSagemakerClient._endpoints: error_response = { "Error": {"Code": "ValidationException", "Message": "Could not find local endpoint"} @@ -177,30 +251,71 @@ def describe_endpoint(self, EndpointName): return LocalSagemakerClient._endpoints[EndpointName].describe() def create_endpoint(self, EndpointName, EndpointConfigName, Tags=None): + """ + + Args: + EndpointName: + EndpointConfigName: + Tags: (Default value = None) + + Returns: + + """ endpoint = _LocalEndpoint(EndpointName, EndpointConfigName, Tags, self.sagemaker_session) LocalSagemakerClient._endpoints[EndpointName] = endpoint endpoint.serve() def update_endpoint(self, EndpointName, EndpointConfigName): # pylint: disable=unused-argument + """ + + Args: + EndpointName: + EndpointConfigName: + + Returns: + + """ raise NotImplementedError("Update endpoint name is not supported in local session.") def delete_endpoint(self, EndpointName): + """ + + Args: + EndpointName: + + Returns: + + """ if EndpointName in LocalSagemakerClient._endpoints: LocalSagemakerClient._endpoints[EndpointName].stop() def delete_endpoint_config(self, EndpointConfigName): + """ + + Args: + EndpointConfigName: + + Returns: + + """ if EndpointConfigName in LocalSagemakerClient._endpoint_configs: del LocalSagemakerClient._endpoint_configs[EndpointConfigName] def delete_model(self, ModelName): + """ + + Args: + ModelName: + + Returns: + + """ if ModelName in LocalSagemakerClient._models: del LocalSagemakerClient._models[ModelName] class LocalSagemakerRuntimeClient(object): - """A SageMaker Runtime client that calls a local endpoint only. - - """ + """A SageMaker Runtime client that calls a local endpoint only.""" def __init__(self, config=None): """Initializes a LocalSageMakerRuntimeClient @@ -222,6 +337,17 @@ def invoke_endpoint( Accept=None, CustomAttributes=None, ): + """ + + Args: + Body: + EndpointName: + Accept: (Default value = None) + CustomAttributes: (Default value = None) + + Returns: + + """ url = "http://localhost:%s/invocations" % self.serving_port headers = {} @@ -240,6 +366,8 @@ def invoke_endpoint( class LocalSession(Session): + """Placeholder docstring""" + def __init__(self, boto_session=None): super(LocalSession, self).__init__(boto_session) @@ -247,7 +375,16 @@ def __init__(self, boto_session=None): logger.warning("Windows Support for Local Mode is Experimental") def _initialize(self, boto_session, sagemaker_client, sagemaker_runtime_client): - """Initialize this Local SageMaker Session.""" + """Initialize this Local SageMaker Session. + + Args: + boto_session: + sagemaker_client: + sagemaker_runtime_client: + + Returns: + + """ self.boto_session = boto_session or boto3.Session() self._region_name = self.boto_session.region_name @@ -262,17 +399,23 @@ def _initialize(self, boto_session, sagemaker_client, sagemaker_runtime_client): self.local_mode = True def logs_for_job(self, job_name, wait=False, poll=5): + """ + + Args: + job_name: + wait: (Default value = False) + poll: (Default value = 5) + + Returns: + + """ # override logs_for_job() as it doesn't need to perform any action # on local mode. - pass + pass # pylint: disable=unnecessary-pass class file_input(object): - """Amazon SageMaker channel configuration for FILE data sources, used in local mode. - - Attributes: - config (dict[str, dict]): A SageMaker ``DataSource`` referencing a SageMaker ``FileDataSource``. - """ + """Amazon SageMaker channel configuration for FILE data sources, used in local mode.""" def __init__(self, fileUri, content_type=None): """Create a definition for input data used by an SageMaker training job in local mode. diff --git a/src/sagemaker/local/utils.py b/src/sagemaker/local/utils.py index ab316e21d3..63166c8a93 100644 --- a/src/sagemaker/local/utils.py +++ b/src/sagemaker/local/utils.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import os @@ -20,18 +21,20 @@ def copy_directory_structure(destination_directory, relative_path): - """Create all the intermediate directories required for relative_path to exist within destination_directory. - This assumes that relative_path is a directory located within root_dir. + """Create all the intermediate directories required for relative_path to + exist within destination_directory. This assumes that relative_path is a + directory located within root_dir. Examples: - destination_directory: /tmp/destination - relative_path: test/unit/ + destination_directory: /tmp/destination relative_path: test/unit/ - will create: /tmp/destination/test/unit + will create: /tmp/destination/test/unit Args: - destination_directory (str): root of the destination directory where the directory structure will be created. - relative_path (str): relative path that will be created within destination_directory + destination_directory (str): root of the destination directory where the + directory structure will be created. + relative_path (str): relative path that will be created within + destination_directory """ full_path = os.path.join(destination_directory, relative_path) if os.path.exists(full_path): @@ -47,7 +50,8 @@ def move_to_destination(source, destination, job_name, sagemaker_session): source (str): root directory to move destination (str): file:// or s3:// URI that source will be moved to. job_name (str): SageMaker job name. - sagemaker_session (sagemaker.Session): a sagemaker_session to interact with S3 if needed + sagemaker_session (sagemaker.Session): a sagemaker_session to interact + with S3 if needed Returns: (str): destination URI @@ -69,8 +73,8 @@ def move_to_destination(source, destination, job_name, sagemaker_session): def recursive_copy(source, destination): - """A wrapper around distutils.dir_util.copy_tree but won't throw any exception when the source - directory does not exist. + """A wrapper around distutils.dir_util.copy_tree but won't throw any + exception when the source directory does not exist. Args: source (str): source path diff --git a/src/sagemaker/logs.py b/src/sagemaker/logs.py index 3d3582ab33..ae225d2283 100644 --- a/src/sagemaker/logs.py +++ b/src/sagemaker/logs.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import collections @@ -25,8 +26,9 @@ class ColorWrap(object): - """A callable that will print text in a different color depending on the instance (up - to 6 if standard output is a terminal or a Jupyter notebook cell). + """A callable that will print text in a different color depending on the + instance (up to 6 if standard output is a terminal or a Jupyter notebook + cell). """ _stream_colors = [31, 32, 33, 34, 35, 36] @@ -35,16 +37,17 @@ def __init__(self, force=False): """Initialize the class. Args: - force (bool): If True, render colorizes output no matter where the output is (default: False). + force (bool): If True, render colorizes output no matter where the + output is (default: False). """ self.colorize = force or sys.stdout.isatty() or os.environ.get("JPY_PARENT_PID", None) def __call__(self, index, s): """Print the output, colorized or not, depending on the environment. - Args: - index (int): The instance number. - s (str): The string to print. + Args: + index (int): The instance number. + s (str): The string to print. """ if self.colorize: self._color_wrap(index, s) @@ -52,11 +55,21 @@ def __call__(self, index, s): print(s) def _color_wrap(self, index, s): + """ + Args: + index: + s: + """ print("\x1b[{}m{}\x1b[0m".format(self._stream_colors[index % len(self._stream_colors)], s)) def argmin(arr, f): - """Return the index, i, in arr that minimizes f(arr[i])""" + """Return the index, i, in arr that minimizes f(arr[i]) + + Args: + arr: + f: + """ m = None i = None for idx, item in enumerate(arr): @@ -68,7 +81,11 @@ def argmin(arr, f): def some(arr): - """Return True iff there is an element, a, of arr such that a is not None""" + """Return True iff there is an element, a, of arr such that a is not None + + Args: + arr: + """ return functools.reduce(lambda x, y: x or (y is not None), arr, False) @@ -84,10 +101,10 @@ def multi_stream_iter(client, log_group, streams, positions=None): Args: client (boto3 client): The boto client for logs. log_group (str): The name of the log group. - streams (list of str): A list of the log stream names. The position of the stream in this list is - the stream number. - positions: (list of Positions): A list of pairs of (timestamp, skip) which represents the last record - read from each stream. + streams (list of str): A list of the log stream names. The position of the stream in + this list is the stream number. + positions: (list of Positions): A list of pairs of (timestamp, skip) which represents + the last record read from each stream. Yields: A tuple of (stream number, cloudwatch log event). @@ -124,8 +141,8 @@ def log_stream(client, log_group, stream_name, start_time=0, skip=0): log_group (str): The name of the log group. stream_name (str): The name of the specific stream. start_time (int): The time stamp value to start reading the logs from (default: 0). - skip (int): The number of log entries to skip at the start (default: 0). This is for when there are - multiple entries at the same timestamp. + skip (int): The number of log entries to skip at the start (default: 0). This is for + when there are multiple entries at the same timestamp. Yields: dict: A CloudWatch log event with the following key-value pairs: diff --git a/src/sagemaker/model.py b/src/sagemaker/model.py index be840d6885..c9e3469a74 100644 --- a/src/sagemaker/model.py +++ b/src/sagemaker/model.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import json @@ -70,26 +71,37 @@ def __init__( """Initialize an SageMaker ``Model``. Args: - model_data (str): The S3 location of a SageMaker model data ``.tar.gz`` file. + model_data (str): The S3 location of a SageMaker model data + ``.tar.gz`` file. image (str): A Docker image URI. - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs - that create Amazon SageMaker endpoints use this role to access training data and model artifacts. - After the endpoint is created, the inference code might use the IAM role if it needs to access some AWS - resources. It can be null if this is being used to create a Model to pass to a ``PipelineModel`` which - has its own Role field. (default: None) - predictor_cls (callable[string, sagemaker.session.Session]): A function to call to create - a predictor (default: None). If not None, ``deploy`` will return the result of invoking - this function on the created endpoint name. - env (dict[str, str]): Environment variables to run with ``image`` when hosted in SageMaker (default: None). - name (str): The model name. If None, a default model name will be selected on each ``deploy``. - vpc_config (dict[str, list[str]]): The VpcConfig set on the model (default: None) + role (str): An AWS IAM role (either name or full ARN). The Amazon + SageMaker training jobs and APIs that create Amazon SageMaker + endpoints use this role to access training data and model + artifacts. After the endpoint is created, the inference code + might use the IAM role if it needs to access some AWS resources. + It can be null if this is being used to create a Model to pass + to a ``PipelineModel`` which has its own Role field. (default: + None) + predictor_cls (callable[string, sagemaker.session.Session]): A + function to call to create a predictor (default: None). If not + None, ``deploy`` will return the result of invoking this + function on the created endpoint name. + env (dict[str, str]): Environment variables to run with ``image`` + when hosted in SageMaker (default: None). + name (str): The model name. If None, a default model name will be + selected on each ``deploy``. + vpc_config (dict[str, list[str]]): The VpcConfig set on the model + (default: None) * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. - sagemaker_session (sagemaker.session.Session): A SageMaker Session object, used for SageMaker - interactions (default: None). If not specified, one is created using the default AWS configuration chain. - enable_network_isolation (Boolean): Default False. if True, enables network isolation in the endpoint, - isolating the model container. No inbound or outbound network calls can be made to or from the - model container. + sagemaker_session (sagemaker.session.Session): A SageMaker Session + object, used for SageMaker interactions (default: None). If not + specified, one is created using the default AWS configuration + chain. + enable_network_isolation (Boolean): Default False. if True, enables + network isolation in the endpoint, isolating the model + container. No inbound or outbound network calls can be made to + or from the model container. """ self.model_data = model_data self.image = image @@ -107,15 +119,18 @@ def __init__( def prepare_container_def( self, instance_type, accelerator_type=None ): # pylint: disable=unused-argument - """Return a dict created by ``sagemaker.container_def()`` for deploying this model to a specified instance type. + """Return a dict created by ``sagemaker.container_def()`` for deploying + this model to a specified instance type. - Subclasses can override this to provide custom container definitions for - deployment to a specific instance type. Called by ``deploy()``. + Subclasses can override this to provide custom container definitions + for deployment to a specific instance type. Called by ``deploy()``. Args: - instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. - accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and - making inferences to the model. For example, 'ml.eia1.medium'. + instance_type (str): The EC2 instance type to deploy this Model to. + For example, 'ml.p2.xlarge'. + accelerator_type (str): The Elastic Inference accelerator type to + deploy to the instance for loading and making inferences to the + model. For example, 'ml.eia1.medium'. Returns: dict: A container definition object usable with the CreateModel API. @@ -134,16 +149,18 @@ def _create_sagemaker_model(self, instance_type, accelerator_type=None, tags=Non """Create a SageMaker Model Entity Args: - instance_type (str): The EC2 instance type that this Model will be used for, this is only - used to determine if the image needs GPU support or not. - accelerator_type (str): Type of Elastic Inference accelerator to attach to an endpoint for model loading - and inference, for example, 'ml.eia1.medium'. If not specified, no Elastic Inference accelerator - will be attached to the endpoint. - tags(List[dict[str, str]]): Optional. The list of tags to add to the model. Example: - >>> tags = [{'Key': 'tagname', 'Value': 'tagvalue'}] - For more information about tags, see https://boto3.amazonaws.com/v1/documentation\ - /api/latest/reference/services/sagemaker.html#SageMaker.Client.add_tags - + instance_type (str): The EC2 instance type that this Model will be + used for, this is only used to determine if the image needs GPU + support or not. + accelerator_type (str): Type of Elastic Inference accelerator to + attach to an endpoint for model loading and inference, for + example, 'ml.eia1.medium'. If not specified, no Elastic + Inference accelerator will be attached to the endpoint. + tags (List[dict[str, str]]): Optional. The list of tags to add to + the model. Example: >>> tags = [{'Key': 'tagname', 'Value': + 'tagvalue'}] For more information about tags, see + https://boto3.amazonaws.com/v1/documentation + /api/latest/reference/services/sagemaker.html#SageMaker.Client.add_tags """ container_def = self.prepare_container_def(instance_type, accelerator_type=accelerator_type) self.name = self.name or utils.name_from_image(container_def["Image"]) @@ -158,9 +175,11 @@ def _create_sagemaker_model(self, instance_type, accelerator_type=None, tags=Non ) def _framework(self): + """Placeholder docstring""" return getattr(self, "__framework_name__", None) def _get_framework_version(self): + """Placeholder docstring""" return getattr(self, "framework_version", None) def _compilation_job_config( @@ -174,6 +193,17 @@ def _compilation_job_config( framework, tags, ): + """ + Args: + target_instance_type: + input_shape: + output_path: + role: + compile_max_run: + job_name: + framework: + tags: + """ input_model_config = { "S3Uri": self.model_data, "DataInputConfig": input_shape @@ -201,14 +231,20 @@ def check_neo_region(self, region): Args: region (str): Specifies the region where want to execute compilation + Returns: - bool: boolean value whether if neo is available in the specified region + bool: boolean value whether if neo is available in the specified + region """ if region in NEO_IMAGE_ACCOUNT: return True return False def _neo_image_account(self, region): + """ + Args: + region: + """ if region not in NEO_IMAGE_ACCOUNT: raise ValueError( "Neo is not currently supported in {}, " @@ -217,6 +253,13 @@ def _neo_image_account(self, region): return NEO_IMAGE_ACCOUNT[region] def _neo_image(self, region, target_instance_type, framework, framework_version): + """ + Args: + region: + target_instance_type: + framework: + framework_version: + """ return fw_utils.create_image_uri( region, "neo-" + framework.lower(), @@ -241,24 +284,31 @@ def compile( """Compile this ``Model`` with SageMaker Neo. Args: - target_instance_family (str): Identifies the device that you want to run your model after compilation, for - example: ml_c5. Allowed strings are: ml_c5, ml_m5, ml_c4, ml_m4, jetsontx1, jetsontx2, ml_p2, ml_p3, - deeplens, rasp3b - input_shape (dict): Specifies the name and shape of the expected inputs for your trained model in json - dictionary form, for example: {'data':[1,3,1024,1024]}, or {'var1': [1,1,28,28], 'var2':[1,1,28,28]} + target_instance_family (str): Identifies the device that you want to + run your model after compilation, for example: ml_c5. Allowed + strings are: ml_c5, ml_m5, ml_c4, ml_m4, jetsontx1, jetsontx2, + ml_p2, ml_p3, deeplens, rasp3b + input_shape (dict): Specifies the name and shape of the expected + inputs for your trained model in json dictionary form, for + example: {'data':[1,3,1024,1024]}, or {'var1': [1,1,28,28], + 'var2':[1,1,28,28]} output_path (str): Specifies where to store the compiled model role (str): Execution role - tags (list[dict]): List of tags for labeling a compilation job. For more, see + tags (list[dict]): List of tags for labeling a compilation job. For + more, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. job_name (str): The name of the compilation job - compile_max_run (int): Timeout in seconds for compilation (default: 3 * 60). - After this amount of time Amazon SageMaker Neo terminates the compilation job regardless of its - current status. - framework (str): The framework that is used to train the original model. Allowed values: 'mxnet', - 'tensorflow', 'pytorch', 'onnx', 'xgboost' - framework_version (str) + compile_max_run (int): Timeout in seconds for compilation (default: + 3 * 60). After this amount of time Amazon SageMaker Neo + terminates the compilation job regardless of its current status. + framework (str): The framework that is used to train the original + model. Allowed values: 'mxnet', 'tensorflow', 'pytorch', 'onnx', + 'xgboost' + framework_version (str): + Returns: - sagemaker.model.Model: A SageMaker ``Model`` object. See :func:`~sagemaker.model.Model` for full details. + sagemaker.model.Model: A SageMaker ``Model`` object. See + :func:`~sagemaker.model.Model` for full details. """ framework = self._framework() or framework if framework is None: @@ -315,38 +365,50 @@ def deploy( kms_key=None, wait=True, ): - """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``. + """Deploy this ``Model`` to an ``Endpoint`` and optionally return a + ``Predictor``. - Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an ``Endpoint`` from this ``Model``. - If ``self.predictor_cls`` is not None, this method returns a the result of invoking - ``self.predictor_cls`` on the created endpoint name. + Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an + ``Endpoint`` from this ``Model``. If ``self.predictor_cls`` is not None, + this method returns a the result of invoking ``self.predictor_cls`` on + the created endpoint name. - The name of the created model is accessible in the ``name`` field of this ``Model`` after deploy returns + The name of the created model is accessible in the ``name`` field of + this ``Model`` after deploy returns - The name of the created endpoint is accessible in the ``endpoint_name`` - field of this ``Model`` after deploy returns. + The name of the created endpoint is accessible in the + ``endpoint_name`` field of this ``Model`` after deploy returns. Args: - instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. - initial_instance_count (int): The initial number of instances to run in the - ``Endpoint`` created from this ``Model``. - accelerator_type (str): Type of Elastic Inference accelerator to deploy this model for model loading - and inference, for example, 'ml.eia1.medium'. If not specified, no Elastic Inference accelerator - will be attached to the endpoint. - For more information: https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html - endpoint_name (str): The name of the endpoint to create (default: None). - If not specified, a unique endpoint name will be created. - update_endpoint (bool): Flag to update the model in an existing Amazon SageMaker endpoint. - If True, this will deploy a new EndpointConfig to an already existing endpoint and delete resources - corresponding to the previous EndpointConfig. If False, a new endpoint will be created. Default: False - tags(List[dict[str, str]]): The list of tags to attach to this specific endpoint. - kms_key (str): The ARN of the KMS key that is used to encrypt the data on the - storage volume attached to the instance hosting the endpoint. - wait (bool): Whether the call should wait until the deployment of this model completes (default: True). + initial_instance_count (int): The initial number of instances to run + in the ``Endpoint`` created from this ``Model``. + instance_type (str): The EC2 instance type to deploy this Model to. + For example, 'ml.p2.xlarge'. + accelerator_type (str): Type of Elastic Inference accelerator to + deploy this model for model loading and inference, for example, + 'ml.eia1.medium'. If not specified, no Elastic Inference + accelerator will be attached to the endpoint. For more + information: + https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html + endpoint_name (str): The name of the endpoint to create (default: + None). If not specified, a unique endpoint name will be created. + update_endpoint (bool): Flag to update the model in an existing + Amazon SageMaker endpoint. If True, this will deploy a new + EndpointConfig to an already existing endpoint and delete + resources corresponding to the previous EndpointConfig. If + False, a new endpoint will be created. Default: False + tags (List[dict[str, str]]): The list of tags to attach to this + specific endpoint. + kms_key (str): The ARN of the KMS key that is used to encrypt the + data on the storage volume attached to the instance hosting the + endpoint. + wait (bool): Whether the call should wait until the deployment of + this model completes (default: True). Returns: callable[string, sagemaker.session.Session] or None: Invocation of ``self.predictor_cls`` on - the created endpoint name, if ``self.predictor_cls`` is not None. Otherwise, return None. + the created endpoint name, if ``self.predictor_cls`` is not + None. Otherwise, return None. """ if not self.sagemaker_session: if instance_type in ("local", "local_gpu"): @@ -411,22 +473,30 @@ def transformer( Args: instance_count (int): Number of EC2 instances to use. - instance_type (str): Type of EC2 instance to use, for example, 'ml.c4.xlarge'. - strategy (str): The strategy used to decide how to batch records in a single request (default: None). - Valid values: 'MULTI_RECORD' and 'SINGLE_RECORD'. - assemble_with (str): How the output is assembled (default: None). Valid values: 'Line' or 'None'. - output_path (str): S3 location for saving the transform result. If not specified, results are stored to - a default bucket. - output_kms_key (str): Optional. KMS key ID for encrypting the transform output (default: None). - accept (str): The content type accepted by the endpoint deployed during the transform job. - env (dict): Environment variables to be set for use during the transform job (default: None). - max_concurrent_transforms (int): The maximum number of HTTP requests to be made to - each individual transform container at one time. - max_payload (int): Maximum size of the payload in a single HTTP request to the container in MB. - tags (list[dict]): List of tags for labeling a transform job. If none specified, then the tags used for - the training job are used for the transform job. - volume_kms_key (str): Optional. KMS key ID for encrypting the volume attached to the ML - compute instance (default: None). + instance_type (str): Type of EC2 instance to use, for example, + 'ml.c4.xlarge'. + strategy (str): The strategy used to decide how to batch records in + a single request (default: None). Valid values: 'MULTI_RECORD' + and 'SINGLE_RECORD'. + assemble_with (str): How the output is assembled (default: None). + Valid values: 'Line' or 'None'. + output_path (str): S3 location for saving the transform result. If + not specified, results are stored to a default bucket. + output_kms_key (str): Optional. KMS key ID for encrypting the + transform output (default: None). + accept (str): The content type accepted by the endpoint deployed + during the transform job. + env (dict): Environment variables to be set for use during the + transform job (default: None). + max_concurrent_transforms (int): The maximum number of HTTP requests + to be made to each individual transform container at one time. + max_payload (int): Maximum size of the payload in a single HTTP + request to the container in MB. + tags (list[dict]): List of tags for labeling a transform job. If + none specified, then the tags used for the training job are used + for the transform job. + volume_kms_key (str): Optional. KMS key ID for encrypting the volume + attached to the ML compute instance (default: None). """ self._create_sagemaker_model(instance_type) if self.enable_network_isolation(): @@ -455,7 +525,6 @@ def delete_model(self): Raises: ValueError: if the model is not created yet. - """ if self.name is None: raise ValueError( @@ -477,7 +546,8 @@ def delete_model(self): class FrameworkModel(Model): """A Model for working with an SageMaker ``Framework``. - This class hosts user-defined code in S3 and sets code location and configuration in model environment variables. + This class hosts user-defined code in S3 and sets code location and + configuration in model environment variables. """ def __init__( @@ -501,14 +571,17 @@ def __init__( """Initialize a ``FrameworkModel``. Args: - model_data (str): The S3 location of a SageMaker model data ``.tar.gz`` file. + model_data (str): The S3 location of a SageMaker model data + ``.tar.gz`` file. image (str): A Docker image URI. - role (str): An IAM role name or ARN for SageMaker to access AWS resources on your behalf. - entry_point (str): Path (absolute or relative) to the Python source file which should be executed - as the entry point to model hosting. This should be compatible with either Python 2.7 or Python 3.5. - If 'git_config' is provided, 'entry_point' should be a relative location to the Python source file in - the Git repo. - Example: + role (str): An IAM role name or ARN for SageMaker to access AWS + resources on your behalf. + entry_point (str): Path (absolute or relative) to the Python source + file which should be executed as the entry point to model + hosting. This should be compatible with either Python 2.7 or + Python 3.5. If 'git_config' is provided, 'entry_point' should be + a relative location to the Python source file in the Git repo. + Example With the following GitHub repo directory structure: @@ -518,46 +591,14 @@ def __init__( >>> |----- test.py You can assign entry_point='src/inference.py'. - git_config (dict[str, str]): Git configurations used for cloning files, including ``repo``, ``branch``, - ``commit``, ``2FA_enabled``, ``username``, ``password`` and ``token``. The ``repo`` field is required. - All other fields are optional. ``repo`` specifies the Git repository where your training script is - stored. If you don't provide ``branch``, the default value 'master' is used. If you don't provide - ``commit``, the latest commit in the specified branch is used. - Example: - - The following config: - - >>> git_config = {'repo': 'https://github.com/aws/sagemaker-python-sdk.git', - >>> 'branch': 'test-branch-git-config', - >>> 'commit': '329bfcf884482002c05ff7f44f62599ebc9f445a'} - - results in cloning the repo specified in 'repo', then checkout the 'master' branch, and checkout - the specified commit. - ``2FA_enabled``, ``username``, ``password`` and ``token`` are used for authentication. For GitHub - (or other Git) accounts, set ``2FA_enabled`` to 'True' if two-factor authentication is enabled for the - account, otherwise set it to 'False'. If you do not provide a value for ``2FA_enabled``, a default - value of 'False' is used. CodeCommit does not support two-factor authentication, so do not provide - "2FA_enabled" with CodeCommit repositories. - - For GitHub and other Git repos, when SSH URLs are provided, it doesn't matter whether 2FA is - enabled or disabled; you should either have no passphrase for the SSH key pairs, or have the ssh-agent - configured so that you will not be prompted for SSH passphrase when you do 'git clone' command with SSH - URLs. When HTTPS URLs are provided: if 2FA is disabled, then either token or username+password will be - used for authentication if provided (token prioritized); if 2FA is enabled, only token will be used for - authentication if provided. If required authentication info is not provided, python SDK will try to use - local credentials storage to authenticate. If that fails either, an error message will be thrown. - - For CodeCommit repos, 2FA is not supported, so '2FA_enabled' should not be provided. There is no token - in CodeCommit, so 'token' should not be provided too. When 'repo' is an SSH URL, the requirements are - the same as GitHub-like repos. When 'repo' is an HTTPS URL, username+password will be used for - authentication if they are provided; otherwise, python SDK will try to use either CodeCommit credential - helper or local credential storage for authentication. - source_dir (str): Path (absolute or relative) to a directory with any other training - source code dependencies aside from the entry point file (default: None). Structure within this - directory will be preserved when training on SageMaker. If 'git_config' is provided, - 'source_dir' should be a relative location to a directory in the Git repo. If the directory points - to S3, no code will be uploaded and the S3 location will be used instead. - Example: + source_dir (str): Path (absolute or relative) to a directory with + any other training source code dependencies aside from the entry + point file (default: None). Structure within this directory will + be preserved when training on SageMaker. If 'git_config' is + provided, 'source_dir' should be a relative location to a + directory in the Git repo. If the directory points to S3, no + code will be uploaded and the S3 location will be used instead. + .. admonition:: Example With the following GitHub repo directory structure: @@ -567,17 +608,40 @@ def __init__( >>> |----- test.py You can assign entry_point='inference.py', source_dir='src'. - dependencies (list[str]): A list of paths to directories (absolute or relative) with - any additional libraries that will be exported to the container (default: []). - The library folders will be copied to SageMaker in the same folder where the entrypoint is copied. - If 'git_config' is provided, 'dependencies' should be a list of relative locations to directories - with any additional libraries needed in the Git repo. If the ```source_dir``` points to S3, code - will be uploaded and the S3 location will be used instead. - Example: - - The following call - >>> Estimator(entry_point='inference.py', dependencies=['my/libs/common', 'virtual-env']) - results in the following inside the container: + predictor_cls (callable[string, sagemaker.session.Session]): A + function to call to create a predictor (default: None). If not + None, ``deploy`` will return the result of invoking this + function on the created endpoint name. + env (dict[str, str]): Environment variables to run with ``image`` + when hosted in SageMaker (default: None). + name (str): The model name. If None, a default model name will be + selected on each ``deploy``. + enable_cloudwatch_metrics (bool): Whether training and hosting + containers will generate CloudWatch metrics under the + AWS/SageMakerContainer namespace (default: False). + container_log_level (int): Log level to use within the container + (default: logging.INFO). Valid values are defined in the Python + logging module. + code_location (str): Name of the S3 bucket where custom code is + uploaded (default: None). If not specified, default bucket + created by ``sagemaker.session.Session`` is used. + sagemaker_session (sagemaker.session.Session): A SageMaker Session + object, used for SageMaker interactions (default: None). If not + specified, one is created using the default AWS configuration + chain. + dependencies (list[str]): A list of paths to directories (absolute + or relative) with any additional libraries that will be exported + to the container (default: []). The library folders will be + copied to SageMaker in the same folder where the entrypoint is + copied. If 'git_config' is provided, 'dependencies' should be a + list of relative locations to directories with any additional + libraries needed in the Git repo. If the ```source_dir``` points + to S3, code will be uploaded and the S3 location will be used + instead. .. admonition:: Example + + The following call >>> Estimator(entry_point='inference.py', + dependencies=['my/libs/common', 'virtual-env']) results in + the following inside the container: >>> $ ls @@ -585,21 +649,55 @@ def __init__( >>> |------ inference.py >>> |------ common >>> |------ virtual-env + git_config (dict[str, str]): Git configurations used for cloning + files, including ``repo``, ``branch``, ``commit``, + ``2FA_enabled``, ``username``, ``password`` and ``token``. The + ``repo`` field is required. All other fields are optional. + ``repo`` specifies the Git repository where your training script + is stored. If you don't provide ``branch``, the default value + 'master' is used. If you don't provide ``commit``, the latest + commit in the specified branch is used. .. admonition:: Example + + The following config: - predictor_cls (callable[string, sagemaker.session.Session]): A function to call to create - a predictor (default: None). If not None, ``deploy`` will return the result of invoking - this function on the created endpoint name. - env (dict[str, str]): Environment variables to run with ``image`` when hosted in SageMaker - (default: None). - name (str): The model name. If None, a default model name will be selected on each ``deploy``. - enable_cloudwatch_metrics (bool): Whether training and hosting containers will - generate CloudWatch metrics under the AWS/SageMakerContainer namespace (default: False). - container_log_level (int): Log level to use within the container (default: logging.INFO). - Valid values are defined in the Python logging module. - code_location (str): Name of the S3 bucket where custom code is uploaded (default: None). - If not specified, default bucket created by ``sagemaker.session.Session`` is used. - sagemaker_session (sagemaker.session.Session): A SageMaker Session object, used for SageMaker - interactions (default: None). If not specified, one is created using the default AWS configuration chain. + >>> git_config = {'repo': 'https://github.com/aws/sagemaker-python-sdk.git', + >>> 'branch': 'test-branch-git-config', + >>> 'commit': '329bfcf884482002c05ff7f44f62599ebc9f445a'} + + results in cloning the repo specified in 'repo', then + checkout the 'master' branch, and checkout the specified + commit. + + ``2FA_enabled``, ``username``, ``password`` and ``token`` are + used for authentication. For GitHub (or other Git) accounts, set + ``2FA_enabled`` to 'True' if two-factor authentication is + enabled for the account, otherwise set it to 'False'. If you do + not provide a value for ``2FA_enabled``, a default value of + 'False' is used. CodeCommit does not support two-factor + authentication, so do not provide "2FA_enabled" with CodeCommit + repositories. + + For GitHub and other Git repos, when SSH URLs are provided, it + doesn't matter whether 2FA is enabled or disabled; you should + either have no passphrase for the SSH key pairs, or have the + ssh-agent configured so that you will not be prompted for SSH + passphrase when you do 'git clone' command with SSH URLs. When + HTTPS URLs are provided: if 2FA is disabled, then either token + or username+password will be used for authentication if provided + (token prioritized); if 2FA is enabled, only token will be used + for authentication if provided. If required authentication info + is not provided, python SDK will try to use local credentials + storage to authenticate. If that fails either, an error message + will be thrown. + + For CodeCommit repos, 2FA is not supported, so '2FA_enabled' + should not be provided. There is no token in CodeCommit, so + 'token' should not be provided too. When 'repo' is an SSH URL, + the requirements are the same as GitHub-like repos. When 'repo' + is an HTTPS URL, username+password will be used for + authentication if they are provided; otherwise, python SDK will + try to use either CodeCommit credential helper or local + credential storage for authentication. **kwargs: Keyword arguments passed to the ``Model`` initializer. """ super(FrameworkModel, self).__init__( @@ -635,17 +733,21 @@ def __init__( def prepare_container_def( self, instance_type, accelerator_type=None ): # pylint disable=unused-argument - """Return a container definition with framework configuration set in model environment variables. + """Return a container definition with framework configuration set in + model environment variables. This also uploads user-supplied code to S3. Args: - instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. - accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and - making inferences to the model. For example, 'ml.eia1.medium'. + instance_type (str): The EC2 instance type to deploy this Model to. + For example, 'ml.p2.xlarge'. + accelerator_type (str): The Elastic Inference accelerator type to + deploy to the instance for loading and making inferences to the + model. For example, 'ml.eia1.medium'. Returns: - dict[str, str]: A container definition object usable with the CreateModel API. + dict[str, str]: A container definition object usable with the + CreateModel API. """ deploy_key_prefix = fw_utils.model_code_key_prefix(self.key_prefix, self.name, self.image) self._upload_code(deploy_key_prefix) @@ -654,6 +756,11 @@ def prepare_container_def( return sagemaker.container_def(self.image, self.model_data, deploy_env) def _upload_code(self, key_prefix, repack=False): + """ + Args: + key_prefix: + repack: + """ local_code = utils.get_config_value("local.local_code", self.sagemaker_session.config) if self.sagemaker_session.local_mode and local_code: self.uploaded_code = None @@ -687,6 +794,7 @@ def _upload_code(self, key_prefix, repack=False): ) def _framework_env_vars(self): + """Placeholder docstring""" if self.uploaded_code: script_name = self.uploaded_code.script_name dir_name = self.uploaded_code.s3_prefix @@ -710,16 +818,19 @@ def __init__(self, role, model_data=None, algorithm_arn=None, model_package_arn= """Initialize a SageMaker ModelPackage. Args: - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs - that create Amazon SageMaker endpoints use this role to access training data and model artifacts. - After the endpoint is created, the inference code might use the IAM role, - if it needs to access an AWS resource. - model_data (str): The S3 location of a SageMaker model data ``.tar.gz`` file. Must be - provided if algorithm_arn is provided. - algorithm_arn (str): algorithm arn used to train the model, can be just the name if your - account owns the algorithm. Must also provide ``model_data``. - model_package_arn (str): An existing SageMaker Model Package arn, can be just the name if - your account owns the Model Package. ``model_data`` is not required. + role (str): An AWS IAM role (either name or full ARN). The Amazon + SageMaker training jobs and APIs that create Amazon SageMaker + endpoints use this role to access training data and model + artifacts. After the endpoint is created, the inference code + might use the IAM role, if it needs to access an AWS resource. + model_data (str): The S3 location of a SageMaker model data + ``.tar.gz`` file. Must be provided if algorithm_arn is provided. + algorithm_arn (str): algorithm arn used to train the model, can be + just the name if your account owns the algorithm. Must also + provide ``model_data``. + model_package_arn (str): An existing SageMaker Model Package arn, + can be just the name if your account owns the Model Package. + ``model_data`` is not required. **kwargs: Additional kwargs passed to the Model constructor. """ super(ModelPackage, self).__init__(role=role, model_data=model_data, image=None, **kwargs) @@ -746,6 +857,7 @@ def __init__(self, role, model_data=None, algorithm_arn=None, model_package_arn= self._created_model_package_name = None def _create_sagemaker_model_package(self): + """Placeholder docstring""" if self.algorithm_arn is None: raise ValueError("No algorithm_arn was provided to create a SageMaker Model Pacakge") @@ -757,7 +869,8 @@ def _create_sagemaker_model_package(self): return name def enable_network_isolation(self): - """Whether to enable network isolation when creating a model out of this ModelPackage + """Whether to enable network isolation when creating a model out of this + ModelPackage Returns: bool: If network isolation should be enabled or not. @@ -765,6 +878,7 @@ def enable_network_isolation(self): return self._is_marketplace() def _is_marketplace(self): + """Placeholder docstring""" model_package_name = self.model_package_arn or self._created_model_package_name if model_package_name is None: return True @@ -785,8 +899,8 @@ def _create_sagemaker_model(self, *args): # pylint: disable=unused-argument """Create a SageMaker Model Entity Args: - *args: Arguments coming from the caller. This class - does not require any so they are ignored. + *args: Arguments coming from the caller. This class does not require + any so they are ignored. """ if self.algorithm_arn: # When ModelPackage is created using an algorithm_arn we need to first diff --git a/src/sagemaker/mxnet/__init__.py b/src/sagemaker/mxnet/__init__.py index 653c2fa19e..eb630d2098 100644 --- a/src/sagemaker/mxnet/__init__.py +++ b/src/sagemaker/mxnet/__init__.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import # noqa: F401 from sagemaker.mxnet.estimator import MXNet # noqa: F401 diff --git a/src/sagemaker/mxnet/defaults.py b/src/sagemaker/mxnet/defaults.py index 9e693c87bc..b95383c544 100644 --- a/src/sagemaker/mxnet/defaults.py +++ b/src/sagemaker/mxnet/defaults.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import MXNET_VERSION = "1.2" diff --git a/src/sagemaker/mxnet/estimator.py b/src/sagemaker/mxnet/estimator.py index 6cb552ff4b..c3713aa354 100644 --- a/src/sagemaker/mxnet/estimator.py +++ b/src/sagemaker/mxnet/estimator.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import logging @@ -48,44 +49,57 @@ def __init__( distributions=None, **kwargs ): - """ - This ``Estimator`` executes an MXNet script in a managed MXNet execution environment, within a SageMaker - Training Job. The managed MXNet environment is an Amazon-built Docker container that executes functions - defined in the supplied ``entry_point`` Python script. - - Training is started by calling :meth:`~sagemaker.amazon.estimator.Framework.fit` on this Estimator. - After training is complete, calling :meth:`~sagemaker.amazon.estimator.Framework.deploy` creates a - hosted SageMaker endpoint and returns an :class:`~sagemaker.amazon.mxnet.model.MXNetPredictor` instance - that can be used to perform inference against the hosted model. - - Technical documentation on preparing MXNet scripts for SageMaker training and using the MXNet Estimator is - available on the project home-page: https://github.com/aws/sagemaker-python-sdk + """This ``Estimator`` executes an MXNet script in a managed MXNet + execution environment, within a SageMaker Training Job. The managed + MXNet environment is an Amazon-built Docker container that executes + functions defined in the supplied ``entry_point`` Python script. + + Training is started by calling + :meth:`~sagemaker.amazon.estimator.Framework.fit` on this Estimator. + After training is complete, calling + :meth:`~sagemaker.amazon.estimator.Framework.deploy` creates a hosted + SageMaker endpoint and returns an + :class:`~sagemaker.amazon.mxnet.model.MXNetPredictor` instance that can + be used to perform inference against the hosted model. + + Technical documentation on preparing MXNet scripts for SageMaker + training and using the MXNet Estimator is available on the project + home-page: https://github.com/aws/sagemaker-python-sdk Args: - entry_point (str): Path (absolute or relative) to the Python source file which should be executed - as the entry point to training. This should be compatible with either Python 2.7 or Python 3.5. - source_dir (str): Path (absolute or relative) to a directory with any other training - source code dependencies aside from tne entry point file (default: None). Structure within this - directory are preserved when training on Amazon SageMaker. - hyperparameters (dict): Hyperparameters that will be used for training (default: None). - The hyperparameters are made accessible as a dict[str, str] to the training code on SageMaker. - For convenience, this accepts other types for keys and values, but ``str()`` will be called - to convert them before training. - py_version (str): Python version you want to use for executing your model training code (default: 'py2'). - One of 'py2' or 'py3'. - framework_version (str): MXNet version you want to use for executing your model training code. - List of supported versions https://github.com/aws/sagemaker-python-sdk#mxnet-sagemaker-estimators. + entry_point (str): Path (absolute or relative) to the Python source + file which should be executed as the entry point to training. + This should be compatible with either Python 2.7 or Python 3.5. + source_dir (str): Path (absolute or relative) to a directory with + any other training source code dependencies aside from tne entry + point file (default: None). Structure within this directory are + preserved when training on Amazon SageMaker. + hyperparameters (dict): Hyperparameters that will be used for + training (default: None). The hyperparameters are made + accessible as a dict[str, str] to the training code on + SageMaker. For convenience, this accepts other types for keys + and values, but ``str()`` will be called to convert them before + training. + py_version (str): Python version you want to use for executing your + model training code (default: 'py2'). One of 'py2' or 'py3'. + framework_version (str): MXNet version you want to use for executing + your model training code. List of supported versions + https://github.com/aws/sagemaker-python-sdk#mxnet-sagemaker-estimators. If not specified, this will default to 1.2.1. image_name (str): If specified, the estimator will use this image for training and hosting, instead of - selecting the appropriate SageMaker official image based on framework_version and py_version. It can - be an ECR url or dockerhub image and tag. - - Examples: - 123.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0 - custom-image:latest. - distributions (dict): A dictionary with information on how to run distributed training - (default: None). - **kwargs: Additional kwargs passed to the :class:`~sagemaker.estimator.Framework` constructor. + selecting the appropriate SageMaker official image based on + framework_version and py_version. It can be an ECR url or + dockerhub image and tag. + + Examples: + 123.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0 + custom-image:latest. + + distributions (dict): A dictionary with information on how to run distributed training + (default: None). + distributions: + **kwargs: Additional kwargs passed to the + :class:`~sagemaker.estimator.Framework` constructor. """ if framework_version is None: logger.warning(empty_framework_version_warning(MXNET_VERSION, self.LATEST_VERSION)) @@ -102,6 +116,10 @@ def __init__( self._configure_distribution(distributions) def _configure_distribution(self, distributions): + """ + Args: + distributions: + """ if distributions is None: return @@ -119,21 +137,24 @@ def _configure_distribution(self, distributions): def create_model( self, model_server_workers=None, role=None, vpc_config_override=VPC_CONFIG_DEFAULT ): - """Create a SageMaker ``MXNetModel`` object that can be deployed to an ``Endpoint``. + """Create a SageMaker ``MXNetModel`` object that can be deployed to an + ``Endpoint``. Args: - role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also used during - transform jobs. If not specified, the role from the Estimator will be used. - model_server_workers (int): Optional. The number of worker processes used by the inference server. - If None, server will use one worker per vCPU. - vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on the model. - Default: use subnets and security groups from this Estimator. + model_server_workers (int): Optional. The number of worker processes + used by the inference server. If None, server will use one + worker per vCPU. + role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, + which is also used during transform jobs. If not specified, the + role from the Estimator will be used. + vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on + the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. Returns: sagemaker.mxnet.model.MXNetModel: A SageMaker ``MXNetModel`` object. - See :func:`~sagemaker.mxnet.model.MXNetModel` for full details. + See :func:`~sagemaker.mxnet.model.MXNetModel` for full details. """ role = role or self.role return MXNetModel( @@ -156,15 +177,17 @@ def create_model( @classmethod def _prepare_init_params_from_job_description(cls, job_details, model_channel_name=None): - """Convert the job description to init params that can be handled by the class constructor + """Convert the job description to init params that can be handled by the + class constructor Args: - job_details: the returned job details from a describe_training_job API call. - model_channel_name (str): Name of the channel where pre-trained model data will be downloaded. + job_details: the returned job details from a describe_training_job + API call. + model_channel_name (str): Name of the channel where pre-trained + model data will be downloaded. Returns: dictionary: The transformed init_params - """ init_params = super(MXNet, cls)._prepare_init_params_from_job_description( job_details, model_channel_name diff --git a/src/sagemaker/mxnet/model.py b/src/sagemaker/mxnet/model.py index 2107f0103e..26b6a6ffa7 100644 --- a/src/sagemaker/mxnet/model.py +++ b/src/sagemaker/mxnet/model.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import logging @@ -28,16 +29,19 @@ class MXNetPredictor(RealTimePredictor): """A RealTimePredictor for inference against MXNet Endpoints. - This is able to serialize Python lists, dictionaries, and numpy arrays to multidimensional tensors for MXNet - inference.""" + This is able to serialize Python lists, dictionaries, and numpy arrays to + multidimensional tensors for MXNet inference. + """ def __init__(self, endpoint_name, sagemaker_session=None): """Initialize an ``MXNetPredictor``. Args: - endpoint_name (str): The name of the endpoint to perform inference on. - sagemaker_session (sagemaker.session.Session): Session object which manages interactions with - Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one + endpoint_name (str): The name of the endpoint to perform inference + on. + sagemaker_session (sagemaker.session.Session): Session object which + manages interactions with Amazon SageMaker APIs and any other + AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. """ super(MXNetPredictor, self).__init__( @@ -66,22 +70,32 @@ def __init__( """Initialize an MXNetModel. Args: - model_data (str): The S3 location of a SageMaker model data ``.tar.gz`` file. - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs - that create Amazon SageMaker endpoints use this role to access training data and model artifacts. - After the endpoint is created, the inference code might use the IAM role, - if it needs to access an AWS resource. - entry_point (str): Path (absolute or relative) to the Python source file which should be executed - as the entry point to model hosting. This should be compatible with either Python 2.7 or Python 3.5. - image (str): A Docker image URI (default: None). If not specified, a default image for MXNet will be used. - py_version (str): Python version you want to use for executing your model training code (default: 'py2'). - framework_version (str): MXNet version you want to use for executing your model training code. - predictor_cls (callable[str, sagemaker.session.Session]): A function to call to create a predictor - with an endpoint name and SageMaker ``Session``. If specified, ``deploy()`` returns the result of - invoking this function on the created endpoint name. - model_server_workers (int): Optional. The number of worker processes used by the inference server. - If None, server will use one worker per vCPU. - **kwargs: Keyword arguments passed to the ``FrameworkModel`` initializer. + model_data (str): The S3 location of a SageMaker model data + ``.tar.gz`` file. + role (str): An AWS IAM role (either name or full ARN). The Amazon + SageMaker training jobs and APIs that create Amazon SageMaker + endpoints use this role to access training data and model + artifacts. After the endpoint is created, the inference code + might use the IAM role, if it needs to access an AWS resource. + entry_point (str): Path (absolute or relative) to the Python source + file which should be executed as the entry point to model + hosting. This should be compatible with either Python 2.7 or + Python 3.5. + image (str): A Docker image URI (default: None). If not specified, a + default image for MXNet will be used. + py_version (str): Python version you want to use for executing your + model training code (default: 'py2'). + framework_version (str): MXNet version you want to use for executing + your model training code. + predictor_cls (callable[str, sagemaker.session.Session]): A function + to call to create a predictor with an endpoint name and + SageMaker ``Session``. If specified, ``deploy()`` returns the + result of invoking this function on the created endpoint name. + model_server_workers (int): Optional. The number of worker processes + used by the inference server. If None, server will use one + worker per vCPU. + **kwargs: Keyword arguments passed to the ``FrameworkModel`` + initializer. """ super(MXNetModel, self).__init__( model_data, image, role, entry_point, predictor_cls=predictor_cls, **kwargs @@ -95,15 +109,19 @@ def __init__( self.model_server_workers = model_server_workers def prepare_container_def(self, instance_type, accelerator_type=None): - """Return a container definition with framework configuration set in model environment variables. + """Return a container definition with framework configuration set in + model environment variables. Args: - instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. - accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and - making inferences to the model. For example, 'ml.eia1.medium'. + instance_type (str): The EC2 instance type to deploy this Model to. + For example, 'ml.p2.xlarge'. + accelerator_type (str): The Elastic Inference accelerator type to + deploy to the instance for loading and making inferences to the + model. For example, 'ml.eia1.medium'. Returns: - dict[str, str]: A container definition object usable with the CreateModel API. + dict[str, str]: A container definition object usable with the + CreateModel API. """ is_mms_version = parse_version(self.framework_version) >= parse_version( self._LOWEST_MMS_VERSION diff --git a/src/sagemaker/parameter.py b/src/sagemaker/parameter.py index 152455c107..e254809334 100644 --- a/src/sagemaker/parameter.py +++ b/src/sagemaker/parameter.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import json @@ -17,10 +18,9 @@ class ParameterRange(object): - """Base class for representing parameter ranges. This is used to define - what hyperparameters to tune for an Amazon SageMaker hyperparameter tuning job and to verify - hyperparameters for Marketplace Algorithms. - + """Base class for representing parameter ranges. This is used to define what + hyperparameters to tune for an Amazon SageMaker hyperparameter tuning job + and to verify hyperparameters for Marketplace Algorithms. """ __all_types__ = ("Continuous", "Categorical", "Integer") @@ -31,8 +31,9 @@ def __init__(self, min_value, max_value, scaling_type="Auto"): Args: min_value (float or int): The minimum value for the range. max_value (float or int): The maximum value for the range. - scaling_type (str): The scale used for searching the range during tuning (default: 'Auto'). - Valid values: 'Auto', 'Linear', 'Logarithmic' and 'ReverseLogarithmic'. + scaling_type (str): The scale used for searching the range during + tuning (default: 'Auto'). Valid values: 'Auto', 'Linear', + 'Logarithmic' and 'ReverseLogarithmic'. """ self.min_value = min_value self.max_value = max_value @@ -51,17 +52,22 @@ def is_valid(self, value): @classmethod def cast_to_type(cls, value): + """ + Args: + value: + """ return float(value) def as_tuning_range(self, name): - """Represent the parameter range as a dicionary suitable for a request to - create an Amazon SageMaker hyperparameter tuning job. + """Represent the parameter range as a dicionary suitable for a request + to create an Amazon SageMaker hyperparameter tuning job. Args: name (str): The name of the hyperparameter. Returns: - dict[str, str]: A dictionary that contains the name and values of the hyperparameter. + dict[str, str]: A dictionary that contains the name and values of + the hyperparameter. """ return { "Name": name, @@ -73,6 +79,7 @@ def as_tuning_range(self, name): class ContinuousParameter(ParameterRange): """A class for representing hyperparameters that have a continuous range of possible values. + Args: min_value (float): The minimum value for the range. max_value (float): The maximum value for the range. @@ -82,11 +89,16 @@ class ContinuousParameter(ParameterRange): @classmethod def cast_to_type(cls, value): + """ + Args: + value: + """ return float(value) class CategoricalParameter(ParameterRange): - """A class for representing hyperparameters that have a discrete list of possible values. + """A class for representing hyperparameters that have a discrete list of + possible values. """ __name__ = "Categorical" @@ -95,8 +107,8 @@ def __init__(self, values): # pylint: disable=super-init-not-called """Initialize a ``CategoricalParameter``. Args: - values (list or object): The possible values for the hyperparameter. This input will - be converted into a list of strings. + values (list or object): The possible values for the hyperparameter. + This input will be converted into a list of strings. """ if isinstance(values, list): self.values = [to_str(v) for v in values] @@ -104,22 +116,25 @@ def __init__(self, values): # pylint: disable=super-init-not-called self.values = [to_str(values)] def as_tuning_range(self, name): - """Represent the parameter range as a dicionary suitable for a request to - create an Amazon SageMaker hyperparameter tuning job. + """Represent the parameter range as a dicionary suitable for a request + to create an Amazon SageMaker hyperparameter tuning job. Args: name (str): The name of the hyperparameter. Returns: - dict[str, list[str]]: A dictionary that contains the name and values of the hyperparameter. + dict[str, list[str]]: A dictionary that contains the name and values + of the hyperparameter. """ return {"Name": name, "Values": self.values} def as_json_range(self, name): - """Represent the parameter range as a dictionary suitable for a request to - create an Amazon SageMaker hyperparameter tuning job using one of the deep learning frameworks. + """Represent the parameter range as a dictionary suitable for a request + to create an Amazon SageMaker hyperparameter tuning job using one of the + deep learning frameworks. - The deep learning framework images require that hyperparameters be serialized as JSON. + The deep learning framework images require that hyperparameters be + serialized as JSON. Args: name (str): The name of the hyperparameter. @@ -131,10 +146,18 @@ def as_json_range(self, name): return {"Name": name, "Values": [json.dumps(v) for v in self.values]} def is_valid(self, value): + """ + Args: + value: + """ return value in self.values @classmethod def cast_to_type(cls, value): + """ + Args: + value: + """ return to_str(value) @@ -149,4 +172,8 @@ class IntegerParameter(ParameterRange): @classmethod def cast_to_type(cls, value): + """ + Args: + value: + """ return int(value) diff --git a/src/sagemaker/pipeline.py b/src/sagemaker/pipeline.py index c70b2b17ca..9e2a24ae41 100644 --- a/src/sagemaker/pipeline.py +++ b/src/sagemaker/pipeline.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import sagemaker @@ -19,30 +20,39 @@ class PipelineModel(object): - """A pipeline of SageMaker ``Model``s that can be deployed to an ``Endpoint``.""" + """A pipeline of SageMaker + ``Model``s that can be deployed to an ``Endpoint``. + """ def __init__( self, models, role, predictor_cls=None, name=None, vpc_config=None, sagemaker_session=None ): - """Initialize an SageMaker ``Model`` which can be used to build an Inference Pipeline comprising of multiple - model containers. + """Initialize an SageMaker ``Model`` which can be used to build an + Inference Pipeline comprising of multiple model containers. Args: - models (list[sagemaker.Model]): For using multiple containers to build an inference pipeline, - you can pass a list of ``sagemaker.Model`` objects in the order you want the inference to happen. - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs - that create Amazon SageMaker endpoints use this role to access training data and model artifacts. - After the endpoint is created, the inference code might use the IAM role, - if it needs to access an AWS resource. - predictor_cls (callable[string, sagemaker.session.Session]): A function to call to create - a predictor (default: None). If not None, ``deploy`` will return the result of invoking - this function on the created endpoint name. - name (str): The model name. If None, a default model name will be selected on each ``deploy``. - vpc_config (dict[str, list[str]]): The VpcConfig set on the model (default: None) + models (list[sagemaker.Model]): For using multiple containers to + build an inference pipeline, you can pass a list of ``sagemaker.Model`` objects + in the order you want the inference to happen. + role (str): An AWS IAM role (either name or full ARN). The Amazon + SageMaker training jobs and APIs that create Amazon SageMaker + endpoints use this role to access training data and model + artifacts. After the endpoint is created, the inference code + might use the IAM role, if it needs to access an AWS resource. + predictor_cls (callable[string, sagemaker.session.Session]): A + function to call to create a predictor (default: None). If not + None, ``deploy`` will return the result of invoking this + function on the created endpoint name. + name (str): The model name. If None, a default model name will be + selected on each ``deploy``. + vpc_config (dict[str, list[str]]): The VpcConfig set on the model + (default: None) * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. - sagemaker_session (sagemaker.session.Session): A SageMaker Session object, used for SageMaker - interactions (default: None). If not specified, one is created using the default AWS configuration chain. + sagemaker_session (sagemaker.session.Session): A SageMaker Session + object, used for SageMaker interactions (default: None). If not + specified, one is created using the default AWS configuration + chain. """ self.models = models self.role = role @@ -54,18 +64,20 @@ def __init__( self.endpoint_name = None def pipeline_container_def(self, instance_type): - """Return a dict created by ``sagemaker.pipeline_container_def()`` for deploying this model to a specified - instance type. + """Return a dict created by ``sagemaker.pipeline_container_def()`` for + deploying this model to a specified instance type. - Subclasses can override this to provide custom container definitions for - deployment to a specific instance type. Called by ``deploy()``. + Subclasses can override this to provide custom container definitions + for deployment to a specific instance type. Called by ``deploy()``. Args: - instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. + instance_type (str): The EC2 instance type to deploy this Model to. + For example, 'ml.p2.xlarge'. Returns: - list[dict[str, str]]: A list of container definition objects usable with the CreateModel API in the scenario - of multiple containers (Inference Pipeline). + list[dict[str, str]]: A list of container definition objects usable + with the CreateModel API in the scenario of multiple containers + (Inference Pipeline). """ return sagemaker.pipeline_container_def(self.models, instance_type) @@ -73,29 +85,36 @@ def pipeline_container_def(self, instance_type): def deploy( self, initial_instance_count, instance_type, endpoint_name=None, tags=None, wait=True ): - """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``. + """Deploy this ``Model`` to an ``Endpoint`` and optionally return a + ``Predictor``. - Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an ``Endpoint`` from this ``Model``. - If ``self.predictor_cls`` is not None, this method returns a the result of invoking - ``self.predictor_cls`` on the created endpoint name. + Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an + ``Endpoint`` from this ``Model``. If ``self.predictor_cls`` is not None, + this method returns a the result of invoking ``self.predictor_cls`` on + the created endpoint name. - The name of the created model is accessible in the ``name`` field of this ``Model`` after deploy returns + The name of the created model is accessible in the ``name`` field of + this ``Model`` after deploy returns - The name of the created endpoint is accessible in the ``endpoint_name`` - field of this ``Model`` after deploy returns. + The name of the created endpoint is accessible in the + ``endpoint_name`` field of this ``Model`` after deploy returns. Args: - instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. - initial_instance_count (int): The initial number of instances to run in the - ``Endpoint`` created from this ``Model``. - endpoint_name (str): The name of the endpoint to create (default: None). - If not specified, a unique endpoint name will be created. - tags(List[dict[str, str]]): The list of tags to attach to this specific endpoint. - wait (bool): Whether the call should wait until the deployment of model completes (default: True). + initial_instance_count (int): The initial number of instances to run + in the ``Endpoint`` created from this ``Model``. + instance_type (str): The EC2 instance type to deploy this Model to. + For example, 'ml.p2.xlarge'. + endpoint_name (str): The name of the endpoint to create (default: + None). If not specified, a unique endpoint name will be created. + tags (List[dict[str, str]]): The list of tags to attach to this + specific endpoint. + wait (bool): Whether the call should wait until the deployment of + model completes (default: True). Returns: callable[string, sagemaker.session.Session] or None: Invocation of ``self.predictor_cls`` on - the created endpoint name, if ``self.predictor_cls`` is not None. Otherwise, return None. + the created endpoint name, if ``self.predictor_cls`` is not + None. Otherwise, return None. """ if not self.sagemaker_session: self.sagemaker_session = Session() @@ -122,11 +141,9 @@ def _create_sagemaker_pipeline_model(self, instance_type): """Create a SageMaker Model Entity Args: - instance_type (str): The EC2 instance type that this Model will be used for, this is only - used to determine if the image needs GPU support or not. - accelerator_type (str): Type of Elastic Inference accelerator to attach to an endpoint for model loading - and inference, for example, 'ml.eia1.medium'. If not specified, no Elastic Inference accelerator - will be attached to the endpoint. + instance_type (str): The EC2 instance type that this Model will be + used for, this is only used to determine if the image needs GPU + support or not. """ if not self.sagemaker_session: self.sagemaker_session = Session() @@ -157,22 +174,30 @@ def transformer( Args: instance_count (int): Number of EC2 instances to use. - instance_type (str): Type of EC2 instance to use, for example, 'ml.c4.xlarge'. - strategy (str): The strategy used to decide how to batch records in a single request (default: None). - Valid values: 'MULTI_RECORD' and 'SINGLE_RECORD'. - assemble_with (str): How the output is assembled (default: None). Valid values: 'Line' or 'None'. - output_path (str): S3 location for saving the transform result. If not specified, results are stored to - a default bucket. - output_kms_key (str): Optional. KMS key ID for encrypting the transform output (default: None). - accept (str): The content type accepted by the endpoint deployed during the transform job. - env (dict): Environment variables to be set for use during the transform job (default: None). - max_concurrent_transforms (int): The maximum number of HTTP requests to be made to - each individual transform container at one time. - max_payload (int): Maximum size of the payload in a single HTTP request to the container in MB. - tags (list[dict]): List of tags for labeling a transform job. If none specified, then the tags used for - the training job are used for the transform job. - volume_kms_key (str): Optional. KMS key ID for encrypting the volume attached to the ML - compute instance (default: None). + instance_type (str): Type of EC2 instance to use, for example, + 'ml.c4.xlarge'. + strategy (str): The strategy used to decide how to batch records in + a single request (default: None). Valid values: 'MULTI_RECORD' + and 'SINGLE_RECORD'. + assemble_with (str): How the output is assembled (default: None). + Valid values: 'Line' or 'None'. + output_path (str): S3 location for saving the transform result. If + not specified, results are stored to a default bucket. + output_kms_key (str): Optional. KMS key ID for encrypting the + transform output (default: None). + accept (str): The content type accepted by the endpoint deployed + during the transform job. + env (dict): Environment variables to be set for use during the + transform job (default: None). + max_concurrent_transforms (int): The maximum number of HTTP requests + to be made to each individual transform container at one time. + max_payload (int): Maximum size of the payload in a single HTTP + request to the container in MB. + tags (list[dict]): List of tags for labeling a transform job. If + none specified, then the tags used for the training job are used + for the transform job. + volume_kms_key (str): Optional. KMS key ID for encrypting the volume + attached to the ML compute instance (default: None). """ self._create_sagemaker_pipeline_model(instance_type) @@ -195,9 +220,9 @@ def transformer( ) def delete_model(self): - """Delete the SageMaker model backing this pipeline model. This does not delete the list of SageMaker models used - in multiple containers to build the inference pipeline. - + """Delete the SageMaker model backing this pipeline model. This does not + delete the list of SageMaker models used in multiple containers to build + the inference pipeline. """ if self.name is None: diff --git a/src/sagemaker/predictor.py b/src/sagemaker/predictor.py index d1ddba38a6..3dd6c48e2a 100644 --- a/src/sagemaker/predictor.py +++ b/src/sagemaker/predictor.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import print_function, absolute_import import codecs @@ -24,8 +25,7 @@ class RealTimePredictor(object): - """Make prediction requests to an Amazon SageMaker endpoint. - """ + """Make prediction requests to an Amazon SageMaker endpoint.""" def __init__( self, @@ -38,24 +38,33 @@ def __init__( ): """Initialize a ``RealTimePredictor``. - Behavior for serialization of input data and deserialization of result data - can be configured through initializer arguments. If not specified, a sequence - of bytes is expected and the API sends it in the request body without modifications. - In response, the API returns the sequence of bytes from the prediction result without any modifications. + Behavior for serialization of input data and deserialization of + result data can be configured through initializer arguments. If not + specified, a sequence of bytes is expected and the API sends it in the + request body without modifications. In response, the API returns the + sequence of bytes from the prediction result without any modifications. Args: - endpoint (str): Name of the Amazon SageMaker endpoint to which requests are sent. - sagemaker_session (sagemaker.session.Session): A SageMaker Session object, used for SageMaker - interactions (default: None). If not specified, one is created using the default AWS configuration chain. - serializer (callable): Accepts a single argument, the input data, and returns a sequence - of bytes. It may provide a ``content_type`` attribute that defines the endpoint request content type. - If not specified, a sequence of bytes is expected for the data. - deserializer (callable): Accepts two arguments, the result data and the response content type, - and returns a sequence of bytes. It may provide a ``content_type`` attribute that defines the endpoint - response's "Accept" content type. If not specified, a sequence of bytes is expected for the data. - content_type (str): The invocation's "ContentType", overriding any ``content_type`` from - the serializer (default: None). - accept (str): The invocation's "Accept", overriding any accept from the deserializer (default: None). + endpoint (str): Name of the Amazon SageMaker endpoint to which + requests are sent. + sagemaker_session (sagemaker.session.Session): A SageMaker Session + object, used for SageMaker interactions (default: None). If not + specified, one is created using the default AWS configuration + chain. + serializer (callable): Accepts a single argument, the input data, + and returns a sequence of bytes. It may provide a + ``content_type`` attribute that defines the endpoint request + content type. If not specified, a sequence of bytes is expected + for the data. + deserializer (callable): Accepts two arguments, the result data and + the response content type, and returns a sequence of bytes. It + may provide a ``content_type`` attribute that defines the + endpoint response's "Accept" content type. If not specified, a + sequence of bytes is expected for the data. + content_type (str): The invocation's "ContentType", overriding any + ``content_type`` from the serializer (default: None). + accept (str): The invocation's "Accept", overriding any accept from + the deserializer (default: None). """ self.endpoint = endpoint self.sagemaker_session = sagemaker_session or Session() @@ -70,17 +79,20 @@ def predict(self, data, initial_args=None): """Return the inference from the specified endpoint. Args: - data (object): Input data for which you want the model to provide inference. - If a serializer was specified when creating the RealTimePredictor, the result of the - serializer is sent as input data. Otherwise the data must be sequence of bytes, and - the predict method then sends the bytes in the request body as is. + data (object): Input data for which you want the model to provide + inference. If a serializer was specified when creating the + RealTimePredictor, the result of the serializer is sent as input + data. Otherwise the data must be sequence of bytes, and the + predict method then sends the bytes in the request body as is. initial_args (dict[str,str]): Optional. Default arguments for boto3 - ``invoke_endpoint`` call. Default is None (no default arguments). + ``invoke_endpoint`` call. Default is None (no default + arguments). Returns: object: Inference for the given input. If a deserializer was specified when creating - the RealTimePredictor, the result of the deserializer is returned. Otherwise the response - returns the sequence of bytes as is. + the RealTimePredictor, the result of the deserializer is + returned. Otherwise the response returns the sequence of bytes + as is. """ request_args = self._create_request_args(data, initial_args) @@ -88,6 +100,10 @@ def predict(self, data, initial_args=None): return self._handle_response(response) def _handle_response(self, response): + """ + Args: + response: + """ response_body = response["Body"] if self.deserializer is not None: # It's the deserializer's responsibility to close the stream @@ -97,6 +113,11 @@ def _handle_response(self, response): return data def _create_request_args(self, data, initial_args=None): + """ + Args: + data: + initial_args: + """ args = dict(initial_args) if initial_args else {} if "EndpointName" not in args: @@ -115,20 +136,19 @@ def _create_request_args(self, data, initial_args=None): return args def _delete_endpoint_config(self): - """Delete the Amazon SageMaker endpoint configuration - - """ + """Delete the Amazon SageMaker endpoint configuration""" self.sagemaker_session.delete_endpoint_config(self._endpoint_config_name) def delete_endpoint(self, delete_endpoint_config=True): - """Delete the Amazon SageMaker endpoint backing this predictor. Also delete the endpoint configuration attached - to it if delete_endpoint_config is True. + """Delete the Amazon SageMaker endpoint backing this predictor. Also + delete the endpoint configuration attached to it if + delete_endpoint_config is True. Args: - delete_endpoint_config (bool, optional): Flag to indicate whether to delete endpoint configuration together - with endpoint. Defaults to True. If True, both endpoint and endpoint configuration will be deleted. If - False, only endpoint will be deleted. - + delete_endpoint_config (bool, optional): Flag to indicate whether to + delete endpoint configuration together with endpoint. Defaults + to True. If True, both endpoint and endpoint configuration will + be deleted. If False, only endpoint will be deleted. """ if delete_endpoint_config: self._delete_endpoint_config() @@ -136,9 +156,7 @@ def delete_endpoint(self, delete_endpoint_config=True): self.sagemaker_session.delete_endpoint(self.endpoint) def delete_model(self): - """Deletes the Amazon SageMaker models backing this predictor. - - """ + """Deletes the Amazon SageMaker models backing this predictor.""" request_failed = False failed_models = [] for model_name in self._model_names: @@ -155,6 +173,7 @@ def delete_model(self): ) def _get_endpoint_config_name(self): + """Placeholder docstring""" endpoint_desc = self.sagemaker_session.sagemaker_client.describe_endpoint( EndpointName=self.endpoint ) @@ -162,6 +181,7 @@ def _get_endpoint_config_name(self): return endpoint_config_name def _get_model_names(self): + """Placeholder docstring""" endpoint_config = self.sagemaker_session.sagemaker_client.describe_endpoint_config( EndpointConfigName=self._endpoint_config_name ) @@ -170,7 +190,10 @@ def _get_model_names(self): class _CsvSerializer(object): + """Placeholder docstring""" + def __init__(self): + """Placeholder docstring""" self.content_type = CONTENT_TYPE_CSV def __call__(self, data): @@ -190,6 +213,10 @@ def __call__(self, data): @staticmethod def _serialize_row(data): # Don't attempt to re-serialize a string + """ + Args: + data: + """ if isinstance(data, str): return data if isinstance(data, np.ndarray): @@ -207,14 +234,26 @@ def _serialize_row(data): def _csv_serialize_python_array(data): + """ + Args: + data: + """ return _csv_serialize_object(data) def _csv_serialize_from_buffer(buff): + """ + Args: + buff: + """ return buff.read() def _csv_serialize_object(data): + """ + Args: + data: + """ csv_buffer = StringIO() csv_writer = csv.writer(csv_buffer, delimiter=",") @@ -226,10 +265,18 @@ def _csv_serialize_object(data): def _is_mutable_sequence_like(obj): + """ + Args: + obj: + """ return _is_sequence_like(obj) and hasattr(obj, "__setitem__") def _is_sequence_like(obj): + """ + Args: + obj: + """ # Need to explicitly check on str since str lacks the iterable magic methods in Python 2 return ( # pylint: disable=consider-using-ternary hasattr(obj, "__iter__") and hasattr(obj, "__getitem__") @@ -237,17 +284,32 @@ def _is_sequence_like(obj): def _row_to_csv(obj): + """ + Args: + obj: + """ if isinstance(obj, str): return obj return ",".join(obj) class _CsvDeserializer(object): + """Placeholder docstring""" + def __init__(self, encoding="utf-8"): + """ + Args: + encoding: + """ self.accept = CONTENT_TYPE_CSV self.encoding = encoding def __call__(self, stream, content_type): + """ + Args: + stream: + content_type: + """ try: return list(csv.reader(stream.read().decode(self.encoding).splitlines())) finally: @@ -260,14 +322,23 @@ def __call__(self, stream, content_type): class BytesDeserializer(object): """Return the response as an undecoded array of bytes. - Args: - accept (str): The Accept header to send to the server (optional). + Args: + accept (str): The Accept header to send to the server (optional). """ def __init__(self, accept=None): + """ + Args: + accept: + """ self.accept = accept def __call__(self, stream, content_type): + """ + Args: + stream: + content_type: + """ try: return stream.read() finally: @@ -277,16 +348,26 @@ def __call__(self, stream, content_type): class StringDeserializer(object): """Return the response as a decoded string. - Args: - encoding (str): The string encoding to use (default=utf-8). - accept (str): The Accept header to send to the server (optional). + Args: + encoding (str): The string encoding to use (default=utf-8). + accept (str): The Accept header to send to the server (optional). """ def __init__(self, encoding="utf-8", accept=None): + """ + Args: + encoding: + accept: + """ self.encoding = encoding self.accept = accept def __call__(self, stream, content_type): + """ + Args: + stream: + content_type: + """ try: return stream.read().decode(self.encoding) finally: @@ -298,24 +379,37 @@ class StreamDeserializer(object): It is the receivers responsibility to close the stream when they're done reading the stream. - Args: - accept (str): The Accept header to send to the server (optional). + Args: + accept (str): The Accept header to send to the server (optional). """ def __init__(self, accept=None): + """ + Args: + accept: + """ self.accept = accept def __call__(self, stream, content_type): + """ + Args: + stream: + content_type: + """ return (stream, content_type) class _JsonSerializer(object): + """Placeholder docstring""" + def __init__(self): + """Placeholder docstring""" self.content_type = CONTENT_TYPE_JSON def __call__(self, data): - """Take data of various formats and serialize them into the expected request body. - This uses information about supported input formats for the deployed model. + """Take data of various formats and serialize them into the expected + request body. This uses information about supported input formats for + the deployed model. Args: data (object): Data to be serialized. @@ -338,15 +432,26 @@ def __call__(self, data): def _ndarray_to_list(data): + """ + Args: + data: + """ return data.tolist() if isinstance(data, np.ndarray) else data def _json_serialize_from_buffer(buff): + """ + Args: + buff: + """ return buff.read() class _JsonDeserializer(object): + """Placeholder docstring""" + def __init__(self): + """Placeholder docstring""" self.accept = CONTENT_TYPE_JSON def __call__(self, stream, content_type): @@ -369,7 +474,14 @@ def __call__(self, stream, content_type): class _NumpyDeserializer(object): + """Placeholder docstring""" + def __init__(self, accept=CONTENT_TYPE_NPY, dtype=None): + """ + Args: + accept: + dtype: + """ self.accept = accept self.dtype = dtype @@ -378,7 +490,8 @@ def __call__(self, stream, content_type=CONTENT_TYPE_NPY): Args: stream (stream): The response stream to be deserialized. - content_type (str): The content type of the response. Can accept CSV, JSON, or NPY data. + content_type (str): The content type of the response. Can accept + CSV, JSON, or NPY data. Returns: object: Body of the response deserialized into a Numpy array. @@ -405,14 +518,19 @@ def __call__(self, stream, content_type=CONTENT_TYPE_NPY): class _NPYSerializer(object): + """Placeholder docstring""" + def __init__(self): + """Placeholder docstring""" self.content_type = CONTENT_TYPE_NPY def __call__(self, data, dtype=None): """Serialize data into the request body in NPY format. Args: - data (object): Data to be serialized. Can be a numpy array, list, file, or buffer. + data (object): Data to be serialized. Can be a numpy array, list, + file, or buffer. + dtype: Returns: object: NPY serialized data used for the request. @@ -435,6 +553,10 @@ def __call__(self, data, dtype=None): def _npy_serialize(data): + """ + Args: + data: + """ buffer = BytesIO() np.save(buffer, data) return buffer.getvalue() diff --git a/src/sagemaker/pytorch/__init__.py b/src/sagemaker/pytorch/__init__.py index 2e8db2d49e..4f3212ba21 100644 --- a/src/sagemaker/pytorch/__init__.py +++ b/src/sagemaker/pytorch/__init__.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import from sagemaker.pytorch.estimator import PyTorch # noqa: F401 diff --git a/src/sagemaker/pytorch/defaults.py b/src/sagemaker/pytorch/defaults.py index 21629fa66f..f851a473cb 100644 --- a/src/sagemaker/pytorch/defaults.py +++ b/src/sagemaker/pytorch/defaults.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import PYTORCH_VERSION = "0.4" diff --git a/src/sagemaker/pytorch/estimator.py b/src/sagemaker/pytorch/estimator.py index f6a441d8e6..b23cf7a793 100644 --- a/src/sagemaker/pytorch/estimator.py +++ b/src/sagemaker/pytorch/estimator.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import logging @@ -46,41 +47,52 @@ def __init__( image_name=None, **kwargs ): - """ - This ``Estimator`` executes an PyTorch script in a managed PyTorch execution environment, within a SageMaker - Training Job. The managed PyTorch environment is an Amazon-built Docker container that executes functions - defined in the supplied ``entry_point`` Python script. - - Training is started by calling :meth:`~sagemaker.amazon.estimator.Framework.fit` on this Estimator. - After training is complete, calling :meth:`~sagemaker.amazon.estimator.Framework.deploy` creates a - hosted SageMaker endpoint and returns an :class:`~sagemaker.amazon.pytorch.model.PyTorchPredictor` instance - that can be used to perform inference against the hosted model. - - Technical documentation on preparing PyTorch scripts for SageMaker training and using the PyTorch Estimator is - available on the project home-page: https://github.com/aws/sagemaker-python-sdk + """This ``Estimator`` executes an PyTorch script in a managed PyTorch + execution environment, within a SageMaker Training Job. The managed + PyTorch environment is an Amazon-built Docker container that executes + functions defined in the supplied ``entry_point`` Python script. + + Training is started by calling + :meth:`~sagemaker.amazon.estimator.Framework.fit` on this Estimator. + After training is complete, calling + :meth:`~sagemaker.amazon.estimator.Framework.deploy` creates a hosted + SageMaker endpoint and returns an + :class:`~sagemaker.amazon.pytorch.model.PyTorchPredictor` instance that + can be used to perform inference against the hosted model. + + Technical documentation on preparing PyTorch scripts for SageMaker + training and using the PyTorch Estimator is available on the project + home-page: https://github.com/aws/sagemaker-python-sdk Args: - entry_point (str): Path (absolute or relative) to the Python source file which should be executed - as the entry point to training. This should be compatible with either Python 2.7 or Python 3.5. - source_dir (str): Path (absolute or relative) to a directory with any other training - source code dependencies aside from tne entry point file (default: None). Structure within this - directory are preserved when training on Amazon SageMaker. - hyperparameters (dict): Hyperparameters that will be used for training (default: None). - The hyperparameters are made accessible as a dict[str, str] to the training code on SageMaker. - For convenience, this accepts other types for keys and values, but ``str()`` will be called - to convert them before training. - py_version (str): Python version you want to use for executing your model training code (default: 'py3'). - One of 'py2' or 'py3'. - framework_version (str): PyTorch version you want to use for executing your model training code. - List of supported versions https://github.com/aws/sagemaker-python-sdk#pytorch-sagemaker-estimators. + entry_point (str): Path (absolute or relative) to the Python source + file which should be executed as the entry point to training. + This should be compatible with either Python 2.7 or Python 3.5. + source_dir (str): Path (absolute or relative) to a directory with + any other training source code dependencies aside from tne entry + point file (default: None). Structure within this directory are + preserved when training on Amazon SageMaker. + hyperparameters (dict): Hyperparameters that will be used for + training (default: None). The hyperparameters are made + accessible as a dict[str, str] to the training code on + SageMaker. For convenience, this accepts other types for keys + and values, but ``str()`` will be called to convert them before + training. + py_version (str): Python version you want to use for executing your + model training code (default: 'py3'). One of 'py2' or 'py3'. + framework_version (str): PyTorch version you want to use for + executing your model training code. List of supported versions + https://github.com/aws/sagemaker-python-sdk#pytorch-sagemaker-estimators. If not specified, this will default to 0.4. - image_name (str): If specified, the estimator will use this image for training and hosting, instead of - selecting the appropriate SageMaker official image based on framework_version and py_version. It can - be an ECR url or dockerhub image and tag. + image_name (str): If specified, the estimator will use this image + for training and hosting, instead of selecting the appropriate + SageMaker official image based on framework_version and + py_version. It can be an ECR url or dockerhub image and tag. Examples: - 123.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0 - custom-image:latest. - **kwargs: Additional kwargs passed to the :class:`~sagemaker.estimator.Framework` constructor. + 123.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0 + custom-image:latest. + **kwargs: Additional kwargs passed to the :class:`~sagemaker.estimator.Framework` + constructor. """ if framework_version is None: logger.warning(empty_framework_version_warning(PYTORCH_VERSION, PYTORCH_VERSION)) @@ -98,21 +110,24 @@ def __init__( def create_model( self, model_server_workers=None, role=None, vpc_config_override=VPC_CONFIG_DEFAULT ): - """Create a SageMaker ``PyTorchModel`` object that can be deployed to an ``Endpoint``. + """Create a SageMaker ``PyTorchModel`` object that can be deployed to an + ``Endpoint``. Args: - role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also used during - transform jobs. If not specified, the role from the Estimator will be used. - model_server_workers (int): Optional. The number of worker processes used by the inference server. - If None, server will use one worker per vCPU. - vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on the model. - Default: use subnets and security groups from this Estimator. + model_server_workers (int): Optional. The number of worker processes + used by the inference server. If None, server will use one + worker per vCPU. + role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, + which is also used during transform jobs. If not specified, the + role from the Estimator will be used. + vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on + the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. Returns: - sagemaker.pytorch.model.PyTorchModel: A SageMaker ``PyTorchModel`` object. - See :func:`~sagemaker.pytorch.model.PyTorchModel` for full details. + sagemaker.pytorch.model.PyTorchModel: A SageMaker ``PyTorchModel`` + object. See :func:`~sagemaker.pytorch.model.PyTorchModel` for full details. """ role = role or self.role return PyTorchModel( @@ -135,15 +150,17 @@ def create_model( @classmethod def _prepare_init_params_from_job_description(cls, job_details, model_channel_name=None): - """Convert the job description to init params that can be handled by the class constructor + """Convert the job description to init params that can be handled by the + class constructor Args: - job_details: the returned job details from a describe_training_job API call. - model_channel_name (str): Name of the channel where pre-trained model data will be downloaded. + job_details: the returned job details from a describe_training_job + API call. + model_channel_name (str): Name of the channel where pre-trained + model data will be downloaded. Returns: - dictionary: The transformed init_params - + dictionary: The transformed init_params """ init_params = super(PyTorch, cls)._prepare_init_params_from_job_description( job_details, model_channel_name diff --git a/src/sagemaker/pytorch/model.py b/src/sagemaker/pytorch/model.py index 11d0caf04d..edea603501 100644 --- a/src/sagemaker/pytorch/model.py +++ b/src/sagemaker/pytorch/model.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import logging @@ -26,16 +27,19 @@ class PyTorchPredictor(RealTimePredictor): """A RealTimePredictor for inference against PyTorch Endpoints. - This is able to serialize Python lists, dictionaries, and numpy arrays to multidimensional tensors for PyTorch - inference.""" + This is able to serialize Python lists, dictionaries, and numpy arrays to + multidimensional tensors for PyTorch inference. + """ def __init__(self, endpoint_name, sagemaker_session=None): """Initialize an ``PyTorchPredictor``. Args: - endpoint_name (str): The name of the endpoint to perform inference on. - sagemaker_session (sagemaker.session.Session): Session object which manages interactions with - Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one + endpoint_name (str): The name of the endpoint to perform inference + on. + sagemaker_session (sagemaker.session.Session): Session object which + manages interactions with Amazon SageMaker APIs and any other + AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. """ super(PyTorchPredictor, self).__init__( @@ -44,7 +48,9 @@ def __init__(self, endpoint_name, sagemaker_session=None): class PyTorchModel(FrameworkModel): - """An PyTorch SageMaker ``Model`` that can be deployed to a SageMaker ``Endpoint``.""" + """An PyTorch SageMaker ``Model`` that can be deployed to a SageMaker + ``Endpoint``. + """ __framework_name__ = "pytorch" @@ -63,22 +69,32 @@ def __init__( """Initialize an PyTorchModel. Args: - model_data (str): The S3 location of a SageMaker model data ``.tar.gz`` file. - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs - that create Amazon SageMaker endpoints use this role to access training data and model artifacts. - After the endpoint is created, the inference code might use the IAM role, - if it needs to access an AWS resource. - entry_point (str): Path (absolute or relative) to the Python source file which should be executed - as the entry point to model hosting. This should be compatible with either Python 2.7 or Python 3.5. - image (str): A Docker image URI (default: None). If not specified, a default image for PyTorch will be used. - py_version (str): Python version you want to use for executing your model training code (default: 'py3'). - framework_version (str): PyTorch version you want to use for executing your model training code. - predictor_cls (callable[str, sagemaker.session.Session]): A function to call to create a predictor - with an endpoint name and SageMaker ``Session``. If specified, ``deploy()`` returns the result of - invoking this function on the created endpoint name. - model_server_workers (int): Optional. The number of worker processes used by the inference server. - If None, server will use one worker per vCPU. - **kwargs: Keyword arguments passed to the ``FrameworkModel`` initializer. + model_data (str): The S3 location of a SageMaker model data + ``.tar.gz`` file. + role (str): An AWS IAM role (either name or full ARN). The Amazon + SageMaker training jobs and APIs that create Amazon SageMaker + endpoints use this role to access training data and model + artifacts. After the endpoint is created, the inference code + might use the IAM role, if it needs to access an AWS resource. + entry_point (str): Path (absolute or relative) to the Python source + file which should be executed as the entry point to model + hosting. This should be compatible with either Python 2.7 or + Python 3.5. + image (str): A Docker image URI (default: None). If not specified, a + default image for PyTorch will be used. + py_version (str): Python version you want to use for executing your + model training code (default: 'py3'). + framework_version (str): PyTorch version you want to use for + executing your model training code. + predictor_cls (callable[str, sagemaker.session.Session]): A function + to call to create a predictor with an endpoint name and + SageMaker ``Session``. If specified, ``deploy()`` returns the + result of invoking this function on the created endpoint name. + model_server_workers (int): Optional. The number of worker processes + used by the inference server. If None, server will use one + worker per vCPU. + **kwargs: Keyword arguments passed to the ``FrameworkModel`` + initializer. """ super(PyTorchModel, self).__init__( model_data, image, role, entry_point, predictor_cls=predictor_cls, **kwargs @@ -92,15 +108,19 @@ def __init__( self.model_server_workers = model_server_workers def prepare_container_def(self, instance_type, accelerator_type=None): - """Return a container definition with framework configuration set in model environment variables. + """Return a container definition with framework configuration set in + model environment variables. Args: - instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. - accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and - making inferences to the model. For example, 'ml.eia1.medium'. + instance_type (str): The EC2 instance type to deploy this Model to. + For example, 'ml.p2.xlarge'. + accelerator_type (str): The Elastic Inference accelerator type to + deploy to the instance for loading and making inferences to the + model. For example, 'ml.eia1.medium'. Returns: - dict[str, str]: A container definition object usable with the CreateModel API. + dict[str, str]: A container definition object usable with the + CreateModel API. """ deploy_image = self.image if not deploy_image: diff --git a/src/sagemaker/rl/__init__.py b/src/sagemaker/rl/__init__.py index 5acabd42f4..a1a059714d 100644 --- a/src/sagemaker/rl/__init__.py +++ b/src/sagemaker/rl/__init__.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import from sagemaker.rl.estimator import ( # noqa: F401 diff --git a/src/sagemaker/rl/estimator.py b/src/sagemaker/rl/estimator.py index aa2565d02a..dcbeb84fb5 100644 --- a/src/sagemaker/rl/estimator.py +++ b/src/sagemaker/rl/estimator.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import enum @@ -45,11 +46,15 @@ class RLToolkit(enum.Enum): + """Placeholder docstring""" + COACH = "coach" RAY = "ray" class RLFramework(enum.Enum): + """Placeholder docstring""" + TENSORFLOW = "tensorflow" MXNET = "mxnet" @@ -74,50 +79,58 @@ def __init__( **kwargs ): """This Estimator executes an RLEstimator script in a managed - Reinforcement Learning (RL) execution environment within a SageMaker Training Job. - The managed RL environment is an Amazon-built Docker container that executes - functions defined in the supplied ``entry_point`` Python script. - - Training is started by calling :meth:`~sagemaker.amazon.estimator.Framework.fit` - on this Estimator. After training is complete, calling - :meth:`~sagemaker.amazon.estimator.Framework.deploy` creates a - hosted SageMaker endpoint and based on the specified framework returns - an :class:`~sagemaker.amazon.mxnet.model.MXNetPredictor` or - :class:`~sagemaker.amazon.tensorflow.serving.Predictor` instance - that can be used to perform inference against the hosted model. - - Technical documentation on preparing RLEstimator scripts for SageMaker training - and using the RLEstimator is available on the project homepage: - https://github.com/aws/sagemaker-python-sdk + Reinforcement Learning (RL) execution environment within a SageMaker + Training Job. The managed RL environment is an Amazon-built Docker + container that executes functions defined in the supplied + ``entry_point`` Python script. + + Training is started by calling + :meth:`~sagemaker.amazon.estimator.Framework.fit` on this Estimator. + After training is complete, calling + :meth:`~sagemaker.amazon.estimator.Framework.deploy` creates a hosted + SageMaker endpoint and based on the specified framework returns an + :class:`~sagemaker.amazon.mxnet.model.MXNetPredictor` or + :class:`~sagemaker.amazon.tensorflow.serving.Predictor` instance that + can be used to perform inference against the hosted model. + + Technical documentation on preparing RLEstimator scripts for + SageMaker training and using the RLEstimator is available on the project + homepage: https://github.com/aws/sagemaker-python-sdk Args: - entry_point (str): Path (absolute or relative) to the Python source file - which should be executed as the entry point to training. - This should be compatible with Python 3.5 for MXNet or Python 3.6 for TensorFlow. - toolkit (sagemaker.rl.RLToolkit): RL toolkit you want to use - for executing your model training code. - toolkit_version (str): RL toolkit version you want to be use - for executing your model training code. - framework (sagemaker.rl.RLFramework): Framework (MXNet or TensorFlow) - you want to be used as a toolkit backed for reinforcement learning training. - source_dir (str): Path (absolute or relative) to a directory with any other training - source code dependencies aside from the entry point file (default: None). - Structure within this directory is preserved when training on Amazon SageMaker. - hyperparameters (dict): Hyperparameters that will be used for training (default: None). - The hyperparameters are made accessible as a dict[str, str] - to the training code on SageMaker. For convenience, this accepts other types - for keys and values. - image_name (str): An ECR url. If specified, the estimator will use this image - for training and hosting, instead of selecting the appropriate SageMaker - official image based on framework_version and py_version. - Example: 123.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0 - metric_definitions (list[dict]): A list of dictionaries that defines the metric(s) - used to evaluate the training jobs. Each dictionary contains two keys: - 'Name' for the name of the metric, and 'Regex' for the regular expression used to - extract the metric from the logs. This should be defined only for jobs - that don't use an Amazon algorithm. - **kwargs: Additional kwargs passed to the :class:`~sagemaker.estimator.Framework` - constructor. + entry_point (str): Path (absolute or relative) to the Python source + file which should be executed as the entry point to training. + This should be compatible with Python 3.5 for MXNet or Python + 3.6 for TensorFlow. + toolkit (sagemaker.rl.RLToolkit): RL toolkit you want to use for + executing your model training code. + toolkit_version (str): RL toolkit version you want to be use for + executing your model training code. + framework (sagemaker.rl.RLFramework): Framework (MXNet or + TensorFlow) you want to be used as a toolkit backed for + reinforcement learning training. + source_dir (str): Path (absolute or relative) to a directory with + any other training source code dependencies aside from the entry + point file (default: None). Structure within this directory is + preserved when training on Amazon SageMaker. + hyperparameters (dict): Hyperparameters that will be used for + training (default: None). The hyperparameters are made + accessible as a dict[str, str] to the training code on + SageMaker. For convenience, this accepts other types for keys + and values. + image_name (str): An ECR url. If specified, the estimator will use + this image for training and hosting, instead of selecting the + appropriate SageMaker official image based on framework_version + and py_version. Example: + 123.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0 + metric_definitions (list[dict]): A list of dictionaries that defines + the metric(s) used to evaluate the training jobs. Each + dictionary contains two keys: 'Name' for the name of the metric, + and 'Regex' for the regular expression used to extract the + metric from the logs. This should be defined only for jobs that + don't use an Amazon algorithm. + **kwargs: Additional kwargs passed to the + :class:`~sagemaker.estimator.Framework` constructor. """ self._validate_images_args(toolkit, toolkit_version, framework, image_name) @@ -151,28 +164,31 @@ def create_model( source_dir=None, dependencies=None, ): - """Create a SageMaker ``RLEstimatorModel`` object that can be deployed to an Endpoint. + """Create a SageMaker ``RLEstimatorModel`` object that can be deployed + to an Endpoint. Args: - role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also used - during transform jobs. If not specified, the role from the Estimator will be used. - vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig - set on the model. Default: use subnets and security groups from this Estimator. - + role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, + which is also used during transform jobs. If not specified, the + role from the Estimator will be used. + vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on + the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. - - entry_point (str): Path (absolute or relative) to the Python source file - which should be executed as the entry point for MXNet hosting. - This should be compatible with Python 3.5 (default: self.entry_point) - source_dir (str): Path (absolute or relative) to a directory with any other training - source code dependencies aside from tne entry point file (default: self.source_dir). - Structure within this directory are preserved when hosting on Amazon SageMaker. - dependencies (list[str]): A list of paths to directories (absolute or relative) with - any additional libraries that will be exported to the container - (default: self.dependencies). The library folders will be copied to SageMaker - in the same folder where the entry_point is copied. If the ```source_dir``` points - to S3, code will be uploaded and the S3 location will be used instead. + entry_point (str): Path (absolute or relative) to the Python source + file which should be executed as the entry point for MXNet + hosting. This should be compatible with Python 3.5 (default: + self.entry_point) + source_dir (str): Path (absolute or relative) to a directory with + any other training source code dependencies aside from tne entry + point file (default: self.source_dir). Structure within this + directory are preserved when hosting on Amazon SageMaker. + dependencies (list[str]): A list of paths to directories (absolute + or relative) with any additional libraries that will be exported + to the container (default: self.dependencies). The library + folders will be copied to SageMaker in the same folder where the + entry_point is copied. If the ```source_dir``` points to S3, + code will be uploaded and the S3 location will be used instead. Returns: sagemaker.model.FrameworkModel: Depending on input parameters returns @@ -238,8 +254,9 @@ def create_model( def train_image(self): """Return the Docker image to use for training. - The :meth:`~sagemaker.estimator.EstimatorBase.fit` method, which does the model training, - calls this method to find the image to use for model training. + The :meth:`~sagemaker.estimator.EstimatorBase.fit` method, which does + the model training, calls this method to find the image to use for model + training. Returns: str: The URI of the Docker image. @@ -256,15 +273,17 @@ def train_image(self): @classmethod def _prepare_init_params_from_job_description(cls, job_details, model_channel_name=None): - """Convert the job description to init params that can be handled by the class constructor + """Convert the job description to init params that can be handled by the + class constructor Args: - job_details: the returned job details from a describe_training_job API call. - model_channel_name (str): Name of the channel where pre-trained model data will be - downloaded. + job_details: the returned job details from a describe_training_job + API call. + model_channel_name (str): Name of the channel where pre-trained + model data will be downloaded. Returns: - dictionary: The transformed init_params + dictionary: The transformed init_params """ init_params = super(RLEstimator, cls)._prepare_init_params_from_job_description( job_details, model_channel_name @@ -296,7 +315,9 @@ def _prepare_init_params_from_job_description(cls, job_details, model_channel_na return init_params def hyperparameters(self): - """Return hyperparameters used by your custom TensorFlow code during model training.""" + """Return hyperparameters used by your custom TensorFlow code during + model training. + """ hyperparameters = super(RLEstimator, self).hyperparameters() additional_hyperparameters = { @@ -310,6 +331,10 @@ def hyperparameters(self): @classmethod def _toolkit_and_version_from_tag(cls, image_tag): + """ + Args: + image_tag: + """ tag_pattern = re.compile( "^([A-Z]*|[a-z]*)(\d.*)-(cpu|gpu)-(py2|py3)$" # noqa: W605,E501 pylint: disable=anomalous-backslash-in-string ) @@ -320,6 +345,10 @@ def _toolkit_and_version_from_tag(cls, image_tag): @classmethod def _validate_framework_format(cls, framework): + """ + Args: + framework: + """ if framework and framework not in RLFramework: raise ValueError( "Invalid type: {}, valid RL frameworks types are: [{}]".format( @@ -329,6 +358,10 @@ def _validate_framework_format(cls, framework): @classmethod def _validate_toolkit_format(cls, toolkit): + """ + Args: + toolkit: + """ if toolkit and toolkit not in RLToolkit: raise ValueError( "Invalid type: {}, valid RL toolkits types are: [{}]".format( @@ -338,6 +371,13 @@ def _validate_toolkit_format(cls, toolkit): @classmethod def _validate_images_args(cls, toolkit, toolkit_version, framework, image_name): + """ + Args: + toolkit: + toolkit_version: + framework: + image_name: + """ cls._validate_toolkit_format(toolkit) cls._validate_framework_format(framework) @@ -372,6 +412,12 @@ def _validate_images_args(cls, toolkit, toolkit_version, framework, image_name): @classmethod def _is_combination_supported(cls, toolkit, toolkit_version, framework): + """ + Args: + toolkit: + toolkit_version: + framework: + """ supported_versions = TOOLKIT_FRAMEWORK_VERSION_MAP.get(toolkit, None) if supported_versions: supported_frameworks = supported_versions.get(toolkit_version, None) @@ -381,6 +427,12 @@ def _is_combination_supported(cls, toolkit, toolkit_version, framework): @classmethod def _validate_toolkit_support(cls, toolkit, toolkit_version, framework): + """ + Args: + toolkit: + toolkit_version: + framework: + """ if not cls._is_combination_supported(toolkit, toolkit_version, framework): raise AttributeError( "Provided `{}-{}` and `{}` combination is not supported.".format( @@ -389,9 +441,11 @@ def _validate_toolkit_support(cls, toolkit, toolkit_version, framework): ) def _image_version(self): + """Placeholder docstring""" return "{}{}".format(self.toolkit, self.toolkit_version) def _image_framework(self): + """Placeholder docstring""" return "rl-{}".format(self.framework) @classmethod @@ -399,7 +453,8 @@ def default_metric_definitions(cls, toolkit): """Provides default metric definitions based on provided toolkit. Args: - toolkit(sagemaker.rl.RLToolkit): RL Toolkit to be used for training. + toolkit (sagemaker.rl.RLToolkit): RL Toolkit to be used for + training. Returns: list: metric definitions diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index 26b2e53adb..5b3b51e335 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import, print_function import json @@ -48,6 +49,8 @@ class LogState(object): + """Placeholder docstring""" + STARTING = 1 WAIT_IN_PROGRESS = 2 TAILING = 3 @@ -121,6 +124,7 @@ def _initialize(self, boto_session, sagemaker_client, sagemaker_runtime_client): @property def boto_region_name(self): + """Placeholder docstring""" return self._region_name def upload_data(self, path, bucket=None, key_prefix="data", extra_args=None): @@ -1607,6 +1611,7 @@ def __init__(self, model_data, image, env=None): def _create_model_request( name, role, container_def=None, tags=None ): # pylint: disable=redefined-outer-name + """Placeholder docstring""" request = {"ModelName": name, "ExecutionRoleArn": role} if isinstance(container_def, list): @@ -1621,6 +1626,7 @@ def _create_model_request( def _deployment_entity_exists(describe_fn): + """Placeholder docstring""" try: describe_fn() return True @@ -1635,6 +1641,7 @@ def _deployment_entity_exists(describe_fn): def _train_done(sagemaker_client, job_name, last_desc): + """Placeholder docstring""" in_progress_statuses = ["InProgress", "Created"] desc = sagemaker_client.describe_training_job(TrainingJobName=job_name) @@ -1655,6 +1662,7 @@ def _train_done(sagemaker_client, job_name, last_desc): def _compilation_job_status(sagemaker_client, job_name): + """Placeholder docstring""" compile_status_codes = { "Completed": "!", "InProgress": ".", @@ -1678,6 +1686,7 @@ def _compilation_job_status(sagemaker_client, job_name): def _tuning_job_status(sagemaker_client, job_name): + """Placeholder docstring""" tuning_status_codes = { "Completed": "!", "InProgress": ".", @@ -1703,6 +1712,7 @@ def _tuning_job_status(sagemaker_client, job_name): def _transform_job_status(sagemaker_client, job_name): + """Placeholder docstring""" transform_job_status_codes = { "Completed": "!", "InProgress": ".", @@ -1726,6 +1736,7 @@ def _transform_job_status(sagemaker_client, job_name): def _create_model_package_status(sagemaker_client, model_package_name): + """Placeholder docstring""" in_progress_statuses = ["InProgress", "Pending"] desc = sagemaker_client.describe_model_package(ModelPackageName=model_package_name) @@ -1741,6 +1752,7 @@ def _create_model_package_status(sagemaker_client, model_package_name): def _deploy_done(sagemaker_client, endpoint_name): + """Placeholder docstring""" hosting_status_codes = { "OutOfService": "x", "Creating": "-", @@ -1762,6 +1774,7 @@ def _deploy_done(sagemaker_client, endpoint_name): def _wait_until_training_done(callable_fn, desc, poll=5): + """Placeholder docstring""" job_desc, finished = callable_fn(desc) while not finished: time.sleep(poll) @@ -1770,6 +1783,7 @@ def _wait_until_training_done(callable_fn, desc, poll=5): def _wait_until(callable_fn, poll=5): + """Placeholder docstring""" result = callable_fn() while result is None: time.sleep(poll) @@ -1778,6 +1792,7 @@ def _wait_until(callable_fn, poll=5): def _expand_container_def(c_def): + """Placeholder docstring""" if isinstance(c_def, six.string_types): return container_def(c_def) return c_def @@ -1786,6 +1801,7 @@ def _expand_container_def(c_def): def _vpc_config_from_training_job( training_job_desc, vpc_config_override=vpc_utils.VPC_CONFIG_DEFAULT ): + """Placeholder docstring""" if vpc_config_override is vpc_utils.VPC_CONFIG_DEFAULT: return training_job_desc.get(vpc_utils.VPC_CONFIG_KEY) return vpc_utils.sanitize(vpc_config_override) diff --git a/src/sagemaker/sklearn/__init__.py b/src/sagemaker/sklearn/__init__.py index f9b7b55d53..1ed88c180e 100644 --- a/src/sagemaker/sklearn/__init__.py +++ b/src/sagemaker/sklearn/__init__.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import from sagemaker.sklearn.estimator import SKLearn # noqa: F401 diff --git a/src/sagemaker/sklearn/defaults.py b/src/sagemaker/sklearn/defaults.py index 8bd416cc9a..3995e4b591 100644 --- a/src/sagemaker/sklearn/defaults.py +++ b/src/sagemaker/sklearn/defaults.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import SKLEARN_NAME = "scikit-learn" diff --git a/src/sagemaker/sklearn/estimator.py b/src/sagemaker/sklearn/estimator.py index 56cdcd1f51..15de26e0b3 100644 --- a/src/sagemaker/sklearn/estimator.py +++ b/src/sagemaker/sklearn/estimator.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import logging @@ -43,40 +44,52 @@ def __init__( image_name=None, **kwargs ): - """ - This ``Estimator`` executes an Scikit-learn script in a managed Scikit-learn execution environment, within a - SageMaker Training Job. The managed Scikit-learn environment is an Amazon-built Docker container that executes - functions defined in the supplied ``entry_point`` Python script. - - Training is started by calling :meth:`~sagemaker.amazon.estimator.Framework.fit` on this Estimator. - After training is complete, calling :meth:`~sagemaker.amazon.estimator.Framework.deploy` creates a - hosted SageMaker endpoint and returns an :class:`~sagemaker.amazon.sklearn.model.SKLearnPredictor` instance - that can be used to perform inference against the hosted model. - - Technical documentation on preparing Scikit-learn scripts for SageMaker training and using the Scikit-learn - Estimator is available on the project home-page: https://github.com/aws/sagemaker-python-sdk + """This ``Estimator`` executes an Scikit-learn script in a managed + Scikit-learn execution environment, within a SageMaker Training Job. The + managed Scikit-learn environment is an Amazon-built Docker container + that executes functions defined in the supplied ``entry_point`` Python + script. + + Training is started by calling + :meth:`~sagemaker.amazon.estimator.Framework.fit` on this Estimator. + After training is complete, calling + :meth:`~sagemaker.amazon.estimator.Framework.deploy` creates a hosted + SageMaker endpoint and returns an + :class:`~sagemaker.amazon.sklearn.model.SKLearnPredictor` instance that + can be used to perform inference against the hosted model. + + Technical documentation on preparing Scikit-learn scripts for + SageMaker training and using the Scikit-learn Estimator is available on + the project home-page: https://github.com/aws/sagemaker-python-sdk Args: - entry_point (str): Path (absolute or relative) to the Python source file which should be executed - as the entry point to training. This should be compatible with either Python 2.7 or Python 3.5. - source_dir (str): Path (absolute or relative) to a directory with any other training - source code dependencies aside from tne entry point file (default: None). Structure within this - directory are preserved when training on Amazon SageMaker. - hyperparameters (dict): Hyperparameters that will be used for training (default: None). - The hyperparameters are made accessible as a dict[str, str] to the training code on SageMaker. - For convenience, this accepts other types for keys and values, but ``str()`` will be called - to convert them before training. - py_version (str): Python version you want to use for executing your model training code (default: 'py2'). - One of 'py2' or 'py3'. - framework_version (str): Scikit-learn version you want to use for executing your model training code. - List of supported versions https://github.com/aws/sagemaker-python-sdk#sklearn-sagemaker-estimators - image_name (str): If specified, the estimator will use this image for training and hosting, instead of - selecting the appropriate SageMaker official image based on framework_version and py_version. It can - be an ECR url or dockerhub image and tag. + entry_point (str): Path (absolute or relative) to the Python source + file which should be executed as the entry point to training. + This should be compatible with either Python 2.7 or Python 3.5. + framework_version (str): Scikit-learn version you want to use for + executing your model training code. List of supported versions + https://github.com/aws/sagemaker-python-sdk#sklearn-sagemaker-estimators + source_dir (str): Path (absolute or relative) to a directory with + any other training source code dependencies aside from tne entry + point file (default: None). Structure within this directory are + preserved when training on Amazon SageMaker. + hyperparameters (dict): Hyperparameters that will be used for + training (default: None). The hyperparameters are made + accessible as a dict[str, str] to the training code on + SageMaker. For convenience, this accepts other types for keys + and values, but ``str()`` will be called to convert them before + training. + py_version (str): Python version you want to use for executing your + model training code (default: 'py2'). One of 'py2' or 'py3'. + image_name (str): If specified, the estimator will use this image + for training and hosting, instead of selecting the appropriate + SageMaker official image based on framework_version and + py_version. It can be an ECR url or dockerhub image and tag. Examples: 123.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0 custom-image:latest. - **kwargs: Additional kwargs passed to the :class:`~sagemaker.estimator.Framework` constructor. + **kwargs: Additional kwargs passed to the + :class:`~sagemaker.estimator.Framework` constructor. """ # SciKit-Learn does not support distributed training or training on GPU instance types. Fail fast. train_instance_type = kwargs.get("train_instance_type") @@ -116,22 +129,25 @@ def __init__( def create_model( self, model_server_workers=None, role=None, vpc_config_override=VPC_CONFIG_DEFAULT, **kwargs ): - """Create a SageMaker ``SKLearnModel`` object that can be deployed to an ``Endpoint``. + """Create a SageMaker ``SKLearnModel`` object that can be deployed to an + ``Endpoint``. Args: - role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also used during - transform jobs. If not specified, the role from the Estimator will be used. - model_server_workers (int): Optional. The number of worker processes used by the inference server. - If None, server will use one worker per vCPU. - vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on the model. - Default: use subnets and security groups from this Estimator. + model_server_workers (int): Optional. The number of worker processes + used by the inference server. If None, server will use one + worker per vCPU. + role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, + which is also used during transform jobs. If not specified, the + role from the Estimator will be used. + vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on + the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. **kwargs: Passed to initialization of ``SKLearnModel``. Returns: - sagemaker.sklearn.model.SKLearnModel: A SageMaker ``SKLearnModel`` object. - See :func:`~sagemaker.sklearn.model.SKLearnModel` for full details. + sagemaker.sklearn.model.SKLearnModel: A SageMaker ``SKLearnModel`` + object. See :func:`~sagemaker.sklearn.model.SKLearnModel` for full details. """ role = role or self.role return SKLearnModel( @@ -154,14 +170,16 @@ def create_model( @classmethod def _prepare_init_params_from_job_description(cls, job_details, model_channel_name=None): - """Convert the job description to init params that can be handled by the class constructor + """Convert the job description to init params that can be handled by the + class constructor Args: - job_details: the returned job details from a describe_training_job API call. + job_details: the returned job details from a describe_training_job + API call. + model_channel_name: Returns: - dictionary: The transformed init_params - + dictionary: The transformed init_params """ init_params = super(SKLearn, cls)._prepare_init_params_from_job_description(job_details) @@ -184,6 +202,10 @@ def _prepare_init_params_from_job_description(cls, job_details, model_channel_na def _validate_not_gpu_instance_type(training_instance_type): + """ + Args: + training_instance_type: + """ gpu_instance_types = [ "ml.p2.xlarge", "ml.p2.8xlarge", diff --git a/src/sagemaker/sklearn/model.py b/src/sagemaker/sklearn/model.py index 4ed9ee5e60..5d02044302 100644 --- a/src/sagemaker/sklearn/model.py +++ b/src/sagemaker/sklearn/model.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import logging @@ -27,16 +28,19 @@ class SKLearnPredictor(RealTimePredictor): """A RealTimePredictor for inference against Scikit-learn Endpoints. - This is able to serialize Python lists, dictionaries, and numpy arrays to multidimensional tensors for Scikit-learn - inference.""" + This is able to serialize Python lists, dictionaries, and numpy arrays to + multidimensional tensors for Scikit-learn inference. + """ def __init__(self, endpoint_name, sagemaker_session=None): """Initialize an ``SKLearnPredictor``. Args: - endpoint_name (str): The name of the endpoint to perform inference on. - sagemaker_session (sagemaker.session.Session): Session object which manages interactions with - Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one + endpoint_name (str): The name of the endpoint to perform inference + on. + sagemaker_session (sagemaker.session.Session): Session object which + manages interactions with Amazon SageMaker APIs and any other + AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. """ super(SKLearnPredictor, self).__init__( @@ -45,7 +49,9 @@ def __init__(self, endpoint_name, sagemaker_session=None): class SKLearnModel(FrameworkModel): - """An Scikit-learn SageMaker ``Model`` that can be deployed to a SageMaker ``Endpoint``.""" + """An Scikit-learn SageMaker ``Model`` that can be deployed to a SageMaker + ``Endpoint``. + """ __framework_name__ = SKLEARN_NAME @@ -64,23 +70,32 @@ def __init__( """Initialize an SKLearnModel. Args: - model_data (str): The S3 location of a SageMaker model data ``.tar.gz`` file. - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs - that create Amazon SageMaker endpoints use this role to access training data and model artifacts. - After the endpoint is created, the inference code might use the IAM role, - if it needs to access an AWS resource. - entry_point (str): Path (absolute or relative) to the Python source file which should be executed - as the entry point to model hosting. This should be compatible with either Python 2.7 or Python 3.5. - image (str): A Docker image URI (default: None). If not specified, a default image for Scikit-learn - will be used. - py_version (str): Python version you want to use for executing your model training code (default: 'py2'). - framework_version (str): Scikit-learn version you want to use for executing your model training code. - predictor_cls (callable[str, sagemaker.session.Session]): A function to call to create a predictor - with an endpoint name and SageMaker ``Session``. If specified, ``deploy()`` returns the result of - invoking this function on the created endpoint name. - model_server_workers (int): Optional. The number of worker processes used by the inference server. - If None, server will use one worker per vCPU. - **kwargs: Keyword arguments passed to the ``FrameworkModel`` initializer. + model_data (str): The S3 location of a SageMaker model data + ``.tar.gz`` file. + role (str): An AWS IAM role (either name or full ARN). The Amazon + SageMaker training jobs and APIs that create Amazon SageMaker + endpoints use this role to access training data and model + artifacts. After the endpoint is created, the inference code + might use the IAM role, if it needs to access an AWS resource. + entry_point (str): Path (absolute or relative) to the Python source + file which should be executed as the entry point to model + hosting. This should be compatible with either Python 2.7 or + Python 3.5. + image (str): A Docker image URI (default: None). If not specified, a + default image for Scikit-learn will be used. + py_version (str): Python version you want to use for executing your + model training code (default: 'py2'). + framework_version (str): Scikit-learn version you want to use for + executing your model training code. + predictor_cls (callable[str, sagemaker.session.Session]): A function + to call to create a predictor with an endpoint name and + SageMaker ``Session``. If specified, ``deploy()`` returns the + result of invoking this function on the created endpoint name. + model_server_workers (int): Optional. The number of worker processes + used by the inference server. If None, server will use one + worker per vCPU. + **kwargs: Keyword arguments passed to the ``FrameworkModel`` + initializer. """ super(SKLearnModel, self).__init__( model_data, image, role, entry_point, predictor_cls=predictor_cls, **kwargs @@ -94,16 +109,20 @@ def __init__( self.model_server_workers = model_server_workers def prepare_container_def(self, instance_type, accelerator_type=None): - """Return a container definition with framework configuration set in model environment variables. + """Return a container definition with framework configuration set in + model environment variables. Args: - instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. - accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and - making inferences to the model. For example, 'ml.eia1.medium'. Note: accelerator types are not - supported by SKLearnModel. + instance_type (str): The EC2 instance type to deploy this Model to. + For example, 'ml.p2.xlarge'. + accelerator_type (str): The Elastic Inference accelerator type to + deploy to the instance for loading and making inferences to the + model. For example, 'ml.eia1.medium'. Note: accelerator types + are not supported by SKLearnModel. Returns: - dict[str, str]: A container definition object usable with the CreateModel API. + dict[str, str]: A container definition object usable with the + CreateModel API. """ if accelerator_type: raise ValueError("Accelerator types are not supported for Scikit-Learn.") diff --git a/src/sagemaker/sparkml/__init__.py b/src/sagemaker/sparkml/__init__.py index e724ac8c9b..1ce365d33a 100644 --- a/src/sagemaker/sparkml/__init__.py +++ b/src/sagemaker/sparkml/__init__.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import from sagemaker.sparkml.model import SparkMLModel, SparkMLPredictor # noqa: F401 diff --git a/src/sagemaker/sparkml/model.py b/src/sagemaker/sparkml/model.py index ab898de490..97d62674c3 100644 --- a/src/sagemaker/sparkml/model.py +++ b/src/sagemaker/sparkml/model.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import from sagemaker import Model, RealTimePredictor, Session @@ -22,26 +23,28 @@ class SparkMLPredictor(RealTimePredictor): - """ - Performs predictions against an MLeap serialized SparkML model. + """Performs predictions against an MLeap serialized SparkML model. - The implementation of :meth:`~sagemaker.predictor.RealTimePredictor.predict` in this - `RealTimePredictor` requires a json as input. The input should follow the json format - as documented. + The implementation of + :meth:`~sagemaker.predictor.RealTimePredictor.predict` in this + `RealTimePredictor` requires a json as input. The input should follow the + json format as documented. - ``predict()`` returns a csv output, comma separated if the output is a list. + ``predict()`` returns a csv output, comma separated if the output is a + list. """ def __init__(self, endpoint, sagemaker_session=None): - """ - Initializes a SparkMLPredictor which should be used with SparkMLModel to perform predictions against SparkML - models serialized via MLeap. The response is returned in text/csv format which is the default response format - for SparkML Serving container. + """Initializes a SparkMLPredictor which should be used with SparkMLModel + to perform predictions against SparkML models serialized via MLeap. The + response is returned in text/csv format which is the default response + format for SparkML Serving container. Args: endpoint (str): The name of the endpoint to perform inference on. - sagemaker_session (sagemaker.session.Session): Session object which manages interactions with - Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one + sagemaker_session (sagemaker.session.Session): Session object which + manages interactions with Amazon SageMaker APIs and any other + AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. """ sagemaker_session = sagemaker_session or Session() @@ -54,26 +57,33 @@ def __init__(self, endpoint, sagemaker_session=None): class SparkMLModel(Model): - """ - Model data and S3 location holder for MLeap serialized SparkML model. Calling - :meth:`~sagemaker.model.Model.deploy` creates an Endpoint and return - a Predictor to performs predictions against an MLeap serialized SparkML model . + """Model data and S3 location holder for MLeap serialized SparkML model. + Calling :meth:`~sagemaker.model.Model.deploy` creates an Endpoint and return + a Predictor to performs predictions against an MLeap serialized SparkML + model . """ def __init__(self, model_data, role=None, spark_version=2.2, sagemaker_session=None, **kwargs): """Initialize a SparkMLModel. Args: - model_data (str): The S3 location of a SageMaker model data ``.tar.gz`` file. For SparkML, this will be the - output that has been produced by the Spark job after serializing the Model via MLeap. - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs - that create Amazon SageMaker endpoints use this role to access training data and model artifacts. - After the endpoint is created, the inference code might use the IAM role, - if it needs to access an AWS resource. - spark_version (str): Spark version you want to use for executing the inference (default: '2.2'). - sagemaker_session (sagemaker.session.Session): Session object which manages interactions with - Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one - using the default AWS configuration chain. For local mode, please do not pass this variable. + model_data (str): The S3 location of a SageMaker model data + ``.tar.gz`` file. For SparkML, this will be the output that has + been produced by the Spark job after serializing the Model via + MLeap. + role (str): An AWS IAM role (either name or full ARN). The Amazon + SageMaker training jobs and APIs that create Amazon SageMaker + endpoints use this role to access training data and model + artifacts. After the endpoint is created, the inference code + might use the IAM role, if it needs to access an AWS resource. + spark_version (str): Spark version you want to use for executing the + inference (default: '2.2'). + sagemaker_session (sagemaker.session.Session): Session object which + manages interactions with Amazon SageMaker APIs and any other + AWS services needed. If not specified, the estimator creates one + using the default AWS configuration chain. For local mode, + please do not pass this variable. + **kwargs: """ # for local mode, sagemaker_session should be passed as None but we need a session to get boto_region_name region_name = (sagemaker_session or Session()).boto_region_name diff --git a/src/sagemaker/tensorflow/__init__.py b/src/sagemaker/tensorflow/__init__.py index c521df20fc..b32b2b5eea 100644 --- a/src/sagemaker/tensorflow/__init__.py +++ b/src/sagemaker/tensorflow/__init__.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import sys diff --git a/src/sagemaker/tensorflow/defaults.py b/src/sagemaker/tensorflow/defaults.py index 52c3bc5369..89c9f36c3c 100644 --- a/src/sagemaker/tensorflow/defaults.py +++ b/src/sagemaker/tensorflow/defaults.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import TF_VERSION = "1.11" diff --git a/src/sagemaker/tensorflow/estimator.py b/src/sagemaker/tensorflow/estimator.py index cdac66661f..c25f6691fc 100644 --- a/src/sagemaker/tensorflow/estimator.py +++ b/src/sagemaker/tensorflow/estimator.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import contextlib @@ -52,6 +53,8 @@ class Tensorboard(threading.Thread): + """Placeholder docstring""" + def __init__(self, estimator, logdir=None): """Initialize ``Tensorboard`` instance. @@ -66,6 +69,7 @@ def __init__(self, estimator, logdir=None): @staticmethod def _cmd_exists(cmd): + """Placeholder docstring""" return any( os.access(os.path.join(path, cmd), os.X_OK) for path in os.environ["PATH"].split(os.pathsep) @@ -301,6 +305,7 @@ def _validate_args( requirements_file, checkpoint_path, ): + """Placeholder docstring""" if py_version == "py3" or script_mode: @@ -330,11 +335,13 @@ def _validate_args( self.script_mode = True def _only_script_mode_supported(self): + """Placeholder docstring""" return [ int(s) for s in self.framework_version.split(".") ] >= self._LOWEST_SCRIPT_MODE_ONLY_VERSION def _validate_requirements_file(self, requirements_file): + """Placeholder docstring""" if not requirements_file: return @@ -482,7 +489,7 @@ def create_model( Returns: sagemaker.tensorflow.model.TensorFlowModel: A SageMaker ``TensorFlowModel`` object. - See :func:`~sagemaker.tensorflow.model.TensorFlowModel` for full details. + See :func:`~sagemaker.tensorflow.model.TensorFlowModel` for full details. """ role = role or self.role @@ -496,6 +503,7 @@ def create_model( ) def _create_tfs_model(self, role=None, vpc_config_override=VPC_CONFIG_DEFAULT): + """Placeholder docstring""" return Model( model_data=self.model_data, role=role, @@ -508,6 +516,7 @@ def _create_tfs_model(self, role=None, vpc_config_override=VPC_CONFIG_DEFAULT): ) def _create_default_model(self, model_server_workers, role, vpc_config_override): + """Placeholder docstring""" return TensorFlowModel( self.model_data, role, @@ -569,6 +578,7 @@ def hyperparameters(self): return hyperparameters def _default_s3_path(self, directory, mpi=False): + """Placeholder docstring""" local_code = utils.get_config_value("local.local_code", self.sagemaker_session.config) if self.sagemaker_session.local_mode and local_code: return "/opt/ml/shared/{}".format(directory) @@ -579,9 +589,11 @@ def _default_s3_path(self, directory, mpi=False): return None def _script_mode_enabled(self): + """Placeholder docstring""" return self.py_version == "py3" or self.script_mode def train_image(self): + """Placeholder docstring""" if self.image_name: return self.image_name diff --git a/src/sagemaker/tensorflow/model.py b/src/sagemaker/tensorflow/model.py index 73a22684e6..bb90c30e54 100644 --- a/src/sagemaker/tensorflow/model.py +++ b/src/sagemaker/tensorflow/model.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import logging @@ -27,16 +28,19 @@ class TensorFlowPredictor(RealTimePredictor): """A ``RealTimePredictor`` for inference against TensorFlow endpoint. - This is able to serialize Python lists, dictionaries, and numpy arrays to multidimensional tensors for - inference""" + This is able to serialize Python lists, dictionaries, and numpy arrays to + multidimensional tensors for inference + """ def __init__(self, endpoint_name, sagemaker_session=None): """Initialize an ``TensorFlowPredictor``. Args: - endpoint_name (str): The name of the endpoint to perform inference on. - sagemaker_session (sagemaker.session.Session): Session object which manages interactions with - Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one + endpoint_name (str): The name of the endpoint to perform inference + on. + sagemaker_session (sagemaker.session.Session): Session object which + manages interactions with Amazon SageMaker APIs and any other + AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. """ super(TensorFlowPredictor, self).__init__( @@ -45,6 +49,7 @@ def __init__(self, endpoint_name, sagemaker_session=None): class TensorFlowModel(FrameworkModel): + """Placeholder docstring""" __framework_name__ = "tensorflow" @@ -63,23 +68,32 @@ def __init__( """Initialize an TensorFlowModel. Args: - model_data (str): The S3 location of a SageMaker model data ``.tar.gz`` file. - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs - that create Amazon SageMaker endpoints use this role to access training data and model artifacts. - After the endpoint is created, the inference code might use the IAM role, - if it needs to access an AWS resource. - entry_point (str): Path (absolute or relative) to the Python source file which should be executed - as the entry point to model hosting. This should be compatible with either Python 2.7 or Python 3.5. - image (str): A Docker image URI (default: None). If not specified, a default image for - TensorFlow will be used. - py_version (str): Python version you want to use for executing your model training code (default: 'py2'). - framework_version (str): TensorFlow version you want to use for executing your model training code. - predictor_cls (callable[str, sagemaker.session.Session]): A function to call to create a predictor - with an endpoint name and SageMaker ``Session``. If specified, ``deploy()`` returns the result of - invoking this function on the created endpoint name. - model_server_workers (int): Optional. The number of worker processes used by the inference server. - If None, server will use one worker per vCPU. - **kwargs: Keyword arguments passed to the ``FrameworkModel`` initializer. + model_data (str): The S3 location of a SageMaker model data + ``.tar.gz`` file. + role (str): An AWS IAM role (either name or full ARN). The Amazon + SageMaker training jobs and APIs that create Amazon SageMaker + endpoints use this role to access training data and model + artifacts. After the endpoint is created, the inference code + might use the IAM role, if it needs to access an AWS resource. + entry_point (str): Path (absolute or relative) to the Python source + file which should be executed as the entry point to model + hosting. This should be compatible with either Python 2.7 or + Python 3.5. + image (str): A Docker image URI (default: None). If not specified, a + default image for TensorFlow will be used. + py_version (str): Python version you want to use for executing your + model training code (default: 'py2'). + framework_version (str): TensorFlow version you want to use for + executing your model training code. + predictor_cls (callable[str, sagemaker.session.Session]): A function + to call to create a predictor with an endpoint name and + SageMaker ``Session``. If specified, ``deploy()`` returns the + result of invoking this function on the created endpoint name. + model_server_workers (int): Optional. The number of worker processes + used by the inference server. If None, server will use one + worker per vCPU. + **kwargs: Keyword arguments passed to the ``FrameworkModel`` + initializer. """ super(TensorFlowModel, self).__init__( model_data, image, role, entry_point, predictor_cls=predictor_cls, **kwargs @@ -93,17 +107,21 @@ def __init__( self.model_server_workers = model_server_workers def prepare_container_def(self, instance_type, accelerator_type=None): - """Return a container definition with framework configuration set in model environment variables. + """Return a container definition with framework configuration set in + model environment variables. This also uploads user-supplied code to S3. Args: - instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. - accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and - making inferences to the model. For example, 'ml.eia1.medium'. + instance_type (str): The EC2 instance type to deploy this Model to. + For example, 'ml.p2.xlarge'. + accelerator_type (str): The Elastic Inference accelerator type to + deploy to the instance for loading and making inferences to the + model. For example, 'ml.eia1.medium'. Returns: - dict[str, str]: A container definition object usable with the CreateModel API. + dict[str, str]: A container definition object usable with the + CreateModel API. """ deploy_image = self.image if not deploy_image: diff --git a/src/sagemaker/tensorflow/predictor.py b/src/sagemaker/tensorflow/predictor.py index f9dd30d014..f47f6860ae 100644 --- a/src/sagemaker/tensorflow/predictor.py +++ b/src/sagemaker/tensorflow/predictor.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import json @@ -39,12 +40,18 @@ class _TFProtobufSerializer(object): + """Placeholder docstring""" + def __init__(self): self.content_type = CONTENT_TYPE_OCTET_STREAM def __call__(self, data): # isinstance does not work here because a same protobuf message can be imported from a different module. # for example sagemaker.tensorflow.tensorflow_serving.regression_pb2 and tensorflow_serving.apis.regression_pb2 + """ + Args: + data: + """ predict_type = data.__class__.__name__ available_requests = [ @@ -63,10 +70,18 @@ def __call__(self, data): class _TFProtobufDeserializer(object): + """Placeholder docstring""" + def __init__(self): + """Placeholder docstring""" self.accept = CONTENT_TYPE_OCTET_STREAM def __call__(self, stream, content_type): + """ + Args: + stream: + content_type: + """ try: data = stream.read() finally: @@ -88,10 +103,16 @@ def __call__(self, stream, content_type): class _TFJsonSerializer(object): + """Placeholder docstring""" + def __init__(self): self.content_type = CONTENT_TYPE_JSON def __call__(self, data): + """ + Args: + data: + """ if isinstance(data, tensor_pb2.TensorProto): return json_format.MessageToJson(data) return json_serializer(data) @@ -101,10 +122,17 @@ def __call__(self, data): class _TFJsonDeserializer(object): + """Placeholder docstring""" + def __init__(self): self.accept = CONTENT_TYPE_JSON def __call__(self, stream, content_type): + """ + Args: + stream: + content_type: + """ try: data = stream.read() finally: @@ -124,10 +152,16 @@ def __call__(self, stream, content_type): class _TFCsvSerializer(object): + """Placeholder docstring""" + def __init__(self): self.content_type = CONTENT_TYPE_CSV def __call__(self, data): + """ + Args: + data: + """ to_serialize = data if isinstance(data, tensor_pb2.TensorProto): to_serialize = tensor_util.MakeNdarray(data) diff --git a/src/sagemaker/tensorflow/serving.py b/src/sagemaker/tensorflow/serving.py index e54e58bae6..c18f496ab0 100644 --- a/src/sagemaker/tensorflow/serving.py +++ b/src/sagemaker/tensorflow/serving.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import logging @@ -23,7 +24,8 @@ class Predictor(sagemaker.RealTimePredictor): - """A ``RealTimePredictor`` implementation for inference against TensorFlow Serving endpoints. + """A ``RealTimePredictor`` implementation for inference against TensorFlow + Serving endpoints. """ def __init__( @@ -36,23 +38,29 @@ def __init__( model_name=None, model_version=None, ): - """Initialize a ``TFSPredictor``. See ``sagemaker.RealTimePredictor`` for - more info about parameters. + """Initialize a ``TFSPredictor``. See ``sagemaker.RealTimePredictor`` + for more info about parameters. Args: - endpoint_name (str): The name of the endpoint to perform inference on. - sagemaker_session (sagemaker.session.Session): Session object which manages interactions - with Amazon SageMaker APIs and any other AWS services needed. If not specified, - the estimator creates one using the default AWS configuration chain. - serializer (callable): Optional. Default serializes input data to json. Handles dicts, - lists, and numpy arrays. - deserializer (callable): Optional. Default parses the response using ``json.load(...)``. - content_type (str): Optional. The "ContentType" for invocation requests. If specified, - overrides the ``content_type`` from the serializer (default: None). - model_name (str): Optional. The name of the SavedModel model that should handle the - request. If not specified, the endpoint's default model will handle the request. - model_version (str): Optional. The version of the SavedModel model that should handle - the request. If not specified, the latest version of the model will be used. + endpoint_name (str): The name of the endpoint to perform inference + on. + sagemaker_session (sagemaker.session.Session): Session object which + manages interactions with Amazon SageMaker APIs and any other + AWS services needed. If not specified, the estimator creates one + using the default AWS configuration chain. + serializer (callable): Optional. Default serializes input data to + json. Handles dicts, lists, and numpy arrays. + deserializer (callable): Optional. Default parses the response using + ``json.load(...)``. + content_type (str): Optional. The "ContentType" for invocation + requests. If specified, overrides the ``content_type`` from the + serializer (default: None). + model_name (str): Optional. The name of the SavedModel model that + should handle the request. If not specified, the endpoint's + default model will handle the request. + model_version (str): Optional. The version of the SavedModel model + that should handle the request. If not specified, the latest + version of the model will be used. """ super(Predictor, self).__init__( endpoint_name, sagemaker_session, serializer, deserializer, content_type @@ -66,12 +74,25 @@ def __init__( self._model_attributes = ",".join(attributes) if attributes else None def classify(self, data): + """ + Args: + data: + """ return self._classify_or_regress(data, "classify") def regress(self, data): + """ + Args: + data: + """ return self._classify_or_regress(data, "regress") def _classify_or_regress(self, data, method): + """ + Args: + data: + method: + """ if method not in ["classify", "regress"]: raise ValueError("invalid TensorFlow Serving method: {}".format(method)) @@ -83,6 +104,11 @@ def _classify_or_regress(self, data, method): return self.predict(data, args) def predict(self, data, initial_args=None): + """ + Args: + data: + initial_args: + """ args = dict(initial_args) if initial_args else {} if self._model_attributes: if "CustomAttributes" in args: @@ -94,6 +120,8 @@ def predict(self, data, initial_args=None): class Model(sagemaker.model.FrameworkModel): + """Placeholder docstring""" + FRAMEWORK_NAME = "tensorflow-serving" LOG_LEVEL_PARAM_NAME = "SAGEMAKER_TFS_NGINX_LOGLEVEL" LOG_LEVEL_MAP = { @@ -117,20 +145,26 @@ def __init__( ): """Initialize a Model. - Args: - model_data (str): The S3 location of a SageMaker model data ``.tar.gz`` file. - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker APIs that - create Amazon SageMaker endpoints use this role to access model artifacts. - image (str): A Docker image URI (default: None). If not specified, a default image for - TensorFlow Serving will be used. - framework_version (str): Optional. TensorFlow Serving version you want to use. - container_log_level (int): Log level to use within the container (default: logging.ERROR). - Valid values are defined in the Python logging module. - predictor_cls (callable[str, sagemaker.session.Session]): A function to call to create a - predictor with an endpoint name and SageMaker ``Session``. If specified, ``deploy()`` - returns the result of invoking this function on the created endpoint name. - **kwargs: Keyword arguments passed to the ``Model`` initializer. - """ + Args: + model_data (str): The S3 location of a SageMaker model data + ``.tar.gz`` file. + role (str): An AWS IAM role (either name or full ARN). The Amazon + SageMaker APIs that create Amazon SageMaker endpoints use this + role to access model artifacts. + entry_point: + image (str): A Docker image URI (default: None). If not specified, a + default image for TensorFlow Serving will be used. + framework_version (str): Optional. TensorFlow Serving version you + want to use. + container_log_level (int): Log level to use within the container + (default: logging.ERROR). Valid values are defined in the Python + logging module. + predictor_cls (callable[str, sagemaker.session.Session]): A function + to call to create a predictor with an endpoint name and + SageMaker ``Session``. If specified, ``deploy()`` returns the + result of invoking this function on the created endpoint name. + **kwargs: Keyword arguments passed to the ``Model`` initializer. + """ super(Model, self).__init__( model_data=model_data, role=role, @@ -143,6 +177,11 @@ def __init__( self._container_log_level = container_log_level def prepare_container_def(self, instance_type, accelerator_type=None): + """ + Args: + instance_type: + accelerator_type: + """ image = self._get_image_uri(instance_type, accelerator_type) env = self._get_container_env() @@ -166,6 +205,7 @@ def prepare_container_def(self, instance_type, accelerator_type=None): return sagemaker.container_def(image, model_data, env) def _get_container_env(self): + """Placeholder docstring""" if not self._container_log_level: return self.env @@ -178,6 +218,11 @@ def _get_container_env(self): return env def _get_image_uri(self, instance_type, accelerator_type=None): + """ + Args: + instance_type: + accelerator_type: + """ if self.image: return self.image diff --git a/src/sagemaker/transformer.py b/src/sagemaker/transformer.py index 4ea198433a..593caae67d 100644 --- a/src/sagemaker/transformer.py +++ b/src/sagemaker/transformer.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import from botocore import exceptions @@ -20,7 +21,8 @@ class Transformer(object): - """A class for handling creating and interacting with Amazon SageMaker transform jobs. + """A class for handling creating and interacting with Amazon SageMaker + transform jobs. """ def __init__( @@ -44,31 +46,42 @@ def __init__( """Initialize a ``Transformer``. Args: - model_name (str): Name of the SageMaker model being used for the transform job. + model_name (str): Name of the SageMaker model being used for the + transform job. instance_count (int): Number of EC2 instances to use. - instance_type (str): Type of EC2 instance to use, for example, 'ml.c4.xlarge'. - strategy (str): The strategy used to decide how to batch records in a single request (default: None). - Valid values: 'MultiRecord' and 'SingleRecord'. - assemble_with (str): How the output is assembled (default: None). Valid values: 'Line' or 'None'. - output_path (str): S3 location for saving the transform result. If not specified, results are stored to - a default bucket. - output_kms_key (str): Optional. KMS key ID for encrypting the transform output (default: None). - accept (str): The content type accepted by the endpoint deployed during the transform job. - max_concurrent_transforms (int): The maximum number of HTTP requests to be made to - each individual transform container at one time. - max_payload (int): Maximum size of the payload in a single HTTP request to the container in MB. - env (dict): Environment variables to be set for use during the transform job (default: None). - tags (list[dict]): List of tags for labeling a transform job (default: None). For more, see + instance_type (str): Type of EC2 instance to use, for example, + 'ml.c4.xlarge'. + strategy (str): The strategy used to decide how to batch records in + a single request (default: None). Valid values: 'MultiRecord' + and 'SingleRecord'. + assemble_with (str): How the output is assembled (default: None). + Valid values: 'Line' or 'None'. + output_path (str): S3 location for saving the transform result. If + not specified, results are stored to a default bucket. + output_kms_key (str): Optional. KMS key ID for encrypting the + transform output (default: None). + accept (str): The content type accepted by the endpoint deployed + during the transform job. + max_concurrent_transforms (int): The maximum number of HTTP requests + to be made to each individual transform container at one time. + max_payload (int): Maximum size of the payload in a single HTTP + request to the container in MB. + tags (list[dict]): List of tags for labeling a transform job + (default: None). For more, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. + env (dict): Environment variables to be set for use during the + transform job (default: None). base_transform_job_name (str): Prefix for the transform job when the - :meth:`~sagemaker.transformer.Transformer.transform` method launches. If not specified, a default prefix - will be generated based on the training image name that was used to train the model associated with - the transform job. - sagemaker_session (sagemaker.session.Session): Session object which manages interactions with - Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one + :meth:`~sagemaker.transformer.Transformer.transform` method + launches. If not specified, a default prefix will be generated + based on the training image name that was used to train the + model associated with the transform job. + sagemaker_session (sagemaker.session.Session): Session object which + manages interactions with Amazon SageMaker APIs and any other + AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. - volume_kms_key (str): Optional. KMS key ID for encrypting the volume attached to the ML - compute instance (default: None). + volume_kms_key (str): Optional. KMS key ID for encrypting the volume + attached to the ML compute instance (default: None). """ self.model_name = model_name self.strategy = strategy @@ -110,28 +123,34 @@ def transform( Args: data (str): Input data location in S3. - data_type (str): What the S3 location defines (default: 'S3Prefix'). Valid values: + data_type (str): What the S3 location defines (default: 'S3Prefix'). + Valid values: * 'S3Prefix' - the S3 URI defines a key name prefix. All objects with this prefix will be used as - inputs for the transform job. - * 'ManifestFile' - the S3 URI points to a single manifest file listing each S3 object to use as - an input for the transform job. + inputs for the transform job. + * 'ManifestFile' - the S3 URI points to a single manifest file listing each S3 object to use as + an input for the transform job. content_type (str): MIME type of the input data (default: None). - compression_type (str): Compression type of the input data, if compressed (default: None). - Valid values: 'Gzip', None. - split_type (str): The record delimiter for the input object (default: 'None'). - Valid values: 'None', 'Line', 'RecordIO', and 'TFRecord'. - job_name (str): job name (default: None). If not specified, one will be generated. - input_filter (str): A JSONPath to select a portion of the input to pass to the algorithm container for - inference. If you omit the field, it gets the value '$', representing the entire input. + compression_type (str): Compression type of the input data, if + compressed (default: None). Valid values: 'Gzip', None. + split_type (str): The record delimiter for the input object + (default: 'None'). Valid values: 'None', 'Line', 'RecordIO', and + 'TFRecord'. + job_name (str): job name (default: None). If not specified, one will + be generated. + input_filter (str): A JSONPath to select a portion of the input to + pass to the algorithm container for inference. If you omit the + field, it gets the value '$', representing the entire input. Some examples: "$[1:]", "$.features"(default: None). - output_filter (str): A JSONPath to select a portion of the joined/original output to return as the output. - Some examples: "$[1:]", "$.prediction" (default: None). - join_source (str): The source of data to be joined to the transform output. It can be set to 'Input' - meaning the entire input record will be joined to the inference result. - You can use OutputFilter to select the useful portion before uploading to S3. (default: None). - Valid values: Input, None. + output_filter (str): A JSONPath to select a portion of the + joined/original output to return as the output. Some examples: + "$[1:]", "$.prediction" (default: None). + join_source (str): The source of data to be joined to the transform + output. It can be set to 'Input' meaning the entire input record + will be joined to the inference result. You can use OutputFilter + to select the useful portion before uploading to S3. (default: + None). Valid values: Input, None. """ local_mode = self.sagemaker_session.local_mode if not local_mode and not data.startswith("s3://"): @@ -166,12 +185,11 @@ def transform( ) def delete_model(self): - """Delete the corresponding SageMaker model for this Transformer. - - """ + """Delete the corresponding SageMaker model for this Transformer.""" self.sagemaker_session.delete_model(self.model_name) def _retrieve_base_name(self): + """Placeholder docstring""" image_name = self._retrieve_image_name() if image_name: @@ -180,6 +198,7 @@ def _retrieve_base_name(self): return self.model_name def _retrieve_image_name(self): + """Placeholder docstring""" try: model_desc = self.sagemaker_session.sagemaker_client.describe_model( ModelName=self.model_name @@ -203,10 +222,12 @@ def _retrieve_image_name(self): ) def wait(self): + """Placeholder docstring""" self._ensure_last_transform_job() self.latest_transform_job.wait() def _ensure_last_transform_job(self): + """Placeholder docstring""" if self.latest_transform_job is None: raise ValueError("No transform job available") @@ -216,13 +237,14 @@ def attach(cls, transform_job_name, sagemaker_session=None): Args: transform_job_name (str): Name for the transform job to be attached. - sagemaker_session (sagemaker.session.Session): Session object which manages interactions with - Amazon SageMaker APIs and any other AWS services needed. If not specified, one will be created - using the default AWS configuration chain. + sagemaker_session (sagemaker.session.Session): Session object which + manages interactions with Amazon SageMaker APIs and any other + AWS services needed. If not specified, one will be created using + the default AWS configuration chain. Returns: - sagemaker.transformer.Transformer: The Transformer instance with the specified transform job attached. - + sagemaker.transformer.Transformer: The Transformer instance with the + specified transform job attached. """ sagemaker_session = sagemaker_session or Session() @@ -239,10 +261,12 @@ def attach(cls, transform_job_name, sagemaker_session=None): @classmethod def _prepare_init_params_from_job_description(cls, job_details): - """Convert the transform job description to init params that can be handled by the class constructor + """Convert the transform job description to init params that can be + handled by the class constructor Args: - job_details (dict): the returned job details from a describe_transform_job API call. + job_details (dict): the returned job details from a + describe_transform_job API call. Returns: dict: The transformed init_params @@ -266,6 +290,8 @@ def _prepare_init_params_from_job_description(cls, job_details): class _TransformJob(_Job): + """Placeholder docstring""" + @classmethod def start_new( cls, @@ -279,6 +305,18 @@ def start_new( output_filter, join_source, ): + """ + Args: + transformer: + data: + data_type: + content_type: + compression_type: + split_type: + input_filter: + output_filter: + join_source: + """ config = _TransformJob._load_config( data, data_type, content_type, compression_type, split_type, transformer ) @@ -307,6 +345,15 @@ def wait(self): @staticmethod def _load_config(data, data_type, content_type, compression_type, split_type, transformer): + """ + Args: + data: + data_type: + content_type: + compression_type: + split_type: + transformer: + """ input_config = _TransformJob._format_inputs_to_input_config( data, data_type, content_type, compression_type, split_type ) @@ -330,6 +377,14 @@ def _load_config(data, data_type, content_type, compression_type, split_type, tr @staticmethod def _format_inputs_to_input_config(data, data_type, content_type, compression_type, split_type): + """ + Args: + data: + data_type: + content_type: + compression_type: + split_type: + """ config = {"DataSource": {"S3DataSource": {"S3DataType": data_type, "S3Uri": data}}} if content_type is not None: @@ -345,6 +400,13 @@ def _format_inputs_to_input_config(data, data_type, content_type, compression_ty @staticmethod def _prepare_output_config(s3_path, kms_key_id, assemble_with, accept): + """ + Args: + s3_path: + kms_key_id: + assemble_with: + accept: + """ config = super(_TransformJob, _TransformJob)._prepare_output_config(s3_path, kms_key_id) if assemble_with is not None: @@ -357,6 +419,12 @@ def _prepare_output_config(s3_path, kms_key_id, assemble_with, accept): @staticmethod def _prepare_resource_config(instance_count, instance_type, volume_kms_key): + """ + Args: + instance_count: + instance_type: + volume_kms_key: + """ config = {"InstanceCount": instance_count, "InstanceType": instance_type} if volume_kms_key is not None: @@ -366,6 +434,12 @@ def _prepare_resource_config(instance_count, instance_type, volume_kms_key): @staticmethod def _prepare_data_processing(input_filter, output_filter, join_source): + """ + Args: + input_filter: + output_filter: + join_source: + """ config = {} if input_filter is not None: diff --git a/src/sagemaker/tuner.py b/src/sagemaker/tuner.py index 16f6bf07d3..9531bd4524 100644 --- a/src/sagemaker/tuner.py +++ b/src/sagemaker/tuner.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import importlib @@ -52,10 +53,11 @@ class WarmStartTypes(Enum): """Warm Start Configuration type. There can be two types of warm start jobs: - * IdenticalDataAndAlgorithm: Type of warm start that allows users to reuse training results from existing - tuning jobs that have the same algorithm code and datasets. - * TransferLearning: Type of warm start that allows users to reuse training results from existing tuning jobs - that have similar algorithm code and datasets. + * IdenticalDataAndAlgorithm: Type of warm start that allows users to reuse + training results from existing tuning jobs that have the same algorithm code + and datasets. * TransferLearning: Type of warm start that allows users to + reuse training results from existing tuning jobs that have similar algorithm + code and datasets. """ IDENTICAL_DATA_AND_ALGORITHM = "IdenticalDataAndAlgorithm" @@ -63,8 +65,8 @@ class WarmStartTypes(Enum): class WarmStartConfig(object): - """Warm Start Configuration which defines the nature of the warm start ``HyperparameterTuner``, with type and - parents for warm start. + """Warm Start Configuration which defines the nature of the warm start + ``HyperparameterTuner``, with type and parents for warm start. Examples: >>> warm_start_config = WarmStartConfig(type=WarmStartTypes.TransferLearning, parents={"p1","p2"}) @@ -75,12 +77,14 @@ class WarmStartConfig(object): """ def __init__(self, warm_start_type, parents): - """Initializes the ``WarmStartConfig`` with the provided ``WarmStartTypes`` and parents. + """Initializes the ``WarmStartConfig`` with the provided + ``WarmStartTypes`` and parents. Args: - warm_start_type (sagemaker.tuner.WarmStartTypes): This should be one of the supported warm start types - in WarmStartType - parents (set{str}): Set of parent tuning jobs which will be used to warm start the new tuning job. + warm_start_type (sagemaker.tuner.WarmStartTypes): This should be one + of the supported warm start types in WarmStartType + parents (set{str}): Set of parent tuning jobs which will be used to + warm start the new tuning job. """ if warm_start_type not in WarmStartTypes: @@ -100,19 +104,8 @@ def __init__(self, warm_start_type, parents): @classmethod def from_job_desc(cls, warm_start_config): - """Creates an instance of ``WarmStartConfig`` class, from warm start configuration response from - DescribeTrainingJob. - - Args: - warm_start_config (dict): The expected format of the ``warm_start_config`` contains two first-class - fields: - * "type": Type of warm start tuner, currently two supported types - "IdenticalDataAndAlgorithm" and - "TransferLearning". - * "parents": List of tuning job names from which the warm start should be done. - - Returns: - sagemaker.tuner.WarmStartConfig: De-serialized instance of WarmStartConfig containing the type and parents - provided as part of ``warm_start_config``. + """Creates an instance of ``WarmStartConfig`` class, from warm start + configuration response from DescribeTrainingJob. Examples: >>> warm_start_config = WarmStartConfig.from_job_desc(warm_start_config={ @@ -126,6 +119,15 @@ def from_job_desc(cls, warm_start_config): "TransferLearning" >>> warm_start_config.parents ["p1","p2"] + + Args: + warm_start_config (dict): The expected format of the + ``warm_start_config`` contains two first-class + + Returns: + sagemaker.tuner.WarmStartConfig: De-serialized instance of + WarmStartConfig containing the type and parents provided as part of + ``warm_start_config``. """ if ( not warm_start_config @@ -145,11 +147,11 @@ def from_job_desc(cls, warm_start_config): def to_input_req(self): """Converts the ``self`` instance to the desired input request format. - Returns: - dict: Containing the "WarmStartType" and "ParentHyperParameterTuningJobs" as the first class fields. - Examples: - >>> warm_start_config = WarmStartConfig(warm_start_type=WarmStartTypes.TransferLearning,parents=["p1,p2"]) + >>> warm_start_config = WarmStartConfig + ( + warm_start_type=WarmStartTypes.TransferLearning,parents=["p1,p2"] + ) >>> warm_start_config.to_input_req() { "WarmStartType":"TransferLearning", @@ -158,6 +160,10 @@ def to_input_req(self): {'HyperParameterTuningJobName': "p2"}, ] } + + Returns: + dict: Containing the "WarmStartType" and + "ParentHyperParameterTuningJobs" as the first class fields. """ return { WARM_START_TYPE: self.type.value, @@ -168,8 +174,8 @@ def to_input_req(self): class HyperparameterTuner(object): - """A class for creating and interacting with Amazon SageMaker hyperparameter tuning jobs, as well as - deploying the resulting model(s). + """A class for creating and interacting with Amazon SageMaker hyperparameter + tuning jobs, as well as deploying the resulting model(s). """ TUNING_JOB_NAME_MAX_LENGTH = 32 @@ -195,37 +201,55 @@ def __init__( warm_start_config=None, early_stopping_type="Off", ): - """Initialize a ``HyperparameterTuner``. It takes an estimator to obtain configuration information - for training jobs that are created as the result of a hyperparameter tuning job. + """Initialize a ``HyperparameterTuner``. It takes an estimator to obtain + configuration information for training jobs that are created as the + result of a hyperparameter tuning job. Args: - estimator (sagemaker.estimator.EstimatorBase): An estimator object that has been initialized with - the desired configuration. There does not need to be a training job associated with this instance. - objective_metric_name (str): Name of the metric for evaluating training jobs. - hyperparameter_ranges (dict[str, sagemaker.parameter.ParameterRange]): Dictionary of parameter ranges. - These parameter ranges can be one of three types: Continuous, Integer, or Categorical. The keys of the - dictionary are the names of the hyperparameter, and the values are the appropriate parameter range class - to represent the range. - metric_definitions (list[dict]): A list of dictionaries that defines the metric(s) used to evaluate the - training jobs (default: None). Each dictionary contains two keys: 'Name' for the name of the metric, and - 'Regex' for the regular expression used to extract the metric from the logs. This should be defined only - for hyperparameter tuning jobs that don't use an Amazon algorithm. - strategy (str): Strategy to be used for hyperparameter estimations (default: 'Bayesian'). - objective_type (str): The type of the objective metric for evaluating training jobs. This value can be - either 'Minimize' or 'Maximize' (default: 'Maximize'). - max_jobs (int): Maximum total number of training jobs to start for the hyperparameter tuning job - (default: 1). - max_parallel_jobs (int): Maximum number of parallel training jobs to start (default: 1). - tags (list[dict]): List of tags for labeling the tuning job (default: None). For more, see + estimator (sagemaker.estimator.EstimatorBase): An estimator object + that has been initialized with the desired configuration. There + does not need to be a training job associated with this + instance. + objective_metric_name (str): Name of the metric for evaluating + training jobs. + hyperparameter_ranges (dict[str, sagemaker.parameter.ParameterRange]): Dictionary of + parameter ranges. These parameter ranges can be one + of three types: Continuous, Integer, or Categorical. The keys of + the dictionary are the names of the hyperparameter, and the + values are the appropriate parameter range class to represent + the range. + metric_definitions (list[dict]): A list of dictionaries that defines + the metric(s) used to evaluate the training jobs (default: + None). Each dictionary contains two keys: 'Name' for the name of + the metric, and 'Regex' for the regular expression used to + extract the metric from the logs. This should be defined only + for hyperparameter tuning jobs that don't use an Amazon + algorithm. + strategy (str): Strategy to be used for hyperparameter estimations + (default: 'Bayesian'). + objective_type (str): The type of the objective metric for + evaluating training jobs. This value can be either 'Minimize' or + 'Maximize' (default: 'Maximize'). + max_jobs (int): Maximum total number of training jobs to start for + the hyperparameter tuning job (default: 1). + max_parallel_jobs (int): Maximum number of parallel training jobs to + start (default: 1). + tags (list[dict]): List of tags for labeling the tuning job + (default: None). For more, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. - base_tuning_job_name (str): Prefix for the hyperparameter tuning job name when the - :meth:`~sagemaker.tuner.HyperparameterTuner.fit` method launches. If not specified, - a default job name is generated, based on the training image name and current timestamp. - warm_start_config (sagemaker.tuner.WarmStartConfig): A ``WarmStartConfig`` object that has been initialized - with the configuration defining the nature of warm start tuning job. - early_stopping_type (str): Specifies whether early stopping is enabled for the job. - Can be either 'Auto' or 'Off' (default: 'Off'). If set to 'Off', early stopping will not be attempted. - If set to 'Auto', early stopping of some training jobs may happen, but is not guaranteed to. + base_tuning_job_name (str): Prefix for the hyperparameter tuning job + name when the :meth:`~sagemaker.tuner.HyperparameterTuner.fit` + method launches. If not specified, a default job name is + generated, based on the training image name and current + timestamp. + warm_start_config (sagemaker.tuner.WarmStartConfig): A + ``WarmStartConfig`` object that has been initialized with the + configuration defining the nature of warm start tuning job. + early_stopping_type (str): Specifies whether early stopping is + enabled for the job. Can be either 'Auto' or 'Off' (default: + 'Off'). If set to 'Off', early stopping will not be attempted. + If set to 'Auto', early stopping of some training jobs may + happen, but is not guaranteed to. """ self._hyperparameter_ranges = hyperparameter_ranges if self._hyperparameter_ranges is None or len(self._hyperparameter_ranges) == 0: @@ -250,6 +274,11 @@ def __init__( self.static_hyperparameters = None def _prepare_for_training(self, job_name=None, include_cls_metadata=False): + """ + Args: + job_name: + include_cls_metadata: + """ if job_name is not None: self._current_job_name = job_name else: @@ -280,33 +309,42 @@ def fit(self, inputs=None, job_name=None, include_cls_metadata=False, **kwargs): """Start a hyperparameter tuning job. Args: - inputs: Information about the training data. Please refer to the ``fit()`` method of - the associated estimator, as this can take any of the following forms: + inputs: Information about the training data. Please refer to the + ``fit()`` method of the associated estimator, as this can take + any of the following forms: * (str) - The S3 location where training data is saved. - * (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple channels for - training data, you can specify a dict mapping channel names - to strings or :func:`~sagemaker.session.s3_input` objects. - * (sagemaker.session.s3_input) - Channel configuration for S3 data sources that can provide - additional information about the training dataset. See :func:`sagemaker.session.s3_input` - for full details. + + * (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple + channels for training data, you can specify a dict mapping channel + names to strings or :func:`~sagemaker.session.s3_input` + objects. + + * (sagemaker.session.s3_input) - Channel configuration for S3 data sources that can + provide additional information about the training dataset. See + :func:`sagemaker.session.s3_input` for full details. + * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of - Amazon :class:~`Record` objects serialized and stored in S3. - For use with an estimator for an Amazon algorithm. + Amazon :class:~`Record` objects serialized and stored in + S3. For use with an estimator for an Amazon algorithm. + * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of - :class:~`sagemaker.amazon.amazon_estimator.RecordSet` objects, where each instance is - a different channel of training data. - - job_name (str): Tuning job name. If not specified, the tuner generates a default job name, - based on the training image name and current timestamp. - include_cls_metadata (bool): Whether or not the hyperparameter tuning job should include - information about the estimator class (default: False). This information is passed - as a hyperparameter, so if the algorithm you are using cannot handle - unknown hyperparameters (e.g. an Amazon SageMaker built-in algorithm that - does not have a custom estimator in the Python SDK), then set - ``include_cls_metadata`` to ``False``. - **kwargs: Other arguments needed for training. Please refer to the ``fit()`` method of the associated - estimator to see what other arguments are needed. + :class:~`sagemaker.amazon.amazon_estimator.RecordSet` + objects, where each instance is a different channel of + training data. + job_name (str): Tuning job name. If not specified, the tuner + generates a default job name, based on the training image name + and current timestamp. + include_cls_metadata (bool): Whether or not the hyperparameter + tuning job should include information about the estimator class + (default: False). This information is passed as a + hyperparameter, so if the algorithm you are using cannot handle + unknown hyperparameters (e.g. an Amazon SageMaker built-in + algorithm that does not have a custom estimator in the Python + SDK), then set ``include_cls_metadata`` to ``False``. + **kwargs: Other arguments needed for training. Please refer to the + ``fit()`` method of the associated estimator to see what other + arguments are needed. """ if isinstance(inputs, (list, RecordSet)): self.estimator._prepare_for_training(inputs, **kwargs) @@ -320,21 +358,10 @@ def fit(self, inputs=None, job_name=None, include_cls_metadata=False, **kwargs): def attach(cls, tuning_job_name, sagemaker_session=None, job_details=None, estimator_cls=None): """Attach to an existing hyperparameter tuning job. - Create a HyperparameterTuner bound to an existing hyperparameter tuning job. After attaching, if there exists a - best training job (or any other completed training job), that can be deployed to create - an Amazon SageMaker Endpoint and return a ``Predictor``. - - Args: - tuning_job_name (str): The name of the hyperparameter tuning job to attach to. - sagemaker_session (sagemaker.session.Session): Session object which manages interactions with - Amazon SageMaker APIs and any other AWS services needed. If not specified, one is created - using the default AWS configuration chain. - job_details (dict): The response to a ``DescribeHyperParameterTuningJob`` call. If not specified, - the ``HyperparameterTuner`` will perform one such call with the provided hyperparameter tuning job name. - estimator_cls (str): The estimator class name associated with the training jobs, - e.g. 'sagemaker.estimator.Estimator'. If not specified, the ``HyperparameterTuner`` will try to derive - the correct estimator class from training job metadata, defaulting to - :class:~`sagemaker.estimator.Estimator` if it is unable to determine a more specific class. + Create a HyperparameterTuner bound to an existing hyperparameter + tuning job. After attaching, if there exists a best training job (or any + other completed training job), that can be deployed to create an Amazon + SageMaker Endpoint and return a ``Predictor``. Examples: >>> my_tuner.fit() @@ -343,9 +370,27 @@ def attach(cls, tuning_job_name, sagemaker_session=None, job_details=None, estim >>> attached_tuner = HyperparameterTuner.attach(job_name) >>> attached_tuner.deploy() + Args: + tuning_job_name (str): The name of the hyperparameter tuning job to + attach to. + sagemaker_session (sagemaker.session.Session): Session object which + manages interactions with Amazon SageMaker APIs and any other + AWS services needed. If not specified, one is created using the + default AWS configuration chain. + job_details (dict): The response to a + ``DescribeHyperParameterTuningJob`` call. If not specified, the + ``HyperparameterTuner`` will perform one such call with the + provided hyperparameter tuning job name. + estimator_cls (str): The estimator class name associated with the + training jobs, e.g. 'sagemaker.estimator.Estimator'. If not + specified, the ``HyperparameterTuner`` will try to derive the + correct estimator class from training job metadata, defaulting + to :class:~`sagemaker.estimator.Estimator` if it is unable to + determine a more specific class. + Returns: - sagemaker.tuner.HyperparameterTuner: A ``HyperparameterTuner`` instance with the attached hyperparameter - tuning job. + sagemaker.tuner.HyperparameterTuner: A ``HyperparameterTuner`` + instance with the attached hyperparameter tuning job. """ sagemaker_session = sagemaker_session or Session() @@ -379,30 +424,38 @@ def deploy( model_name=None, **kwargs ): - """Deploy the best trained or user specified model to an Amazon SageMaker endpoint and return a - ``sagemaker.RealTimePredictor`` object. + """Deploy the best trained or user specified model to an Amazon + SageMaker endpoint and return a ``sagemaker.RealTimePredictor`` object. - For more information: http://docs.aws.amazon.com/sagemaker/latest/dg/how-it-works-training.html + For more information: + http://docs.aws.amazon.com/sagemaker/latest/dg/how-it-works-training.html Args: - initial_instance_count (int): Minimum number of EC2 instances to deploy to an endpoint for prediction. - instance_type (str): Type of EC2 instance to deploy to an endpoint for prediction, - for example, 'ml.c4.xlarge'. - accelerator_type (str): Type of Elastic Inference accelerator to attach to an endpoint for model loading - and inference, for example, 'ml.eia1.medium'. If not specified, no Elastic Inference accelerator - will be attached to the endpoint. - For more information: https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html - endpoint_name (str): Name to use for creating an Amazon SageMaker endpoint. If not specified, - the name of the training job is used. - wait (bool): Whether the call should wait until the deployment of model completes (default: True). - model_name (str): Name to use for creating an Amazon SageMaker model. If not specified, the name of - the training job is used. - **kwargs: Other arguments needed for deployment. Please refer to the ``create_model()`` method of - the associated estimator to see what other arguments are needed. + initial_instance_count (int): Minimum number of EC2 instances to + deploy to an endpoint for prediction. + instance_type (str): Type of EC2 instance to deploy to an endpoint + for prediction, for example, 'ml.c4.xlarge'. + accelerator_type (str): Type of Elastic Inference accelerator to + attach to an endpoint for model loading and inference, for + example, 'ml.eia1.medium'. If not specified, no Elastic + Inference accelerator will be attached to the endpoint. For more + information: + https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html + endpoint_name (str): Name to use for creating an Amazon SageMaker + endpoint. If not specified, the name of the training job is + used. + wait (bool): Whether the call should wait until the deployment of + model completes (default: True). + model_name (str): Name to use for creating an Amazon SageMaker + model. If not specified, the name of the training job is used. + **kwargs: Other arguments needed for deployment. Please refer to the + ``create_model()`` method of the associated estimator to see + what other arguments are needed. Returns: sagemaker.predictor.RealTimePredictor: A predictor that provides a ``predict()`` method, - which can be used to send requests to the Amazon SageMaker endpoint and obtain inferences. + which can be used to send requests to the Amazon SageMaker + endpoint and obtain inferences. """ endpoint_name = endpoint_name or self.best_training_job() best_estimator = self.estimator.attach( @@ -419,22 +472,22 @@ def deploy( ) def stop_tuning_job(self): - """Stop latest running hyperparameter tuning job. - """ + """Stop latest running hyperparameter tuning job.""" self._ensure_last_tuning_job() self.latest_tuning_job.stop() def wait(self): - """Wait for latest hyperparameter tuning job to finish. - """ + """Wait for latest hyperparameter tuning job to finish.""" self._ensure_last_tuning_job() self.latest_tuning_job.wait() def best_training_job(self): - """Return name of the best training job for the latest hyperparameter tuning job. + """Return name of the best training job for the latest hyperparameter + tuning job. Raises: - Exception: If there is no best training job available for the hyperparameter tuning job. + Exception: If there is no best training job available for the + hyperparameter tuning job. """ self._ensure_last_tuning_job() @@ -454,8 +507,8 @@ def best_training_job(self): def delete_endpoint(self, endpoint_name=None): """Delete an Amazon SageMaker endpoint. - If an endpoint name is not specified, this defaults to looking for an endpoint that - shares a name with the best training job for deletion. + If an endpoint name is not specified, this defaults to looking for an + endpoint that shares a name with the best training job for deletion. Args: endpoint_name (str): Name of the endpoint to delete @@ -464,12 +517,18 @@ def delete_endpoint(self, endpoint_name=None): self.sagemaker_session.delete_endpoint(endpoint_name) def _ensure_last_tuning_job(self): + """Placeholder docstring""" if self.latest_tuning_job is None: raise ValueError("No tuning job available") @classmethod def _prepare_estimator_cls(cls, estimator_cls, training_details): # Check for customer-specified estimator first + """ + Args: + estimator_cls: + training_details: + """ if estimator_cls is not None: module, cls_name = estimator_cls.rsplit(".", 1) return getattr(importlib.import_module(module), cls_name) @@ -498,6 +557,12 @@ def _prepare_estimator_cls(cls, estimator_cls, training_details): @classmethod def _prepare_estimator_from_job_description(cls, estimator_cls, job_details, sagemaker_session): + """ + Args: + estimator_cls: + job_details: + sagemaker_session: + """ training_details = job_details["TrainingJobDefinition"] # Swap name for static hyperparameters to what an estimator would expect @@ -528,6 +593,10 @@ def _prepare_estimator_from_job_description(cls, estimator_cls, job_details, sag @classmethod def _prepare_init_params_from_job_description(cls, job_details): + """ + Args: + job_details: + """ tuning_config = job_details["HyperParameterTuningJobConfig"] return { "metric_definitions": job_details["TrainingJobDefinition"]["AlgorithmSpecification"][ @@ -551,6 +620,10 @@ def _prepare_init_params_from_job_description(cls, job_details): @classmethod def _prepare_parameter_ranges(cls, parameter_ranges): + """ + Args: + parameter_ranges: + """ ranges = {} for parameter in parameter_ranges["CategoricalParameterRanges"]: @@ -570,6 +643,10 @@ def _prepare_parameter_ranges(cls, parameter_ranges): @classmethod def _extract_hyperparameters_from_parameter_ranges(cls, parameter_ranges): + """ + Args: + parameter_ranges: + """ hyperparameters = {} for parameter in parameter_ranges["CategoricalParameterRanges"]: @@ -584,8 +661,8 @@ def _extract_hyperparameters_from_parameter_ranges(cls, parameter_ranges): return hyperparameters def hyperparameter_ranges(self): - """Return the hyperparameter ranges in a dictionary to be used as part of a request for creating a - hyperparameter tuning job. + """Return the hyperparameter ranges in a dictionary to be used as part + of a request for creating a hyperparameter tuning job. """ hyperparameter_ranges = dict() for range_type in ParameterRange.__all_types__: @@ -605,18 +682,21 @@ def hyperparameter_ranges(self): @property def sagemaker_session(self): - """Convenience method for accessing the :class:`~sagemaker.session.Session` object associated - with the estimator for the ``HyperparameterTuner``. + """Convenience method for accessing the + :class:`~sagemaker.session.Session` object associated with the estimator + for the ``HyperparameterTuner``. """ return self.estimator.sagemaker_session def analytics(self): - """An instance of HyperparameterTuningJobAnalytics for this latest tuning job of this tuner. - Analytics olbject gives you access to tuning results summarized into a pandas dataframe. + """An instance of HyperparameterTuningJobAnalytics for this latest + tuning job of this tuner. Analytics olbject gives you access to tuning + results summarized into a pandas dataframe. """ return HyperparameterTuningJobAnalytics(self.latest_tuning_job.name, self.sagemaker_session) def _validate_parameter_ranges(self): + """Placeholder docstring""" for kls in inspect.getmro(self.estimator.__class__)[::-1]: for _, value in kls.__dict__.items(): if isinstance(value, hp): @@ -632,6 +712,11 @@ def _validate_parameter_ranges(self): pass def _validate_parameter_range(self, value_hp, parameter_range): + """ + Args: + value_hp: + parameter_range: + """ for (parameter_range_key, parameter_range_value) in parameter_range.__dict__.items(): if parameter_range_key == "scaling_type": continue @@ -645,26 +730,30 @@ def _validate_parameter_range(self, value_hp, parameter_range): value_hp.validate(parameter_range_value) def transfer_learning_tuner(self, additional_parents=None, estimator=None): - """Creates a new ``HyperparameterTuner`` by copying the request fields from the provided parent to the new - instance of ``HyperparameterTuner``. Followed by addition of warm start configuration with the type as - "TransferLearning" and parents as the union of provided list of ``additional_parents`` and the ``self``. - Also, training image in the new tuner's estimator is updated with the provided ``training_image``. - - Args: - additional_parents (set{str}): Set of additional parents along with the self to be used in warm starting - the transfer learning tuner. - estimator (sagemaker.estimator.EstimatorBase): An estimator object that has been initialized with - the desired configuration. There does not need to be a training job associated with this instance. - - Returns: - sagemaker.tuner.HyperparameterTuner: ``HyperparameterTuner`` instance which can be used to launch transfer - learning tuning job. + """Creates a new ``HyperparameterTuner`` by copying the request fields + from the provided parent to the new instance of ``HyperparameterTuner``. + Followed by addition of warm start configuration with the type as + "TransferLearning" and parents as the union of provided list of + ``additional_parents`` and the ``self``. Also, training image in the new + tuner's estimator is updated with the provided ``training_image``. Examples: >>> parent_tuner = HyperparameterTuner.attach(tuning_job_name="parent-job-1") >>> transfer_learning_tuner = parent_tuner.transfer_learning_tuner(additional_parents={"parent-job-2"}) Later On: >>> transfer_learning_tuner.fit(inputs={}) + + Args: + additional_parents (set{str}): Set of additional parents along with + the self to be used in warm starting + estimator (sagemaker.estimator.EstimatorBase): An estimator object + that has been initialized with the desired configuration. There + does not need to be a training job associated with this + instance. + + Returns: + sagemaker.tuner.HyperparameterTuner: ``HyperparameterTuner`` + instance which can be used to launch transfer learning tuning job. """ return self._create_warm_start_tuner( @@ -674,17 +763,11 @@ def transfer_learning_tuner(self, additional_parents=None, estimator=None): ) def identical_dataset_and_algorithm_tuner(self, additional_parents=None): - """Creates a new ``HyperparameterTuner`` by copying the request fields from the provided parent to the new - instance of ``HyperparameterTuner``. Followed by addition of warm start configuration with the type as - "IdenticalDataAndAlgorithm" and parents as the union of provided list of ``additional_parents`` and the ``self`` - - Args: - additional_parents (set{str}): Set of additional parents along with the self to be used in warm starting - the identical dataset and algorithm tuner. - - Returns: - sagemaker.tuner.HyperparameterTuner: HyperparameterTuner instance which can be used to launch identical - dataset and algorithm tuning job. + """Creates a new ``HyperparameterTuner`` by copying the request fields + from the provided parent to the new instance of ``HyperparameterTuner``. + Followed by addition of warm start configuration with the type as + "IdenticalDataAndAlgorithm" and parents as the union of provided list of + ``additional_parents`` and the ``self`` Examples: >>> parent_tuner = HyperparameterTuner.attach(tuning_job_name="parent-job-1") @@ -692,6 +775,15 @@ def identical_dataset_and_algorithm_tuner(self, additional_parents=None): >>> additional_parents={"parent-job-2"}) Later On: >>> identical_dataset_algo_tuner.fit(inputs={}) + + Args: + additional_parents (set{str}): Set of additional parents along with + the self to be used in warm starting + + Returns: + sagemaker.tuner.HyperparameterTuner: HyperparameterTuner instance + which can be used to launch identical dataset and algorithm tuning + job. """ return self._create_warm_start_tuner( @@ -700,16 +792,20 @@ def identical_dataset_and_algorithm_tuner(self, additional_parents=None): ) def _create_warm_start_tuner(self, additional_parents, warm_start_type, estimator=None): - """Creates a new ``HyperparameterTuner`` with ``WarmStartConfig``, where type will be equal to - ``warm_start_type`` and``parents`` would be equal to union of ``additional_parents`` and self. + """Creates a new ``HyperparameterTuner`` with ``WarmStartConfig``, where + type will be equal to ``warm_start_type`` and``parents`` would be equal + to union of ``additional_parents`` and self. Args: - additional_parents (set{str}): Additional parents along with self, to be used for warm starting. - warm_start_type (sagemaker.tuner.WarmStartTypes): Type of warm start job. + additional_parents (set{str}): Additional parents along with self, + to be used for warm starting. + warm_start_type (sagemaker.tuner.WarmStartTypes): Type of warm start + job. + estimator: Returns: - sagemaker.tuner.HyperparameterTuner: Instance with the request fields copied from self along with the - warm start configuration + sagemaker.tuner.HyperparameterTuner: Instance with the request + fields copied from self along with the warm start configuration """ all_parents = {self.latest_tuning_job.name} if additional_parents: @@ -727,16 +823,22 @@ def _create_warm_start_tuner(self, additional_parents, warm_start_type, estimato class _TuningJob(_Job): + """Placeholder docstring""" + @classmethod def start_new(cls, tuner, inputs): - """Create a new Amazon SageMaker hyperparameter tuning job from the HyperparameterTuner. + """Create a new Amazon SageMaker hyperparameter tuning job from the + HyperparameterTuner. Args: - tuner (sagemaker.tuner.HyperparameterTuner): HyperparameterTuner object created by the user. - inputs (str): Parameters used when called :meth:`~sagemaker.estimator.EstimatorBase.fit`. + tuner (sagemaker.tuner.HyperparameterTuner): HyperparameterTuner + object created by the user. + inputs (str): Parameters used when called + :meth:`~sagemaker.estimator.EstimatorBase.fit`. Returns: - sagemaker.tuner._TuningJob: Constructed object that captures all information about the started job. + sagemaker.tuner._TuningJob: Constructed object that captures all + information about the started job. """ config = _Job._load_config(inputs, tuner.estimator) @@ -783,9 +885,11 @@ def start_new(cls, tuner, inputs): return cls(tuner.sagemaker_session, tuner._current_job_name) def stop(self): + """Placeholder docstring""" self.sagemaker_session.stop_tuning_job(name=self.name) def wait(self): + """Placeholder docstring""" self.sagemaker_session.wait_for_tuning_job(self.name) @@ -793,21 +897,24 @@ def create_identical_dataset_and_algorithm_tuner( parent, additional_parents=None, sagemaker_session=None ): """Creates a new tuner by copying the request fields from the provided parent to the new instance of - ``HyperparameterTuner`` followed by addition of warm start configuration with the type as - "IdenticalDataAndAlgorithm" and ``parents`` as the union of provided list of ``additional_parents`` and the - ``parent``. + ``HyperparameterTuner`` followed by addition of warm start configuration + with the type as "IdenticalDataAndAlgorithm" and ``parents`` as the + union of provided list of ``additional_parents`` and the ``parent``. Args: - parent (str): Primary parent tuning job's name from which the Tuner and Estimator configuration has to be copied - additional_parents (set{str}): Set of additional parent tuning job's names along with the primary parent tuning - job name to be used in warm starting the transfer learning tuner. - sagemaker_session (sagemaker.session.Session): Session object which manages interactions with - Amazon SageMaker APIs and any other AWS services needed. If not specified, one is created - using the default AWS configuration chain. + parent (str): Primary parent tuning job's name from which the Tuner and + Estimator configuration has to be copied + additional_parents (set{str}): Set of additional parent tuning job's + names along with the primary parent tuning job name to be used in + warm starting the transfer learning tuner. + sagemaker_session (sagemaker.session.Session): Session object which + manages interactions with Amazon SageMaker APIs and any other AWS + services needed. If not specified, one is created using the default + AWS configuration chain. Returns: - sagemaker.tuner.HyperparameterTuner: a new ``HyperparameterTuner`` object for the warm-started - hyperparameter tuning job + sagemaker.tuner.HyperparameterTuner: a new ``HyperparameterTuner`` + object for the warm-started hyperparameter tuning job """ parent_tuner = HyperparameterTuner.attach( @@ -819,22 +926,29 @@ def create_identical_dataset_and_algorithm_tuner( def create_transfer_learning_tuner( parent, additional_parents=None, estimator=None, sagemaker_session=None ): - """Creates a new ``HyperParameterTuner`` by copying the request fields from the provided parent to the new instance - of ``HyperparameterTuner`` followed by addition of warm start configuration with the type as "TransferLearning" - and ``parents`` as the union of provided list of ``additional_parents`` and the ``parent``. + """Creates a new ``HyperParameterTuner`` by copying the request fields from the + provided parent to the new instance + of ``HyperparameterTuner`` followed by addition of warm start + configuration with the type as "TransferLearning" and ``parents`` as the + union of provided list of ``additional_parents`` and the ``parent``. Args: - parent (str): Primary parent tuning job's name from which the Tuner and Estimator configuration has to be copied - additional_parents (set{str}): Set of additional parent tuning job's names along with the primary parent tuning - job name to be used in warm starting the identical dataset and algorithm tuner. - estimator (sagemaker.estimator.EstimatorBase): An estimator object that has been initialized with - the desired configuration. There does not need to be a training job associated with this instance. - sagemaker_session (sagemaker.session.Session): Session object which manages interactions with - Amazon SageMaker APIs and any other AWS services needed. If not specified, one is created - using the default AWS configuration chain. + parent (str): Primary parent tuning job's name from which the Tuner and + Estimator configuration has to be copied + additional_parents (set{str}): Set of additional parent tuning job's + names along with the primary parent tuning job name to be used in + warm starting the identical dataset and algorithm tuner. + estimator (sagemaker.estimator.EstimatorBase): An estimator object that + has been initialized with the desired configuration. There does not + need to be a training job associated with this instance. + sagemaker_session (sagemaker.session.Session): Session object which + manages interactions with Amazon SageMaker APIs and any other AWS + services needed. If not specified, one is created using the default + AWS configuration chain. Returns: - sagemaker.tuner.HyperparameterTuner: New instance of warm started HyperparameterTuner + sagemaker.tuner.HyperparameterTuner: New instance of warm started + HyperparameterTuner """ parent_tuner = HyperparameterTuner.attach( diff --git a/src/sagemaker/user_agent.py b/src/sagemaker/user_agent.py index 2420266572..bedf21b5ff 100644 --- a/src/sagemaker/user_agent.py +++ b/src/sagemaker/user_agent.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import platform @@ -28,6 +29,7 @@ def determine_prefix(): + """Placeholder docstring""" prefix = "AWS-SageMaker-Python-SDK/{} Python/{} {}/{} Boto3/{} Botocore/{}".format( SDK_VERSION, PYTHON_VERSION, OS_NAME, OS_VERSION, boto3.__version__, botocore.__version__ ) @@ -45,6 +47,10 @@ def determine_prefix(): def prepend_user_agent(client): + """ + Args: + client: + """ prefix = determine_prefix() if client._client_config.user_agent is None: diff --git a/src/sagemaker/utils.py b/src/sagemaker/utils.py index 3fa07fb678..8e44955d1a 100644 --- a/src/sagemaker/utils.py +++ b/src/sagemaker/utils.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import import contextlib @@ -40,7 +41,8 @@ def name_from_image(image): image (str): Image name. Returns: - str: Training job name using the algorithm from the image name and a timestamp. + str: Training job name using the algorithm from the image name and a + timestamp. """ return name_from_base(base_name_from_image(image)) @@ -48,8 +50,9 @@ def name_from_image(image): def name_from_base(base, max_length=63, short=False): """Append a timestamp to the provided string. - This function assures that the total length of the resulting string is not - longer than the specified max length, trimming the input parameter if necessary. + This function assures that the total length of the resulting string is + not longer than the specified max length, trimming the input parameter if + necessary. Args: base (str): String used as prefix to generate the unique name. @@ -65,6 +68,11 @@ def name_from_base(base, max_length=63, short=False): def unique_name_from_base(base, max_length=63): + """ + Args: + base: + max_length: + """ unique = "%04x" % random.randrange(16 ** 4) # 4-digit hex ts = str(int(time.time())) available_length = max_length - 2 - len(ts) - len(unique) @@ -73,7 +81,8 @@ def unique_name_from_base(base, max_length=63): def base_name_from_image(image): - """Extract the base name of the image to use as the 'algorithm name' for the job. + """Extract the base name of the image to use as the 'algorithm name' for the + job. Args: image (str): Image name. @@ -99,7 +108,11 @@ def sagemaker_short_timestamp(): def debug(func): - """Print the function name and arguments for debugging.""" + """Print the function name and arguments for debugging. + + Args: + func: + """ @wraps(func) def wrapper(*args, **kwargs): @@ -110,6 +123,11 @@ def wrapper(*args, **kwargs): def get_config_value(key_path, config): + """ + Args: + key_path: + config: + """ if config is None: return None @@ -138,15 +156,16 @@ def get_short_version(framework_version): def to_str(value): """Convert the input to a string, unless it is a unicode string in Python 2. - Unicode strings are supported as native strings in Python 3, but ``str()`` cannot be - invoked on unicode strings in Python 2, so we need to check for that case when - converting user-specified values to strings. + Unicode strings are supported as native strings in Python 3, but + ``str()`` cannot be invoked on unicode strings in Python 2, so we need to + check for that case when converting user-specified values to strings. Args: value: The value to convert to a string. Returns: - str or unicode: The string representation of the value or the unicode string itself. + str or unicode: The string representation of the value or the unicode + string itself. """ if sys.version_info.major < 3 and isinstance(value, six.string_types): return value @@ -154,8 +173,11 @@ def to_str(value): def extract_name_from_job_arn(arn): - """Returns the name used in the API given a full ARN for a training job - or hyperparameter tuning job. + """Returns the name used in the API given a full ARN for a training job or + hyperparameter tuning job. + + Args: + arn: """ slash_pos = arn.find("/") if slash_pos == -1: @@ -167,12 +189,12 @@ def secondary_training_status_changed(current_job_description, prev_job_descript """Returns true if training job's secondary status message has changed. Args: - current_job_desc: Current job description, returned from DescribeTrainingJob call. - prev_job_desc: Previous job description, returned from DescribeTrainingJob call. + current_job_description: Current job description, returned from DescribeTrainingJob call. + prev_job_description: Previous job description, returned from DescribeTrainingJob call. Returns: - boolean: Whether the secondary status message of a training job changed or not. - + boolean: Whether the secondary status message of a training job changed + or not. """ current_secondary_status_transitions = current_job_description.get("SecondaryStatusTransitions") if ( @@ -200,7 +222,8 @@ def secondary_training_status_changed(current_job_description, prev_job_descript def secondary_training_status_message(job_description, prev_description): - """Returns a string contains last modified time and the secondary training job status message. + """Returns a string contains last modified time and the secondary training + job status message. Args: job_description: Returned response from DescribeTrainingJob call @@ -208,7 +231,6 @@ def secondary_training_status_message(job_description, prev_description): Returns: str: Job status string to be printed. - """ if ( @@ -253,9 +275,11 @@ def download_folder(bucket_name, prefix, target, sagemaker_session): Args: bucket_name (str): S3 bucket name - prefix (str): S3 prefix within the bucket that will be downloaded. Can be a single file. + prefix (str): S3 prefix within the bucket that will be downloaded. Can + be a single file. target (str): destination path where the downloaded items will be placed - sagemaker_session (:class:`sagemaker.session.Session`): a sagemaker session to interact with S3. + sagemaker_session (sagemaker.session.Session): a sagemaker session to + interact with S3. """ boto_session = sagemaker_session.boto_session @@ -293,10 +317,13 @@ def download_folder(bucket_name, prefix, target, sagemaker_session): def create_tar_file(source_files, target=None): """Create a tar file containing all the source_files + Args: - source_files (List[str]): List of file paths that will be contained in the tar file + source_files: (List[str]): List of file paths that will be contained in the tar file + target: + Returns: - (str): path to created tar file + (str): path to created tar file """ if target: filename = target @@ -312,17 +339,17 @@ def create_tar_file(source_files, target=None): @contextlib.contextmanager def _tmpdir(suffix="", prefix="tmp"): - """Create a temporary directory with a context manager. The file is deleted when the context exits. + """Create a temporary directory with a context manager. The file is deleted + when the context exits. The prefix, suffix, and dir arguments are the same as for mkstemp(). Args: - suffix (str): If suffix is specified, the file name will end with that suffix, otherwise there will be no - suffix. - prefix (str): If prefix is specified, the file name will begin with that prefix; otherwise, - a default prefix is used. - dir (str): If dir is specified, the file will be created in that directory; otherwise, a default directory is - used. + suffix (str): If suffix is specified, the file name will end with that + suffix, otherwise there will be no suffix. + prefix (str): If prefix is specified, the file name will begin with that + prefix; otherwise, a default prefix is used. + Returns: str: path to the directory """ @@ -339,35 +366,40 @@ def repack_model( repacked_model_uri, sagemaker_session, ): - """Unpack model tarball and creates a new model tarball with the provided code script. + """Unpack model tarball and creates a new model tarball with the provided + code script. - This function does the following: - - uncompresses model tarball from S3 or local system into a temp folder - - replaces the inference code from the model with the new code provided - - compresses the new model tarball and saves it in S3 or local file system + This function does the following: - uncompresses model tarball from S3 or + local system into a temp folder - replaces the inference code from the model + with the new code provided - compresses the new model tarball and saves it + in S3 or local file system Args: - inference_script (str): path or basename of the inference script that will be packed into the model - source_directory (str): path including all the files that will be packed into the model - dependencies (list[str]): A list of paths to directories (absolute or relative) with - any additional libraries that will be exported to the container (default: []). - The library folders will be copied to SageMaker in the same folder where the entrypoint is copied. - Example: - - The following call - >>> Estimator(entry_point='train.py', dependencies=['my/libs/common', 'virtual-env']) - results in the following inside the container: - - >>> $ ls - - >>> opt/ml/code - >>> |------ train.py - >>> |------ common - >>> |------ virtual-env - - repacked_model_uri (str): path or file system location where the new model will be saved + inference_script (str): path or basename of the inference script that + will be packed into the model + source_directory (str): path including all the files that will be packed + into the model + dependencies (list[str]): A list of paths to directories (absolute or + relative) with any additional libraries that will be exported to the + container (default: []). The library folders will be copied to + SageMaker in the same folder where the entrypoint is copied. + Example + + The following call >>> Estimator(entry_point='train.py', + dependencies=['my/libs/common', 'virtual-env']) results in the + following inside the container: + + >>> $ ls + + >>> opt/ml/code + >>> |------ train.py + >>> |------ common + >>> |------ virtual-env model_uri (str): S3 or file system location of the original model tar - sagemaker_session (:class:`sagemaker.session.Session`): a sagemaker session to interact with S3. + repacked_model_uri (str): path or file system location where the new + model will be saved + sagemaker_session (sagemaker.session.Session): a sagemaker session to + interact with S3. Returns: str: path to the new packed model @@ -389,6 +421,12 @@ def repack_model( def _save_model(repacked_model_uri, tmp_model_path, sagemaker_session): + """ + Args: + repacked_model_uri: + tmp_model_path: + sagemaker_session: + """ if repacked_model_uri.lower().startswith("s3://"): url = parse.urlparse(repacked_model_uri) bucket, key = url.netloc, url.path.lstrip("/") @@ -404,6 +442,15 @@ def _save_model(repacked_model_uri, tmp_model_path, sagemaker_session): def _create_or_update_code_dir( model_dir, inference_script, source_directory, dependencies, sagemaker_session, tmp ): + """ + Args: + model_dir: + inference_script: + source_directory: + dependencies: + sagemaker_session: + tmp: + """ code_dir = os.path.join(model_dir, "code") if os.path.exists(code_dir): shutil.rmtree(code_dir, ignore_errors=True) @@ -428,6 +475,12 @@ def _create_or_update_code_dir( def _extract_model(model_uri, sagemaker_session, tmp): + """ + Args: + model_uri: + sagemaker_session: + tmp: + """ tmp_model_dir = os.path.join(tmp, "model") os.mkdir(tmp_model_dir) if model_uri.lower().startswith("s3://"): @@ -441,6 +494,12 @@ def _extract_model(model_uri, sagemaker_session, tmp): def download_file_from_url(url, dst, sagemaker_session): + """ + Args: + url: + dst: + sagemaker_session: + """ url = parse.urlparse(url) bucket, key = url.netloc, url.path.lstrip("/") @@ -454,7 +513,8 @@ def download_file(bucket_name, path, target, sagemaker_session): bucket_name (str): S3 bucket name path (str): file path within the bucket target (str): destination directory for the downloaded file. - sagemaker_session (:class:`sagemaker.session.Session`): a sagemaker session to interact with S3. + sagemaker_session (sagemaker.session.Session): a sagemaker session to + interact with S3. """ path = path.lstrip("/") boto_session = sagemaker_session.boto_session @@ -479,10 +539,10 @@ def get_ecr_image_uri_prefix(account, region): class DeferredError(object): - """Stores an exception and raises it at a later time if this - object is accessed in any way. Useful to allow soft-dependencies on imports, - so that the ImportError can be raised again later if code actually - relies on the missing library. + """Stores an exception and raises it at a later time if this object is + accessed in any way. Useful to allow soft-dependencies on imports, so that + the ImportError can be raised again later if code actually relies on the + missing library. Example:: @@ -494,11 +554,18 @@ class DeferredError(object): """ def __init__(self, exception): + """ + Args: + exception: + """ self.exc = exception def __getattr__(self, name): - """Called by Python interpreter before using any method or property - on the object. So this will short-circuit essentially any access to this + """Called by Python interpreter before using any method or property on + the object. So this will short-circuit essentially any access to this object. + + Args: + name: """ raise self.exc diff --git a/src/sagemaker/vpc_utils.py b/src/sagemaker/vpc_utils.py index 5bf1fd687b..c569e20c1c 100644 --- a/src/sagemaker/vpc_utils.py +++ b/src/sagemaker/vpc_utils.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import absolute_import SUBNETS_KEY = "Subnets" @@ -22,18 +23,19 @@ def to_dict(subnets, security_group_ids): - """ - Prepares a VpcConfig dict containing keys 'Subnets' and 'SecurityGroupIds' - This is the dict format expected by SageMaker CreateTrainingJob and CreateModel APIs - See https://docs.aws.amazon.com/sagemaker/latest/dg/API_VpcConfig.html + """Prepares a VpcConfig dict containing keys 'Subnets' and + 'SecurityGroupIds' This is the dict format expected by SageMaker + CreateTrainingJob and CreateModel APIs See + https://docs.aws.amazon.com/sagemaker/latest/dg/API_VpcConfig.html Args: subnets (list): list of subnet IDs to use in VpcConfig - security_group_ids (list): list of security group IDs to use in VpcConfig + security_group_ids (list): list of security group IDs to use in + VpcConfig Returns: - A VpcConfig dict containing keys 'Subnets' and 'SecurityGroupIds' - If either or both parameters are None, returns None + A VpcConfig dict containing keys 'Subnets' and 'SecurityGroupIds' If + either or both parameters are None, returns None """ if subnets is None or security_group_ids is None: return None @@ -41,20 +43,22 @@ def to_dict(subnets, security_group_ids): def from_dict(vpc_config, do_sanitize=False): - """ - Extracts subnets and security group ids as lists from a VpcConfig dict + """Extracts subnets and security group ids as lists from a VpcConfig dict Args: - vpc_config (dict): a VpcConfig dict containing 'Subnets' and 'SecurityGroupIds' - do_sanitize (bool): whether to sanitize the VpcConfig dict before extracting values + vpc_config (dict): a VpcConfig dict containing 'Subnets' and + 'SecurityGroupIds' + do_sanitize (bool): whether to sanitize the VpcConfig dict before + extracting values Returns: - Tuple of lists as (subnets, security_group_ids) - If vpc_config parameter is None, returns (None, None) + Tuple of lists as (subnets, security_group_ids) If vpc_config parameter + is None, returns (None, None) Raises: - ValueError if sanitize enabled and vpc_config is invalid - KeyError if sanitize disabled and vpc_config is missing key(s) + * ValueError if sanitize enabled and vpc_config is invalid + + * KeyError if sanitize disabled and vpc_config is missing key(s) """ if do_sanitize: vpc_config = sanitize(vpc_config) @@ -64,16 +68,18 @@ def from_dict(vpc_config, do_sanitize=False): def sanitize(vpc_config): - """ - Checks that an instance of VpcConfig has the expected keys and values, removes unexpected keys, - and raises ValueErrors if any expectations are violated + """Checks that an instance of VpcConfig has the expected keys and values, + removes unexpected keys, and raises ValueErrors if any expectations are + violated Args: - vpc_config (dict): a VpcConfig dict containing 'Subnets' and 'SecurityGroupIds' + vpc_config (dict): a VpcConfig dict containing 'Subnets' and + 'SecurityGroupIds' Returns: - A valid VpcConfig dict containing only 'Subnets' and 'SecurityGroupIds' from the vpc_config parameter - If vpc_config parameter is None, returns None + A valid VpcConfig dict containing only 'Subnets' and 'SecurityGroupIds' + from the vpc_config parameter If vpc_config parameter is None, returns + None Raises: ValueError if any expectations are violated: diff --git a/src/sagemaker/workflow/airflow.py b/src/sagemaker/workflow/airflow.py index 92de8b22b2..21173723a8 100644 --- a/src/sagemaker/workflow/airflow.py +++ b/src/sagemaker/workflow/airflow.py @@ -10,6 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Placeholder docstring""" from __future__ import print_function, absolute_import import os @@ -21,12 +22,14 @@ def prepare_framework(estimator, s3_operations): - """Prepare S3 operations (specify where to upload `source_dir`) and environment variables - related to framework. + """Prepare S3 operations (specify where to upload `source_dir` ) and + environment variables related to framework. Args: - estimator (sagemaker.estimator.Estimator): The framework estimator to get information from and update. - s3_operations (dict): The dict to specify s3 operations (upload `source_dir`). + estimator (sagemaker.estimator.Estimator): The framework estimator to + get information from and update. + s3_operations (dict): The dict to specify s3 operations (upload + `source_dir` ). """ if estimator.code_location is not None: bucket, key = fw_utils.parse_s3_url(estimator.code_location) @@ -59,18 +62,22 @@ def prepare_framework(estimator, s3_operations): def prepare_amazon_algorithm_estimator(estimator, inputs, mini_batch_size=None): - """ Set up amazon algorithm estimator, adding the required `feature_dim` hyperparameter from training data. + """Set up amazon algorithm estimator, adding the required `feature_dim` + hyperparameter from training data. Args: - estimator (sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase): - An estimator for a built-in Amazon algorithm to get information from and update. + estimator (sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase): An estimator + for a built-in Amazon algorithm to get information from and update. inputs: The training data. - * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of - Amazon :class:~`Record` objects serialized and stored in S3. - For use with an estimator for an Amazon algorithm. + * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of + + Amazon :class:~`Record` objects serialized and stored in S3. For + use with an estimator for an Amazon algorithm. + * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of - :class:~`sagemaker.amazon.amazon_estimator.RecordSet` objects, where each instance is - a different channel of training data. + :class:~`sagemaker.amazon.amazon_estimator.RecordSet` objects, + where each instance is a different channel of training data. + mini_batch_size: """ if isinstance(inputs, list): for record in inputs: @@ -88,32 +95,39 @@ def training_base_config(estimator, inputs=None, job_name=None, mini_batch_size= """Export Airflow base training config from an estimator Args: - estimator (sagemaker.estimator.EstimatorBase): - The estimator to export training config from. Can be a BYO estimator, - Framework estimator or Amazon algorithm estimator. - inputs: Information about the training data. Please refer to the ``fit()`` method of - the associated estimator, as this can take any of the following forms: + estimator (sagemaker.estimator.EstimatorBase): The estimator to export + training config from. Can be a BYO estimator, Framework estimator or + Amazon algorithm estimator. + inputs: Information about the training data. Please refer to the ``fit()`` + method of + the associated estimator, as this can take any of the following + forms: * (str) - The S3 location where training data is saved. - * (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple channels for - training data, you can specify a dict mapping channel names - to strings or :func:`~sagemaker.session.s3_input` objects. - * (sagemaker.session.s3_input) - Channel configuration for S3 data sources that can provide - additional information about the training dataset. See :func:`sagemaker.session.s3_input` - for full details. + + * (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple + channels for training data, you can specify a dict mapping channel names to + strings or :func:`~sagemaker.session.s3_input` objects. + + * (sagemaker.session.s3_input) - Channel configuration for S3 data sources that can + provide additional information about the training dataset. See + :func:`sagemaker.session.s3_input` for full details. + * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of - Amazon :class:~`Record` objects serialized and stored in S3. - For use with an estimator for an Amazon algorithm. - * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of - :class:~`sagemaker.amazon.amazon_estimator.RecordSet` objects, where each instance is - a different channel of training data. + Amazon :class:~`Record` objects serialized and stored in S3. + For use with an estimator for an Amazon algorithm. + * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of + :class:~`sagemaker.amazon.amazon_estimator.RecordSet` objects, + where each instance is a different channel of training data. job_name (str): Specify a training job name if needed. - mini_batch_size (int): Specify this argument only when estimator is a built-in estimator of an - Amazon algorithm. For other estimators, batch size should be specified in the estimator. + mini_batch_size (int): Specify this argument only when estimator is a + built-in estimator of an Amazon algorithm. For other estimators, + batch size should be specified in the estimator. Returns: - dict: Training config that can be directly used by SageMakerTrainingOperator in Airflow. + dict: Training config that can be directly used by + SageMakerTrainingOperator in Airflow. """ default_bucket = estimator.sagemaker_session.default_bucket() s3_operations = {} @@ -167,32 +181,36 @@ def training_config(estimator, inputs=None, job_name=None, mini_batch_size=None) """Export Airflow training config from an estimator Args: - estimator (sagemaker.estimator.EstimatorBase): - The estimator to export training config from. Can be a BYO estimator, - Framework estimator or Amazon algorithm estimator. - inputs: Information about the training data. Please refer to the ``fit()`` method of - the associated estimator, as this can take any of the following forms: - + estimator (sagemaker.estimator.EstimatorBase): The estimator to export + training config from. Can be a BYO estimator, Framework estimator or + Amazon algorithm estimator. + inputs: Information about the training data. Please refer to the ``fit()`` + method of the associated estimator, as this can take any of the following forms: * (str) - The S3 location where training data is saved. + * (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple channels for - training data, you can specify a dict mapping channel names - to strings or :func:`~sagemaker.session.s3_input` objects. + training data, you can specify a dict mapping channel names to + strings or :func:`~sagemaker.session.s3_input` objects. + * (sagemaker.session.s3_input) - Channel configuration for S3 data sources that can provide - additional information about the training dataset. See :func:`sagemaker.session.s3_input` - for full details. + additional information about the training dataset. See + :func:`sagemaker.session.s3_input` for full details. + * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of - Amazon :class:~`Record` objects serialized and stored in S3. - For use with an estimator for an Amazon algorithm. - * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of - :class:~`sagemaker.amazon.amazon_estimator.RecordSet` objects, where each instance is - a different channel of training data. + Amazon :class:~`Record` objects serialized and stored in S3. + For use with an estimator for an Amazon algorithm. + * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of + :class:~`sagemaker.amazon.amazon_estimator.RecordSet` objects, + where each instance is a different channel of training data. job_name (str): Specify a training job name if needed. - mini_batch_size (int): Specify this argument only when estimator is a built-in estimator of an - Amazon algorithm. For other estimators, batch size should be specified in the estimator. + mini_batch_size (int): Specify this argument only when estimator is a + built-in estimator of an Amazon algorithm. For other estimators, + batch size should be specified in the estimator. Returns: - dict: Training config that can be directly used by SageMakerTrainingOperator in Airflow. + dict: Training config that can be directly used by + SageMakerTrainingOperator in Airflow. """ train_config = training_base_config(estimator, inputs, job_name, mini_batch_size) @@ -209,24 +227,29 @@ def tuning_config(tuner, inputs, job_name=None): """Export Airflow tuning config from an estimator Args: - tuner (sagemaker.tuner.HyperparameterTuner): The tuner to export tuning config from. - inputs: Information about the training data. Please refer to the ``fit()`` method of - the associated estimator in the tuner, as this can take any of the following forms: + tuner (sagemaker.tuner.HyperparameterTuner): The tuner to export tuning + config from. + inputs: Information about the training data. Please refer to the ``fit()`` + method of the associated estimator in the tuner, as this can take any of the + following forms: * (str) - The S3 location where training data is saved. + * (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple channels for - training data, you can specify a dict mapping channel names - to strings or :func:`~sagemaker.session.s3_input` objects. + training data, you can specify a dict mapping channel names to + strings or :func:`~sagemaker.session.s3_input` objects. + * (sagemaker.session.s3_input) - Channel configuration for S3 data sources that can provide - additional information about the training dataset. See :func:`sagemaker.session.s3_input` - for full details. + additional information about the training dataset. See + :func:`sagemaker.session.s3_input` for full details. + * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of - Amazon :class:~`Record` objects serialized and stored in S3. - For use with an estimator for an Amazon algorithm. - * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of - :class:~`sagemaker.amazon.amazon_estimator.RecordSet` objects, where each instance is - a different channel of training data. + Amazon :class:~`Record` objects serialized and stored in S3. + For use with an estimator for an Amazon algorithm. + * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of + :class:~`sagemaker.amazon.amazon_estimator.RecordSet` objects, + where each instance is a different channel of training data. job_name (str): Specify a tuning job name if needed. Returns: @@ -291,7 +314,8 @@ def update_submit_s3_uri(estimator, job_name): """Updated the S3 URI of the framework source directory in given estimator. Args: - estimator (sagemaker.estimator.Framework): The Framework estimator to update. + estimator (sagemaker.estimator.Framework): The Framework estimator to + update. job_name (str): The new job name included in the submit S3 URI Returns: @@ -315,10 +339,13 @@ def update_estimator_from_task(estimator, task_id, task_type): Args: estimator (sagemaker.estimator.EstimatorBase): The estimator to update - task_id (str): The task id of any airflow.contrib.operators.SageMakerTrainingOperator or - airflow.contrib.operators.SageMakerTuningOperator that generates training jobs in the DAG. - task_type (str): Whether the task is from SageMakerTrainingOperator or SageMakerTuningOperator. Values can be - 'training', 'tuning' or None (which means training job is not from any task). + task_id (str): The task id of any + airflow.contrib.operators.SageMakerTrainingOperator or + airflow.contrib.operators.SageMakerTuningOperator that generates + training jobs in the DAG. + task_type (str): Whether the task is from SageMakerTrainingOperator or + SageMakerTuningOperator. Values can be 'training', 'tuning' or None + (which means training job is not from any task). """ if task_type is None: return @@ -343,13 +370,15 @@ def update_estimator_from_task(estimator, task_id, task_type): def prepare_framework_container_def(model, instance_type, s3_operations): - """Prepare the framework model container information. Specify related S3 operations for Airflow to perform. - (Upload `source_dir`) + """Prepare the framework model container information. Specify related S3 + operations for Airflow to perform. (Upload `source_dir` ) Args: model (sagemaker.model.FrameworkModel): The framework model - instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. - s3_operations (dict): The dict to specify S3 operations (upload `source_dir`). + instance_type (str): The EC2 instance type to deploy this Model to. For + example, 'ml.p2.xlarge'. + s3_operations (dict): The dict to specify S3 operations (upload + `source_dir` ). Returns: dict: The container information of this framework model. @@ -401,14 +430,17 @@ def model_config(instance_type, model, role=None, image=None): """Export Airflow model config from a SageMaker model Args: - instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge' - model (sagemaker.model.FrameworkModel): The SageMaker model to export Airflow config from + instance_type (str): The EC2 instance type to deploy this Model to. For + example, 'ml.p2.xlarge' + model (sagemaker.model.FrameworkModel): The SageMaker model to export + Airflow config from role (str): The ``ExecutionRoleArn`` IAM Role ARN for the model image (str): An container image to use for deploying the model Returns: - dict: Model config that can be directly used by SageMakerModelOperator in Airflow. It can also be part - of the config used by SageMakerEndpointOperator and SageMakerTransformOperator in Airflow. + dict: Model config that can be directly used by SageMakerModelOperator + in Airflow. It can also be part of the config used by + SageMakerEndpointOperator and SageMakerTransformOperator in Airflow. """ s3_operations = {} model.image = image or model.image @@ -451,22 +483,27 @@ def model_config_from_estimator( """Export Airflow model config from a SageMaker estimator Args: - instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge' - estimator (sagemaker.model.EstimatorBase): The SageMaker estimator to export Airflow config from. - It has to be an estimator associated with a training job. - task_id (str): The task id of any airflow.contrib.operators.SageMakerTrainingOperator or - airflow.contrib.operators.SageMakerTuningOperator that generates training jobs in the DAG. The model config - is built based on the training job generated in this operator. - task_type (str): Whether the task is from SageMakerTrainingOperator or SageMakerTuningOperator. Values can be - 'training', 'tuning' or None (which means training job is not from any task). + instance_type (str): The EC2 instance type to deploy this Model to. For + example, 'ml.p2.xlarge' + estimator (sagemaker.model.EstimatorBase): The SageMaker estimator to + export Airflow config from. It has to be an estimator associated + with a training job. + task_id (str): The task id of any + airflow.contrib.operators.SageMakerTrainingOperator or + airflow.contrib.operators.SageMakerTuningOperator that generates + training jobs in the DAG. The model config is built based on the + training job generated in this operator. + task_type (str): Whether the task is from SageMakerTrainingOperator or + SageMakerTuningOperator. Values can be 'training', 'tuning' or None + (which means training job is not from any task). role (str): The ``ExecutionRoleArn`` IAM Role ARN for the model image (str): An container image to use for deploying the model name (str): Name of the model - model_server_workers (int): The number of worker processes used by the inference server. - If None, server will use one worker per vCPU. Only effective when estimator is a - SageMaker framework. - vpc_config_override (dict[str, list[str]]): Override for VpcConfig set on the model. - Default: use subnets and security groups from this Estimator. + model_server_workers (int): The number of worker processes used by the + inference server. If None, server will use one worker per vCPU. Only + effective when estimator is a SageMaker framework. + vpc_config_override (dict[str, list[str]]): Override for VpcConfig set on + the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. @@ -509,25 +546,28 @@ def transform_config( """Export Airflow transform config from a SageMaker transformer Args: - transformer (sagemaker.transformer.Transformer): The SageMaker transformer to export Airflow - config from. + transformer (sagemaker.transformer.Transformer): The SageMaker + transformer to export Airflow config from. data (str): Input data location in S3. - data_type (str): What the S3 location defines (default: 'S3Prefix'). Valid values: + data_type (str): What the S3 location defines (default: 'S3Prefix'). + Valid values: * 'S3Prefix' - the S3 URI defines a key name prefix. All objects with this prefix will be used as - inputs for the transform job. - * 'ManifestFile' - the S3 URI points to a single manifest file listing each S3 object to use as - an input for the transform job. + inputs for the transform job. + * 'ManifestFile' - the S3 URI points to a single manifest file listing each S3 object to use as + an input for the transform job. content_type (str): MIME type of the input data (default: None). - compression_type (str): Compression type of the input data, if compressed (default: None). - Valid values: 'Gzip', None. - split_type (str): The record delimiter for the input object (default: 'None'). - Valid values: 'None', 'Line', 'RecordIO', and 'TFRecord'. - job_name (str): job name (default: None). If not specified, one will be generated. + compression_type (str): Compression type of the input data, if + compressed (default: None). Valid values: 'Gzip', None. + split_type (str): The record delimiter for the input object (default: + 'None'). Valid values: 'None', 'Line', 'RecordIO', and 'TFRecord'. + job_name (str): job name (default: None). If not specified, one will be + generated. Returns: - dict: Transform config that can be directly used by SageMakerTransformOperator in Airflow. + dict: Transform config that can be directly used by + SageMakerTransformOperator in Airflow. """ if job_name is not None: transformer._current_job_name = job_name @@ -603,57 +643,75 @@ def transform_config_from_estimator( """Export Airflow transform config from a SageMaker estimator Args: - estimator (sagemaker.model.EstimatorBase): The SageMaker estimator to export Airflow config from. - It has to be an estimator associated with a training job. - task_id (str): The task id of any airflow.contrib.operators.SageMakerTrainingOperator or - airflow.contrib.operators.SageMakerTuningOperator that generates training jobs in the DAG. The transform - config is built based on the training job generated in this operator. - task_type (str): Whether the task is from SageMakerTrainingOperator or SageMakerTuningOperator. Values can be - 'training', 'tuning' or None (which means training job is not from any task). + estimator (sagemaker.model.EstimatorBase): The SageMaker estimator to + export Airflow config from. It has to be an estimator associated + with a training job. + task_id (str): The task id of any + airflow.contrib.operators.SageMakerTrainingOperator or + airflow.contrib.operators.SageMakerTuningOperator that generates + training jobs in the DAG. The transform config is built based on the + training job generated in this operator. + task_type (str): Whether the task is from SageMakerTrainingOperator or + SageMakerTuningOperator. Values can be 'training', 'tuning' or None + (which means training job is not from any task). instance_count (int): Number of EC2 instances to use. - instance_type (str): Type of EC2 instance to use, for example, 'ml.c4.xlarge'. + instance_type (str): Type of EC2 instance to use, for example, + 'ml.c4.xlarge'. data (str): Input data location in S3. - data_type (str): What the S3 location defines (default: 'S3Prefix'). Valid values: + data_type (str): What the S3 location defines (default: 'S3Prefix'). + Valid values: * 'S3Prefix' - the S3 URI defines a key name prefix. All objects with this prefix will be used as - inputs for the transform job. - * 'ManifestFile' - the S3 URI points to a single manifest file listing each S3 object to use as - an input for the transform job. + inputs for the transform job. + * 'ManifestFile' - the S3 URI points to a single manifest file listing each S3 object to use as + an input for the transform job. content_type (str): MIME type of the input data (default: None). - compression_type (str): Compression type of the input data, if compressed (default: None). - Valid values: 'Gzip', None. - split_type (str): The record delimiter for the input object (default: 'None'). - Valid values: 'None', 'Line', 'RecordIO', and 'TFRecord'. - job_name (str): transform job name (default: None). If not specified, one will be generated. - model_name (str): model name (default: None). If not specified, one will be generated. - strategy (str): The strategy used to decide how to batch records in a single request (default: None). - Valid values: 'MULTI_RECORD' and 'SINGLE_RECORD'. - assemble_with (str): How the output is assembled (default: None). Valid values: 'Line' or 'None'. - output_path (str): S3 location for saving the transform result. If not specified, results are stored to - a default bucket. - output_kms_key (str): Optional. KMS key ID for encrypting the transform output (default: None). - accept (str): The content type accepted by the endpoint deployed during the transform job. - env (dict): Environment variables to be set for use during the transform job (default: None). - max_concurrent_transforms (int): The maximum number of HTTP requests to be made to - each individual transform container at one time. - max_payload (int): Maximum size of the payload in a single HTTP request to the container in MB. - tags (list[dict]): List of tags for labeling a transform job. If none specified, then the tags used for - the training job are used for the transform job. - role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also used during - transform jobs. If not specified, the role from the Estimator will be used. - volume_kms_key (str): Optional. KMS key ID for encrypting the volume attached to the ML - compute instance (default: None). - model_server_workers (int): Optional. The number of worker processes used by the inference server. - If None, server will use one worker per vCPU. + compression_type (str): Compression type of the input data, if + compressed (default: None). Valid values: 'Gzip', None. + split_type (str): The record delimiter for the input object (default: + 'None'). Valid values: 'None', 'Line', 'RecordIO', and 'TFRecord'. + job_name (str): transform job name (default: None). If not specified, + one will be generated. + model_name (str): model name (default: None). If not specified, one will + be generated. + strategy (str): The strategy used to decide how to batch records in a + single request (default: None). Valid values: 'MULTI_RECORD' and + 'SINGLE_RECORD'. + assemble_with (str): How the output is assembled (default: None). Valid + values: 'Line' or 'None'. + output_path (str): S3 location for saving the transform result. If not + specified, results are stored to a default bucket. + output_kms_key (str): Optional. KMS key ID for encrypting the transform + output (default: None). + accept (str): The content type accepted by the endpoint deployed during + the transform job. + env (dict): Environment variables to be set for use during the transform + job (default: None). + max_concurrent_transforms (int): The maximum number of HTTP requests to + be made to each individual transform container at one time. + max_payload (int): Maximum size of the payload in a single HTTP request + to the container in MB. + tags (list[dict]): List of tags for labeling a transform job. If none + specified, then the tags used for the training job are used for the + transform job. + role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, + which is also used during transform jobs. If not specified, the role + from the Estimator will be used. + volume_kms_key (str): Optional. KMS key ID for encrypting the volume + attached to the ML compute instance (default: None). + model_server_workers (int): Optional. The number of worker processes + used by the inference server. If None, server will use one worker + per vCPU. image (str): An container image to use for deploying the model - vpc_config_override (dict[str, list[str]]): Override for VpcConfig set on the model. - Default: use subnets and security groups from this Estimator. + vpc_config_override (dict[str, list[str]]): Override for VpcConfig set on + the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. Returns: - dict: Transform config that can be directly used by SageMakerTransformOperator in Airflow. + dict: Transform config that can be directly used by + SageMakerTransformOperator in Airflow. """ model_base_config = model_config_from_estimator( instance_type=instance_type, @@ -715,18 +773,20 @@ def deploy_config(model, initial_instance_count, instance_type, endpoint_name=No """Export Airflow deploy config from a SageMaker model Args: - model (sagemaker.model.Model): The SageMaker model to export the Airflow config from. - instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. - initial_instance_count (int): The initial number of instances to run in the - ``Endpoint`` created from this ``Model``. + model (sagemaker.model.Model): The SageMaker model to export the Airflow + config from. + initial_instance_count (int): The initial number of instances to run in + the ``Endpoint`` created from this ``Model``. + instance_type (str): The EC2 instance type to deploy this Model to. For + example, 'ml.p2.xlarge'. endpoint_name (str): The name of the endpoint to create (default: None). If not specified, a unique endpoint name will be created. - tags (list[dict]): List of tags for labeling a training job. For more, see - https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. + tags (list[dict]): List of tags for labeling a training job. For more, + see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. Returns: - dict: Deploy config that can be directly used by SageMakerEndpointOperator in Airflow. - + dict: Deploy config that can be directly used by + SageMakerEndpointOperator in Airflow. """ model_base_config = model_config(instance_type, model) @@ -769,27 +829,34 @@ def deploy_config_from_estimator( """Export Airflow deploy config from a SageMaker estimator Args: - estimator (sagemaker.model.EstimatorBase): The SageMaker estimator to export Airflow config from. - It has to be an estimator associated with a training job. - task_id (str): The task id of any airflow.contrib.operators.SageMakerTrainingOperator or - airflow.contrib.operators.SageMakerTuningOperator that generates training jobs in the DAG. The endpoint - config is built based on the training job generated in this operator. - task_type (str): Whether the task is from SageMakerTrainingOperator or SageMakerTuningOperator. Values can be - 'training', 'tuning' or None (which means training job is not from any task). - initial_instance_count (int): Minimum number of EC2 instances to deploy to an endpoint for prediction. - instance_type (str): Type of EC2 instance to deploy to an endpoint for prediction, - for example, 'ml.c4.xlarge'. - model_name (str): Name to use for creating an Amazon SageMaker model. If not specified, one will be generated. - endpoint_name (str): Name to use for creating an Amazon SageMaker endpoint. If not specified, the name of - the SageMaker model is used. - tags (list[dict]): List of tags for labeling a training job. For more, see - https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. - **kwargs: Passed to invocation of ``create_model()``. Implementations may customize - ``create_model()`` to accept ``**kwargs`` to customize model creation during deploy. - For more, see the implementation docs. + estimator (sagemaker.model.EstimatorBase): The SageMaker estimator to + export Airflow config from. It has to be an estimator associated + with a training job. + task_id (str): The task id of any + airflow.contrib.operators.SageMakerTrainingOperator or + airflow.contrib.operators.SageMakerTuningOperator that generates + training jobs in the DAG. The endpoint config is built based on the + training job generated in this operator. + task_type (str): Whether the task is from SageMakerTrainingOperator or + SageMakerTuningOperator. Values can be 'training', 'tuning' or None + (which means training job is not from any task). + initial_instance_count (int): Minimum number of EC2 instances to deploy + to an endpoint for prediction. + instance_type (str): Type of EC2 instance to deploy to an endpoint for + prediction, for example, 'ml.c4.xlarge'. + model_name (str): Name to use for creating an Amazon SageMaker model. If + not specified, one will be generated. + endpoint_name (str): Name to use for creating an Amazon SageMaker + endpoint. If not specified, the name of the SageMaker model is used. + tags (list[dict]): List of tags for labeling a training job. For more, + see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. + **kwargs: Passed to invocation of ``create_model()``. Implementations + may customize ``create_model()`` to accept ``**kwargs`` to customize + model creation during deploy. For more, see the implementation docs. Returns: - dict: Deploy config that can be directly used by SageMakerEndpointOperator in Airflow. + dict: Deploy config that can be directly used by + SageMakerEndpointOperator in Airflow. """ update_estimator_from_task(estimator, task_id, task_type) model = estimator.create_model(**kwargs) From 2dfe7d3660068da6b217b2f6475d223dfdb88286 Mon Sep 17 00:00:00 2001 From: Lauren Yu <6631887+laurenyu@users.noreply.github.com> Date: Fri, 19 Jul 2019 09:48:30 -0700 Subject: [PATCH 26/35] change: allow serving script to be defined for deploy() and transformer() with frameworks (#944) --- src/sagemaker/chainer/estimator.py | 25 +++++-- src/sagemaker/estimator.py | 13 +++- src/sagemaker/mxnet/estimator.py | 25 +++++-- src/sagemaker/pytorch/estimator.py | 25 +++++-- src/sagemaker/tensorflow/estimator.py | 70 ++++++++++++++++--- .../inference.py | 18 +---- .../training.py | 28 ++++++++ tests/integ/test_tf_script_mode.py | 8 ++- tests/unit/test_chainer.py | 9 ++- tests/unit/test_estimator.py | 12 ++-- tests/unit/test_mxnet.py | 9 ++- tests/unit/test_pytorch.py | 9 ++- tests/unit/test_tf_estimator.py | 18 ++++- 13 files changed, 204 insertions(+), 65 deletions(-) create mode 100644 tests/data/tfs/tfs-test-entrypoint-with-handler/training.py diff --git a/src/sagemaker/chainer/estimator.py b/src/sagemaker/chainer/estimator.py index d01c495174..6ae6fd6992 100644 --- a/src/sagemaker/chainer/estimator.py +++ b/src/sagemaker/chainer/estimator.py @@ -155,7 +155,13 @@ def hyperparameters(self): return hyperparameters def create_model( - self, model_server_workers=None, role=None, vpc_config_override=VPC_CONFIG_DEFAULT + self, + model_server_workers=None, + role=None, + vpc_config_override=VPC_CONFIG_DEFAULT, + entry_point=None, + source_dir=None, + dependencies=None, ): """Create a SageMaker ``ChainerModel`` object that can be deployed to an ``Endpoint``. @@ -171,17 +177,24 @@ def create_model( the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. + entry_point (str): Path (absolute or relative) to the local Python source file which should be executed + as the entry point to training. If not specified, the training entry point is used. + source_dir (str): Path (absolute or relative) to a directory with any other serving + source code dependencies aside from the entry point file. + If not specified, the model source directory from training is used. + dependencies (list[str]): A list of paths to directories (absolute or relative) with + any additional libraries that will be exported to the container. + If not specified, the dependencies from training are used. Returns: sagemaker.chainer.model.ChainerModel: A SageMaker ``ChainerModel`` object. See :func:`~sagemaker.chainer.model.ChainerModel` for full details. """ - role = role or self.role return ChainerModel( self.model_data, - role, - self.entry_point, - source_dir=self._model_source_dir(), + role or self.role, + entry_point or self.entry_point, + source_dir=(source_dir or self._model_source_dir()), enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, name=self._current_job_name, container_log_level=self.container_log_level, @@ -192,7 +205,7 @@ def create_model( image=self.image_name, sagemaker_session=self.sagemaker_session, vpc_config=self.get_vpc_config(vpc_config_override), - dependencies=self.dependencies, + dependencies=(dependencies or self.dependencies), ) @classmethod diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py index 83c72b27e7..bbe06a77e8 100644 --- a/src/sagemaker/estimator.py +++ b/src/sagemaker/estimator.py @@ -1,4 +1,4 @@ -# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of @@ -1524,6 +1524,7 @@ def transformer( role=None, model_server_workers=None, volume_kms_key=None, + entry_point=None, ): """Return a ``Transformer`` that uses a SageMaker Model based on the training job. It reuses the SageMaker Session and base job name used by @@ -1561,11 +1562,19 @@ def transformer( worker per vCPU. volume_kms_key (str): Optional. KMS key ID for encrypting the volume attached to the ML compute instance (default: None). + entry_point (str): Path (absolute or relative) to the local Python source file which should be executed + as the entry point to training. If not specified, the training entry point is used. + + Returns: + sagemaker.transformer.Transformer: a ``Transformer`` object that can be used to start a + SageMaker Batch Transform job. """ role = role or self.role if self.latest_training_job is not None: - model = self.create_model(role=role, model_server_workers=model_server_workers) + model = self.create_model( + role=role, model_server_workers=model_server_workers, entry_point=entry_point + ) container_def = model.prepare_container_def(instance_type) model_name = model.name or name_from_image(container_def["Image"]) diff --git a/src/sagemaker/mxnet/estimator.py b/src/sagemaker/mxnet/estimator.py index c3713aa354..e74a1cffa2 100644 --- a/src/sagemaker/mxnet/estimator.py +++ b/src/sagemaker/mxnet/estimator.py @@ -135,7 +135,13 @@ def _configure_distribution(self, distributions): self._hyperparameters[self.LAUNCH_PS_ENV_NAME] = enabled def create_model( - self, model_server_workers=None, role=None, vpc_config_override=VPC_CONFIG_DEFAULT + self, + model_server_workers=None, + role=None, + vpc_config_override=VPC_CONFIG_DEFAULT, + entry_point=None, + source_dir=None, + dependencies=None, ): """Create a SageMaker ``MXNetModel`` object that can be deployed to an ``Endpoint``. @@ -151,17 +157,24 @@ def create_model( the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. + entry_point (str): Path (absolute or relative) to the local Python source file which should be executed + as the entry point to training. If not specified, the training entry point is used. + source_dir (str): Path (absolute or relative) to a directory with any other serving + source code dependencies aside from the entry point file. + If not specified, the model source directory from training is used. + dependencies (list[str]): A list of paths to directories (absolute or relative) with + any additional libraries that will be exported to the container. + If not specified, the dependencies from training are used. Returns: sagemaker.mxnet.model.MXNetModel: A SageMaker ``MXNetModel`` object. See :func:`~sagemaker.mxnet.model.MXNetModel` for full details. """ - role = role or self.role return MXNetModel( self.model_data, - role, - self.entry_point, - source_dir=self._model_source_dir(), + role or self.role, + entry_point or self.entry_point, + source_dir=(source_dir or self._model_source_dir()), enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, name=self._current_job_name, container_log_level=self.container_log_level, @@ -172,7 +185,7 @@ def create_model( model_server_workers=model_server_workers, sagemaker_session=self.sagemaker_session, vpc_config=self.get_vpc_config(vpc_config_override), - dependencies=self.dependencies, + dependencies=(dependencies or self.dependencies), ) @classmethod diff --git a/src/sagemaker/pytorch/estimator.py b/src/sagemaker/pytorch/estimator.py index b23cf7a793..9c0ebf1263 100644 --- a/src/sagemaker/pytorch/estimator.py +++ b/src/sagemaker/pytorch/estimator.py @@ -108,7 +108,13 @@ def __init__( self.py_version = py_version def create_model( - self, model_server_workers=None, role=None, vpc_config_override=VPC_CONFIG_DEFAULT + self, + model_server_workers=None, + role=None, + vpc_config_override=VPC_CONFIG_DEFAULT, + entry_point=None, + source_dir=None, + dependencies=None, ): """Create a SageMaker ``PyTorchModel`` object that can be deployed to an ``Endpoint``. @@ -124,17 +130,24 @@ def create_model( the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. + entry_point (str): Path (absolute or relative) to the local Python source file which should be executed + as the entry point to training. If not specified, the training entry point is used. + source_dir (str): Path (absolute or relative) to a directory with any other serving + source code dependencies aside from the entry point file. + If not specified, the model source directory from training is used. + dependencies (list[str]): A list of paths to directories (absolute or relative) with + any additional libraries that will be exported to the container. + If not specified, the dependencies from training are used. Returns: sagemaker.pytorch.model.PyTorchModel: A SageMaker ``PyTorchModel`` object. See :func:`~sagemaker.pytorch.model.PyTorchModel` for full details. """ - role = role or self.role return PyTorchModel( self.model_data, - role, - self.entry_point, - source_dir=self._model_source_dir(), + role or self.role, + entry_point or self.entry_point, + source_dir=(source_dir or self._model_source_dir()), enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, name=self._current_job_name, container_log_level=self.container_log_level, @@ -145,7 +158,7 @@ def create_model( model_server_workers=model_server_workers, sagemaker_session=self.sagemaker_session, vpc_config=self.get_vpc_config(vpc_config_override), - dependencies=self.dependencies, + dependencies=(dependencies or self.dependencies), ) @classmethod diff --git a/src/sagemaker/tensorflow/estimator.py b/src/sagemaker/tensorflow/estimator.py index c25f6691fc..d07dc63961 100644 --- a/src/sagemaker/tensorflow/estimator.py +++ b/src/sagemaker/tensorflow/estimator.py @@ -470,8 +470,12 @@ def create_model( role=None, vpc_config_override=VPC_CONFIG_DEFAULT, endpoint_type=None, + entry_point=None, + source_dir=None, + dependencies=None, ): - """Create a SageMaker ``TensorFlowModel`` object that can be deployed to an ``Endpoint``. + """Create a ``Model`` object that can be used for creating SageMaker model entities, + deploying to a SageMaker endpoint, or starting SageMaker Batch Transform jobs. Args: role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also used during @@ -482,27 +486,55 @@ def create_model( Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. - endpoint_type: Optional. Selects the software stack used by the inference server. + endpoint_type (str): Optional. Selects the software stack used by the inference server. If not specified, the model will be configured to use the default SageMaker model server. If 'tensorflow-serving', the model will be configured to use the SageMaker Tensorflow Serving container. + entry_point (str): Path (absolute or relative) to the local Python source file which should be executed + as the entry point to training. If not specified and ``endpoint_type`` is 'tensorflow-serving', + no entry point is used. If ``endpoint_type`` is also ``None``, then the training entry point is used. + source_dir (str): Path (absolute or relative) to a directory with any other serving + source code dependencies aside from the entry point file. If not specified and + ``endpoint_type`` is 'tensorflow-serving', no source_dir is used. If ``endpoint_type`` is also ``None``, + then the model source directory from training is used. + dependencies (list[str]): A list of paths to directories (absolute or relative) with + any additional libraries that will be exported to the container. + If not specified and ``endpoint_type`` is 'tensorflow-serving', ``dependencies`` is set to ``None``. + If ``endpoint_type`` is also ``None``, then the dependencies from training are used. Returns: - sagemaker.tensorflow.model.TensorFlowModel: A SageMaker ``TensorFlowModel`` object. - See :func:`~sagemaker.tensorflow.model.TensorFlowModel` for full details. + sagemaker.tensorflow.model.TensorFlowModel or sagemaker.tensorflow.serving.Model: A ``Model`` object. + See :class:`~sagemaker.tensorflow.serving.Model` or :class:`~sagemaker.tensorflow.model.TensorFlowModel` + for full details. """ - role = role or self.role + if endpoint_type == "tensorflow-serving" or self._script_mode_enabled(): - return self._create_tfs_model(role=role, vpc_config_override=vpc_config_override) + return self._create_tfs_model( + role=role, + vpc_config_override=vpc_config_override, + entry_point=entry_point, + source_dir=source_dir, + dependencies=dependencies, + ) return self._create_default_model( model_server_workers=model_server_workers, role=role, vpc_config_override=vpc_config_override, + entry_point=entry_point, + source_dir=source_dir, + dependencies=dependencies, ) - def _create_tfs_model(self, role=None, vpc_config_override=VPC_CONFIG_DEFAULT): + def _create_tfs_model( + self, + role=None, + vpc_config_override=VPC_CONFIG_DEFAULT, + entry_point=None, + source_dir=None, + dependencies=None, + ): """Placeholder docstring""" return Model( model_data=self.model_data, @@ -513,15 +545,26 @@ def _create_tfs_model(self, role=None, vpc_config_override=VPC_CONFIG_DEFAULT): framework_version=utils.get_short_version(self.framework_version), sagemaker_session=self.sagemaker_session, vpc_config=self.get_vpc_config(vpc_config_override), + entry_point=entry_point, + source_dir=source_dir, + dependencies=dependencies, ) - def _create_default_model(self, model_server_workers, role, vpc_config_override): + def _create_default_model( + self, + model_server_workers, + role, + vpc_config_override, + entry_point=None, + source_dir=None, + dependencies=None, + ): """Placeholder docstring""" return TensorFlowModel( self.model_data, role, - self.entry_point, - source_dir=self._model_source_dir(), + entry_point or self.entry_point, + source_dir=source_dir or self._model_source_dir(), enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, env={"SAGEMAKER_REQUIREMENTS": self.requirements_file}, image=self.image_name, @@ -533,7 +576,7 @@ def _create_default_model(self, model_server_workers, role, vpc_config_override) model_server_workers=model_server_workers, sagemaker_session=self.sagemaker_session, vpc_config=self.get_vpc_config(vpc_config_override), - dependencies=self.dependencies, + dependencies=dependencies or self.dependencies, ) def hyperparameters(self): @@ -625,6 +668,7 @@ def transformer( model_server_workers=None, volume_kms_key=None, endpoint_type=None, + entry_point=None, ): """Return a ``Transformer`` that uses a SageMaker Model based on the training job. It reuses the SageMaker Session and base job name used by the Estimator. @@ -656,6 +700,9 @@ def transformer( SageMaker model server. If 'tensorflow-serving', the model will be configured to use the SageMaker Tensorflow Serving container. + entry_point (str): Path (absolute or relative) to the local Python source file which should be executed + as the entry point to training. If not specified and ``endpoint_type`` is 'tensorflow-serving', + no entry point is used. If ``endpoint_type`` is also ``None``, then the training entry point is used. """ role = role or self.role @@ -664,6 +711,7 @@ def transformer( role=role, vpc_config_override=VPC_CONFIG_DEFAULT, endpoint_type=endpoint_type, + entry_point=entry_point, ) return model.transformer( instance_count, diff --git a/tests/data/tfs/tfs-test-entrypoint-with-handler/inference.py b/tests/data/tfs/tfs-test-entrypoint-with-handler/inference.py index 495c0dde24..aa1a5cdd20 100644 --- a/tests/data/tfs/tfs-test-entrypoint-with-handler/inference.py +++ b/tests/data/tfs/tfs-test-entrypoint-with-handler/inference.py @@ -1,4 +1,4 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of @@ -10,19 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. - -"""Exports a toy TensorFlow model. -Exports a TensorFlow model to /opt/ml/model/ -This graph calculates, - y = a*x + b -where a and b are variables with a=0.5 and b=2. -""" import json -import shutil - - -def save_model(): - shutil.copytree("/opt/ml/code/123", "/opt/ml/model/123") def input_handler(data, context): @@ -36,7 +24,3 @@ def output_handler(data, context): response_content_type = context.accept_header prediction = data.content return prediction, response_content_type - - -if __name__ == "__main__": - save_model() diff --git a/tests/data/tfs/tfs-test-entrypoint-with-handler/training.py b/tests/data/tfs/tfs-test-entrypoint-with-handler/training.py new file mode 100644 index 0000000000..3a6c1b8ee1 --- /dev/null +++ b/tests/data/tfs/tfs-test-entrypoint-with-handler/training.py @@ -0,0 +1,28 @@ +# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. + +"""Exports a toy TensorFlow model. +Exports a TensorFlow model to /opt/ml/model/ +This graph calculates, + y = a*x + b +where a and b are variables with a=0.5 and b=2. +""" +import shutil + + +def save_model(): + shutil.copytree("/opt/ml/code/123", "/opt/ml/model/123") + + +if __name__ == "__main__": + save_model() diff --git a/tests/integ/test_tf_script_mode.py b/tests/integ/test_tf_script_mode.py index 685dbae36b..5892c2736a 100644 --- a/tests/integ/test_tf_script_mode.py +++ b/tests/integ/test_tf_script_mode.py @@ -185,7 +185,7 @@ def test_mnist_async(sagemaker_session): def test_deploy_with_input_handlers(sagemaker_session, instance_type): estimator = TensorFlow( - entry_point="inference.py", + entry_point="training.py", source_dir=TFS_RESOURCE_PATH, role=ROLE, train_instance_count=1, @@ -202,9 +202,11 @@ def test_deploy_with_input_handlers(sagemaker_session, instance_type): endpoint_name = estimator.latest_training_job.name with timeout.timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): - predictor = estimator.deploy( - initial_instance_count=1, instance_type=instance_type, endpoint_name=endpoint_name + initial_instance_count=1, + instance_type=instance_type, + endpoint_name=endpoint_name, + entry_point=os.path.join(TFS_RESOURCE_PATH, "inference.py"), ) input_data = {"instances": [1.0, 2.0, 5.0]} diff --git a/tests/unit/test_chainer.py b/tests/unit/test_chainer.py index 8357f4c6b4..bc0c141476 100644 --- a/tests/unit/test_chainer.py +++ b/tests/unit/test_chainer.py @@ -1,4 +1,4 @@ -# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of @@ -29,6 +29,7 @@ DATA_DIR = os.path.join(os.path.dirname(__file__), "..", "data") SCRIPT_PATH = os.path.join(DATA_DIR, "dummy_script.py") +SERVING_SCRIPT_FILE = "another_dummy_script.py" MODEL_DATA = "s3://some/data.tar.gz" TIMESTAMP = "2017-11-06-14:14:15.672" TIME = 1507167947 @@ -314,12 +315,16 @@ def test_create_model_with_optional_params(sagemaker_session): model_server_workers = 2 vpc_config = {"Subnets": ["foo"], "SecurityGroupIds": ["bar"]} model = chainer.create_model( - role=new_role, model_server_workers=model_server_workers, vpc_config_override=vpc_config + role=new_role, + model_server_workers=model_server_workers, + vpc_config_override=vpc_config, + entry_point=SERVING_SCRIPT_FILE, ) assert model.role == new_role assert model.model_server_workers == model_server_workers assert model.vpc_config == vpc_config + assert model.entry_point == SERVING_SCRIPT_FILE def test_create_model_with_custom_image(sagemaker_session): diff --git a/tests/unit/test_estimator.py b/tests/unit/test_estimator.py index d8f95240b2..766729c341 100644 --- a/tests/unit/test_estimator.py +++ b/tests/unit/test_estimator.py @@ -1,4 +1,4 @@ -# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of @@ -112,8 +112,10 @@ class DummyFramework(Framework): def train_image(self): return IMAGE_NAME - def create_model(self, role=None, model_server_workers=None): - return DummyFrameworkModel(self.sagemaker_session, vpc_config=self.get_vpc_config()) + def create_model(self, role=None, model_server_workers=None, entry_point=None): + return DummyFrameworkModel( + self.sagemaker_session, vpc_config=self.get_vpc_config(), entry_point=entry_point + ) @classmethod def _prepare_init_params_from_job_description(cls, job_details, model_channel_name=None): @@ -125,13 +127,13 @@ def _prepare_init_params_from_job_description(cls, job_details, model_channel_na class DummyFrameworkModel(FrameworkModel): - def __init__(self, sagemaker_session, **kwargs): + def __init__(self, sagemaker_session, entry_point=None, **kwargs): super(DummyFrameworkModel, self).__init__( MODEL_DATA, MODEL_IMAGE, INSTANCE_TYPE, ROLE, - ENTRY_POINT, + entry_point or ENTRY_POINT, sagemaker_session=sagemaker_session, **kwargs ) diff --git a/tests/unit/test_mxnet.py b/tests/unit/test_mxnet.py index 3a3a82b494..bfbb4387d7 100644 --- a/tests/unit/test_mxnet.py +++ b/tests/unit/test_mxnet.py @@ -1,4 +1,4 @@ -# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of @@ -28,6 +28,7 @@ DATA_DIR = os.path.join(os.path.dirname(__file__), "..", "data") SCRIPT_PATH = os.path.join(DATA_DIR, "dummy_script.py") +SERVING_SCRIPT_FILE = "another_dummy_script.py" MODEL_DATA = "s3://mybucket/model" TIMESTAMP = "2017-11-06-14:14:15.672" TIME = 1507167947 @@ -213,12 +214,16 @@ def test_create_model_with_optional_params(sagemaker_session): model_server_workers = 2 vpc_config = {"Subnets": ["foo"], "SecurityGroupIds": ["bar"]} model = mx.create_model( - role=new_role, model_server_workers=model_server_workers, vpc_config_override=vpc_config + role=new_role, + model_server_workers=model_server_workers, + vpc_config_override=vpc_config, + entry_point=SERVING_SCRIPT_FILE, ) assert model.role == new_role assert model.model_server_workers == model_server_workers assert model.vpc_config == vpc_config + assert model.entry_point == SERVING_SCRIPT_FILE def test_create_model_with_custom_image(sagemaker_session): diff --git a/tests/unit/test_pytorch.py b/tests/unit/test_pytorch.py index 7eeadaea27..14079b78e4 100644 --- a/tests/unit/test_pytorch.py +++ b/tests/unit/test_pytorch.py @@ -1,4 +1,4 @@ -# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of @@ -27,6 +27,7 @@ DATA_DIR = os.path.join(os.path.dirname(__file__), "..", "data") SCRIPT_PATH = os.path.join(DATA_DIR, "dummy_script.py") +SERVING_SCRIPT_FILE = "another_dummy_script.py" MODEL_DATA = "s3://some/data.tar.gz" TIMESTAMP = "2017-11-06-14:14:15.672" TIME = 1507167947 @@ -195,12 +196,16 @@ def test_create_model_with_optional_params(sagemaker_session): model_server_workers = 2 vpc_config = {"Subnets": ["foo"], "SecurityGroupIds": ["bar"]} model = pytorch.create_model( - role=new_role, model_server_workers=model_server_workers, vpc_config_override=vpc_config + role=new_role, + model_server_workers=model_server_workers, + vpc_config_override=vpc_config, + entry_point=SERVING_SCRIPT_FILE, ) assert model.role == new_role assert model.model_server_workers == model_server_workers assert model.vpc_config == vpc_config + assert model.entry_point == SERVING_SCRIPT_FILE def test_create_model_with_custom_image(sagemaker_session): diff --git a/tests/unit/test_tf_estimator.py b/tests/unit/test_tf_estimator.py index 93040ba6e3..bfaf1a29e7 100644 --- a/tests/unit/test_tf_estimator.py +++ b/tests/unit/test_tf_estimator.py @@ -1,4 +1,4 @@ -# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of @@ -29,6 +29,7 @@ DATA_DIR = os.path.join(os.path.dirname(__file__), "..", "data") SCRIPT_FILE = "dummy_script.py" SCRIPT_PATH = os.path.join(DATA_DIR, SCRIPT_FILE) +SERVING_SCRIPT_FILE = "another_dummy_script.py" MODEL_DATA = "s3://some/data.tar.gz" REQUIREMENTS_FILE = "dummy_requirements.txt" TIMESTAMP = "2017-11-06-14:14:15.673" @@ -298,12 +299,16 @@ def test_create_model_with_optional_params(sagemaker_session): model_server_workers = 2 vpc_config = {"Subnets": ["foo"], "SecurityGroupIds": ["bar"]} model = tf.create_model( - role=new_role, model_server_workers=model_server_workers, vpc_config_override=vpc_config + role=new_role, + model_server_workers=model_server_workers, + vpc_config_override=vpc_config, + entry_point=SERVING_SCRIPT_FILE, ) assert model.role == new_role assert model.model_server_workers == model_server_workers assert model.vpc_config == vpc_config + assert model.entry_point == SERVING_SCRIPT_FILE @patch("sagemaker.tensorflow.estimator.TensorFlow.create_model") @@ -319,13 +324,19 @@ def test_transformer_creation_with_endpoint_type(create_model, sagemaker_session train_instance_type=INSTANCE_TYPE, ) - tf.transformer(INSTANCE_COUNT, INSTANCE_TYPE, endpoint_type="tensorflow-serving") + tf.transformer( + INSTANCE_COUNT, + INSTANCE_TYPE, + endpoint_type="tensorflow-serving", + entry_point=SERVING_SCRIPT_FILE, + ) create_model.assert_called_with( endpoint_type="tensorflow-serving", model_server_workers=None, role=ROLE, vpc_config_override="VPC_CONFIG_DEFAULT", + entry_point=SERVING_SCRIPT_FILE, ) model.transformer.assert_called_with( INSTANCE_COUNT, @@ -362,6 +373,7 @@ def test_transformer_creation_without_endpoint_type(create_model, sagemaker_sess model_server_workers=None, role=ROLE, vpc_config_override="VPC_CONFIG_DEFAULT", + entry_point=None, ) model.transformer.assert_called_with( INSTANCE_COUNT, From 910d9f86ca32ee8d987470b46e7688f4e90cb724 Mon Sep 17 00:00:00 2001 From: Chuyang Date: Fri, 19 Jul 2019 10:55:51 -0700 Subject: [PATCH 27/35] change: update PyTorch version (#947) --- README.rst | 2 +- src/sagemaker/pytorch/README.rst | 72 +++++++++++++++--------------- src/sagemaker/pytorch/defaults.py | 2 +- src/sagemaker/pytorch/estimator.py | 2 +- tests/data/pytorch_mnist/mnist.py | 2 + tests/integ/test_git.py | 3 +- 6 files changed, 43 insertions(+), 40 deletions(-) diff --git a/README.rst b/README.rst index bacf953a20..eff60c8941 100644 --- a/README.rst +++ b/README.rst @@ -221,7 +221,7 @@ PyTorch SageMaker Estimators With PyTorch SageMaker Estimators, you can train and host PyTorch models on Amazon SageMaker. -Supported versions of PyTorch: ``0.4.0``, ``1.0.0``. +Supported versions of PyTorch: ``0.4.0``, ``1.0.0``, ``1.1.0``. We recommend that you use the latest supported version, because that's where we focus most of our development efforts. diff --git a/src/sagemaker/pytorch/README.rst b/src/sagemaker/pytorch/README.rst index b02775f3ab..cb8dc780f3 100644 --- a/src/sagemaker/pytorch/README.rst +++ b/src/sagemaker/pytorch/README.rst @@ -4,7 +4,7 @@ SageMaker PyTorch Estimators and Models With PyTorch Estimators and Models, you can train and host PyTorch models on Amazon SageMaker. -Supported versions of PyTorch: ``0.4.0``, ``1.0.0``. +Supported versions of PyTorch: ``0.4.0``, ``1.0.0``, ``1.1.0``. We recommend that you use the latest supported version, because that's where we focus most of our development efforts. @@ -37,41 +37,41 @@ This Python version applies to both the Training Job, created by fit, and the En The PyTorch Docker images have the following dependencies installed: -+-----------------------------+---------------+-------------------+ -| Dependencies | pytorch 0.4.0 | pytorch 1.0.0 | -+-----------------------------+---------------+-------------------+ -| boto3 | >=1.7.35 | >=1.9.11 | -+-----------------------------+---------------+-------------------+ -| botocore | >=1.10.35 | >=1.12.11 | -+-----------------------------+---------------+-------------------+ -| CUDA (GPU image only) | 9.0 | 9.0 | -+-----------------------------+---------------+-------------------+ -| numpy | >=1.14.3 | >=1.15.2 | -+-----------------------------+---------------+-------------------+ -| Pillow | >=5.1.0 | >=5.2.0 | -+-----------------------------+---------------+-------------------+ -| pip | >=10.0.1 | >=18.0 | -+-----------------------------+---------------+-------------------+ -| python-dateutil | >=2.7.3 | >=2.7.3 | -+-----------------------------+---------------+-------------------+ -| retrying | >=1.3.3 | >=1.3.3 | -+-----------------------------+---------------+-------------------+ -| s3transfer | >=0.1.13 | >=0.1.13 | -+-----------------------------+---------------+-------------------+ -| sagemaker-containers | >=2.1.0 | >=2.1.0 | -+-----------------------------+---------------+-------------------+ -| sagemaker-pytorch-container | 1.0 | 1.0 | -+-----------------------------+---------------+-------------------+ -| setuptools | >=39.2.0 | >=40.4.3 | -+-----------------------------+---------------+-------------------+ -| six | >=1.11.0 | >=1.11.0 | -+-----------------------------+---------------+-------------------+ -| torch | 0.4.0 | 1.0.0 | -+-----------------------------+---------------+-------------------+ -| torchvision | 0.2.1 | 0.2.1 | -+-----------------------------+---------------+-------------------+ -| Python | 2.7 or 3.5 | 2.7 or 3.6 | -+-----------------------------+---------------+-------------------+ ++-----------------------------+---------------+-------------------+-------------------+ +| Dependencies | pytorch 0.4.0 | pytorch 1.0.0 | pytorch 1.1.0 | ++-----------------------------+---------------+-------------------+-------------------+ +| boto3 | >=1.7.35 | >=1.9.11 | 1.9.82 | ++-----------------------------+---------------+-------------------+-------------------+ +| botocore | >=1.10.35 | >=1.12.11 | >= 1.12.11 | ++-----------------------------+---------------+-------------------+-------------------+ +| CUDA (GPU image only) | 9.0 | 9.0 | 10.1 | ++-----------------------------+---------------+-------------------+-------------------+ +| numpy | >=1.14.3 | >=1.15.2 | 1.16.4 | ++-----------------------------+---------------+-------------------+-------------------+ +| Pillow | >=5.1.0 | >=5.2.0 | 6.0.0 | ++-----------------------------+---------------+-------------------+-------------------+ +| pip | >=10.0.1 | >=18.0 | >=18.0 | ++-----------------------------+---------------+-------------------+-------------------+ +| python-dateutil | >=2.7.3 | >=2.7.3 | >=2.7.3 | ++-----------------------------+---------------+-------------------+-------------------+ +| retrying | >=1.3.3 | >=1.3.3 | 1.3.3 | ++-----------------------------+---------------+-------------------+-------------------+ +| s3transfer | >=0.1.13 | >=0.1.13 | >=0.1.13 | ++-----------------------------+---------------+-------------------+-------------------+ +| sagemaker-containers | >=2.1.0 | >=2.1.0 | 2.4.10.post0 | ++-----------------------------+---------------+-------------------+-------------------+ +| sagemaker-pytorch-container | 1.0 | 1.1 | 1.2 | ++-----------------------------+---------------+-------------------+-------------------+ +| setuptools | >=39.2.0 | >=40.4.3 | >=40.4.3 | ++-----------------------------+---------------+-------------------+-------------------+ +| six | >=1.11.0 | >=1.11.0 | 1.12.0 | ++-----------------------------+---------------+-------------------+-------------------+ +| torch | 0.4.0 | 1.0.0 | 1.1.0 | ++-----------------------------+---------------+-------------------+-------------------+ +| torchvision | 0.2.1 | 0.2.1 | 0.3.0 | ++-----------------------------+---------------+-------------------+-------------------+ +| Python | 2.7 or 3.5 | 2.7 or 3.6 | 2.7 or 3.6 | ++-----------------------------+---------------+-------------------+-------------------+ The Docker images extend Ubuntu 16.04. diff --git a/src/sagemaker/pytorch/defaults.py b/src/sagemaker/pytorch/defaults.py index f851a473cb..372424b6bd 100644 --- a/src/sagemaker/pytorch/defaults.py +++ b/src/sagemaker/pytorch/defaults.py @@ -15,7 +15,7 @@ PYTORCH_VERSION = "0.4" """Default PyTorch version for when the framework version is not specified. -This is no longer updated so as to not break existing workflows. +The latest PyTorch version is 1.1.0, but the default version is no longer updated so as to not break existing workflows. """ PYTHON_VERSION = "py3" diff --git a/src/sagemaker/pytorch/estimator.py b/src/sagemaker/pytorch/estimator.py index 9c0ebf1263..74c45b8c6e 100644 --- a/src/sagemaker/pytorch/estimator.py +++ b/src/sagemaker/pytorch/estimator.py @@ -34,7 +34,7 @@ class PyTorch(Framework): __framework_name__ = "pytorch" - LATEST_VERSION = "1.0" + LATEST_VERSION = "1.1" """The latest version of PyTorch included in the SageMaker pre-built Docker images.""" def __init__( diff --git a/tests/data/pytorch_mnist/mnist.py b/tests/data/pytorch_mnist/mnist.py index 6eb2d43228..a7d542064e 100644 --- a/tests/data/pytorch_mnist/mnist.py +++ b/tests/data/pytorch_mnist/mnist.py @@ -47,6 +47,7 @@ def _get_train_data_loader(training_dir, is_distributed, batch_size, **kwargs): transform=transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] ), + download=True, ) train_sampler = ( torch.utils.data.distributed.DistributedSampler(dataset) if is_distributed else None @@ -70,6 +71,7 @@ def _get_test_data_loader(training_dir, **kwargs): transform=transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] ), + download=True, ), batch_size=1000, shuffle=True, diff --git a/tests/integ/test_git.py b/tests/integ/test_git.py index cd0edd6ad0..be3c85e3f2 100644 --- a/tests/integ/test_git.py +++ b/tests/integ/test_git.py @@ -21,6 +21,7 @@ from tests.integ import lock as lock from sagemaker.mxnet.estimator import MXNet +from sagemaker.pytorch.defaults import PYTORCH_VERSION from sagemaker.pytorch.estimator import PyTorch from sagemaker.sklearn.estimator import SKLearn from sagemaker.mxnet.model import MXNetModel @@ -60,7 +61,7 @@ def test_git_support_with_pytorch(sagemaker_local_session): entry_point=script_path, role="SageMakerRole", source_dir="pytorch", - framework_version=PyTorch.LATEST_VERSION, + framework_version=PYTORCH_VERSION, py_version=PYTHON_VERSION, train_instance_count=1, train_instance_type="local", From 946adeb3205efb44d7ad99fa7d6bfb71616e69d1 Mon Sep 17 00:00:00 2001 From: knaresh Date: Fri, 19 Jul 2019 14:20:04 -0700 Subject: [PATCH 28/35] change: improve documentation of some functions (#864) [pr-827][followups] Improve documentation of some functions. Also some unit test fixes. See comments from marcio in https://github.com/aws/sagemaker-python-sdk/pull/827 --- src/sagemaker/session.py | 4 ++-- src/sagemaker/transformer.py | 8 +++++--- tests/integ/test_transformer.py | 4 +++- tests/unit/test_session.py | 5 ++++- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index 5b3b51e335..470297cbbd 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -581,9 +581,9 @@ def transform( input_config (dict): A dictionary describing the input data (and its location) for the job. output_config (dict): A dictionary describing the output location for the job. resource_config (dict): A dictionary describing the resources to complete the job. - tags (list[dict]): List of tags for labeling a transform job. + tags (list[dict]): List of tags for labeling a transform job. For more information, + see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. data_processing(dict): A dictionary describing config for combining the input data and transformed data. - For more, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. """ transform_request = { "TransformJobName": job_name, diff --git a/src/sagemaker/transformer.py b/src/sagemaker/transformer.py index 593caae67d..a0f570ecc3 100644 --- a/src/sagemaker/transformer.py +++ b/src/sagemaker/transformer.py @@ -142,10 +142,12 @@ def transform( input_filter (str): A JSONPath to select a portion of the input to pass to the algorithm container for inference. If you omit the field, it gets the value '$', representing the entire input. - Some examples: "$[1:]", "$.features"(default: None). + For more information, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_CreateTransformJob.html. + Some examples: "$[1:]", "$.features" (default: None). output_filter (str): A JSONPath to select a portion of the - joined/original output to return as the output. Some examples: - "$[1:]", "$.prediction" (default: None). + joined/original output to return as the output. + For more information, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_CreateTransformJob.html. + Some examples: "$[1:]", "$.prediction" (default: None). join_source (str): The source of data to be joined to the transform output. It can be set to 'Input' meaning the entire input record will be joined to the inference result. You can use OutputFilter diff --git a/tests/integ/test_transformer.py b/tests/integ/test_transformer.py index c519cb6786..ad3fd65c2d 100644 --- a/tests/integ/test_transformer.py +++ b/tests/integ/test_transformer.py @@ -67,12 +67,13 @@ def test_transform_mxnet(sagemaker_session, mxnet_full_version): kms_key_arn = get_or_create_kms_key(sagemaker_session) output_filter = "$" + input_filter = "$" transformer = _create_transformer_and_transform_job( mx, transform_input, kms_key_arn, - input_filter=None, + input_filter=input_filter, output_filter=output_filter, join_source=None, ) @@ -86,6 +87,7 @@ def test_transform_mxnet(sagemaker_session, mxnet_full_version): ) assert kms_key_arn == job_desc["TransformResources"]["VolumeKmsKeyId"] assert output_filter == job_desc["DataProcessing"]["OutputFilter"] + assert input_filter == job_desc["DataProcessing"]["InputFilter"] @pytest.mark.canary_quick diff --git a/tests/unit/test_session.py b/tests/unit/test_session.py index e401622353..714da580c8 100644 --- a/tests/unit/test_session.py +++ b/tests/unit/test_session.py @@ -676,12 +676,15 @@ def test_transform_pack_to_request(sagemaker_session): resource_config = {"InstanceCount": INSTANCE_COUNT, "InstanceType": INSTANCE_TYPE} + data_processing = {"OutputFilter": "$", "InputFilter": "$", "JoinSource": "Input"} + expected_args = { "TransformJobName": JOB_NAME, "ModelName": model_name, "TransformInput": in_config, "TransformOutput": out_config, "TransformResources": resource_config, + "DataProcessing": data_processing, } sagemaker_session.transform( @@ -695,7 +698,7 @@ def test_transform_pack_to_request(sagemaker_session): output_config=out_config, resource_config=resource_config, tags=None, - data_processing=None, + data_processing=data_processing, ) _, _, actual_args = sagemaker_session.sagemaker_client.method_calls[0] From af4b3e3f271d886f1125fab6f6c78c8b4d71c7b3 Mon Sep 17 00:00:00 2001 From: Eric Slesar <34587362+eslesar-aws@users.noreply.github.com> Date: Fri, 19 Jul 2019 16:34:42 -0700 Subject: [PATCH 29/35] doc: update using_tensorflow topic (#946) --- doc/conf.py | 4 + doc/overview.rst | 53 -- doc/using_chainer.rst | 1396 ++++++++++++++++++------------------- doc/using_mxnet.rst | 8 +- doc/using_pytorch.rst | 1460 +++++++++++++++++++-------------------- doc/using_rl.rst | 642 ++++++++--------- doc/using_sklearn.rst | 1276 +++++++++++++++++----------------- doc/using_tf.rst | 1486 ++++++++++++++++++++++++++-------------- doc/using_workflow.rst | 330 ++++----- 9 files changed, 3545 insertions(+), 3110 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index df38101665..c0ff0e0c51 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -60,6 +60,7 @@ def __getattr__(cls, name): "sphinx.ext.coverage", "sphinx.ext.autosummary", "sphinx.ext.napoleon", + "sphinx.ext.autosectionlabel", ] # Add any paths that contain templates here, relative to this directory. @@ -95,3 +96,6 @@ def __getattr__(cls, name): # autosummary autosummary_generate = True + +# autosectionlabel +autosectionlabel_prefix_document = True diff --git a/doc/overview.rst b/doc/overview.rst index 69992d9f1f..cf525cd4a4 100644 --- a/doc/overview.rst +++ b/doc/overview.rst @@ -826,59 +826,6 @@ A new training job channel, named ``code``, will be added with that S3 URI. Bef Once the training job begins, the training container will look at the offline input ``code`` channel to install dependencies and run the entry script. This isolates the training container, so no inbound or outbound network calls can be made. -********* -BYO Model -********* - -You can also create an endpoint from an existing model rather than training one. -That is, you can bring your own model: - -First, package the files for the trained model into a ``.tar.gz`` file, and upload the archive to S3. - -Next, create a ``Model`` object that corresponds to the framework that you are using: `MXNetModel `__ or `TensorFlowModel `__. - -Example code using ``MXNetModel``: - -.. code:: python - - from sagemaker.mxnet.model import MXNetModel - - sagemaker_model = MXNetModel(model_data='s3://path/to/model.tar.gz', - role='arn:aws:iam::accid:sagemaker-role', - entry_point='entry_point.py') - -After that, invoke the ``deploy()`` method on the ``Model``: - -.. code:: python - - predictor = sagemaker_model.deploy(initial_instance_count=1, - instance_type='ml.m4.xlarge') - -This returns a predictor the same way an ``Estimator`` does when ``deploy()`` is called. You can now get inferences just like with any other model deployed on Amazon SageMaker. - -Git support is also available when you bring your own model, through which you can use inference scripts stored in your -Git repositories. The process is similar to using Git support for training jobs. You can simply provide ``git_config`` -when create the ``Model`` object, and let ``entry_point``, ``source_dir`` and ``dependencies`` (if needed) be relative -paths inside the Git repository: - -.. code:: python - - git_config = {'repo': 'https://github.com/username/repo-with-training-scripts.git', - 'branch': 'branch1', - 'commit': '4893e528afa4a790331e1b5286954f073b0f14a2'} - - sagemaker_model = MXNetModel(model_data='s3://path/to/model.tar.gz', - role='arn:aws:iam::accid:sagemaker-role', - entry_point='inference.py', - source_dir='mxnet', - git_config=git_config) - -A full example is available in the `Amazon SageMaker examples repository `__. - -You can also find this notebook in the **Advanced Functionality** section of the **SageMaker Examples** section in a notebook instance. -For information about using sample notebooks in a SageMaker notebook instance, see `Use Example Notebooks `__ -in the AWS documentation. - ******************* Inference Pipelines ******************* diff --git a/doc/using_chainer.rst b/doc/using_chainer.rst index ddee676e73..067e83c1dd 100644 --- a/doc/using_chainer.rst +++ b/doc/using_chainer.rst @@ -1,699 +1,699 @@ -=========================================== -Using Chainer with the SageMaker Python SDK -=========================================== - -.. contents:: - -With Chainer Estimators, you can train and host Chainer models on Amazon SageMaker. - -Supported versions of Chainer: ``4.0.0``, ``4.1.0``, ``5.0.0`` - -You can visit the Chainer repository at https://github.com/chainer/chainer. - - -Training with Chainer -~~~~~~~~~~~~~~~~~~~~~ - -Training Chainer models using ``Chainer`` Estimators is a two-step process: - -1. Prepare a Chainer script to run on SageMaker -2. Run this script on SageMaker via a ``Chainer`` Estimator. - - -First, you prepare your training script, then second, you run this on SageMaker via a ``Chainer`` Estimator. -You should prepare your script in a separate source file than the notebook, terminal session, or source file you're -using to submit the script to SageMaker via a ``Chainer`` Estimator. - -Suppose that you already have an Chainer training script called -``chainer-train.py``. You can run this script in SageMaker as follows: - -.. code:: python - - from sagemaker.chainer import Chainer - chainer_estimator = Chainer(entry_point='chainer-train.py', - role='SageMakerRole', - train_instance_type='ml.p3.2xlarge', - train_instance_count=1, - framework_version='5.0.0') - chainer_estimator.fit('s3://bucket/path/to/training/data') - -Where the S3 URL is a path to your training data, within Amazon S3. The constructor keyword arguments define how -SageMaker runs your training script and are discussed in detail in a later section. - -In the following sections, we'll discuss how to prepare a training script for execution on SageMaker, -then how to run that script on SageMaker using a ``Chainer`` Estimator. - -Preparing the Chainer training script -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Your Chainer training script must be a Python 2.7 or 3.5 compatible source file. - -The training script is very similar to a training script you might run outside of SageMaker, but you -can access useful properties about the training environment through various environment variables, such as - -* ``SM_MODEL_DIR``: A string representing the path to the directory to write model artifacts to. - These artifacts are uploaded to S3 for model hosting. -* ``SM_NUM_GPUS``: An integer representing the number of GPUs available to the host. -* ``SM_OUTPUT_DATA_DIR``: A string representing the filesystem path to write output artifacts to. Output artifacts may - include checkpoints, graphs, and other files to save, not including model artifacts. These artifacts are compressed - and uploaded to S3 to the same S3 prefix as the model artifacts. - -Supposing two input channels, 'train' and 'test', were used in the call to the Chainer estimator's ``fit()`` method, -the following will be set, following the format "SM_CHANNEL_[channel_name]": - -* ``SM_CHANNEL_TRAIN``: A string representing the path to the directory containing data in the 'train' channel -* ``SM_CHANNEL_TEST``: Same as above, but for the 'test' channel. - -A typical training script loads data from the input channels, configures training with hyperparameters, trains a model, -and saves a model to model_dir so that it can be hosted later. Hyperparameters are passed to your script as arguments -and can be retrieved with an argparse.ArgumentParser instance. For example, a training script might start -with the following: - -.. code:: python - - import argparse - import os - - if __name__ =='__main__': - - parser = argparse.ArgumentParser() - - # hyperparameters sent by the client are passed as command-line arguments to the script. - parser.add_argument('--epochs', type=int, default=50) - parser.add_argument('--batch-size', type=int, default=64) - parser.add_argument('--learning-rate', type=float, default=0.05) - - # Data, model, and output directories - parser.add_argument('--output-data-dir', type=str, default=os.environ['SM_OUTPUT_DATA_DIR']) - parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR']) - parser.add_argument('--train', type=str, default=os.environ['SM_CHANNEL_TRAIN']) - parser.add_argument('--test', type=str, default=os.environ['SM_CHANNEL_TEST']) - - args, _ = parser.parse_known_args() - - # ... load from args.train and args.test, train a model, write model to args.model_dir. - -Because the SageMaker imports your training script, you should put your training code in a main guard -(``if __name__=='__main__':``) if you are using the same script to host your model, so that SageMaker does not -inadvertently run your training code at the wrong point in execution. - -For more on training environment variables, please visit https://github.com/aws/sagemaker-containers. - -Using third-party libraries -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -When running your training script on SageMaker, it will have access to some pre-installed third-party libraries including ``chainer``, ``numpy``, and ``cupy``. -For more information on the runtime environment, including specific package versions, see `SageMaker Chainer Docker containers <#sagemaker-chainer-docker-containers>`__. - -If there are other packages you want to use with your script, you can include a ``requirements.txt`` file in the same directory as your training script to install other dependencies at runtime. -A ``requirements.txt`` file is a text file that contains a list of items that are installed by using ``pip install``. You can also specify the version of an item to install. -For information about the format of a ``requirements.txt`` file, see `Requirements Files `__ in the pip documentation. - -Running a Chainer training script in SageMaker -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You run Chainer training scripts on SageMaker by creating ``Chainer`` Estimators. -SageMaker training of your script is invoked when you call ``fit`` on a ``Chainer`` Estimator. -The following code sample shows how you train a custom Chainer script "chainer-train.py", passing -in three hyperparameters ('epochs', 'batch-size', and 'learning-rate'), and using two input channel -directories ('train' and 'test'). - -.. code:: python - - chainer_estimator = Chainer('chainer-train.py', - train_instance_type='ml.p3.2xlarge', - train_instance_count=1, - framework_version='5.0.0', - hyperparameters = {'epochs': 20, 'batch-size': 64, 'learning-rate': 0.1}) - chainer_estimator.fit({'train': 's3://my-data-bucket/path/to/my/training/data', - 'test': 's3://my-data-bucket/path/to/my/test/data'}) - - -Chainer Estimators -^^^^^^^^^^^^^^^^^^ - -The `Chainer` constructor takes both required and optional arguments. - -Required arguments -'''''''''''''''''' - -The following are required arguments to the ``Chainer`` constructor. When you create a Chainer object, you must include -these in the constructor, either positionally or as keyword arguments. - -- ``entry_point`` Path (absolute or relative) to the Python file which - should be executed as the entry point to training. -- ``role`` An AWS IAM role (either name or full ARN). The Amazon - SageMaker training jobs and APIs that create Amazon SageMaker - endpoints use this role to access training data and model artifacts. - After the endpoint is created, the inference code might use the IAM - role, if accessing AWS resource. -- ``train_instance_count`` Number of Amazon EC2 instances to use for - training. -- ``train_instance_type`` Type of EC2 instance to use for training, for - example, 'ml.m4.xlarge'. - -Optional arguments -'''''''''''''''''' - -The following are optional arguments. When you create a ``Chainer`` object, you can specify these as keyword arguments. - -- ``source_dir`` Path (absolute or relative) to a directory with any - other training source code dependencies including the entry point - file. Structure within this directory will be preserved when training - on SageMaker. -- ``dependencies (list[str])`` A list of paths to directories (absolute or relative) with - any additional libraries that will be exported to the container (default: []). - The library folders will be copied to SageMaker in the same folder where the entrypoint is copied. - If the ```source_dir``` points to S3, code will be uploaded and the S3 location will be used - instead. Example: - - The following call - >>> Chainer(entry_point='train.py', dependencies=['my/libs/common', 'virtual-env']) - results in the following inside the container: - - >>> $ ls - - >>> opt/ml/code - >>> ├── train.py - >>> ├── common - >>> └── virtual-env - -- ``hyperparameters`` Hyperparameters that will be used for training. - Will be made accessible as a dict[str, str] to the training code on - SageMaker. For convenience, accepts other types besides str, but - str() will be called on keys and values to convert them before - training. -- ``py_version`` Python version you want to use for executing your - model training code. -- ``train_volume_size`` Size in GB of the EBS volume to use for storing - input data during training. Must be large enough to store training - data if input_mode='File' is used (which is the default). -- ``train_max_run`` Timeout in seconds for training, after which Amazon - SageMaker terminates the job regardless of its current status. -- ``input_mode`` The input mode that the algorithm supports. Valid - modes: 'File' - Amazon SageMaker copies the training dataset from the - s3 location to a directory in the Docker container. 'Pipe' - Amazon - SageMaker streams data directly from s3 to the container via a Unix - named pipe. -- ``output_path`` s3 location where you want the training result (model - artifacts and optional output files) saved. If not specified, results - are stored to a default bucket. If the bucket with the specific name - does not exist, the estimator creates the bucket during the fit() - method execution. -- ``output_kms_key`` Optional KMS key ID to optionally encrypt training - output with. -- ``job_name`` Name to assign for the training job that the fit() - method launches. If not specified, the estimator generates a default - job name, based on the training image name and current timestamp -- ``image_name`` An alternative docker image to use for training and - serving. If specified, the estimator will use this image for training and - hosting, instead of selecting the appropriate SageMaker official image based on - framework_version and py_version. Refer to: `SageMaker Chainer Docker Containers - <#sagemaker-chainer-docker-containers>`__ for details on what the Official images support - and where to find the source code to build your custom image. - - -Distributed Chainer Training -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - -Chainer allows you to train a model on multiple nodes using ChainerMN_, which distributes training with MPI. - -.. _ChainerMN: https://github.com/chainer/chainermn - -In order to run distributed Chainer training on SageMaker, your training script should use a ``chainermn`` Communicator -object to coordinate training between multiple hosts. - -SageMaker runs your script with ``mpirun`` if ``train_instance_count`` is greater than two. -The following are optional arguments modify how MPI runs your distributed training script. - -- ``use_mpi`` Boolean that overrides whether to run your training script with MPI. -- ``num_processes`` Integer that determines how many total processes to run with MPI. By default, this is equal to ``process_slots_per_host`` times the number of nodes. -- ``process_slots_per_host`` Integer that determines how many processes can be run on each host. By default, this is equal to one process per host on CPU instances, or one process per GPU on GPU instances. -- ``additional_mpi_options`` String of additional options to pass to the ``mpirun`` command. - - -Calling fit -^^^^^^^^^^^ - -You start your training script by calling ``fit`` on a ``Chainer`` Estimator. ``fit`` takes both required and optional -arguments. - -Required arguments -'''''''''''''''''' - -- ``inputs``: This can take one of the following forms: A string - s3 URI, for example ``s3://my-bucket/my-training-data``. In this - case, the s3 objects rooted at the ``my-training-data`` prefix will - be available in the default ``train`` channel. A dict from - string channel names to s3 URIs. In this case, the objects rooted at - each s3 prefix will available as files in each channel directory. - -For example: - -.. code:: python - - {'train':'s3://my-bucket/my-training-data', - 'eval':'s3://my-bucket/my-evaluation-data'} - -.. optional-arguments-1: - -Optional arguments -'''''''''''''''''' - -- ``wait``: Defaults to True, whether to block and wait for the - training script to complete before returning. -- ``logs``: Defaults to True, whether to show logs produced by training - job in the Python session. Only meaningful when wait is True. - - -Saving models -~~~~~~~~~~~~~ - -In order to save your trained Chainer model for deployment on SageMaker, your training script should save your model -to a certain filesystem path called `model_dir`. This value is accessible through the environment variable -``SM_MODEL_DIR``. The following code demonstrates how to save a trained Chainer model named ``model`` as -``model.npz`` at the end of training: - -.. code:: python - - import chainer - import argparse - import os - - if __name__=='__main__': - # default to the value in environment variable `SM_MODEL_DIR`. Using args makes the script more portable. - parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR']) - args, _ = parser.parse_known_args() - - # ... train `model`, then save it to `model_dir` as file 'model.npz' - chainer.serializers.save_npz(os.path.join(args.model_dir, 'model.npz'), model) - -After your training job is complete, SageMaker will compress and upload the serialized model to S3, and your model data -will available in the s3 ``output_path`` you specified when you created the Chainer Estimator. - -Deploying Chainer models -~~~~~~~~~~~~~~~~~~~~~~~~ - -After an Chainer Estimator has been fit, you can host the newly created model in SageMaker. - -After calling ``fit``, you can call ``deploy`` on a ``Chainer`` Estimator to create a SageMaker Endpoint. -The Endpoint runs a SageMaker-provided Chainer model server and hosts the model produced by your training script, -which was run when you called ``fit``. This was the model you saved to ``model_dir``. - -``deploy`` returns a ``Predictor`` object, which you can use to do inference on the Endpoint hosting your Chainer model. -Each ``Predictor`` provides a ``predict`` method which can do inference with numpy arrays or Python lists. -Inference arrays or lists are serialized and sent to the Chainer model server by an ``InvokeEndpoint`` SageMaker -operation. - -``predict`` returns the result of inference against your model. By default, the inference result a NumPy array. - -.. code:: python - - # Train my estimator - chainer_estimator = Chainer(entry_point='train_and_deploy.py', - train_instance_type='ml.p3.2xlarge', - train_instance_count=1, - framework_version='5.0.0') - chainer_estimator.fit('s3://my_bucket/my_training_data/') - - # Deploy my estimator to a SageMaker Endpoint and get a Predictor - predictor = chainer_estimator.deploy(instance_type='ml.m4.xlarge', - initial_instance_count=1) - - # `data` is a NumPy array or a Python list. - # `response` is a NumPy array. - response = predictor.predict(data) - -You use the SageMaker Chainer model server to host your Chainer model when you call ``deploy`` on an ``Chainer`` -Estimator. The model server runs inside a SageMaker Endpoint, which your call to ``deploy`` creates. -You can access the name of the Endpoint by the ``name`` property on the returned ``Predictor``. - - -The SageMaker Chainer Model Server -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The Chainer Endpoint you create with ``deploy`` runs a SageMaker Chainer model server. -The model server loads the model that was saved by your training script and performs inference on the model in response -to SageMaker InvokeEndpoint API calls. - -You can configure two components of the SageMaker Chainer model server: Model loading and model serving. -Model loading is the process of deserializing your saved model back into an Chainer model. -Serving is the process of translating InvokeEndpoint requests to inference calls on the loaded model. - -You configure the Chainer model server by defining functions in the Python source file you passed to the Chainer constructor. - -Model loading -^^^^^^^^^^^^^ - -Before a model can be served, it must be loaded. The SageMaker Chainer model server loads your model by invoking a -``model_fn`` function that you must provide in your script. The ``model_fn`` should have the following signature: - -.. code:: python - - def model_fn(model_dir) - -SageMaker will inject the directory where your model files and sub-directories, saved by ``save``, have been mounted. -Your model function should return a model object that can be used for model serving. - -SageMaker provides automated serving functions that work with Gluon API ``net`` objects and Module API ``Module`` objects. If you return either of these types of objects, then you will be able to use the default serving request handling functions. - -The following code-snippet shows an example ``model_fn`` implementation. -This loads returns a Chainer Classifier from a multi-layer perceptron class ``MLP`` that extends ``chainer.Chain``. -It loads the model parameters from a ``model.npz`` file in the SageMaker model directory ``model_dir``. - -.. code:: python - - import chainer - import os - - def model_fn(model_dir): - chainer.config.train = False - model = chainer.links.Classifier(MLP(1000, 10)) - chainer.serializers.load_npz(os.path.join(model_dir, 'model.npz'), model) - return model.predictor - -Model serving -^^^^^^^^^^^^^ - -After the SageMaker model server has loaded your model by calling ``model_fn``, SageMaker will serve your model. -Model serving is the process of responding to inference requests, received by SageMaker InvokeEndpoint API calls. -The SageMaker Chainer model server breaks request handling into three steps: - - -- input processing, -- prediction, and -- output processing. - -In a similar way to model loading, you configure these steps by defining functions in your Python source file. - -Each step involves invoking a python function, with information about the request and the return-value from the previous -function in the chain. Inside the SageMaker Chainer model server, the process looks like: - -.. code:: python - - # Deserialize the Invoke request body into an object we can perform prediction on - input_object = input_fn(request_body, request_content_type) - - # Perform prediction on the deserialized object, with the loaded model - prediction = predict_fn(input_object, model) - - # Serialize the prediction result into the desired response content type - output = output_fn(prediction, response_content_type) - -The above code-sample shows the three function definitions: - -- ``input_fn``: Takes request data and deserializes the data into an - object for prediction. -- ``predict_fn``: Takes the deserialized request object and performs - inference against the loaded model. -- ``output_fn``: Takes the result of prediction and serializes this - according to the response content type. - -The SageMaker Chainer model server provides default implementations of these functions. -You can provide your own implementations for these functions in your hosting script. -If you omit any definition then the SageMaker Chainer model server will use its default implementation for that -function. - -The ``RealTimePredictor`` used by Chainer in the SageMaker Python SDK serializes NumPy arrays to the `NPY `_ format -by default, with Content-Type ``application/x-npy``. The SageMaker Chainer model server can deserialize NPY-formatted -data (along with JSON and CSV data). - -If you rely solely on the SageMaker Chainer model server defaults, you get the following functionality: - -- Prediction on models that implement the ``__call__`` method -- Serialization and deserialization of NumPy arrays. - -The default ``input_fn`` and ``output_fn`` are meant to make it easy to predict on NumPy arrays. If your model expects -a NumPy array and returns a NumPy array, then these functions do not have to be overridden when sending NPY-formatted -data. - -In the following sections we describe the default implementations of input_fn, predict_fn, and output_fn. -We describe the input arguments and expected return types of each, so you can define your own implementations. - -Input processing -'''''''''''''''' - -When an InvokeEndpoint operation is made against an Endpoint running a SageMaker Chainer model server, -the model server receives two pieces of information: - -- The request Content-Type, for example "application/x-npy" -- The request data body, a byte array - -The SageMaker Chainer model server will invoke an "input_fn" function in your hosting script, -passing in this information. If you define an ``input_fn`` function definition, -it should return an object that can be passed to ``predict_fn`` and have the following signature: - -.. code:: python - - def input_fn(request_body, request_content_type) - -Where ``request_body`` is a byte buffer and ``request_content_type`` is a Python string - -The SageMaker Chainer model server provides a default implementation of ``input_fn``. -This function deserializes JSON, CSV, or NPY encoded data into a NumPy array. - -Default NPY deserialization requires ``request_body`` to follow the `NPY `_ format. For Chainer, the Python SDK -defaults to sending prediction requests with this format. - -Default json deserialization requires ``request_body`` contain a single json list. -Sending multiple json objects within the same ``request_body`` is not supported. -The list must have a dimensionality compatible with the model loaded in ``model_fn``. -The list's shape must be identical to the model's input shape, for all dimensions after the first (which first -dimension is the batch size). - -Default csv deserialization requires ``request_body`` contain one or more lines of CSV numerical data. -The data is loaded into a two-dimensional array, where each line break defines the boundaries of the first dimension. - -The example below shows a custom ``input_fn`` for preparing pickled NumPy arrays. - -.. code:: python - - import numpy as np - - def input_fn(request_body, request_content_type): - """An input_fn that loads a pickled numpy array""" - if request_content_type == "application/python-pickle": - array = np.load(StringIO(request_body)) - return array - else: - # Handle other content-types here or raise an Exception - # if the content type is not supported. - pass - - - -Prediction -'''''''''' - -After the inference request has been deserialized by ``input_fn``, the SageMaker Chainer model server invokes -``predict_fn`` on the return value of ``input_fn``. - -As with ``input_fn``, you can define your own ``predict_fn`` or use the SageMaker Chainer model server default. - -The ``predict_fn`` function has the following signature: - -.. code:: python - - def predict_fn(input_object, model) - -Where ``input_object`` is the object returned from ``input_fn`` and -``model`` is the model loaded by ``model_fn``. - -The default implementation of ``predict_fn`` invokes the loaded model's ``__call__`` function on ``input_object``, -and returns the resulting value. The return-type should be a NumPy array to be compatible with the default -``output_fn``. - -The example below shows an overridden ``predict_fn``. This model accepts a Python list and returns a tuple of -bounding boxes, labels, and scores from the model in a NumPy array. This ``predict_fn`` can rely on the default -``input_fn`` and ``output_fn`` because ``input_data`` is a NumPy array, and the return value of this function is -a NumPy array. - -.. code:: python - - import chainer - import numpy as np - - def predict_fn(input_data, model): - with chainer.using_config('train', False), chainer.no_backprop_mode(): - bboxes, labels, scores = model.predict([input_data]) - bbox, label, score = bboxes[0], labels[0], scores[0] - return np.array([bbox.tolist(), label, score]) - -If you implement your own prediction function, you should take care to ensure that: - -- The first argument is expected to be the return value from input_fn. - If you use the default input_fn, this will be a NumPy array. -- The second argument is the loaded model. -- The return value should be of the correct type to be passed as the - first argument to ``output_fn``. If you use the default - ``output_fn``, this should be a NumPy array. - -Output processing -''''''''''''''''' - -After invoking ``predict_fn``, the model server invokes ``output_fn``, passing in the return-value from ``predict_fn`` -and the InvokeEndpoint requested response content-type. - -The ``output_fn`` has the following signature: - -.. code:: python - - def output_fn(prediction, content_type) - -Where ``prediction`` is the result of invoking ``predict_fn`` and -``content_type`` is the InvokeEndpoint requested response content-type. -The function should return a byte array of data serialized to content_type. - -The default implementation expects ``prediction`` to be an NumPy and can serialize the result to JSON, CSV, or NPY. -It accepts response content types of "application/json", "text/csv", and "application/x-npy". - -Working with existing model data and training jobs -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Attaching to existing training jobs -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -You can attach an Chainer Estimator to an existing training job using the -``attach`` method. - -.. code:: python - - my_training_job_name = "MyAwesomeChainerTrainingJob" - chainer_estimator = Chainer.attach(my_training_job_name) - -After attaching, if the training job is in a Complete status, it can be -``deploy``\ ed to create a SageMaker Endpoint and return a -``Predictor``. If the training job is in progress, -attach will block and display log messages from the training job, until the training job completes. - -The ``attach`` method accepts the following arguments: - -- ``training_job_name (str):`` The name of the training job to attach - to. -- ``sagemaker_session (sagemaker.Session or None):`` The Session used - to interact with SageMaker - -Deploying Endpoints from model data -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -As well as attaching to existing training jobs, you can deploy models directly from model data in S3. -The following code sample shows how to do this, using the ``ChainerModel`` class. - -.. code:: python - - chainer_model = ChainerModel(model_data="s3://bucket/model.tar.gz", role="SageMakerRole", - entry_point="transform_script.py") - - predictor = chainer_model.deploy(instance_type="ml.c4.xlarge", initial_instance_count=1) - -The ChainerModel constructor takes the following arguments: - -- ``model_data (str):`` An S3 location of a SageMaker model data - .tar.gz file -- ``image (str):`` A Docker image URI -- ``role (str):`` An IAM role name or Arn for SageMaker to access AWS - resources on your behalf. -- ``predictor_cls (callable[string,sagemaker.Session]):`` A function to - call to create a predictor. If not None, ``deploy`` will return the - result of invoking this function on the created endpoint name -- ``env (dict[string,string]):`` Environment variables to run with - ``image`` when hosted in SageMaker. -- ``name (str):`` The model name. If None, a default model name will be - selected on each ``deploy.`` -- ``entry_point (str):`` Path (absolute or relative) to the Python file - which should be executed as the entry point to model hosting. -- ``source_dir (str):`` Optional. Path (absolute or relative) to a - directory with any other training source code dependencies including - tne entry point file. Structure within this directory will be - preserved when training on SageMaker. -- ``enable_cloudwatch_metrics (boolean):`` Optional. If true, training - and hosting containers will generate Cloudwatch metrics under the - AWS/SageMakerContainer namespace. -- ``container_log_level (int):`` Log level to use within the container. - Valid values are defined in the Python logging module. -- ``code_location (str):`` Optional. Name of the S3 bucket where your - custom code will be uploaded to. If not specified, will use the - SageMaker default bucket created by sagemaker.Session. -- ``sagemaker_session (sagemaker.Session):`` The SageMaker Session - object, used for SageMaker interaction""" - -Your model data must be a .tar.gz file in S3. SageMaker Training Job model data is saved to .tar.gz files in S3, -however if you have local data you want to deploy, you can prepare the data yourself. - -Assuming you have a local directory containg your model data named "my_model" you can tar and gzip compress the file and -upload to S3 using the following commands: - -:: - - tar -czf model.tar.gz my_model - aws s3 cp model.tar.gz s3://my-bucket/my-path/model.tar.gz - -This uploads the contents of my_model to a gzip compressed tar file to S3 in the bucket "my-bucket", with the key -"my-path/model.tar.gz". - -To run this command, you'll need the aws cli tool installed. Please refer to our `FAQ <#FAQ>`__ for more information on -installing this. - -Chainer Training Examples -~~~~~~~~~~~~~~~~~~~~~~~~~ - -Amazon provides several example Jupyter notebooks that demonstrate end-to-end training on Amazon SageMaker using Chainer. -Please refer to: - -https://github.com/awslabs/amazon-sagemaker-examples/tree/master/sagemaker-python-sdk - -These are also available in SageMaker Notebook Instance hosted Jupyter notebooks under the "sample notebooks" folder. - - -SageMaker Chainer Docker containers -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -When training and deploying training scripts, SageMaker runs your Python script in a Docker container with several -libraries installed. When creating the Estimator and calling deploy to create the SageMaker Endpoint, you can control -the environment your script runs in. - -SageMaker runs Chainer Estimator scripts in either Python 2.7 or Python 3.5. You can select the Python version by -passing a py_version keyword arg to the Chainer Estimator constructor. Setting this to py3 (the default) will cause your -training script to be run on Python 3.5. Setting this to py2 will cause your training script to be run on Python 2.7 -This Python version applies to both the Training Job, created by fit, and the Endpoint, created by deploy. - -The Chainer Docker images have the following dependencies installed: - -+-----------------------------+-------------+-------------+-------------+ -| Dependencies | chainer 4.0 | chainer 4.1 | chainer 5.0 | -+-----------------------------+-------------+-------------+-------------+ -| chainer | 4.0.0 | 4.1.0 | 5.0.0 | -+-----------------------------+-------------+-------------+-------------+ -| chainercv | 0.9.0 | 0.10.0 | 0.10.0 | -+-----------------------------+-------------+-------------+-------------+ -| chainermn | 1.2.0 | 1.3.0 | N/A | -+-----------------------------+-------------+-------------+-------------+ -| CUDA (GPU image only) | 9.0 | 9.0 | 9.0 | -+-----------------------------+-------------+-------------+-------------+ -| cupy | 4.0.0 | 4.1.0 | 5.0.0 | -+-----------------------------+-------------+-------------+-------------+ -| matplotlib | 2.2.0 | 2.2.0 | 2.2.0 | -+-----------------------------+-------------+-------------+-------------+ -| mpi4py | 3.0.0 | 3.0.0 | 3.0.0 | -+-----------------------------+-------------+-------------+-------------+ -| numpy | 1.14.3 | 1.15.3 | 1.15.4 | -+-----------------------------+-------------+-------------+-------------+ -| opencv-python | 3.4.0.12 | 3.4.0.12 | 3.4.0.12 | -+-----------------------------+-------------+-------------+-------------+ -| Pillow | 5.1.0 | 5.3.0 | 5.3.0 | -+-----------------------------+-------------+-------------+-------------+ -| Python | 2.7 or 3.5 | 2.7 or 3.5 | 2.7 or 3.5 | -+-----------------------------+-------------+-------------+-------------+ - -The Docker images extend Ubuntu 16.04. - -You must select a version of Chainer by passing a ``framework_version`` keyword arg to the Chainer Estimator -constructor. Currently supported versions are listed in the above table. You can also set framework_version to only -specify major and minor version, which will cause your training script to be run on the latest supported patch -version of that minor version. - -Alternatively, you can build your own image by following the instructions in the SageMaker Chainer containers -repository, and passing ``image_name`` to the Chainer Estimator constructor. - +=========================================== +Using Chainer with the SageMaker Python SDK +=========================================== + +.. contents:: + +With Chainer Estimators, you can train and host Chainer models on Amazon SageMaker. + +Supported versions of Chainer: ``4.0.0``, ``4.1.0``, ``5.0.0`` + +You can visit the Chainer repository at https://github.com/chainer/chainer. + + +Training with Chainer +~~~~~~~~~~~~~~~~~~~~~ + +Training Chainer models using ``Chainer`` Estimators is a two-step process: + +1. Prepare a Chainer script to run on SageMaker +2. Run this script on SageMaker via a ``Chainer`` Estimator. + + +First, you prepare your training script, then second, you run this on SageMaker via a ``Chainer`` Estimator. +You should prepare your script in a separate source file than the notebook, terminal session, or source file you're +using to submit the script to SageMaker via a ``Chainer`` Estimator. + +Suppose that you already have an Chainer training script called +``chainer-train.py``. You can run this script in SageMaker as follows: + +.. code:: python + + from sagemaker.chainer import Chainer + chainer_estimator = Chainer(entry_point='chainer-train.py', + role='SageMakerRole', + train_instance_type='ml.p3.2xlarge', + train_instance_count=1, + framework_version='5.0.0') + chainer_estimator.fit('s3://bucket/path/to/training/data') + +Where the S3 URL is a path to your training data, within Amazon S3. The constructor keyword arguments define how +SageMaker runs your training script and are discussed in detail in a later section. + +In the following sections, we'll discuss how to prepare a training script for execution on SageMaker, +then how to run that script on SageMaker using a ``Chainer`` Estimator. + +Preparing the Chainer training script +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Your Chainer training script must be a Python 2.7 or 3.5 compatible source file. + +The training script is very similar to a training script you might run outside of SageMaker, but you +can access useful properties about the training environment through various environment variables, such as + +* ``SM_MODEL_DIR``: A string representing the path to the directory to write model artifacts to. + These artifacts are uploaded to S3 for model hosting. +* ``SM_NUM_GPUS``: An integer representing the number of GPUs available to the host. +* ``SM_OUTPUT_DATA_DIR``: A string representing the filesystem path to write output artifacts to. Output artifacts may + include checkpoints, graphs, and other files to save, not including model artifacts. These artifacts are compressed + and uploaded to S3 to the same S3 prefix as the model artifacts. + +Supposing two input channels, 'train' and 'test', were used in the call to the Chainer estimator's ``fit()`` method, +the following will be set, following the format "SM_CHANNEL_[channel_name]": + +* ``SM_CHANNEL_TRAIN``: A string representing the path to the directory containing data in the 'train' channel +* ``SM_CHANNEL_TEST``: Same as above, but for the 'test' channel. + +A typical training script loads data from the input channels, configures training with hyperparameters, trains a model, +and saves a model to model_dir so that it can be hosted later. Hyperparameters are passed to your script as arguments +and can be retrieved with an argparse.ArgumentParser instance. For example, a training script might start +with the following: + +.. code:: python + + import argparse + import os + + if __name__ =='__main__': + + parser = argparse.ArgumentParser() + + # hyperparameters sent by the client are passed as command-line arguments to the script. + parser.add_argument('--epochs', type=int, default=50) + parser.add_argument('--batch-size', type=int, default=64) + parser.add_argument('--learning-rate', type=float, default=0.05) + + # Data, model, and output directories + parser.add_argument('--output-data-dir', type=str, default=os.environ['SM_OUTPUT_DATA_DIR']) + parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR']) + parser.add_argument('--train', type=str, default=os.environ['SM_CHANNEL_TRAIN']) + parser.add_argument('--test', type=str, default=os.environ['SM_CHANNEL_TEST']) + + args, _ = parser.parse_known_args() + + # ... load from args.train and args.test, train a model, write model to args.model_dir. + +Because the SageMaker imports your training script, you should put your training code in a main guard +(``if __name__=='__main__':``) if you are using the same script to host your model, so that SageMaker does not +inadvertently run your training code at the wrong point in execution. + +For more on training environment variables, please visit https://github.com/aws/sagemaker-containers. + +Using third-party libraries +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When running your training script on SageMaker, it will have access to some pre-installed third-party libraries including ``chainer``, ``numpy``, and ``cupy``. +For more information on the runtime environment, including specific package versions, see `SageMaker Chainer Docker containers <#sagemaker-chainer-docker-containers>`__. + +If there are other packages you want to use with your script, you can include a ``requirements.txt`` file in the same directory as your training script to install other dependencies at runtime. +A ``requirements.txt`` file is a text file that contains a list of items that are installed by using ``pip install``. You can also specify the version of an item to install. +For information about the format of a ``requirements.txt`` file, see `Requirements Files `__ in the pip documentation. + +Running a Chainer training script in SageMaker +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You run Chainer training scripts on SageMaker by creating ``Chainer`` Estimators. +SageMaker training of your script is invoked when you call ``fit`` on a ``Chainer`` Estimator. +The following code sample shows how you train a custom Chainer script "chainer-train.py", passing +in three hyperparameters ('epochs', 'batch-size', and 'learning-rate'), and using two input channel +directories ('train' and 'test'). + +.. code:: python + + chainer_estimator = Chainer('chainer-train.py', + train_instance_type='ml.p3.2xlarge', + train_instance_count=1, + framework_version='5.0.0', + hyperparameters = {'epochs': 20, 'batch-size': 64, 'learning-rate': 0.1}) + chainer_estimator.fit({'train': 's3://my-data-bucket/path/to/my/training/data', + 'test': 's3://my-data-bucket/path/to/my/test/data'}) + + +Chainer Estimators +^^^^^^^^^^^^^^^^^^ + +The `Chainer` constructor takes both required and optional arguments. + +Required arguments +'''''''''''''''''' + +The following are required arguments to the ``Chainer`` constructor. When you create a Chainer object, you must include +these in the constructor, either positionally or as keyword arguments. + +- ``entry_point`` Path (absolute or relative) to the Python file which + should be executed as the entry point to training. +- ``role`` An AWS IAM role (either name or full ARN). The Amazon + SageMaker training jobs and APIs that create Amazon SageMaker + endpoints use this role to access training data and model artifacts. + After the endpoint is created, the inference code might use the IAM + role, if accessing AWS resource. +- ``train_instance_count`` Number of Amazon EC2 instances to use for + training. +- ``train_instance_type`` Type of EC2 instance to use for training, for + example, 'ml.m4.xlarge'. + +Optional arguments +'''''''''''''''''' + +The following are optional arguments. When you create a ``Chainer`` object, you can specify these as keyword arguments. + +- ``source_dir`` Path (absolute or relative) to a directory with any + other training source code dependencies including the entry point + file. Structure within this directory will be preserved when training + on SageMaker. +- ``dependencies (list[str])`` A list of paths to directories (absolute or relative) with + any additional libraries that will be exported to the container (default: []). + The library folders will be copied to SageMaker in the same folder where the entrypoint is copied. + If the ```source_dir``` points to S3, code will be uploaded and the S3 location will be used + instead. Example: + + The following call + >>> Chainer(entry_point='train.py', dependencies=['my/libs/common', 'virtual-env']) + results in the following inside the container: + + >>> $ ls + + >>> opt/ml/code + >>> ├── train.py + >>> ├── common + >>> └── virtual-env + +- ``hyperparameters`` Hyperparameters that will be used for training. + Will be made accessible as a dict[str, str] to the training code on + SageMaker. For convenience, accepts other types besides str, but + str() will be called on keys and values to convert them before + training. +- ``py_version`` Python version you want to use for executing your + model training code. +- ``train_volume_size`` Size in GB of the EBS volume to use for storing + input data during training. Must be large enough to store training + data if input_mode='File' is used (which is the default). +- ``train_max_run`` Timeout in seconds for training, after which Amazon + SageMaker terminates the job regardless of its current status. +- ``input_mode`` The input mode that the algorithm supports. Valid + modes: 'File' - Amazon SageMaker copies the training dataset from the + s3 location to a directory in the Docker container. 'Pipe' - Amazon + SageMaker streams data directly from s3 to the container via a Unix + named pipe. +- ``output_path`` s3 location where you want the training result (model + artifacts and optional output files) saved. If not specified, results + are stored to a default bucket. If the bucket with the specific name + does not exist, the estimator creates the bucket during the fit() + method execution. +- ``output_kms_key`` Optional KMS key ID to optionally encrypt training + output with. +- ``job_name`` Name to assign for the training job that the fit() + method launches. If not specified, the estimator generates a default + job name, based on the training image name and current timestamp +- ``image_name`` An alternative docker image to use for training and + serving. If specified, the estimator will use this image for training and + hosting, instead of selecting the appropriate SageMaker official image based on + framework_version and py_version. Refer to: `SageMaker Chainer Docker Containers + <#sagemaker-chainer-docker-containers>`__ for details on what the Official images support + and where to find the source code to build your custom image. + + +Distributed Chainer Training +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + +Chainer allows you to train a model on multiple nodes using ChainerMN_, which distributes training with MPI. + +.. _ChainerMN: https://github.com/chainer/chainermn + +In order to run distributed Chainer training on SageMaker, your training script should use a ``chainermn`` Communicator +object to coordinate training between multiple hosts. + +SageMaker runs your script with ``mpirun`` if ``train_instance_count`` is greater than two. +The following are optional arguments modify how MPI runs your distributed training script. + +- ``use_mpi`` Boolean that overrides whether to run your training script with MPI. +- ``num_processes`` Integer that determines how many total processes to run with MPI. By default, this is equal to ``process_slots_per_host`` times the number of nodes. +- ``process_slots_per_host`` Integer that determines how many processes can be run on each host. By default, this is equal to one process per host on CPU instances, or one process per GPU on GPU instances. +- ``additional_mpi_options`` String of additional options to pass to the ``mpirun`` command. + + +Calling fit +^^^^^^^^^^^ + +You start your training script by calling ``fit`` on a ``Chainer`` Estimator. ``fit`` takes both required and optional +arguments. + +fit Required arguments +'''''''''''''''''''''' + +- ``inputs``: This can take one of the following forms: A string + s3 URI, for example ``s3://my-bucket/my-training-data``. In this + case, the s3 objects rooted at the ``my-training-data`` prefix will + be available in the default ``train`` channel. A dict from + string channel names to s3 URIs. In this case, the objects rooted at + each s3 prefix will available as files in each channel directory. + +For example: + +.. code:: python + + {'train':'s3://my-bucket/my-training-data', + 'eval':'s3://my-bucket/my-evaluation-data'} + +.. optional-arguments-1: + +fit Optional arguments +'''''''''''''''''''''' + +- ``wait``: Defaults to True, whether to block and wait for the + training script to complete before returning. +- ``logs``: Defaults to True, whether to show logs produced by training + job in the Python session. Only meaningful when wait is True. + + +Saving models +~~~~~~~~~~~~~ + +In order to save your trained Chainer model for deployment on SageMaker, your training script should save your model +to a certain filesystem path called `model_dir`. This value is accessible through the environment variable +``SM_MODEL_DIR``. The following code demonstrates how to save a trained Chainer model named ``model`` as +``model.npz`` at the end of training: + +.. code:: python + + import chainer + import argparse + import os + + if __name__=='__main__': + # default to the value in environment variable `SM_MODEL_DIR`. Using args makes the script more portable. + parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR']) + args, _ = parser.parse_known_args() + + # ... train `model`, then save it to `model_dir` as file 'model.npz' + chainer.serializers.save_npz(os.path.join(args.model_dir, 'model.npz'), model) + +After your training job is complete, SageMaker will compress and upload the serialized model to S3, and your model data +will available in the s3 ``output_path`` you specified when you created the Chainer Estimator. + +Deploying Chainer models +~~~~~~~~~~~~~~~~~~~~~~~~ + +After an Chainer Estimator has been fit, you can host the newly created model in SageMaker. + +After calling ``fit``, you can call ``deploy`` on a ``Chainer`` Estimator to create a SageMaker Endpoint. +The Endpoint runs a SageMaker-provided Chainer model server and hosts the model produced by your training script, +which was run when you called ``fit``. This was the model you saved to ``model_dir``. + +``deploy`` returns a ``Predictor`` object, which you can use to do inference on the Endpoint hosting your Chainer model. +Each ``Predictor`` provides a ``predict`` method which can do inference with numpy arrays or Python lists. +Inference arrays or lists are serialized and sent to the Chainer model server by an ``InvokeEndpoint`` SageMaker +operation. + +``predict`` returns the result of inference against your model. By default, the inference result a NumPy array. + +.. code:: python + + # Train my estimator + chainer_estimator = Chainer(entry_point='train_and_deploy.py', + train_instance_type='ml.p3.2xlarge', + train_instance_count=1, + framework_version='5.0.0') + chainer_estimator.fit('s3://my_bucket/my_training_data/') + + # Deploy my estimator to a SageMaker Endpoint and get a Predictor + predictor = chainer_estimator.deploy(instance_type='ml.m4.xlarge', + initial_instance_count=1) + + # `data` is a NumPy array or a Python list. + # `response` is a NumPy array. + response = predictor.predict(data) + +You use the SageMaker Chainer model server to host your Chainer model when you call ``deploy`` on an ``Chainer`` +Estimator. The model server runs inside a SageMaker Endpoint, which your call to ``deploy`` creates. +You can access the name of the Endpoint by the ``name`` property on the returned ``Predictor``. + + +The SageMaker Chainer Model Server +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The Chainer Endpoint you create with ``deploy`` runs a SageMaker Chainer model server. +The model server loads the model that was saved by your training script and performs inference on the model in response +to SageMaker InvokeEndpoint API calls. + +You can configure two components of the SageMaker Chainer model server: Model loading and model serving. +Model loading is the process of deserializing your saved model back into an Chainer model. +Serving is the process of translating InvokeEndpoint requests to inference calls on the loaded model. + +You configure the Chainer model server by defining functions in the Python source file you passed to the Chainer constructor. + +Model loading +^^^^^^^^^^^^^ + +Before a model can be served, it must be loaded. The SageMaker Chainer model server loads your model by invoking a +``model_fn`` function that you must provide in your script. The ``model_fn`` should have the following signature: + +.. code:: python + + def model_fn(model_dir) + +SageMaker will inject the directory where your model files and sub-directories, saved by ``save``, have been mounted. +Your model function should return a model object that can be used for model serving. + +SageMaker provides automated serving functions that work with Gluon API ``net`` objects and Module API ``Module`` objects. If you return either of these types of objects, then you will be able to use the default serving request handling functions. + +The following code-snippet shows an example ``model_fn`` implementation. +This loads returns a Chainer Classifier from a multi-layer perceptron class ``MLP`` that extends ``chainer.Chain``. +It loads the model parameters from a ``model.npz`` file in the SageMaker model directory ``model_dir``. + +.. code:: python + + import chainer + import os + + def model_fn(model_dir): + chainer.config.train = False + model = chainer.links.Classifier(MLP(1000, 10)) + chainer.serializers.load_npz(os.path.join(model_dir, 'model.npz'), model) + return model.predictor + +Model serving +^^^^^^^^^^^^^ + +After the SageMaker model server has loaded your model by calling ``model_fn``, SageMaker will serve your model. +Model serving is the process of responding to inference requests, received by SageMaker InvokeEndpoint API calls. +The SageMaker Chainer model server breaks request handling into three steps: + + +- input processing, +- prediction, and +- output processing. + +In a similar way to model loading, you configure these steps by defining functions in your Python source file. + +Each step involves invoking a python function, with information about the request and the return-value from the previous +function in the chain. Inside the SageMaker Chainer model server, the process looks like: + +.. code:: python + + # Deserialize the Invoke request body into an object we can perform prediction on + input_object = input_fn(request_body, request_content_type) + + # Perform prediction on the deserialized object, with the loaded model + prediction = predict_fn(input_object, model) + + # Serialize the prediction result into the desired response content type + output = output_fn(prediction, response_content_type) + +The above code-sample shows the three function definitions: + +- ``input_fn``: Takes request data and deserializes the data into an + object for prediction. +- ``predict_fn``: Takes the deserialized request object and performs + inference against the loaded model. +- ``output_fn``: Takes the result of prediction and serializes this + according to the response content type. + +The SageMaker Chainer model server provides default implementations of these functions. +You can provide your own implementations for these functions in your hosting script. +If you omit any definition then the SageMaker Chainer model server will use its default implementation for that +function. + +The ``RealTimePredictor`` used by Chainer in the SageMaker Python SDK serializes NumPy arrays to the `NPY `_ format +by default, with Content-Type ``application/x-npy``. The SageMaker Chainer model server can deserialize NPY-formatted +data (along with JSON and CSV data). + +If you rely solely on the SageMaker Chainer model server defaults, you get the following functionality: + +- Prediction on models that implement the ``__call__`` method +- Serialization and deserialization of NumPy arrays. + +The default ``input_fn`` and ``output_fn`` are meant to make it easy to predict on NumPy arrays. If your model expects +a NumPy array and returns a NumPy array, then these functions do not have to be overridden when sending NPY-formatted +data. + +In the following sections we describe the default implementations of input_fn, predict_fn, and output_fn. +We describe the input arguments and expected return types of each, so you can define your own implementations. + +Input processing +'''''''''''''''' + +When an InvokeEndpoint operation is made against an Endpoint running a SageMaker Chainer model server, +the model server receives two pieces of information: + +- The request Content-Type, for example "application/x-npy" +- The request data body, a byte array + +The SageMaker Chainer model server will invoke an "input_fn" function in your hosting script, +passing in this information. If you define an ``input_fn`` function definition, +it should return an object that can be passed to ``predict_fn`` and have the following signature: + +.. code:: python + + def input_fn(request_body, request_content_type) + +Where ``request_body`` is a byte buffer and ``request_content_type`` is a Python string + +The SageMaker Chainer model server provides a default implementation of ``input_fn``. +This function deserializes JSON, CSV, or NPY encoded data into a NumPy array. + +Default NPY deserialization requires ``request_body`` to follow the `NPY `_ format. For Chainer, the Python SDK +defaults to sending prediction requests with this format. + +Default json deserialization requires ``request_body`` contain a single json list. +Sending multiple json objects within the same ``request_body`` is not supported. +The list must have a dimensionality compatible with the model loaded in ``model_fn``. +The list's shape must be identical to the model's input shape, for all dimensions after the first (which first +dimension is the batch size). + +Default csv deserialization requires ``request_body`` contain one or more lines of CSV numerical data. +The data is loaded into a two-dimensional array, where each line break defines the boundaries of the first dimension. + +The example below shows a custom ``input_fn`` for preparing pickled NumPy arrays. + +.. code:: python + + import numpy as np + + def input_fn(request_body, request_content_type): + """An input_fn that loads a pickled numpy array""" + if request_content_type == "application/python-pickle": + array = np.load(StringIO(request_body)) + return array + else: + # Handle other content-types here or raise an Exception + # if the content type is not supported. + pass + + + +Prediction +'''''''''' + +After the inference request has been deserialized by ``input_fn``, the SageMaker Chainer model server invokes +``predict_fn`` on the return value of ``input_fn``. + +As with ``input_fn``, you can define your own ``predict_fn`` or use the SageMaker Chainer model server default. + +The ``predict_fn`` function has the following signature: + +.. code:: python + + def predict_fn(input_object, model) + +Where ``input_object`` is the object returned from ``input_fn`` and +``model`` is the model loaded by ``model_fn``. + +The default implementation of ``predict_fn`` invokes the loaded model's ``__call__`` function on ``input_object``, +and returns the resulting value. The return-type should be a NumPy array to be compatible with the default +``output_fn``. + +The example below shows an overridden ``predict_fn``. This model accepts a Python list and returns a tuple of +bounding boxes, labels, and scores from the model in a NumPy array. This ``predict_fn`` can rely on the default +``input_fn`` and ``output_fn`` because ``input_data`` is a NumPy array, and the return value of this function is +a NumPy array. + +.. code:: python + + import chainer + import numpy as np + + def predict_fn(input_data, model): + with chainer.using_config('train', False), chainer.no_backprop_mode(): + bboxes, labels, scores = model.predict([input_data]) + bbox, label, score = bboxes[0], labels[0], scores[0] + return np.array([bbox.tolist(), label, score]) + +If you implement your own prediction function, you should take care to ensure that: + +- The first argument is expected to be the return value from input_fn. + If you use the default input_fn, this will be a NumPy array. +- The second argument is the loaded model. +- The return value should be of the correct type to be passed as the + first argument to ``output_fn``. If you use the default + ``output_fn``, this should be a NumPy array. + +Output processing +''''''''''''''''' + +After invoking ``predict_fn``, the model server invokes ``output_fn``, passing in the return-value from ``predict_fn`` +and the InvokeEndpoint requested response content-type. + +The ``output_fn`` has the following signature: + +.. code:: python + + def output_fn(prediction, content_type) + +Where ``prediction`` is the result of invoking ``predict_fn`` and +``content_type`` is the InvokeEndpoint requested response content-type. +The function should return a byte array of data serialized to content_type. + +The default implementation expects ``prediction`` to be an NumPy and can serialize the result to JSON, CSV, or NPY. +It accepts response content types of "application/json", "text/csv", and "application/x-npy". + +Working with existing model data and training jobs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Attaching to existing training jobs +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You can attach an Chainer Estimator to an existing training job using the +``attach`` method. + +.. code:: python + + my_training_job_name = "MyAwesomeChainerTrainingJob" + chainer_estimator = Chainer.attach(my_training_job_name) + +After attaching, if the training job is in a Complete status, it can be +``deploy``\ ed to create a SageMaker Endpoint and return a +``Predictor``. If the training job is in progress, +attach will block and display log messages from the training job, until the training job completes. + +The ``attach`` method accepts the following arguments: + +- ``training_job_name (str):`` The name of the training job to attach + to. +- ``sagemaker_session (sagemaker.Session or None):`` The Session used + to interact with SageMaker + +Deploying Endpoints from model data +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +As well as attaching to existing training jobs, you can deploy models directly from model data in S3. +The following code sample shows how to do this, using the ``ChainerModel`` class. + +.. code:: python + + chainer_model = ChainerModel(model_data="s3://bucket/model.tar.gz", role="SageMakerRole", + entry_point="transform_script.py") + + predictor = chainer_model.deploy(instance_type="ml.c4.xlarge", initial_instance_count=1) + +The ChainerModel constructor takes the following arguments: + +- ``model_data (str):`` An S3 location of a SageMaker model data + .tar.gz file +- ``image (str):`` A Docker image URI +- ``role (str):`` An IAM role name or Arn for SageMaker to access AWS + resources on your behalf. +- ``predictor_cls (callable[string,sagemaker.Session]):`` A function to + call to create a predictor. If not None, ``deploy`` will return the + result of invoking this function on the created endpoint name +- ``env (dict[string,string]):`` Environment variables to run with + ``image`` when hosted in SageMaker. +- ``name (str):`` The model name. If None, a default model name will be + selected on each ``deploy.`` +- ``entry_point (str):`` Path (absolute or relative) to the Python file + which should be executed as the entry point to model hosting. +- ``source_dir (str):`` Optional. Path (absolute or relative) to a + directory with any other training source code dependencies including + tne entry point file. Structure within this directory will be + preserved when training on SageMaker. +- ``enable_cloudwatch_metrics (boolean):`` Optional. If true, training + and hosting containers will generate Cloudwatch metrics under the + AWS/SageMakerContainer namespace. +- ``container_log_level (int):`` Log level to use within the container. + Valid values are defined in the Python logging module. +- ``code_location (str):`` Optional. Name of the S3 bucket where your + custom code will be uploaded to. If not specified, will use the + SageMaker default bucket created by sagemaker.Session. +- ``sagemaker_session (sagemaker.Session):`` The SageMaker Session + object, used for SageMaker interaction""" + +Your model data must be a .tar.gz file in S3. SageMaker Training Job model data is saved to .tar.gz files in S3, +however if you have local data you want to deploy, you can prepare the data yourself. + +Assuming you have a local directory containg your model data named "my_model" you can tar and gzip compress the file and +upload to S3 using the following commands: + +:: + + tar -czf model.tar.gz my_model + aws s3 cp model.tar.gz s3://my-bucket/my-path/model.tar.gz + +This uploads the contents of my_model to a gzip compressed tar file to S3 in the bucket "my-bucket", with the key +"my-path/model.tar.gz". + +To run this command, you'll need the aws cli tool installed. Please refer to our `FAQ <#FAQ>`__ for more information on +installing this. + +Chainer Training Examples +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Amazon provides several example Jupyter notebooks that demonstrate end-to-end training on Amazon SageMaker using Chainer. +Please refer to: + +https://github.com/awslabs/amazon-sagemaker-examples/tree/master/sagemaker-python-sdk + +These are also available in SageMaker Notebook Instance hosted Jupyter notebooks under the "sample notebooks" folder. + + +SageMaker Chainer Docker containers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When training and deploying training scripts, SageMaker runs your Python script in a Docker container with several +libraries installed. When creating the Estimator and calling deploy to create the SageMaker Endpoint, you can control +the environment your script runs in. + +SageMaker runs Chainer Estimator scripts in either Python 2.7 or Python 3.5. You can select the Python version by +passing a py_version keyword arg to the Chainer Estimator constructor. Setting this to py3 (the default) will cause your +training script to be run on Python 3.5. Setting this to py2 will cause your training script to be run on Python 2.7 +This Python version applies to both the Training Job, created by fit, and the Endpoint, created by deploy. + +The Chainer Docker images have the following dependencies installed: + ++-----------------------------+-------------+-------------+-------------+ +| Dependencies | chainer 4.0 | chainer 4.1 | chainer 5.0 | ++-----------------------------+-------------+-------------+-------------+ +| chainer | 4.0.0 | 4.1.0 | 5.0.0 | ++-----------------------------+-------------+-------------+-------------+ +| chainercv | 0.9.0 | 0.10.0 | 0.10.0 | ++-----------------------------+-------------+-------------+-------------+ +| chainermn | 1.2.0 | 1.3.0 | N/A | ++-----------------------------+-------------+-------------+-------------+ +| CUDA (GPU image only) | 9.0 | 9.0 | 9.0 | ++-----------------------------+-------------+-------------+-------------+ +| cupy | 4.0.0 | 4.1.0 | 5.0.0 | ++-----------------------------+-------------+-------------+-------------+ +| matplotlib | 2.2.0 | 2.2.0 | 2.2.0 | ++-----------------------------+-------------+-------------+-------------+ +| mpi4py | 3.0.0 | 3.0.0 | 3.0.0 | ++-----------------------------+-------------+-------------+-------------+ +| numpy | 1.14.3 | 1.15.3 | 1.15.4 | ++-----------------------------+-------------+-------------+-------------+ +| opencv-python | 3.4.0.12 | 3.4.0.12 | 3.4.0.12 | ++-----------------------------+-------------+-------------+-------------+ +| Pillow | 5.1.0 | 5.3.0 | 5.3.0 | ++-----------------------------+-------------+-------------+-------------+ +| Python | 2.7 or 3.5 | 2.7 or 3.5 | 2.7 or 3.5 | ++-----------------------------+-------------+-------------+-------------+ + +The Docker images extend Ubuntu 16.04. + +You must select a version of Chainer by passing a ``framework_version`` keyword arg to the Chainer Estimator +constructor. Currently supported versions are listed in the above table. You can also set framework_version to only +specify major and minor version, which will cause your training script to be run on the latest supported patch +version of that minor version. + +Alternatively, you can build your own image by following the instructions in the SageMaker Chainer containers +repository, and passing ``image_name`` to the Chainer Estimator constructor. + You can visit the SageMaker Chainer containers repository here: https://github.com/aws/sagemaker-chainer-containers/ \ No newline at end of file diff --git a/doc/using_mxnet.rst b/doc/using_mxnet.rst index a951b5ce43..d97640f726 100644 --- a/doc/using_mxnet.rst +++ b/doc/using_mxnet.rst @@ -411,8 +411,8 @@ Calling fit You start your training script by calling ``fit`` on an ``MXNet`` Estimator. ``fit`` takes both required and optional arguments. -Required argument -''''''''''''''''' +fit Required argument +''''''''''''''''''''' - ``inputs``: This can take one of the following forms: A string S3 URI, for example ``s3://my-bucket/my-training-data``. In this @@ -430,8 +430,8 @@ For example: .. optional-arguments-1: -Optional arguments -'''''''''''''''''' +fit Optional arguments +'''''''''''''''''''''' - ``wait``: Defaults to True, whether to block and wait for the training script to complete before returning. diff --git a/doc/using_pytorch.rst b/doc/using_pytorch.rst index d9239cdbcd..6c72445571 100644 --- a/doc/using_pytorch.rst +++ b/doc/using_pytorch.rst @@ -1,731 +1,731 @@ -=========================================== -Using PyTorch with the SageMaker Python SDK -=========================================== - -.. contents:: - -With PyTorch Estimators and Models, you can train and host PyTorch models on Amazon SageMaker. - -Supported versions of PyTorch: ``0.4.0``, ``1.0.0``. - -We recommend that you use the latest supported version, because that's where we focus most of our development efforts. - -You can visit the PyTorch repository at https://github.com/pytorch/pytorch. - -Training with PyTorch ------------------------- - -Training PyTorch models using ``PyTorch`` Estimators is a two-step process: - -1. Prepare a PyTorch script to run on SageMaker -2. Run this script on SageMaker via a ``PyTorch`` Estimator. - - -First, you prepare your training script, then second, you run this on SageMaker via a ``PyTorch`` Estimator. -You should prepare your script in a separate source file than the notebook, terminal session, or source file you're -using to submit the script to SageMaker via a ``PyTorch`` Estimator. This will be discussed in further detail below. - -Suppose that you already have a PyTorch training script called `pytorch-train.py`. -You can then setup a ``PyTorch`` Estimator with keyword arguments to point to this script and define how SageMaker runs it: - -.. code:: python - - from sagemaker.pytorch import PyTorch - - pytorch_estimator = PyTorch(entry_point='pytorch-train.py', - role='SageMakerRole', - train_instance_type='ml.p3.2xlarge', - train_instance_count=1, - framework_version='1.0.0') - -After that, you simply tell the estimator to start a training job and provide an S3 URL -that is the path to your training data within Amazon S3: - -.. code:: python - - pytorch_estimator.fit('s3://bucket/path/to/training/data') - -In the following sections, we'll discuss how to prepare a training script for execution on SageMaker, -then how to run that script on SageMaker using a ``PyTorch`` Estimator. - - -Preparing the PyTorch Training Script -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Your PyTorch training script must be a Python 2.7 or 3.5 compatible source file. - -The training script is very similar to a training script you might run outside of SageMaker, but you -can access useful properties about the training environment through various environment variables, such as - -* ``SM_MODEL_DIR``: A string representing the path to the directory to write model artifacts to. - These artifacts are uploaded to S3 for model hosting. -* ``SM_NUM_GPUS``: An integer representing the number of GPUs available to the host. -* ``SM_OUTPUT_DATA_DIR``: A string representing the filesystem path to write output artifacts to. Output artifacts may - include checkpoints, graphs, and other files to save, not including model artifacts. These artifacts are compressed - and uploaded to S3 to the same S3 prefix as the model artifacts. - -Supposing two input channels, 'train' and 'test', were used in the call to the PyTorch estimator's ``fit`` method, -the following will be set, following the format "SM_CHANNEL_[channel_name]": - -* ``SM_CHANNEL_TRAIN``: A string representing the path to the directory containing data in the 'train' channel -* ``SM_CHANNEL_TEST``: Same as above, but for the 'test' channel. - -A typical training script loads data from the input channels, configures training with hyperparameters, trains a model, -and saves a model to `model_dir` so that it can be hosted later. Hyperparameters are passed to your script as arguments -and can be retrieved with an argparse.ArgumentParser instance. For example, a training script might start -with the following: - -.. code:: python - - import argparse - import os - - if __name__ =='__main__': - - parser = argparse.ArgumentParser() - - # hyperparameters sent by the client are passed as command-line arguments to the script. - parser.add_argument('--epochs', type=int, default=50) - parser.add_argument('--batch-size', type=int, default=64) - parser.add_argument('--learning-rate', type=float, default=0.05) - parser.add_argument('--use-cuda', type=bool, default=False) - - # Data, model, and output directories - parser.add_argument('--output-data-dir', type=str, default=os.environ['SM_OUTPUT_DATA_DIR']) - parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR']) - parser.add_argument('--train', type=str, default=os.environ['SM_CHANNEL_TRAIN']) - parser.add_argument('--test', type=str, default=os.environ['SM_CHANNEL_TEST']) - - args, _ = parser.parse_known_args() - - # ... load from args.train and args.test, train a model, write model to args.model_dir. - -Because the SageMaker imports your training script, you should put your training code in a main guard -(``if __name__=='__main__':``) if you are using the same script to host your model, so that SageMaker does not -inadvertently run your training code at the wrong point in execution. - -Note that SageMaker doesn't support argparse actions. If you want to use, for example, boolean hyperparameters, -you need to specify `type` as `bool` in your script and provide an explicit `True` or `False` value for this hyperparameter -when instantiating PyTorch Estimator. - -For more on training environment variables, please visit `SageMaker Containers `_. - -Using third-party libraries -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -When running your training script on SageMaker, it will have access to some pre-installed third-party libraries including ``torch``, ``torchvisopm``, and ``numpy``. -For more information on the runtime environment, including specific package versions, see `SageMaker PyTorch Docker containers <#id4>`__. - -If there are other packages you want to use with your script, you can include a ``requirements.txt`` file in the same directory as your training script to install other dependencies at runtime. -A ``requirements.txt`` file is a text file that contains a list of items that are installed by using ``pip install``. You can also specify the version of an item to install. -For information about the format of a ``requirements.txt`` file, see `Requirements Files `__ in the pip documentation. - -Running a PyTorch training script in SageMaker -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You run PyTorch training scripts on SageMaker by creating ``PyTorch`` Estimators. -SageMaker training of your script is invoked when you call ``fit`` on a ``PyTorch`` Estimator. -The following code sample shows how you train a custom PyTorch script "pytorch-train.py", passing -in three hyperparameters ('epochs', 'batch-size', and 'learning-rate'), and using two input channel -directories ('train' and 'test'). - -.. code:: python - - pytorch_estimator = PyTorch('pytorch-train.py', - train_instance_type='ml.p3.2xlarge', - train_instance_count=1, - framework_version='1.0.0', - hyperparameters = {'epochs': 20, 'batch-size': 64, 'learning-rate': 0.1}) - pytorch_estimator.fit({'train': 's3://my-data-bucket/path/to/my/training/data', - 'test': 's3://my-data-bucket/path/to/my/test/data'}) - - -PyTorch Estimators ------------------- - -The `PyTorch` constructor takes both required and optional arguments. - -Required arguments -~~~~~~~~~~~~~~~~~~ - -The following are required arguments to the ``PyTorch`` constructor. When you create a PyTorch object, you must include -these in the constructor, either positionally or as keyword arguments. - -- ``entry_point`` Path (absolute or relative) to the Python file which - should be executed as the entry point to training. -- ``role`` An AWS IAM role (either name or full ARN). The Amazon - SageMaker training jobs and APIs that create Amazon SageMaker - endpoints use this role to access training data and model artifacts. - After the endpoint is created, the inference code might use the IAM - role, if accessing AWS resource. -- ``train_instance_count`` Number of Amazon EC2 instances to use for - training. -- ``train_instance_type`` Type of EC2 instance to use for training, for - example, 'ml.m4.xlarge'. - -Optional arguments -~~~~~~~~~~~~~~~~~~ - -The following are optional arguments. When you create a ``PyTorch`` object, you can specify these as keyword arguments. - -- ``source_dir`` Path (absolute or relative) to a directory with any - other training source code dependencies including the entry point - file. Structure within this directory will be preserved when training - on SageMaker. -- ``dependencies (list[str])`` A list of paths to directories (absolute or relative) with - any additional libraries that will be exported to the container (default: []). - The library folders will be copied to SageMaker in the same folder where the entrypoint is copied. - If the ```source_dir``` points to S3, code will be uploaded and the S3 location will be used - instead. Example: - - The following call - >>> PyTorch(entry_point='train.py', dependencies=['my/libs/common', 'virtual-env']) - results in the following inside the container: - - >>> $ ls - - >>> opt/ml/code - >>> ├── train.py - >>> ├── common - >>> └── virtual-env - -- ``hyperparameters`` Hyperparameters that will be used for training. - Will be made accessible as a dict[str, str] to the training code on - SageMaker. For convenience, accepts other types besides strings, but - ``str`` will be called on keys and values to convert them before - training. -- ``py_version`` Python version you want to use for executing your - model training code. -- ``framework_version`` PyTorch version you want to use for executing - your model training code. You can find the list of supported versions - in `the section below <#sagemaker-pytorch-docker-containers>`__. -- ``train_volume_size`` Size in GB of the EBS volume to use for storing - input data during training. Must be large enough to store training - data if input_mode='File' is used (which is the default). -- ``train_max_run`` Timeout in seconds for training, after which Amazon - SageMaker terminates the job regardless of its current status. -- ``input_mode`` The input mode that the algorithm supports. Valid - modes: 'File' - Amazon SageMaker copies the training dataset from the - S3 location to a directory in the Docker container. 'Pipe' - Amazon - SageMaker streams data directly from S3 to the container via a Unix - named pipe. -- ``output_path`` S3 location where you want the training result (model - artifacts and optional output files) saved. If not specified, results - are stored to a default bucket. If the bucket with the specific name - does not exist, the estimator creates the bucket during the ``fit`` - method execution. -- ``output_kms_key`` Optional KMS key ID to optionally encrypt training - output with. -- ``job_name`` Name to assign for the training job that the ``fit``` - method launches. If not specified, the estimator generates a default - job name, based on the training image name and current timestamp -- ``image_name`` An alternative docker image to use for training and - serving. If specified, the estimator will use this image for training and - hosting, instead of selecting the appropriate SageMaker official image based on - framework_version and py_version. Refer to: `SageMaker PyTorch Docker Containers - <#id4>`_ for details on what the Official images support - and where to find the source code to build your custom image. - -Calling fit -~~~~~~~~~~~ - -You start your training script by calling ``fit`` on a ``PyTorch`` Estimator. ``fit`` takes both required and optional -arguments. - -Required arguments -'''''''''''''''''' - -- ``inputs``: This can take one of the following forms: A string - S3 URI, for example ``s3://my-bucket/my-training-data``. In this - case, the S3 objects rooted at the ``my-training-data`` prefix will - be available in the default ``train`` channel. A dict from - string channel names to S3 URIs. In this case, the objects rooted at - each S3 prefix will available as files in each channel directory. - -For example: - -.. code:: python - - {'train':'s3://my-bucket/my-training-data', - 'eval':'s3://my-bucket/my-evaluation-data'} - -.. optional-arguments-1: - -Optional arguments -'''''''''''''''''' - -- ``wait``: Defaults to True, whether to block and wait for the - training script to complete before returning. -- ``logs``: Defaults to True, whether to show logs produced by training - job in the Python session. Only meaningful when wait is True. - - -Distributed PyTorch Training ----------------------------- - -You can run a multi-machine, distributed PyTorch training using the PyTorch Estimator. By default, PyTorch objects will -submit single-machine training jobs to SageMaker. If you set ``train_instance_count`` to be greater than one, multi-machine -training jobs will be launched when ``fit`` is called. When you run multi-machine training, SageMaker will import your -training script and run it on each host in the cluster. - -To initialize distributed training in your script you would call ``dist.init_process_group`` providing desired backend -and rank and setting 'WORLD_SIZE' environment variable similar to how you would do it outside of SageMaker using -environment variable initialization: - -.. code:: python - - if args.distributed: - # Initialize the distributed environment. - world_size = len(args.hosts) - os.environ['WORLD_SIZE'] = str(world_size) - host_rank = args.hosts.index(args.current_host) - dist.init_process_group(backend=args.backend, rank=host_rank) - -SageMaker sets 'MASTER_ADDR' and 'MASTER_PORT' environment variables for you, but you can overwrite them. - -Supported backends: -- `gloo` and `tcp` for cpu instances -- `gloo` and `nccl` for gpu instances - -Saving models -------------- - -In order to save your trained PyTorch model for deployment on SageMaker, your training script should save your model -to a certain filesystem path called ``model_dir``. This value is accessible through the environment variable -``SM_MODEL_DIR``. The following code demonstrates how to save a trained PyTorch model named ``model`` as -``model.pth`` at the : - -.. code:: python - - import argparse - import os - import torch - - if __name__=='__main__': - # default to the value in environment variable `SM_MODEL_DIR`. Using args makes the script more portable. - parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR']) - args, _ = parser.parse_known_args() - - # ... train `model`, then save it to `model_dir` - with open(os.path.join(args.model_dir, 'model.pth'), 'wb') as f: - torch.save(model.state_dict(), f) - -After your training job is complete, SageMaker will compress and upload the serialized model to S3, and your model data -will be available in the S3 ``output_path`` you specified when you created the PyTorch Estimator. - -Deploying PyTorch Models ------------------------- - -After an PyTorch Estimator has been fit, you can host the newly created model in SageMaker. - -After calling ``fit``, you can call ``deploy`` on a ``PyTorch`` Estimator to create a SageMaker Endpoint. -The Endpoint runs a SageMaker-provided PyTorch model server and hosts the model produced by your training script, -which was run when you called ``fit``. This was the model you saved to ``model_dir``. - -``deploy`` returns a ``Predictor`` object, which you can use to do inference on the Endpoint hosting your PyTorch model. -Each ``Predictor`` provides a ``predict`` method which can do inference with numpy arrays or Python lists. -Inference arrays or lists are serialized and sent to the PyTorch model server by an ``InvokeEndpoint`` SageMaker -operation. - -``predict`` returns the result of inference against your model. By default, the inference result a NumPy array. - -.. code:: python - - # Train my estimator - pytorch_estimator = PyTorch(entry_point='train_and_deploy.py', - train_instance_type='ml.p3.2xlarge', - train_instance_count=1, - framework_version='1.0.0') - pytorch_estimator.fit('s3://my_bucket/my_training_data/') - - # Deploy my estimator to a SageMaker Endpoint and get a Predictor - predictor = pytorch_estimator.deploy(instance_type='ml.m4.xlarge', - initial_instance_count=1) - - # `data` is a NumPy array or a Python list. - # `response` is a NumPy array. - response = predictor.predict(data) - -You use the SageMaker PyTorch model server to host your PyTorch model when you call ``deploy`` on an ``PyTorch`` -Estimator. The model server runs inside a SageMaker Endpoint, which your call to ``deploy`` creates. -You can access the name of the Endpoint by the ``name`` property on the returned ``Predictor``. - - -The SageMaker PyTorch Model Server ----------------------------------- - -The PyTorch Endpoint you create with ``deploy`` runs a SageMaker PyTorch model server. -The model server loads the model that was saved by your training script and performs inference on the model in response -to SageMaker InvokeEndpoint API calls. - -You can configure two components of the SageMaker PyTorch model server: Model loading and model serving. -Model loading is the process of deserializing your saved model back into an PyTorch model. -Serving is the process of translating InvokeEndpoint requests to inference calls on the loaded model. - -You configure the PyTorch model server by defining functions in the Python source file you passed to the PyTorch constructor. - -Model loading -~~~~~~~~~~~~~ - -Before a model can be served, it must be loaded. The SageMaker PyTorch model server loads your model by invoking a -``model_fn`` function that you must provide in your script. The ``model_fn`` should have the following signature: - -.. code:: python - - def model_fn(model_dir) - -SageMaker will inject the directory where your model files and sub-directories, saved by ``save``, have been mounted. -Your model function should return a model object that can be used for model serving. - -The following code-snippet shows an example ``model_fn`` implementation. -It loads the model parameters from a ``model.pth`` file in the SageMaker model directory ``model_dir``. - -.. code:: python - - import torch - import os - - def model_fn(model_dir): - model = Your_Model() - with open(os.path.join(model_dir, 'model.pth'), 'rb') as f: - model.load_state_dict(torch.load(f)) - return model - -Model serving -~~~~~~~~~~~~~ - -After the SageMaker model server has loaded your model by calling ``model_fn``, SageMaker will serve your model. -Model serving is the process of responding to inference requests, received by SageMaker InvokeEndpoint API calls. -The SageMaker PyTorch model server breaks request handling into three steps: - - -- input processing, -- prediction, and -- output processing. - -In a similar way to model loading, you configure these steps by defining functions in your Python source file. - -Each step involves invoking a python function, with information about the request and the return value from the previous -function in the chain. Inside the SageMaker PyTorch model server, the process looks like: - -.. code:: python - - # Deserialize the Invoke request body into an object we can perform prediction on - input_object = input_fn(request_body, request_content_type) - - # Perform prediction on the deserialized object, with the loaded model - prediction = predict_fn(input_object, model) - - # Serialize the prediction result into the desired response content type - output = output_fn(prediction, response_content_type) - -The above code sample shows the three function definitions: - -- ``input_fn``: Takes request data and deserializes the data into an - object for prediction. -- ``predict_fn``: Takes the deserialized request object and performs - inference against the loaded model. -- ``output_fn``: Takes the result of prediction and serializes this - according to the response content type. - -The SageMaker PyTorch model server provides default implementations of these functions. -You can provide your own implementations for these functions in your hosting script. -If you omit any definition then the SageMaker PyTorch model server will use its default implementation for that -function. - -The ``RealTimePredictor`` used by PyTorch in the SageMaker Python SDK serializes NumPy arrays to the `NPY `_ format -by default, with Content-Type ``application/x-npy``. The SageMaker PyTorch model server can deserialize NPY-formatted -data (along with JSON and CSV data). - -If you rely solely on the SageMaker PyTorch model server defaults, you get the following functionality: - -- Prediction on models that implement the ``__call__`` method -- Serialization and deserialization of torch.Tensor. - -The default ``input_fn`` and ``output_fn`` are meant to make it easy to predict on torch.Tensors. If your model expects -a torch.Tensor and returns a torch.Tensor, then these functions do not have to be overridden when sending NPY-formatted -data. - -In the following sections we describe the default implementations of input_fn, predict_fn, and output_fn. -We describe the input arguments and expected return types of each, so you can define your own implementations. - -Input processing -'''''''''''''''' - -When an InvokeEndpoint operation is made against an Endpoint running a SageMaker PyTorch model server, -the model server receives two pieces of information: - -- The request Content-Type, for example "application/x-npy" -- The request data body, a byte array - -The SageMaker PyTorch model server will invoke an ``input_fn`` function in your hosting script, -passing in this information. If you define an ``input_fn`` function definition, -it should return an object that can be passed to ``predict_fn`` and have the following signature: - -.. code:: python - - def input_fn(request_body, request_content_type) - -Where ``request_body`` is a byte buffer and ``request_content_type`` is a Python string - -The SageMaker PyTorch model server provides a default implementation of ``input_fn``. -This function deserializes JSON, CSV, or NPY encoded data into a torch.Tensor. - -Default NPY deserialization requires ``request_body`` to follow the `NPY `_ format. For PyTorch, the Python SDK -defaults to sending prediction requests with this format. - -Default JSON deserialization requires ``request_body`` contain a single json list. -Sending multiple JSON objects within the same ``request_body`` is not supported. -The list must have a dimensionality compatible with the model loaded in ``model_fn``. -The list's shape must be identical to the model's input shape, for all dimensions after the first (which first -dimension is the batch size). - -Default csv deserialization requires ``request_body`` contain one or more lines of CSV numerical data. -The data is loaded into a two-dimensional array, where each line break defines the boundaries of the first dimension. - -The example below shows a custom ``input_fn`` for preparing pickled torch.Tensor. - -.. code:: python - - import numpy as np - import torch - from six import BytesIO - - def input_fn(request_body, request_content_type): - """An input_fn that loads a pickled tensor""" - if request_content_type == 'application/python-pickle': - return torch.load(BytesIO(request_body)) - else: - # Handle other content-types here or raise an Exception - # if the content type is not supported. - pass - - - -Prediction -'''''''''' - -After the inference request has been deserialized by ``input_fn``, the SageMaker PyTorch model server invokes -``predict_fn`` on the return value of ``input_fn``. - -As with ``input_fn``, you can define your own ``predict_fn`` or use the SageMaker PyTorch model server default. - -The ``predict_fn`` function has the following signature: - -.. code:: python - - def predict_fn(input_object, model) - -Where ``input_object`` is the object returned from ``input_fn`` and -``model`` is the model loaded by ``model_fn``. - -The default implementation of ``predict_fn`` invokes the loaded model's ``__call__`` function on ``input_object``, -and returns the resulting value. The return-type should be a torch.Tensor to be compatible with the default -``output_fn``. - -The example below shows an overridden ``predict_fn``: - -.. code:: python - - import torch - import numpy as np - - def predict_fn(input_data, model): - device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') - model.to(device) - model.eval() - with torch.no_grad(): - return model(input_data.to(device)) - -If you implement your own prediction function, you should take care to ensure that: - -- The first argument is expected to be the return value from input_fn. - If you use the default input_fn, this will be a torch.Tensor. -- The second argument is the loaded model. -- The return value should be of the correct type to be passed as the - first argument to ``output_fn``. If you use the default - ``output_fn``, this should be a torch.Tensor. - -Output processing -''''''''''''''''' - -After invoking ``predict_fn``, the model server invokes ``output_fn``, passing in the return value from ``predict_fn`` -and the content type for the response, as specified by the InvokeEndpoint request. - -The ``output_fn`` has the following signature: - -.. code:: python - - def output_fn(prediction, content_type) - -Where ``prediction`` is the result of invoking ``predict_fn`` and -the content type for the response, as specified by the InvokeEndpoint request. -The function should return a byte array of data serialized to content_type. - -The default implementation expects ``prediction`` to be a torch.Tensor and can serialize the result to JSON, CSV, or NPY. -It accepts response content types of "application/json", "text/csv", and "application/x-npy". - -Working with Existing Model Data and Training Jobs --------------------------------------------------- - -Attaching to existing training jobs -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You can attach an PyTorch Estimator to an existing training job using the -``attach`` method. - -.. code:: python - - my_training_job_name = 'MyAwesomePyTorchTrainingJob' - pytorch_estimator = PyTorch.attach(my_training_job_name) - -After attaching, if the training job has finished with job status "Completed", it can be -``deploy``\ ed to create a SageMaker Endpoint and return a -``Predictor``. If the training job is in progress, -attach will block and display log messages from the training job, until the training job completes. - -The ``attach`` method accepts the following arguments: - -- ``training_job_name:`` The name of the training job to attach - to. -- ``sagemaker_session:`` The Session used - to interact with SageMaker - -Deploying Endpoints from model data -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -As well as attaching to existing training jobs, you can deploy models directly from model data in S3. -The following code sample shows how to do this, using the ``PyTorchModel`` class. - -.. code:: python - - pytorch_model = PyTorchModel(model_data='s3://bucket/model.tar.gz', role='SageMakerRole', - entry_point='transform_script.py') - - predictor = pytorch_model.deploy(instance_type='ml.c4.xlarge', initial_instance_count=1) - -The PyTorchModel constructor takes the following arguments: - -- ``model_dat:`` An S3 location of a SageMaker model data - .tar.gz file -- ``image:`` A Docker image URI -- ``role:`` An IAM role name or Arn for SageMaker to access AWS - resources on your behalf. -- ``predictor_cls:`` A function to - call to create a predictor. If not None, ``deploy`` will return the - result of invoking this function on the created endpoint name -- ``env:`` Environment variables to run with - ``image`` when hosted in SageMaker. -- ``name:`` The model name. If None, a default model name will be - selected on each ``deploy.`` -- ``entry_point:`` Path (absolute or relative) to the Python file - which should be executed as the entry point to model hosting. -- ``source_dir:`` Optional. Path (absolute or relative) to a - directory with any other training source code dependencies including - tne entry point file. Structure within this directory will be - preserved when training on SageMaker. -- ``enable_cloudwatch_metrics:`` Optional. If true, training - and hosting containers will generate Cloudwatch metrics under the - AWS/SageMakerContainer namespace. -- ``container_log_level:`` Log level to use within the container. - Valid values are defined in the Python logging module. -- ``code_location:`` Optional. Name of the S3 bucket where your - custom code will be uploaded to. If not specified, will use the - SageMaker default bucket created by sagemaker.Session. -- ``sagemaker_session:`` The SageMaker Session - object, used for SageMaker interaction - -Your model data must be a .tar.gz file in S3. SageMaker Training Job model data is saved to .tar.gz files in S3, -however if you have local data you want to deploy, you can prepare the data yourself. - -Assuming you have a local directory containg your model data named "my_model" you can tar and gzip compress the file and -upload to S3 using the following commands: - -:: - - tar -czf model.tar.gz my_model - aws s3 cp model.tar.gz s3://my-bucket/my-path/model.tar.gz - -This uploads the contents of my_model to a gzip compressed tar file to S3 in the bucket "my-bucket", with the key -"my-path/model.tar.gz". - -To run this command, you'll need the AWS CLI tool installed. Please refer to our `FAQ`_ for more information on -installing this. - -.. _FAQ: ../../../README.rst#faq - -PyTorch Training Examples -------------------------- - -Amazon provides several example Jupyter notebooks that demonstrate end-to-end training on Amazon SageMaker using PyTorch. -Please refer to: - -https://github.com/awslabs/amazon-sagemaker-examples/tree/master/sagemaker-python-sdk - -These are also available in SageMaker Notebook Instance hosted Jupyter notebooks under the sample notebooks folder. - - -SageMaker PyTorch Docker containers ------------------------------------ - -When training and deploying training scripts, SageMaker runs your Python script in a Docker container with several -libraries installed. When creating the Estimator and calling deploy to create the SageMaker Endpoint, you can control -the environment your script runs in. - -SageMaker runs PyTorch Estimator scripts in either Python 2 or Python 3. You can select the Python version by -passing a ``py_version`` keyword arg to the PyTorch Estimator constructor. Setting this to `py3` (the default) will cause your -training script to be run on Python 3.5. Setting this to `py2` will cause your training script to be run on Python 2.7 -This Python version applies to both the Training Job, created by fit, and the Endpoint, created by deploy. - -The PyTorch Docker images have the following dependencies installed: - -+-----------------------------+---------------+-------------------+ -| Dependencies | pytorch 0.4.0 | pytorch 1.0.0 | -+-----------------------------+---------------+-------------------+ -| boto3 | >=1.7.35 | >=1.9.11 | -+-----------------------------+---------------+-------------------+ -| botocore | >=1.10.35 | >=1.12.11 | -+-----------------------------+---------------+-------------------+ -| CUDA (GPU image only) | 9.0 | 9.0 | -+-----------------------------+---------------+-------------------+ -| numpy | >=1.14.3 | >=1.15.2 | -+-----------------------------+---------------+-------------------+ -| Pillow | >=5.1.0 | >=5.2.0 | -+-----------------------------+---------------+-------------------+ -| pip | >=10.0.1 | >=18.0 | -+-----------------------------+---------------+-------------------+ -| python-dateutil | >=2.7.3 | >=2.7.3 | -+-----------------------------+---------------+-------------------+ -| retrying | >=1.3.3 | >=1.3.3 | -+-----------------------------+---------------+-------------------+ -| s3transfer | >=0.1.13 | >=0.1.13 | -+-----------------------------+---------------+-------------------+ -| sagemaker-containers | >=2.1.0 | >=2.1.0 | -+-----------------------------+---------------+-------------------+ -| sagemaker-pytorch-container | 1.0 | 1.0 | -+-----------------------------+---------------+-------------------+ -| setuptools | >=39.2.0 | >=40.4.3 | -+-----------------------------+---------------+-------------------+ -| six | >=1.11.0 | >=1.11.0 | -+-----------------------------+---------------+-------------------+ -| torch | 0.4.0 | 1.0.0 | -+-----------------------------+---------------+-------------------+ -| torchvision | 0.2.1 | 0.2.1 | -+-----------------------------+---------------+-------------------+ -| Python | 2.7 or 3.5 | 2.7 or 3.6 | -+-----------------------------+---------------+-------------------+ - -The Docker images extend Ubuntu 16.04. - -If you need to install other dependencies you can put them into `requirements.txt` file and put it in the source directory -(``source_dir``) you provide to the `PyTorch Estimator <#pytorch-estimators>`__. - -You can select version of PyTorch by passing a ``framework_version`` keyword arg to the PyTorch Estimator constructor. -Currently supported versions are listed in the above table. You can also set ``framework_version`` to only specify major and -minor version, which will cause your training script to be run on the latest supported patch version of that minor -version. - -Alternatively, you can build your own image by following the instructions in the SageMaker Chainer containers -repository, and passing ``image_name`` to the Chainer Estimator constructor. - +=========================================== +Using PyTorch with the SageMaker Python SDK +=========================================== + +.. contents:: + +With PyTorch Estimators and Models, you can train and host PyTorch models on Amazon SageMaker. + +Supported versions of PyTorch: ``0.4.0``, ``1.0.0``. + +We recommend that you use the latest supported version, because that's where we focus most of our development efforts. + +You can visit the PyTorch repository at https://github.com/pytorch/pytorch. + +Training with PyTorch +------------------------ + +Training PyTorch models using ``PyTorch`` Estimators is a two-step process: + +1. Prepare a PyTorch script to run on SageMaker +2. Run this script on SageMaker via a ``PyTorch`` Estimator. + + +First, you prepare your training script, then second, you run this on SageMaker via a ``PyTorch`` Estimator. +You should prepare your script in a separate source file than the notebook, terminal session, or source file you're +using to submit the script to SageMaker via a ``PyTorch`` Estimator. This will be discussed in further detail below. + +Suppose that you already have a PyTorch training script called `pytorch-train.py`. +You can then setup a ``PyTorch`` Estimator with keyword arguments to point to this script and define how SageMaker runs it: + +.. code:: python + + from sagemaker.pytorch import PyTorch + + pytorch_estimator = PyTorch(entry_point='pytorch-train.py', + role='SageMakerRole', + train_instance_type='ml.p3.2xlarge', + train_instance_count=1, + framework_version='1.0.0') + +After that, you simply tell the estimator to start a training job and provide an S3 URL +that is the path to your training data within Amazon S3: + +.. code:: python + + pytorch_estimator.fit('s3://bucket/path/to/training/data') + +In the following sections, we'll discuss how to prepare a training script for execution on SageMaker, +then how to run that script on SageMaker using a ``PyTorch`` Estimator. + + +Preparing the PyTorch Training Script +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Your PyTorch training script must be a Python 2.7 or 3.5 compatible source file. + +The training script is very similar to a training script you might run outside of SageMaker, but you +can access useful properties about the training environment through various environment variables, such as + +* ``SM_MODEL_DIR``: A string representing the path to the directory to write model artifacts to. + These artifacts are uploaded to S3 for model hosting. +* ``SM_NUM_GPUS``: An integer representing the number of GPUs available to the host. +* ``SM_OUTPUT_DATA_DIR``: A string representing the filesystem path to write output artifacts to. Output artifacts may + include checkpoints, graphs, and other files to save, not including model artifacts. These artifacts are compressed + and uploaded to S3 to the same S3 prefix as the model artifacts. + +Supposing two input channels, 'train' and 'test', were used in the call to the PyTorch estimator's ``fit`` method, +the following will be set, following the format "SM_CHANNEL_[channel_name]": + +* ``SM_CHANNEL_TRAIN``: A string representing the path to the directory containing data in the 'train' channel +* ``SM_CHANNEL_TEST``: Same as above, but for the 'test' channel. + +A typical training script loads data from the input channels, configures training with hyperparameters, trains a model, +and saves a model to `model_dir` so that it can be hosted later. Hyperparameters are passed to your script as arguments +and can be retrieved with an argparse.ArgumentParser instance. For example, a training script might start +with the following: + +.. code:: python + + import argparse + import os + + if __name__ =='__main__': + + parser = argparse.ArgumentParser() + + # hyperparameters sent by the client are passed as command-line arguments to the script. + parser.add_argument('--epochs', type=int, default=50) + parser.add_argument('--batch-size', type=int, default=64) + parser.add_argument('--learning-rate', type=float, default=0.05) + parser.add_argument('--use-cuda', type=bool, default=False) + + # Data, model, and output directories + parser.add_argument('--output-data-dir', type=str, default=os.environ['SM_OUTPUT_DATA_DIR']) + parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR']) + parser.add_argument('--train', type=str, default=os.environ['SM_CHANNEL_TRAIN']) + parser.add_argument('--test', type=str, default=os.environ['SM_CHANNEL_TEST']) + + args, _ = parser.parse_known_args() + + # ... load from args.train and args.test, train a model, write model to args.model_dir. + +Because the SageMaker imports your training script, you should put your training code in a main guard +(``if __name__=='__main__':``) if you are using the same script to host your model, so that SageMaker does not +inadvertently run your training code at the wrong point in execution. + +Note that SageMaker doesn't support argparse actions. If you want to use, for example, boolean hyperparameters, +you need to specify `type` as `bool` in your script and provide an explicit `True` or `False` value for this hyperparameter +when instantiating PyTorch Estimator. + +For more on training environment variables, please visit `SageMaker Containers `_. + +Using third-party libraries +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When running your training script on SageMaker, it will have access to some pre-installed third-party libraries including ``torch``, ``torchvisopm``, and ``numpy``. +For more information on the runtime environment, including specific package versions, see `SageMaker PyTorch Docker containers <#id4>`__. + +If there are other packages you want to use with your script, you can include a ``requirements.txt`` file in the same directory as your training script to install other dependencies at runtime. +A ``requirements.txt`` file is a text file that contains a list of items that are installed by using ``pip install``. You can also specify the version of an item to install. +For information about the format of a ``requirements.txt`` file, see `Requirements Files `__ in the pip documentation. + +Running a PyTorch training script in SageMaker +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You run PyTorch training scripts on SageMaker by creating ``PyTorch`` Estimators. +SageMaker training of your script is invoked when you call ``fit`` on a ``PyTorch`` Estimator. +The following code sample shows how you train a custom PyTorch script "pytorch-train.py", passing +in three hyperparameters ('epochs', 'batch-size', and 'learning-rate'), and using two input channel +directories ('train' and 'test'). + +.. code:: python + + pytorch_estimator = PyTorch('pytorch-train.py', + train_instance_type='ml.p3.2xlarge', + train_instance_count=1, + framework_version='1.0.0', + hyperparameters = {'epochs': 20, 'batch-size': 64, 'learning-rate': 0.1}) + pytorch_estimator.fit({'train': 's3://my-data-bucket/path/to/my/training/data', + 'test': 's3://my-data-bucket/path/to/my/test/data'}) + + +PyTorch Estimators +------------------ + +The `PyTorch` constructor takes both required and optional arguments. + +Required arguments +~~~~~~~~~~~~~~~~~~ + +The following are required arguments to the ``PyTorch`` constructor. When you create a PyTorch object, you must include +these in the constructor, either positionally or as keyword arguments. + +- ``entry_point`` Path (absolute or relative) to the Python file which + should be executed as the entry point to training. +- ``role`` An AWS IAM role (either name or full ARN). The Amazon + SageMaker training jobs and APIs that create Amazon SageMaker + endpoints use this role to access training data and model artifacts. + After the endpoint is created, the inference code might use the IAM + role, if accessing AWS resource. +- ``train_instance_count`` Number of Amazon EC2 instances to use for + training. +- ``train_instance_type`` Type of EC2 instance to use for training, for + example, 'ml.m4.xlarge'. + +Optional arguments +~~~~~~~~~~~~~~~~~~ + +The following are optional arguments. When you create a ``PyTorch`` object, you can specify these as keyword arguments. + +- ``source_dir`` Path (absolute or relative) to a directory with any + other training source code dependencies including the entry point + file. Structure within this directory will be preserved when training + on SageMaker. +- ``dependencies (list[str])`` A list of paths to directories (absolute or relative) with + any additional libraries that will be exported to the container (default: []). + The library folders will be copied to SageMaker in the same folder where the entrypoint is copied. + If the ```source_dir``` points to S3, code will be uploaded and the S3 location will be used + instead. Example: + + The following call + >>> PyTorch(entry_point='train.py', dependencies=['my/libs/common', 'virtual-env']) + results in the following inside the container: + + >>> $ ls + + >>> opt/ml/code + >>> ├── train.py + >>> ├── common + >>> └── virtual-env + +- ``hyperparameters`` Hyperparameters that will be used for training. + Will be made accessible as a dict[str, str] to the training code on + SageMaker. For convenience, accepts other types besides strings, but + ``str`` will be called on keys and values to convert them before + training. +- ``py_version`` Python version you want to use for executing your + model training code. +- ``framework_version`` PyTorch version you want to use for executing + your model training code. You can find the list of supported versions + in `the section below <#sagemaker-pytorch-docker-containers>`__. +- ``train_volume_size`` Size in GB of the EBS volume to use for storing + input data during training. Must be large enough to store training + data if input_mode='File' is used (which is the default). +- ``train_max_run`` Timeout in seconds for training, after which Amazon + SageMaker terminates the job regardless of its current status. +- ``input_mode`` The input mode that the algorithm supports. Valid + modes: 'File' - Amazon SageMaker copies the training dataset from the + S3 location to a directory in the Docker container. 'Pipe' - Amazon + SageMaker streams data directly from S3 to the container via a Unix + named pipe. +- ``output_path`` S3 location where you want the training result (model + artifacts and optional output files) saved. If not specified, results + are stored to a default bucket. If the bucket with the specific name + does not exist, the estimator creates the bucket during the ``fit`` + method execution. +- ``output_kms_key`` Optional KMS key ID to optionally encrypt training + output with. +- ``job_name`` Name to assign for the training job that the ``fit``` + method launches. If not specified, the estimator generates a default + job name, based on the training image name and current timestamp +- ``image_name`` An alternative docker image to use for training and + serving. If specified, the estimator will use this image for training and + hosting, instead of selecting the appropriate SageMaker official image based on + framework_version and py_version. Refer to: `SageMaker PyTorch Docker Containers + <#id4>`_ for details on what the Official images support + and where to find the source code to build your custom image. + +Calling fit +~~~~~~~~~~~ + +You start your training script by calling ``fit`` on a ``PyTorch`` Estimator. ``fit`` takes both required and optional +arguments. + +fit Required arguments +'''''''''''''''''''''' + +- ``inputs``: This can take one of the following forms: A string + S3 URI, for example ``s3://my-bucket/my-training-data``. In this + case, the S3 objects rooted at the ``my-training-data`` prefix will + be available in the default ``train`` channel. A dict from + string channel names to S3 URIs. In this case, the objects rooted at + each S3 prefix will available as files in each channel directory. + +For example: + +.. code:: python + + {'train':'s3://my-bucket/my-training-data', + 'eval':'s3://my-bucket/my-evaluation-data'} + +.. optional-arguments-1: + +fit Optional arguments +'''''''''''''''''''''' + +- ``wait``: Defaults to True, whether to block and wait for the + training script to complete before returning. +- ``logs``: Defaults to True, whether to show logs produced by training + job in the Python session. Only meaningful when wait is True. + + +Distributed PyTorch Training +---------------------------- + +You can run a multi-machine, distributed PyTorch training using the PyTorch Estimator. By default, PyTorch objects will +submit single-machine training jobs to SageMaker. If you set ``train_instance_count`` to be greater than one, multi-machine +training jobs will be launched when ``fit`` is called. When you run multi-machine training, SageMaker will import your +training script and run it on each host in the cluster. + +To initialize distributed training in your script you would call ``dist.init_process_group`` providing desired backend +and rank and setting 'WORLD_SIZE' environment variable similar to how you would do it outside of SageMaker using +environment variable initialization: + +.. code:: python + + if args.distributed: + # Initialize the distributed environment. + world_size = len(args.hosts) + os.environ['WORLD_SIZE'] = str(world_size) + host_rank = args.hosts.index(args.current_host) + dist.init_process_group(backend=args.backend, rank=host_rank) + +SageMaker sets 'MASTER_ADDR' and 'MASTER_PORT' environment variables for you, but you can overwrite them. + +Supported backends: +- `gloo` and `tcp` for cpu instances +- `gloo` and `nccl` for gpu instances + +Saving models +------------- + +In order to save your trained PyTorch model for deployment on SageMaker, your training script should save your model +to a certain filesystem path called ``model_dir``. This value is accessible through the environment variable +``SM_MODEL_DIR``. The following code demonstrates how to save a trained PyTorch model named ``model`` as +``model.pth`` at the : + +.. code:: python + + import argparse + import os + import torch + + if __name__=='__main__': + # default to the value in environment variable `SM_MODEL_DIR`. Using args makes the script more portable. + parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR']) + args, _ = parser.parse_known_args() + + # ... train `model`, then save it to `model_dir` + with open(os.path.join(args.model_dir, 'model.pth'), 'wb') as f: + torch.save(model.state_dict(), f) + +After your training job is complete, SageMaker will compress and upload the serialized model to S3, and your model data +will be available in the S3 ``output_path`` you specified when you created the PyTorch Estimator. + +Deploying PyTorch Models +------------------------ + +After an PyTorch Estimator has been fit, you can host the newly created model in SageMaker. + +After calling ``fit``, you can call ``deploy`` on a ``PyTorch`` Estimator to create a SageMaker Endpoint. +The Endpoint runs a SageMaker-provided PyTorch model server and hosts the model produced by your training script, +which was run when you called ``fit``. This was the model you saved to ``model_dir``. + +``deploy`` returns a ``Predictor`` object, which you can use to do inference on the Endpoint hosting your PyTorch model. +Each ``Predictor`` provides a ``predict`` method which can do inference with numpy arrays or Python lists. +Inference arrays or lists are serialized and sent to the PyTorch model server by an ``InvokeEndpoint`` SageMaker +operation. + +``predict`` returns the result of inference against your model. By default, the inference result a NumPy array. + +.. code:: python + + # Train my estimator + pytorch_estimator = PyTorch(entry_point='train_and_deploy.py', + train_instance_type='ml.p3.2xlarge', + train_instance_count=1, + framework_version='1.0.0') + pytorch_estimator.fit('s3://my_bucket/my_training_data/') + + # Deploy my estimator to a SageMaker Endpoint and get a Predictor + predictor = pytorch_estimator.deploy(instance_type='ml.m4.xlarge', + initial_instance_count=1) + + # `data` is a NumPy array or a Python list. + # `response` is a NumPy array. + response = predictor.predict(data) + +You use the SageMaker PyTorch model server to host your PyTorch model when you call ``deploy`` on an ``PyTorch`` +Estimator. The model server runs inside a SageMaker Endpoint, which your call to ``deploy`` creates. +You can access the name of the Endpoint by the ``name`` property on the returned ``Predictor``. + + +The SageMaker PyTorch Model Server +---------------------------------- + +The PyTorch Endpoint you create with ``deploy`` runs a SageMaker PyTorch model server. +The model server loads the model that was saved by your training script and performs inference on the model in response +to SageMaker InvokeEndpoint API calls. + +You can configure two components of the SageMaker PyTorch model server: Model loading and model serving. +Model loading is the process of deserializing your saved model back into an PyTorch model. +Serving is the process of translating InvokeEndpoint requests to inference calls on the loaded model. + +You configure the PyTorch model server by defining functions in the Python source file you passed to the PyTorch constructor. + +Model loading +~~~~~~~~~~~~~ + +Before a model can be served, it must be loaded. The SageMaker PyTorch model server loads your model by invoking a +``model_fn`` function that you must provide in your script. The ``model_fn`` should have the following signature: + +.. code:: python + + def model_fn(model_dir) + +SageMaker will inject the directory where your model files and sub-directories, saved by ``save``, have been mounted. +Your model function should return a model object that can be used for model serving. + +The following code-snippet shows an example ``model_fn`` implementation. +It loads the model parameters from a ``model.pth`` file in the SageMaker model directory ``model_dir``. + +.. code:: python + + import torch + import os + + def model_fn(model_dir): + model = Your_Model() + with open(os.path.join(model_dir, 'model.pth'), 'rb') as f: + model.load_state_dict(torch.load(f)) + return model + +Model serving +~~~~~~~~~~~~~ + +After the SageMaker model server has loaded your model by calling ``model_fn``, SageMaker will serve your model. +Model serving is the process of responding to inference requests, received by SageMaker InvokeEndpoint API calls. +The SageMaker PyTorch model server breaks request handling into three steps: + + +- input processing, +- prediction, and +- output processing. + +In a similar way to model loading, you configure these steps by defining functions in your Python source file. + +Each step involves invoking a python function, with information about the request and the return value from the previous +function in the chain. Inside the SageMaker PyTorch model server, the process looks like: + +.. code:: python + + # Deserialize the Invoke request body into an object we can perform prediction on + input_object = input_fn(request_body, request_content_type) + + # Perform prediction on the deserialized object, with the loaded model + prediction = predict_fn(input_object, model) + + # Serialize the prediction result into the desired response content type + output = output_fn(prediction, response_content_type) + +The above code sample shows the three function definitions: + +- ``input_fn``: Takes request data and deserializes the data into an + object for prediction. +- ``predict_fn``: Takes the deserialized request object and performs + inference against the loaded model. +- ``output_fn``: Takes the result of prediction and serializes this + according to the response content type. + +The SageMaker PyTorch model server provides default implementations of these functions. +You can provide your own implementations for these functions in your hosting script. +If you omit any definition then the SageMaker PyTorch model server will use its default implementation for that +function. + +The ``RealTimePredictor`` used by PyTorch in the SageMaker Python SDK serializes NumPy arrays to the `NPY `_ format +by default, with Content-Type ``application/x-npy``. The SageMaker PyTorch model server can deserialize NPY-formatted +data (along with JSON and CSV data). + +If you rely solely on the SageMaker PyTorch model server defaults, you get the following functionality: + +- Prediction on models that implement the ``__call__`` method +- Serialization and deserialization of torch.Tensor. + +The default ``input_fn`` and ``output_fn`` are meant to make it easy to predict on torch.Tensors. If your model expects +a torch.Tensor and returns a torch.Tensor, then these functions do not have to be overridden when sending NPY-formatted +data. + +In the following sections we describe the default implementations of input_fn, predict_fn, and output_fn. +We describe the input arguments and expected return types of each, so you can define your own implementations. + +Input processing +'''''''''''''''' + +When an InvokeEndpoint operation is made against an Endpoint running a SageMaker PyTorch model server, +the model server receives two pieces of information: + +- The request Content-Type, for example "application/x-npy" +- The request data body, a byte array + +The SageMaker PyTorch model server will invoke an ``input_fn`` function in your hosting script, +passing in this information. If you define an ``input_fn`` function definition, +it should return an object that can be passed to ``predict_fn`` and have the following signature: + +.. code:: python + + def input_fn(request_body, request_content_type) + +Where ``request_body`` is a byte buffer and ``request_content_type`` is a Python string + +The SageMaker PyTorch model server provides a default implementation of ``input_fn``. +This function deserializes JSON, CSV, or NPY encoded data into a torch.Tensor. + +Default NPY deserialization requires ``request_body`` to follow the `NPY `_ format. For PyTorch, the Python SDK +defaults to sending prediction requests with this format. + +Default JSON deserialization requires ``request_body`` contain a single json list. +Sending multiple JSON objects within the same ``request_body`` is not supported. +The list must have a dimensionality compatible with the model loaded in ``model_fn``. +The list's shape must be identical to the model's input shape, for all dimensions after the first (which first +dimension is the batch size). + +Default csv deserialization requires ``request_body`` contain one or more lines of CSV numerical data. +The data is loaded into a two-dimensional array, where each line break defines the boundaries of the first dimension. + +The example below shows a custom ``input_fn`` for preparing pickled torch.Tensor. + +.. code:: python + + import numpy as np + import torch + from six import BytesIO + + def input_fn(request_body, request_content_type): + """An input_fn that loads a pickled tensor""" + if request_content_type == 'application/python-pickle': + return torch.load(BytesIO(request_body)) + else: + # Handle other content-types here or raise an Exception + # if the content type is not supported. + pass + + + +Prediction +'''''''''' + +After the inference request has been deserialized by ``input_fn``, the SageMaker PyTorch model server invokes +``predict_fn`` on the return value of ``input_fn``. + +As with ``input_fn``, you can define your own ``predict_fn`` or use the SageMaker PyTorch model server default. + +The ``predict_fn`` function has the following signature: + +.. code:: python + + def predict_fn(input_object, model) + +Where ``input_object`` is the object returned from ``input_fn`` and +``model`` is the model loaded by ``model_fn``. + +The default implementation of ``predict_fn`` invokes the loaded model's ``__call__`` function on ``input_object``, +and returns the resulting value. The return-type should be a torch.Tensor to be compatible with the default +``output_fn``. + +The example below shows an overridden ``predict_fn``: + +.. code:: python + + import torch + import numpy as np + + def predict_fn(input_data, model): + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + model.to(device) + model.eval() + with torch.no_grad(): + return model(input_data.to(device)) + +If you implement your own prediction function, you should take care to ensure that: + +- The first argument is expected to be the return value from input_fn. + If you use the default input_fn, this will be a torch.Tensor. +- The second argument is the loaded model. +- The return value should be of the correct type to be passed as the + first argument to ``output_fn``. If you use the default + ``output_fn``, this should be a torch.Tensor. + +Output processing +''''''''''''''''' + +After invoking ``predict_fn``, the model server invokes ``output_fn``, passing in the return value from ``predict_fn`` +and the content type for the response, as specified by the InvokeEndpoint request. + +The ``output_fn`` has the following signature: + +.. code:: python + + def output_fn(prediction, content_type) + +Where ``prediction`` is the result of invoking ``predict_fn`` and +the content type for the response, as specified by the InvokeEndpoint request. +The function should return a byte array of data serialized to content_type. + +The default implementation expects ``prediction`` to be a torch.Tensor and can serialize the result to JSON, CSV, or NPY. +It accepts response content types of "application/json", "text/csv", and "application/x-npy". + +Working with Existing Model Data and Training Jobs +-------------------------------------------------- + +Attaching to existing training jobs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can attach an PyTorch Estimator to an existing training job using the +``attach`` method. + +.. code:: python + + my_training_job_name = 'MyAwesomePyTorchTrainingJob' + pytorch_estimator = PyTorch.attach(my_training_job_name) + +After attaching, if the training job has finished with job status "Completed", it can be +``deploy``\ ed to create a SageMaker Endpoint and return a +``Predictor``. If the training job is in progress, +attach will block and display log messages from the training job, until the training job completes. + +The ``attach`` method accepts the following arguments: + +- ``training_job_name:`` The name of the training job to attach + to. +- ``sagemaker_session:`` The Session used + to interact with SageMaker + +Deploying Endpoints from model data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As well as attaching to existing training jobs, you can deploy models directly from model data in S3. +The following code sample shows how to do this, using the ``PyTorchModel`` class. + +.. code:: python + + pytorch_model = PyTorchModel(model_data='s3://bucket/model.tar.gz', role='SageMakerRole', + entry_point='transform_script.py') + + predictor = pytorch_model.deploy(instance_type='ml.c4.xlarge', initial_instance_count=1) + +The PyTorchModel constructor takes the following arguments: + +- ``model_dat:`` An S3 location of a SageMaker model data + .tar.gz file +- ``image:`` A Docker image URI +- ``role:`` An IAM role name or Arn for SageMaker to access AWS + resources on your behalf. +- ``predictor_cls:`` A function to + call to create a predictor. If not None, ``deploy`` will return the + result of invoking this function on the created endpoint name +- ``env:`` Environment variables to run with + ``image`` when hosted in SageMaker. +- ``name:`` The model name. If None, a default model name will be + selected on each ``deploy.`` +- ``entry_point:`` Path (absolute or relative) to the Python file + which should be executed as the entry point to model hosting. +- ``source_dir:`` Optional. Path (absolute or relative) to a + directory with any other training source code dependencies including + tne entry point file. Structure within this directory will be + preserved when training on SageMaker. +- ``enable_cloudwatch_metrics:`` Optional. If true, training + and hosting containers will generate Cloudwatch metrics under the + AWS/SageMakerContainer namespace. +- ``container_log_level:`` Log level to use within the container. + Valid values are defined in the Python logging module. +- ``code_location:`` Optional. Name of the S3 bucket where your + custom code will be uploaded to. If not specified, will use the + SageMaker default bucket created by sagemaker.Session. +- ``sagemaker_session:`` The SageMaker Session + object, used for SageMaker interaction + +Your model data must be a .tar.gz file in S3. SageMaker Training Job model data is saved to .tar.gz files in S3, +however if you have local data you want to deploy, you can prepare the data yourself. + +Assuming you have a local directory containg your model data named "my_model" you can tar and gzip compress the file and +upload to S3 using the following commands: + +:: + + tar -czf model.tar.gz my_model + aws s3 cp model.tar.gz s3://my-bucket/my-path/model.tar.gz + +This uploads the contents of my_model to a gzip compressed tar file to S3 in the bucket "my-bucket", with the key +"my-path/model.tar.gz". + +To run this command, you'll need the AWS CLI tool installed. Please refer to our `FAQ`_ for more information on +installing this. + +.. _FAQ: ../../../README.rst#faq + +PyTorch Training Examples +------------------------- + +Amazon provides several example Jupyter notebooks that demonstrate end-to-end training on Amazon SageMaker using PyTorch. +Please refer to: + +https://github.com/awslabs/amazon-sagemaker-examples/tree/master/sagemaker-python-sdk + +These are also available in SageMaker Notebook Instance hosted Jupyter notebooks under the sample notebooks folder. + + +SageMaker PyTorch Docker containers +----------------------------------- + +When training and deploying training scripts, SageMaker runs your Python script in a Docker container with several +libraries installed. When creating the Estimator and calling deploy to create the SageMaker Endpoint, you can control +the environment your script runs in. + +SageMaker runs PyTorch Estimator scripts in either Python 2 or Python 3. You can select the Python version by +passing a ``py_version`` keyword arg to the PyTorch Estimator constructor. Setting this to `py3` (the default) will cause your +training script to be run on Python 3.5. Setting this to `py2` will cause your training script to be run on Python 2.7 +This Python version applies to both the Training Job, created by fit, and the Endpoint, created by deploy. + +The PyTorch Docker images have the following dependencies installed: + ++-----------------------------+---------------+-------------------+ +| Dependencies | pytorch 0.4.0 | pytorch 1.0.0 | ++-----------------------------+---------------+-------------------+ +| boto3 | >=1.7.35 | >=1.9.11 | ++-----------------------------+---------------+-------------------+ +| botocore | >=1.10.35 | >=1.12.11 | ++-----------------------------+---------------+-------------------+ +| CUDA (GPU image only) | 9.0 | 9.0 | ++-----------------------------+---------------+-------------------+ +| numpy | >=1.14.3 | >=1.15.2 | ++-----------------------------+---------------+-------------------+ +| Pillow | >=5.1.0 | >=5.2.0 | ++-----------------------------+---------------+-------------------+ +| pip | >=10.0.1 | >=18.0 | ++-----------------------------+---------------+-------------------+ +| python-dateutil | >=2.7.3 | >=2.7.3 | ++-----------------------------+---------------+-------------------+ +| retrying | >=1.3.3 | >=1.3.3 | ++-----------------------------+---------------+-------------------+ +| s3transfer | >=0.1.13 | >=0.1.13 | ++-----------------------------+---------------+-------------------+ +| sagemaker-containers | >=2.1.0 | >=2.1.0 | ++-----------------------------+---------------+-------------------+ +| sagemaker-pytorch-container | 1.0 | 1.0 | ++-----------------------------+---------------+-------------------+ +| setuptools | >=39.2.0 | >=40.4.3 | ++-----------------------------+---------------+-------------------+ +| six | >=1.11.0 | >=1.11.0 | ++-----------------------------+---------------+-------------------+ +| torch | 0.4.0 | 1.0.0 | ++-----------------------------+---------------+-------------------+ +| torchvision | 0.2.1 | 0.2.1 | ++-----------------------------+---------------+-------------------+ +| Python | 2.7 or 3.5 | 2.7 or 3.6 | ++-----------------------------+---------------+-------------------+ + +The Docker images extend Ubuntu 16.04. + +If you need to install other dependencies you can put them into `requirements.txt` file and put it in the source directory +(``source_dir``) you provide to the `PyTorch Estimator <#pytorch-estimators>`__. + +You can select version of PyTorch by passing a ``framework_version`` keyword arg to the PyTorch Estimator constructor. +Currently supported versions are listed in the above table. You can also set ``framework_version`` to only specify major and +minor version, which will cause your training script to be run on the latest supported patch version of that minor +version. + +Alternatively, you can build your own image by following the instructions in the SageMaker Chainer containers +repository, and passing ``image_name`` to the Chainer Estimator constructor. + You can visit `the SageMaker PyTorch containers repository `_. \ No newline at end of file diff --git a/doc/using_rl.rst b/doc/using_rl.rst index b32ee0ed18..c99420708f 100644 --- a/doc/using_rl.rst +++ b/doc/using_rl.rst @@ -1,322 +1,322 @@ -========================================================== -Using Reinforcement Learning with the SageMaker Python SDK -========================================================== - -.. contents:: - -With Reinforcement Learning (RL) Estimators, you can train reinforcement learning models on Amazon SageMaker. - -Supported versions of Coach: ``0.11.1``, ``0.10.1`` with TensorFlow, ``0.11.0`` with TensorFlow or MXNet. -For more information about Coach, see https://github.com/NervanaSystems/coach - -Supported versions of Ray: ``0.5.3`` with TensorFlow. -For more information about Ray, see https://github.com/ray-project/ray - -RL Training ------------ - -Training RL models using ``RLEstimator`` is a two-step process: - -1. Prepare a training script to run on SageMaker -2. Run this script on SageMaker via an ``RlEstimator``. - -You should prepare your script in a separate source file than the notebook, terminal session, or source file you're -using to submit the script to SageMaker via an ``RlEstimator``. This will be discussed in further detail below. - -Suppose that you already have a training script called ``coach-train.py``. -You can then create an ``RLEstimator`` with keyword arguments to point to this script and define how SageMaker runs it: - -.. code:: python - - from sagemaker.rl import RLEstimator, RLToolkit, RLFramework - - rl_estimator = RLEstimator(entry_point='coach-train.py', - toolkit=RLToolkit.COACH, - toolkit_version='0.11.1', - framework=RLFramework.TENSORFLOW, - role='SageMakerRole', - train_instance_type='ml.p3.2xlarge', - train_instance_count=1) - -After that, you simply tell the estimator to start a training job: - -.. code:: python - - rl_estimator.fit() - -In the following sections, we'll discuss how to prepare a training script for execution on SageMaker -and how to run that script on SageMaker using ``RLEstimator``. - - -Preparing the RL Training Script -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Your RL training script must be a Python 3.5 compatible source file from MXNet framework or Python 3.6 for TensorFlow. - -The training script is very similar to a training script you might run outside of SageMaker, but you -can access useful properties about the training environment through various environment variables, such as - -* ``SM_MODEL_DIR``: A string representing the path to the directory to write model artifacts to. - These artifacts are uploaded to S3 for model hosting. -* ``SM_NUM_GPUS``: An integer representing the number of GPUs available to the host. -* ``SM_OUTPUT_DATA_DIR``: A string representing the filesystem path to write output artifacts to. Output artifacts may - include checkpoints, graphs, and other files to save, not including model artifacts. These artifacts are compressed - and uploaded to S3 to the same S3 prefix as the model artifacts. - -For the exhaustive list of available environment variables, see the -`SageMaker Containers documentation `__. - - -RL Estimators -------------- - -The ``RLEstimator`` constructor takes both required and optional arguments. - -Required arguments -~~~~~~~~~~~~~~~~~~ - -The following are required arguments to the ``RLEstimator`` constructor. When you create an instance of RLEstimator, you must include -these in the constructor, either positionally or as keyword arguments. - -- ``entry_point`` Path (absolute or relative) to the Python file which - should be executed as the entry point to training. -- ``role`` An AWS IAM role (either name or full ARN). The Amazon - SageMaker training jobs and APIs that create Amazon SageMaker - endpoints use this role to access training data and model artifacts. - After the endpoint is created, the inference code might use the IAM - role, if accessing AWS resource. -- ``train_instance_count`` Number of Amazon EC2 instances to use for - training. -- ``train_instance_type`` Type of EC2 instance to use for training, for - example, 'ml.m4.xlarge'. - -You must as well include either: - -- ``toolkit`` RL toolkit (Ray RLlib or Coach) you want to use for executing your model training code. - -- ``toolkit_version`` RL toolkit version you want to be use for executing your model training code. - -- ``framework`` Framework (MXNet or TensorFlow) you want to be used as - a toolkit backed for reinforcement learning training. - -or provide: - -- ``image_name`` An alternative docker image to use for training and - serving. If specified, the estimator will use this image for training and - hosting, instead of selecting the appropriate SageMaker official image based on - framework_version and py_version. Refer to: `SageMaker RL Docker Containers - <#sagemaker-rl-docker-containers>`_ for details on what the Official images support - and where to find the source code to build your custom image. - - -Optional arguments -~~~~~~~~~~~~~~~~~~ - -The following are optional arguments. When you create an ``RlEstimator`` object, you can specify these as keyword arguments. - -- ``source_dir`` Path (absolute or relative) to a directory with any - other training source code dependencies including the entry point - file. Structure within this directory will be preserved when training - on SageMaker. -- ``dependencies (list[str])`` A list of paths to directories (absolute or relative) with - any additional libraries that will be exported to the container (default: ``[]``). - The library folders will be copied to SageMaker in the same folder where the entrypoint is copied. - If the ``source_dir`` points to S3, code will be uploaded and the S3 location will be used - instead. - - For example, the following call: - - .. code:: python - - >>> RLEstimator(entry_point='train.py', - toolkit=RLToolkit.COACH, - toolkit_version='0.11.0', - framework=RLFramework.TENSORFLOW, - dependencies=['my/libs/common', 'virtual-env']) - - results in the following inside the container: - - .. code:: bash - - >>> $ ls - - >>> opt/ml/code - >>> ├── train.py - >>> ├── common - >>> └── virtual-env - -- ``hyperparameters`` Hyperparameters that will be used for training. - Will be made accessible as a ``dict[str, str]`` to the training code on - SageMaker. For convenience, accepts other types besides strings, but - ``str`` will be called on keys and values to convert them before - training. -- ``train_volume_size`` Size in GB of the EBS volume to use for storing - input data during training. Must be large enough to store training - data if ``input_mode='File'`` is used (which is the default). -- ``train_max_run`` Timeout in seconds for training, after which Amazon - SageMaker terminates the job regardless of its current status. -- ``input_mode`` The input mode that the algorithm supports. Valid - modes: 'File' - Amazon SageMaker copies the training dataset from the - S3 location to a directory in the Docker container. 'Pipe' - Amazon - SageMaker streams data directly from S3 to the container via a Unix - named pipe. -- ``output_path`` S3 location where you want the training result (model - artifacts and optional output files) saved. If not specified, results - are stored to a default bucket. If the bucket with the specific name - does not exist, the estimator creates the bucket during the ``fit`` - method execution. -- ``output_kms_key`` Optional KMS key ID to optionally encrypt training - output with. -- ``job_name`` Name to assign for the training job that the ``fit``` - method launches. If not specified, the estimator generates a default - job name, based on the training image name and current timestamp - -Calling fit -~~~~~~~~~~~ - -You start your training script by calling ``fit`` on an ``RLEstimator``. ``fit`` takes both a few optional -arguments. - -Optional arguments -'''''''''''''''''' - -- ``inputs``: This can take one of the following forms: A string - S3 URI, for example ``s3://my-bucket/my-training-data``. In this - case, the S3 objects rooted at the ``my-training-data`` prefix will - be available in the default ``train`` channel. A dict from - string channel names to S3 URIs. In this case, the objects rooted at - each S3 prefix will available as files in each channel directory. -- ``wait``: Defaults to True, whether to block and wait for the - training script to complete before returning. -- ``logs``: Defaults to True, whether to show logs produced by training - job in the Python session. Only meaningful when wait is True. - - -Distributed RL Training ------------------------ - -Amazon SageMaker RL supports multi-core and multi-instance distributed training. -Depending on your use case, training and/or environment rollout can be distributed. - -Please see the `Amazon SageMaker examples `_ -on how it can be done using different RL toolkits. - - -Saving models -------------- - -In order to save your trained PyTorch model for deployment on SageMaker, your training script should save your model -to a certain filesystem path ``/opt/ml/model``. This value is also accessible through the environment variable -``SM_MODEL_DIR``. - -Deploying RL Models -------------------- - -After an RL Estimator has been fit, you can host the newly created model in SageMaker. - -After calling ``fit``, you can call ``deploy`` on an ``RlEstimator`` Estimator to create a SageMaker Endpoint. -The Endpoint runs one of the SageMaker-provided model server based on the ``framework`` parameter -specified in the ``RLEstimator`` constructor and hosts the model produced by your training script, -which was run when you called ``fit``. This was the model you saved to ``model_dir``. -In case if ``image_name`` was specified it would use provided image for the deployment. - -``deploy`` returns a ``sagemaker.mxnet.MXNetPredictor`` for MXNet or -``sagemaker.tensorflow.serving.Predictor`` for TensorFlow. - -``predict`` returns the result of inference against your model. - -.. code:: python - - # Train my estimator - rl_estimator = RLEstimator(entry_point='coach-train.py', - toolkit=RLToolkit.COACH, - toolkit_version='0.11.0', - framework=RLFramework.MXNET, - role='SageMakerRole', - train_instance_type='ml.c4.2xlarge', - train_instance_count=1) - - rl_estimator.fit() - - # Deploy my estimator to a SageMaker Endpoint and get a MXNetPredictor - predictor = rl_estimator.deploy(instance_type='ml.m4.xlarge', - initial_instance_count=1) - - response = predictor.predict(data) - -For more information please see `The SageMaker MXNet Model Server `_ -and `Deploying to TensorFlow Serving Endpoints `_ documentation. - - -Working with Existing Training Jobs ------------------------------------ - -Attaching to existing training jobs -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You can attach an RL Estimator to an existing training job using the -``attach`` method. - -.. code:: python - - my_training_job_name = 'MyAwesomeRLTrainingJob' - rl_estimator = RLEstimator.attach(my_training_job_name) - -After attaching, if the training job has finished with job status "Completed", it can be -``deploy``\ ed to create a SageMaker Endpoint and return a ``Predictor``. If the training job is in progress, -attach will block and display log messages from the training job, until the training job completes. - -The ``attach`` method accepts the following arguments: - -- ``training_job_name:`` The name of the training job to attach - to. -- ``sagemaker_session:`` The Session used - to interact with SageMaker - -RL Training Examples --------------------- - -Amazon provides several example Jupyter notebooks that demonstrate end-to-end training on Amazon SageMaker using RL. -Please refer to: - -https://github.com/awslabs/amazon-sagemaker-examples/tree/master/reinforcement_learning - -These are also available in SageMaker Notebook Instance hosted Jupyter notebooks under the sample notebooks folder. - - -SageMaker RL Docker Containers ------------------------------- - -When training and deploying training scripts, SageMaker runs your Python script in a Docker container with several -libraries installed. When creating the Estimator and calling deploy to create the SageMaker Endpoint, you can control -the environment your script runs in. - -SageMaker runs RL Estimator scripts in either Python 3.5 for MXNet or Python 3.6 for TensorFlow. - -The Docker images have the following dependencies installed: - -+-------------------------+-------------------+-------------------+-------------------+ -| Dependencies | Coach 0.10.1 | Coach 0.11.0 | Ray 0.5.3 | -+-------------------------+-------------------+-------------------+-------------------+ -| Python | 3.6 | 3.5(MXNet) or | 3.6 | -| | | 3.6(TensorFlow) | | -+-------------------------+-------------------+-------------------+-------------------+ -| CUDA (GPU image only) | 9.0 | 9.0 | 9.0 | -+-------------------------+-------------------+-------------------+-------------------+ -| DL Framework | TensorFlow-1.11.0 | MXNet-1.3.0 or | TensorFlow-1.11.0 | -| | | TensorFlow-1.11.0 | | -+-------------------------+-------------------+-------------------+-------------------+ -| gym | 0.10.5 | 0.10.5 | 0.10.5 | -+-------------------------+-------------------+-------------------+-------------------+ - -The Docker images extend Ubuntu 16.04. - -You can select version of by passing a ``framework_version`` keyword arg to the RL Estimator constructor. -Currently supported versions are listed in the above table. You can also set ``framework_version`` to only specify major and -minor version, which will cause your training script to be run on the latest supported patch version of that minor -version. - -Alternatively, you can build your own image by following the instructions in the SageMaker RL containers -repository, and passing ``image_name`` to the RL Estimator constructor. - +========================================================== +Using Reinforcement Learning with the SageMaker Python SDK +========================================================== + +.. contents:: + +With Reinforcement Learning (RL) Estimators, you can train reinforcement learning models on Amazon SageMaker. + +Supported versions of Coach: ``0.11.1``, ``0.10.1`` with TensorFlow, ``0.11.0`` with TensorFlow or MXNet. +For more information about Coach, see https://github.com/NervanaSystems/coach + +Supported versions of Ray: ``0.5.3`` with TensorFlow. +For more information about Ray, see https://github.com/ray-project/ray + +RL Training +----------- + +Training RL models using ``RLEstimator`` is a two-step process: + +1. Prepare a training script to run on SageMaker +2. Run this script on SageMaker via an ``RlEstimator``. + +You should prepare your script in a separate source file than the notebook, terminal session, or source file you're +using to submit the script to SageMaker via an ``RlEstimator``. This will be discussed in further detail below. + +Suppose that you already have a training script called ``coach-train.py``. +You can then create an ``RLEstimator`` with keyword arguments to point to this script and define how SageMaker runs it: + +.. code:: python + + from sagemaker.rl import RLEstimator, RLToolkit, RLFramework + + rl_estimator = RLEstimator(entry_point='coach-train.py', + toolkit=RLToolkit.COACH, + toolkit_version='0.11.1', + framework=RLFramework.TENSORFLOW, + role='SageMakerRole', + train_instance_type='ml.p3.2xlarge', + train_instance_count=1) + +After that, you simply tell the estimator to start a training job: + +.. code:: python + + rl_estimator.fit() + +In the following sections, we'll discuss how to prepare a training script for execution on SageMaker +and how to run that script on SageMaker using ``RLEstimator``. + + +Preparing the RL Training Script +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Your RL training script must be a Python 3.5 compatible source file from MXNet framework or Python 3.6 for TensorFlow. + +The training script is very similar to a training script you might run outside of SageMaker, but you +can access useful properties about the training environment through various environment variables, such as + +* ``SM_MODEL_DIR``: A string representing the path to the directory to write model artifacts to. + These artifacts are uploaded to S3 for model hosting. +* ``SM_NUM_GPUS``: An integer representing the number of GPUs available to the host. +* ``SM_OUTPUT_DATA_DIR``: A string representing the filesystem path to write output artifacts to. Output artifacts may + include checkpoints, graphs, and other files to save, not including model artifacts. These artifacts are compressed + and uploaded to S3 to the same S3 prefix as the model artifacts. + +For the exhaustive list of available environment variables, see the +`SageMaker Containers documentation `__. + + +RL Estimators +------------- + +The ``RLEstimator`` constructor takes both required and optional arguments. + +Required arguments +~~~~~~~~~~~~~~~~~~ + +The following are required arguments to the ``RLEstimator`` constructor. When you create an instance of RLEstimator, you must include +these in the constructor, either positionally or as keyword arguments. + +- ``entry_point`` Path (absolute or relative) to the Python file which + should be executed as the entry point to training. +- ``role`` An AWS IAM role (either name or full ARN). The Amazon + SageMaker training jobs and APIs that create Amazon SageMaker + endpoints use this role to access training data and model artifacts. + After the endpoint is created, the inference code might use the IAM + role, if accessing AWS resource. +- ``train_instance_count`` Number of Amazon EC2 instances to use for + training. +- ``train_instance_type`` Type of EC2 instance to use for training, for + example, 'ml.m4.xlarge'. + +You must as well include either: + +- ``toolkit`` RL toolkit (Ray RLlib or Coach) you want to use for executing your model training code. + +- ``toolkit_version`` RL toolkit version you want to be use for executing your model training code. + +- ``framework`` Framework (MXNet or TensorFlow) you want to be used as + a toolkit backed for reinforcement learning training. + +or provide: + +- ``image_name`` An alternative docker image to use for training and + serving. If specified, the estimator will use this image for training and + hosting, instead of selecting the appropriate SageMaker official image based on + framework_version and py_version. Refer to: `SageMaker RL Docker Containers + <#sagemaker-rl-docker-containers>`_ for details on what the Official images support + and where to find the source code to build your custom image. + + +Optional arguments +~~~~~~~~~~~~~~~~~~ + +The following are optional arguments. When you create an ``RlEstimator`` object, you can specify these as keyword arguments. + +- ``source_dir`` Path (absolute or relative) to a directory with any + other training source code dependencies including the entry point + file. Structure within this directory will be preserved when training + on SageMaker. +- ``dependencies (list[str])`` A list of paths to directories (absolute or relative) with + any additional libraries that will be exported to the container (default: ``[]``). + The library folders will be copied to SageMaker in the same folder where the entrypoint is copied. + If the ``source_dir`` points to S3, code will be uploaded and the S3 location will be used + instead. + + For example, the following call: + + .. code:: python + + >>> RLEstimator(entry_point='train.py', + toolkit=RLToolkit.COACH, + toolkit_version='0.11.0', + framework=RLFramework.TENSORFLOW, + dependencies=['my/libs/common', 'virtual-env']) + + results in the following inside the container: + + .. code:: bash + + >>> $ ls + + >>> opt/ml/code + >>> ├── train.py + >>> ├── common + >>> └── virtual-env + +- ``hyperparameters`` Hyperparameters that will be used for training. + Will be made accessible as a ``dict[str, str]`` to the training code on + SageMaker. For convenience, accepts other types besides strings, but + ``str`` will be called on keys and values to convert them before + training. +- ``train_volume_size`` Size in GB of the EBS volume to use for storing + input data during training. Must be large enough to store training + data if ``input_mode='File'`` is used (which is the default). +- ``train_max_run`` Timeout in seconds for training, after which Amazon + SageMaker terminates the job regardless of its current status. +- ``input_mode`` The input mode that the algorithm supports. Valid + modes: 'File' - Amazon SageMaker copies the training dataset from the + S3 location to a directory in the Docker container. 'Pipe' - Amazon + SageMaker streams data directly from S3 to the container via a Unix + named pipe. +- ``output_path`` S3 location where you want the training result (model + artifacts and optional output files) saved. If not specified, results + are stored to a default bucket. If the bucket with the specific name + does not exist, the estimator creates the bucket during the ``fit`` + method execution. +- ``output_kms_key`` Optional KMS key ID to optionally encrypt training + output with. +- ``job_name`` Name to assign for the training job that the ``fit``` + method launches. If not specified, the estimator generates a default + job name, based on the training image name and current timestamp + +Calling fit +~~~~~~~~~~~ + +You start your training script by calling ``fit`` on an ``RLEstimator``. ``fit`` takes a few optional +arguments. + +fit Optional arguments +'''''''''''''''''''''' + +- ``inputs``: This can take one of the following forms: A string + S3 URI, for example ``s3://my-bucket/my-training-data``. In this + case, the S3 objects rooted at the ``my-training-data`` prefix will + be available in the default ``train`` channel. A dict from + string channel names to S3 URIs. In this case, the objects rooted at + each S3 prefix will available as files in each channel directory. +- ``wait``: Defaults to True, whether to block and wait for the + training script to complete before returning. +- ``logs``: Defaults to True, whether to show logs produced by training + job in the Python session. Only meaningful when wait is True. + + +Distributed RL Training +----------------------- + +Amazon SageMaker RL supports multi-core and multi-instance distributed training. +Depending on your use case, training and/or environment rollout can be distributed. + +Please see the `Amazon SageMaker examples `_ +on how it can be done using different RL toolkits. + + +Saving models +------------- + +In order to save your trained PyTorch model for deployment on SageMaker, your training script should save your model +to a certain filesystem path ``/opt/ml/model``. This value is also accessible through the environment variable +``SM_MODEL_DIR``. + +Deploying RL Models +------------------- + +After an RL Estimator has been fit, you can host the newly created model in SageMaker. + +After calling ``fit``, you can call ``deploy`` on an ``RlEstimator`` Estimator to create a SageMaker Endpoint. +The Endpoint runs one of the SageMaker-provided model server based on the ``framework`` parameter +specified in the ``RLEstimator`` constructor and hosts the model produced by your training script, +which was run when you called ``fit``. This was the model you saved to ``model_dir``. +In case if ``image_name`` was specified it would use provided image for the deployment. + +``deploy`` returns a ``sagemaker.mxnet.MXNetPredictor`` for MXNet or +``sagemaker.tensorflow.serving.Predictor`` for TensorFlow. + +``predict`` returns the result of inference against your model. + +.. code:: python + + # Train my estimator + rl_estimator = RLEstimator(entry_point='coach-train.py', + toolkit=RLToolkit.COACH, + toolkit_version='0.11.0', + framework=RLFramework.MXNET, + role='SageMakerRole', + train_instance_type='ml.c4.2xlarge', + train_instance_count=1) + + rl_estimator.fit() + + # Deploy my estimator to a SageMaker Endpoint and get a MXNetPredictor + predictor = rl_estimator.deploy(instance_type='ml.m4.xlarge', + initial_instance_count=1) + + response = predictor.predict(data) + +For more information please see `The SageMaker MXNet Model Server `_ +and `Deploying to TensorFlow Serving Endpoints `_ documentation. + + +Working with Existing Training Jobs +----------------------------------- + +Attaching to existing training jobs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can attach an RL Estimator to an existing training job using the +``attach`` method. + +.. code:: python + + my_training_job_name = 'MyAwesomeRLTrainingJob' + rl_estimator = RLEstimator.attach(my_training_job_name) + +After attaching, if the training job has finished with job status "Completed", it can be +``deploy``\ ed to create a SageMaker Endpoint and return a ``Predictor``. If the training job is in progress, +attach will block and display log messages from the training job, until the training job completes. + +The ``attach`` method accepts the following arguments: + +- ``training_job_name:`` The name of the training job to attach + to. +- ``sagemaker_session:`` The Session used + to interact with SageMaker + +RL Training Examples +-------------------- + +Amazon provides several example Jupyter notebooks that demonstrate end-to-end training on Amazon SageMaker using RL. +Please refer to: + +https://github.com/awslabs/amazon-sagemaker-examples/tree/master/reinforcement_learning + +These are also available in SageMaker Notebook Instance hosted Jupyter notebooks under the sample notebooks folder. + + +SageMaker RL Docker Containers +------------------------------ + +When training and deploying training scripts, SageMaker runs your Python script in a Docker container with several +libraries installed. When creating the Estimator and calling deploy to create the SageMaker Endpoint, you can control +the environment your script runs in. + +SageMaker runs RL Estimator scripts in either Python 3.5 for MXNet or Python 3.6 for TensorFlow. + +The Docker images have the following dependencies installed: + ++-------------------------+-------------------+-------------------+-------------------+ +| Dependencies | Coach 0.10.1 | Coach 0.11.0 | Ray 0.5.3 | ++-------------------------+-------------------+-------------------+-------------------+ +| Python | 3.6 | 3.5(MXNet) or | 3.6 | +| | | 3.6(TensorFlow) | | ++-------------------------+-------------------+-------------------+-------------------+ +| CUDA (GPU image only) | 9.0 | 9.0 | 9.0 | ++-------------------------+-------------------+-------------------+-------------------+ +| DL Framework | TensorFlow-1.11.0 | MXNet-1.3.0 or | TensorFlow-1.11.0 | +| | | TensorFlow-1.11.0 | | ++-------------------------+-------------------+-------------------+-------------------+ +| gym | 0.10.5 | 0.10.5 | 0.10.5 | ++-------------------------+-------------------+-------------------+-------------------+ + +The Docker images extend Ubuntu 16.04. + +You can select version of by passing a ``framework_version`` keyword arg to the RL Estimator constructor. +Currently supported versions are listed in the above table. You can also set ``framework_version`` to only specify major and +minor version, which will cause your training script to be run on the latest supported patch version of that minor +version. + +Alternatively, you can build your own image by following the instructions in the SageMaker RL containers +repository, and passing ``image_name`` to the RL Estimator constructor. + You can visit `the SageMaker RL containers repository `_. \ No newline at end of file diff --git a/doc/using_sklearn.rst b/doc/using_sklearn.rst index bf9c506b96..b72d2b4356 100644 --- a/doc/using_sklearn.rst +++ b/doc/using_sklearn.rst @@ -1,638 +1,638 @@ -================================================ -Using Scikit-learn with the SageMaker Python SDK -================================================ - -.. contents:: - -With Scikit-learn Estimators, you can train and host Scikit-learn models on Amazon SageMaker. - -Supported versions of Scikit-learn: ``0.20.0`` - -You can visit the Scikit-learn repository at https://github.com/scikit-learn/scikit-learn. - - -Training with Scikit-learn -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Training Scikit-learn models using ``SKLearn`` Estimators is a two-step process: - -1. Prepare a Scikit-learn script to run on SageMaker -2. Run this script on SageMaker via a ``SKLearn`` Estimator. - - -First, you prepare your training script, then second, you run this on SageMaker via a ``SKLearn`` Estimator. -You should prepare your script in a separate source file than the notebook, terminal session, or source file you're -using to submit the script to SageMaker via a ``SKLearn`` Estimator. - -Suppose that you already have an Scikit-learn training script called -``sklearn-train.py``. You can run this script in SageMaker as follows: - -.. code:: python - - from sagemaker.sklearn import SKLearn - sklearn_estimator = SKLearn(entry_point='sklearn-train.py', - role='SageMakerRole', - train_instance_type='ml.m4.xlarge', - framework_version='0.20.0') - sklearn_estimator.fit('s3://bucket/path/to/training/data') - -Where the S3 URL is a path to your training data, within Amazon S3. The constructor keyword arguments define how -SageMaker runs your training script and are discussed in detail in a later section. - -In the following sections, we'll discuss how to prepare a training script for execution on SageMaker, -then how to run that script on SageMaker using a ``SKLearn`` Estimator. - -Preparing the Scikit-learn training script -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Your Scikit-learn training script must be a Python 2.7 or 3.5 compatible source file. - -The training script is very similar to a training script you might run outside of SageMaker, but you -can access useful properties about the training environment through various environment variables, such as - -* ``SM_MODEL_DIR``: A string representing the path to the directory to write model artifacts to. - These artifacts are uploaded to S3 for model hosting. -* ``SM_OUTPUT_DATA_DIR``: A string representing the filesystem path to write output artifacts to. Output artifacts may - include checkpoints, graphs, and other files to save, not including model artifacts. These artifacts are compressed - and uploaded to S3 to the same S3 prefix as the model artifacts. - -Supposing two input channels, 'train' and 'test', were used in the call to the Scikit-learn estimator's ``fit()`` method, -the following will be set, following the format "SM_CHANNEL_[channel_name]": - -* ``SM_CHANNEL_TRAIN``: A string representing the path to the directory containing data in the 'train' channel -* ``SM_CHANNEL_TEST``: Same as above, but for the 'test' channel. - -A typical training script loads data from the input channels, configures training with hyperparameters, trains a model, -and saves a model to model_dir so that it can be hosted later. Hyperparameters are passed to your script as arguments -and can be retrieved with an argparse.ArgumentParser instance. For example, a training script might start -with the following: - -.. code:: python - - import argparse - import os - - if __name__ =='__main__': - - parser = argparse.ArgumentParser() - - # hyperparameters sent by the client are passed as command-line arguments to the script. - parser.add_argument('--epochs', type=int, default=50) - parser.add_argument('--batch-size', type=int, default=64) - parser.add_argument('--learning-rate', type=float, default=0.05) - - # Data, model, and output directories - parser.add_argument('--output-data-dir', type=str, default=os.environ.get('SM_OUTPUT_DATA_DIR')) - parser.add_argument('--model-dir', type=str, default=os.environ.get('SM_MODEL_DIR')) - parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAIN')) - parser.add_argument('--test', type=str, default=os.environ.get('SM_CHANNEL_TEST')) - - args, _ = parser.parse_known_args() - - # ... load from args.train and args.test, train a model, write model to args.model_dir. - -Because the SageMaker imports your training script, you should put your training code in a main guard -(``if __name__=='__main__':``) if you are using the same script to host your model, so that SageMaker does not -inadvertently run your training code at the wrong point in execution. - -For more on training environment variables, please visit https://github.com/aws/sagemaker-containers. - -Running a Scikit-learn training script in SageMaker -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You run Scikit-learn training scripts on SageMaker by creating ``SKLearn`` Estimators. -SageMaker training of your script is invoked when you call ``fit`` on a ``SKLearn`` Estimator. -The following code sample shows how you train a custom Scikit-learn script "sklearn-train.py", passing -in three hyperparameters ('epochs', 'batch-size', and 'learning-rate'), and using two input channel -directories ('train' and 'test'). - -.. code:: python - - sklearn_estimator = SKLearn('sklearn-train.py', - train_instance_type='ml.m4.xlarge', - framework_version='0.20.0', - hyperparameters = {'epochs': 20, 'batch-size': 64, 'learning-rate': 0.1}) - sklearn_estimator.fit({'train': 's3://my-data-bucket/path/to/my/training/data', - 'test': 's3://my-data-bucket/path/to/my/test/data'}) - - -Scikit-learn Estimators -^^^^^^^^^^^^^^^^^^^^^^^ - -The `SKLearn` constructor takes both required and optional arguments. - -Required arguments -'''''''''''''''''' - -The following are required arguments to the ``SKLearn`` constructor. When you create a Scikit-learn object, you must -include these in the constructor, either positionally or as keyword arguments. - -- ``entry_point`` Path (absolute or relative) to the Python file which - should be executed as the entry point to training. -- ``role`` An AWS IAM role (either name or full ARN). The Amazon - SageMaker training jobs and APIs that create Amazon SageMaker - endpoints use this role to access training data and model artifacts. - After the endpoint is created, the inference code might use the IAM - role, if accessing AWS resource. -- ``train_instance_type`` Type of EC2 instance to use for training, for - example, 'ml.m4.xlarge'. Please note that Scikit-learn does not have GPU support. - -Optional arguments -'''''''''''''''''' - -The following are optional arguments. When you create a ``SKLearn`` object, you can specify these as keyword arguments. - -- ``source_dir`` Path (absolute or relative) to a directory with any - other training source code dependencies including the entry point - file. Structure within this directory will be preserved when training - on SageMaker. -- ``hyperparameters`` Hyperparameters that will be used for training. - Will be made accessible as a dict[str, str] to the training code on - SageMaker. For convenience, accepts other types besides str, but - str() will be called on keys and values to convert them before - training. -- ``py_version`` Python version you want to use for executing your - model training code. -- ``train_volume_size`` Size in GB of the EBS volume to use for storing - input data during training. Must be large enough to store training - data if input_mode='File' is used (which is the default). -- ``train_max_run`` Timeout in seconds for training, after which Amazon - SageMaker terminates the job regardless of its current status. -- ``input_mode`` The input mode that the algorithm supports. Valid - modes: 'File' - Amazon SageMaker copies the training dataset from the - s3 location to a directory in the Docker container. 'Pipe' - Amazon - SageMaker streams data directly from s3 to the container via a Unix - named pipe. -- ``output_path`` s3 location where you want the training result (model - artifacts and optional output files) saved. If not specified, results - are stored to a default bucket. If the bucket with the specific name - does not exist, the estimator creates the bucket during the fit() - method execution. -- ``output_kms_key`` Optional KMS key ID to optionally encrypt training - output with. -- ``base_job_name`` Name to assign for the training job that the fit() - method launches. If not specified, the estimator generates a default - job name, based on the training image name and current timestamp -- ``image_name`` An alternative docker image to use for training and - serving. If specified, the estimator will use this image for training and - hosting, instead of selecting the appropriate SageMaker official image based on - framework_version and py_version. Refer to: `SageMaker Scikit-learn Docker Containers - <#sagemaker-scikit-learn-docker-containers>`_ for details on what the official images support - and where to find the source code to build your custom image. - - -Calling fit -^^^^^^^^^^^ - -You start your training script by calling ``fit`` on a ``SKLearn`` Estimator. ``fit`` takes both required and optional -arguments. - -Required arguments -'''''''''''''''''' - -- ``inputs``: This can take one of the following forms: A string - s3 URI, for example ``s3://my-bucket/my-training-data``. In this - case, the s3 objects rooted at the ``my-training-data`` prefix will - be available in the default ``train`` channel. A dict from - string channel names to s3 URIs. In this case, the objects rooted at - each s3 prefix will available as files in each channel directory. - -For example: - -.. code:: python - - {'train':'s3://my-bucket/my-training-data', - 'eval':'s3://my-bucket/my-evaluation-data'} - -.. optional-arguments-1: - -Optional arguments -'''''''''''''''''' - -- ``wait``: Defaults to True, whether to block and wait for the - training script to complete before returning. -- ``logs``: Defaults to True, whether to show logs produced by training - job in the Python session. Only meaningful when wait is True. - - -Saving models -~~~~~~~~~~~~~ - -In order to save your trained Scikit-learn model for deployment on SageMaker, your training script should save your -model to a certain filesystem path called `model_dir`. This value is accessible through the environment variable -``SM_MODEL_DIR``. The following code demonstrates how to save a trained Scikit-learn model named ``model`` as -``model.joblib`` at the end of training: - -.. code:: python - - from sklearn.externals import joblib - import argparse - import os - - if __name__=='__main__': - # default to the value in environment variable `SM_MODEL_DIR`. Using args makes the script more portable. - parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR']) - args, _ = parser.parse_known_args() - - # ... train classifier `clf`, then save it to `model_dir` as file 'model.joblib' - joblib.dump(clf, os.path.join(args.model_dir, "model.joblib")) - -After your training job is complete, SageMaker will compress and upload the serialized model to S3, and your model data -will available in the s3 ``output_path`` you specified when you created the Scikit-learn Estimator. - -Deploying Scikit-learn models -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -After an Scikit-learn Estimator has been fit, you can host the newly created model in SageMaker. - -After calling ``fit``, you can call ``deploy`` on a ``SKLearn`` Estimator to create a SageMaker Endpoint. -The Endpoint runs a SageMaker-provided Scikit-learn model server and hosts the model produced by your training script, -which was run when you called ``fit``. This was the model you saved to ``model_dir``. - -``deploy`` returns a ``Predictor`` object, which you can use to do inference on the Endpoint hosting your Scikit-learn -model. Each ``Predictor`` provides a ``predict`` method which can do inference with numpy arrays or Python lists. -Inference arrays or lists are serialized and sent to the Scikit-learn model server by an ``InvokeEndpoint`` SageMaker -operation. - -``predict`` returns the result of inference against your model. By default, the inference result a NumPy array. - -.. code:: python - - # Train my estimator - sklearn_estimator = SKLearn(entry_point='train_and_deploy.py', - train_instance_type='ml.m4.xlarge', - framework_version='0.20.0') - sklearn_estimator.fit('s3://my_bucket/my_training_data/') - - # Deploy my estimator to a SageMaker Endpoint and get a Predictor - predictor = sklearn_estimator.deploy(instance_type='ml.m4.xlarge', - initial_instance_count=1) - - # `data` is a NumPy array or a Python list. - # `response` is a NumPy array. - response = predictor.predict(data) - -You use the SageMaker Scikit-learn model server to host your Scikit-learn model when you call ``deploy`` -on an ``SKLearn`` Estimator. The model server runs inside a SageMaker Endpoint, which your call to ``deploy`` creates. -You can access the name of the Endpoint by the ``name`` property on the returned ``Predictor``. - - -SageMaker Scikit-learn Model Server -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The Scikit-learn Endpoint you create with ``deploy`` runs a SageMaker Scikit-learn model server. -The model server loads the model that was saved by your training script and performs inference on the model in response -to SageMaker InvokeEndpoint API calls. - -You can configure two components of the SageMaker Scikit-learn model server: Model loading and model serving. -Model loading is the process of deserializing your saved model back into an Scikit-learn model. -Serving is the process of translating InvokeEndpoint requests to inference calls on the loaded model. - -You configure the Scikit-learn model server by defining functions in the Python source file you passed to the -Scikit-learn constructor. - -Model loading -^^^^^^^^^^^^^ - -Before a model can be served, it must be loaded. The SageMaker Scikit-learn model server loads your model by invoking a -``model_fn`` function that you must provide in your script. The ``model_fn`` should have the following signature: - -.. code:: python - - def model_fn(model_dir) - -SageMaker will inject the directory where your model files and sub-directories, saved by ``save``, have been mounted. -Your model function should return a model object that can be used for model serving. - -SageMaker provides automated serving functions that work with Gluon API ``net`` objects and Module API ``Module`` objects. If you return either of these types of objects, then you will be able to use the default serving request handling functions. - -The following code-snippet shows an example ``model_fn`` implementation. -This loads returns a Scikit-learn Classifier from a ``model.joblib`` file in the SageMaker model directory -``model_dir``. - -.. code:: python - - from sklearn.externals import joblib - import os - - def model_fn(model_dir): - clf = joblib.load(os.path.join(model_dir, "model.joblib")) - return clf - -Model serving -^^^^^^^^^^^^^ - -After the SageMaker model server has loaded your model by calling ``model_fn``, SageMaker will serve your model. -Model serving is the process of responding to inference requests, received by SageMaker InvokeEndpoint API calls. -The SageMaker Scikit-learn model server breaks request handling into three steps: - - -- input processing, -- prediction, and -- output processing. - -In a similar way to model loading, you configure these steps by defining functions in your Python source file. - -Each step involves invoking a python function, with information about the request and the return-value from the previous -function in the chain. Inside the SageMaker Scikit-learn model server, the process looks like: - -.. code:: python - - # Deserialize the Invoke request body into an object we can perform prediction on - input_object = input_fn(request_body, request_content_type) - - # Perform prediction on the deserialized object, with the loaded model - prediction = predict_fn(input_object, model) - - # Serialize the prediction result into the desired response content type - output = output_fn(prediction, response_content_type) - -The above code-sample shows the three function definitions: - -- ``input_fn``: Takes request data and deserializes the data into an - object for prediction. -- ``predict_fn``: Takes the deserialized request object and performs - inference against the loaded model. -- ``output_fn``: Takes the result of prediction and serializes this - according to the response content type. - -The SageMaker Scikit-learn model server provides default implementations of these functions. -You can provide your own implementations for these functions in your hosting script. -If you omit any definition then the SageMaker Scikit-learn model server will use its default implementation for that -function. - -The ``RealTimePredictor`` used by Scikit-learn in the SageMaker Python SDK serializes NumPy arrays to the `NPY `_ format -by default, with Content-Type ``application/x-npy``. The SageMaker Scikit-learn model server can deserialize NPY-formatted -data (along with JSON and CSV data). - -If you rely solely on the SageMaker Scikit-learn model server defaults, you get the following functionality: - -- Prediction on models that implement the ``__call__`` method -- Serialization and deserialization of NumPy arrays. - -The default ``input_fn`` and ``output_fn`` are meant to make it easy to predict on NumPy arrays. If your model expects -a NumPy array and returns a NumPy array, then these functions do not have to be overridden when sending NPY-formatted -data. - -In the following sections we describe the default implementations of input_fn, predict_fn, and output_fn. -We describe the input arguments and expected return types of each, so you can define your own implementations. - -Input processing -'''''''''''''''' - -When an InvokeEndpoint operation is made against an Endpoint running a SageMaker Scikit-learn model server, -the model server receives two pieces of information: - -- The request Content-Type, for example "application/x-npy" -- The request data body, a byte array - -The SageMaker Scikit-learn model server will invoke an "input_fn" function in your hosting script, -passing in this information. If you define an ``input_fn`` function definition, -it should return an object that can be passed to ``predict_fn`` and have the following signature: - -.. code:: python - - def input_fn(request_body, request_content_type) - -Where ``request_body`` is a byte buffer and ``request_content_type`` is a Python string - -The SageMaker Scikit-learn model server provides a default implementation of ``input_fn``. -This function deserializes JSON, CSV, or NPY encoded data into a NumPy array. - -Default NPY deserialization requires ``request_body`` to follow the `NPY `_ format. For Scikit-learn, the Python SDK -defaults to sending prediction requests with this format. - -Default json deserialization requires ``request_body`` contain a single json list. -Sending multiple json objects within the same ``request_body`` is not supported. -The list must have a dimensionality compatible with the model loaded in ``model_fn``. -The list's shape must be identical to the model's input shape, for all dimensions after the first (which first -dimension is the batch size). - -Default csv deserialization requires ``request_body`` contain one or more lines of CSV numerical data. -The data is loaded into a two-dimensional array, where each line break defines the boundaries of the first dimension. - -The example below shows a custom ``input_fn`` for preparing pickled NumPy arrays. - -.. code:: python - - import numpy as np - - def input_fn(request_body, request_content_type): - """An input_fn that loads a pickled numpy array""" - if request_content_type == "application/python-pickle": - array = np.load(StringIO(request_body)) - return array - else: - # Handle other content-types here or raise an Exception - # if the content type is not supported. - pass - - - -Prediction -'''''''''' - -After the inference request has been deserialized by ``input_fn``, the SageMaker Scikit-learn model server invokes -``predict_fn`` on the return value of ``input_fn``. - -As with ``input_fn``, you can define your own ``predict_fn`` or use the SageMaker Scikit-learn model server default. - -The ``predict_fn`` function has the following signature: - -.. code:: python - - def predict_fn(input_object, model) - -Where ``input_object`` is the object returned from ``input_fn`` and -``model`` is the model loaded by ``model_fn``. - -The default implementation of ``predict_fn`` invokes the loaded model's ``__call__`` function on ``input_object``, -and returns the resulting value. The return-type should be a NumPy array to be compatible with the default -``output_fn``. - -The example below shows an overridden ``predict_fn`` for a Logistic Regression classifier. This model accepts a -Python list and returns a tuple of predictions and prediction probabilities from the model in a NumPy array. -This ``predict_fn`` can rely on the default ``input_fn`` and ``output_fn`` because ``input_data`` is a NumPy array, -and the return value of this function is a NumPy array. - -.. code:: python - - import sklearn - import numpy as np - - def predict_fn(input_data, model): - prediction = model.predict(input_data) - pred_prob = model.predict_proba(input_data) - return np.array([prediction, pred_prob]) - -If you implement your own prediction function, you should take care to ensure that: - -- The first argument is expected to be the return value from input_fn. - If you use the default input_fn, this will be a NumPy array. -- The second argument is the loaded model. -- The return value should be of the correct type to be passed as the - first argument to ``output_fn``. If you use the default - ``output_fn``, this should be a NumPy array. - -Output processing -''''''''''''''''' - -After invoking ``predict_fn``, the model server invokes ``output_fn``, passing in the return-value from ``predict_fn`` -and the InvokeEndpoint requested response content-type. - -The ``output_fn`` has the following signature: - -.. code:: python - - def output_fn(prediction, content_type) - -Where ``prediction`` is the result of invoking ``predict_fn`` and -``content_type`` is the InvokeEndpoint requested response content-type. -The function should return a byte array of data serialized to content_type. - -The default implementation expects ``prediction`` to be an NumPy and can serialize the result to JSON, CSV, or NPY. -It accepts response content types of "application/json", "text/csv", and "application/x-npy". - -Working with existing model data and training jobs -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Attaching to existing training jobs -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -You can attach an Scikit-learn Estimator to an existing training job using the -``attach`` method. - -.. code:: python - - my_training_job_name = "MyAwesomeSKLearnTrainingJob" - sklearn_estimator = SKLearn.attach(my_training_job_name) - -After attaching, if the training job is in a Complete status, it can be -``deploy``\ ed to create a SageMaker Endpoint and return a -``Predictor``. If the training job is in progress, -attach will block and display log messages from the training job, until the training job completes. - -The ``attach`` method accepts the following arguments: - -- ``training_job_name (str):`` The name of the training job to attach - to. -- ``sagemaker_session (sagemaker.Session or None):`` The Session used - to interact with SageMaker - -Deploying Endpoints from model data -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -As well as attaching to existing training jobs, you can deploy models directly from model data in S3. -The following code sample shows how to do this, using the ``SKLearnModel`` class. - -.. code:: python - - sklearn_model = SKLearnModel(model_data="s3://bucket/model.tar.gz", role="SageMakerRole", - entry_point="transform_script.py") - - predictor = sklearn_model.deploy(instance_type="ml.c4.xlarge", initial_instance_count=1) - -The sklearn_model constructor takes the following arguments: - -- ``model_data (str):`` An S3 location of a SageMaker model data - .tar.gz file -- ``image (str):`` A Docker image URI -- ``role (str):`` An IAM role name or Arn for SageMaker to access AWS - resources on your behalf. -- ``predictor_cls (callable[string,sagemaker.Session]):`` A function to - call to create a predictor. If not None, ``deploy`` will return the - result of invoking this function on the created endpoint name -- ``env (dict[string,string]):`` Environment variables to run with - ``image`` when hosted in SageMaker. -- ``name (str):`` The model name. If None, a default model name will be - selected on each ``deploy.`` -- ``entry_point (str):`` Path (absolute or relative) to the Python file - which should be executed as the entry point to model hosting. -- ``source_dir (str):`` Optional. Path (absolute or relative) to a - directory with any other training source code dependencies including - tne entry point file. Structure within this directory will be - preserved when training on SageMaker. -- ``enable_cloudwatch_metrics (boolean):`` Optional. If true, training - and hosting containers will generate Cloudwatch metrics under the - AWS/SageMakerContainer namespace. -- ``container_log_level (int):`` Log level to use within the container. - Valid values are defined in the Python logging module. -- ``code_location (str):`` Optional. Name of the S3 bucket where your - custom code will be uploaded to. If not specified, will use the - SageMaker default bucket created by sagemaker.Session. -- ``sagemaker_session (sagemaker.Session):`` The SageMaker Session - object, used for SageMaker interaction""" - -Your model data must be a .tar.gz file in S3. SageMaker Training Job model data is saved to .tar.gz files in S3, -however if you have local data you want to deploy, you can prepare the data yourself. - -Assuming you have a local directory containg your model data named "my_model" you can tar and gzip compress the file and -upload to S3 using the following commands: - -:: - - tar -czf model.tar.gz my_model - aws s3 cp model.tar.gz s3://my-bucket/my-path/model.tar.gz - -This uploads the contents of my_model to a gzip compressed tar file to S3 in the bucket "my-bucket", with the key -"my-path/model.tar.gz". - -To run this command, you'll need the aws cli tool installed. Please refer to our `FAQ <#FAQ>`__ for more information on -installing this. - -Scikit-learn Training Examples -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Amazon provides an example Jupyter notebook that demonstrate end-to-end training on Amazon SageMaker using Scikit-learn. -Please refer to: - -https://github.com/awslabs/amazon-sagemaker-examples/tree/master/sagemaker-python-sdk - -These are also available in SageMaker Notebook Instance hosted Jupyter notebooks under the "sample notebooks" folder. - - -SageMaker Scikit-learn Docker Containers -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -When training and deploying training scripts, SageMaker runs your Python script in a Docker container with several -libraries installed. When creating the Estimator and calling deploy to create the SageMaker Endpoint, you can control -the environment your script runs in. - -SageMaker runs Scikit-learn Estimator scripts in either Python 2.7 or Python 3.5. You can select the Python version by -passing a py_version keyword arg to the Scikit-learn Estimator constructor. Setting this to py3 (the default) will cause -your training script to be run on Python 3.5. Setting this to py2 will cause your training script to be run on Python 2.7 -This Python version applies to both the Training Job, created by fit, and the Endpoint, created by deploy. - -The Scikit-learn Docker images have the following dependencies installed: - -+-----------------------------+-------------+ -| Dependencies | sklearn 0.2 | -+-----------------------------+-------------+ -| sklearn | 0.20.0 | -+-----------------------------+-------------+ -| sagemaker | 1.11.3 | -+-----------------------------+-------------+ -| sagemaker-containers | 2.2.4 | -+-----------------------------+-------------+ -| numpy | 1.15.2 | -+-----------------------------+-------------+ -| pandas | 0.23.4 | -+-----------------------------+-------------+ -| Pillow | 3.1.2 | -+-----------------------------+-------------+ -| Python | 2.7 or 3.5 | -+-----------------------------+-------------+ - -You can see the full list by calling ``pip freeze`` from the running Docker image. - -The Docker images extend Ubuntu 16.04. - -You can select version of Scikit-learn by passing a framework_version keyword arg to the Scikit-learn Estimator constructor. -Currently supported versions are listed in the above table. You can also set framework_version to only specify major and -minor version, which will cause your training script to be run on the latest supported patch version of that minor -version. - -Alternatively, you can build your own image by following the instructions in the SageMaker Scikit-learn containers -repository, and passing ``image_name`` to the Scikit-learn Estimator constructor. -sagemaker-containers -You can visit the SageMaker Scikit-learn containers repository here: https://github.com/aws/sagemaker-scikit-learn-container/ +================================================ +Using Scikit-learn with the SageMaker Python SDK +================================================ + +.. contents:: + +With Scikit-learn Estimators, you can train and host Scikit-learn models on Amazon SageMaker. + +Supported versions of Scikit-learn: ``0.20.0`` + +You can visit the Scikit-learn repository at https://github.com/scikit-learn/scikit-learn. + + +Training with Scikit-learn +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Training Scikit-learn models using ``SKLearn`` Estimators is a two-step process: + +1. Prepare a Scikit-learn script to run on SageMaker +2. Run this script on SageMaker via a ``SKLearn`` Estimator. + + +First, you prepare your training script, then second, you run this on SageMaker via a ``SKLearn`` Estimator. +You should prepare your script in a separate source file than the notebook, terminal session, or source file you're +using to submit the script to SageMaker via a ``SKLearn`` Estimator. + +Suppose that you already have an Scikit-learn training script called +``sklearn-train.py``. You can run this script in SageMaker as follows: + +.. code:: python + + from sagemaker.sklearn import SKLearn + sklearn_estimator = SKLearn(entry_point='sklearn-train.py', + role='SageMakerRole', + train_instance_type='ml.m4.xlarge', + framework_version='0.20.0') + sklearn_estimator.fit('s3://bucket/path/to/training/data') + +Where the S3 URL is a path to your training data, within Amazon S3. The constructor keyword arguments define how +SageMaker runs your training script and are discussed in detail in a later section. + +In the following sections, we'll discuss how to prepare a training script for execution on SageMaker, +then how to run that script on SageMaker using a ``SKLearn`` Estimator. + +Preparing the Scikit-learn training script +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Your Scikit-learn training script must be a Python 2.7 or 3.5 compatible source file. + +The training script is very similar to a training script you might run outside of SageMaker, but you +can access useful properties about the training environment through various environment variables, such as + +* ``SM_MODEL_DIR``: A string representing the path to the directory to write model artifacts to. + These artifacts are uploaded to S3 for model hosting. +* ``SM_OUTPUT_DATA_DIR``: A string representing the filesystem path to write output artifacts to. Output artifacts may + include checkpoints, graphs, and other files to save, not including model artifacts. These artifacts are compressed + and uploaded to S3 to the same S3 prefix as the model artifacts. + +Supposing two input channels, 'train' and 'test', were used in the call to the Scikit-learn estimator's ``fit()`` method, +the following will be set, following the format "SM_CHANNEL_[channel_name]": + +* ``SM_CHANNEL_TRAIN``: A string representing the path to the directory containing data in the 'train' channel +* ``SM_CHANNEL_TEST``: Same as above, but for the 'test' channel. + +A typical training script loads data from the input channels, configures training with hyperparameters, trains a model, +and saves a model to model_dir so that it can be hosted later. Hyperparameters are passed to your script as arguments +and can be retrieved with an argparse.ArgumentParser instance. For example, a training script might start +with the following: + +.. code:: python + + import argparse + import os + + if __name__ =='__main__': + + parser = argparse.ArgumentParser() + + # hyperparameters sent by the client are passed as command-line arguments to the script. + parser.add_argument('--epochs', type=int, default=50) + parser.add_argument('--batch-size', type=int, default=64) + parser.add_argument('--learning-rate', type=float, default=0.05) + + # Data, model, and output directories + parser.add_argument('--output-data-dir', type=str, default=os.environ.get('SM_OUTPUT_DATA_DIR')) + parser.add_argument('--model-dir', type=str, default=os.environ.get('SM_MODEL_DIR')) + parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAIN')) + parser.add_argument('--test', type=str, default=os.environ.get('SM_CHANNEL_TEST')) + + args, _ = parser.parse_known_args() + + # ... load from args.train and args.test, train a model, write model to args.model_dir. + +Because the SageMaker imports your training script, you should put your training code in a main guard +(``if __name__=='__main__':``) if you are using the same script to host your model, so that SageMaker does not +inadvertently run your training code at the wrong point in execution. + +For more on training environment variables, please visit https://github.com/aws/sagemaker-containers. + +Running a Scikit-learn training script in SageMaker +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You run Scikit-learn training scripts on SageMaker by creating ``SKLearn`` Estimators. +SageMaker training of your script is invoked when you call ``fit`` on a ``SKLearn`` Estimator. +The following code sample shows how you train a custom Scikit-learn script "sklearn-train.py", passing +in three hyperparameters ('epochs', 'batch-size', and 'learning-rate'), and using two input channel +directories ('train' and 'test'). + +.. code:: python + + sklearn_estimator = SKLearn('sklearn-train.py', + train_instance_type='ml.m4.xlarge', + framework_version='0.20.0', + hyperparameters = {'epochs': 20, 'batch-size': 64, 'learning-rate': 0.1}) + sklearn_estimator.fit({'train': 's3://my-data-bucket/path/to/my/training/data', + 'test': 's3://my-data-bucket/path/to/my/test/data'}) + + +Scikit-learn Estimators +^^^^^^^^^^^^^^^^^^^^^^^ + +The `SKLearn` constructor takes both required and optional arguments. + +Required arguments +'''''''''''''''''' + +The following are required arguments to the ``SKLearn`` constructor. When you create a Scikit-learn object, you must +include these in the constructor, either positionally or as keyword arguments. + +- ``entry_point`` Path (absolute or relative) to the Python file which + should be executed as the entry point to training. +- ``role`` An AWS IAM role (either name or full ARN). The Amazon + SageMaker training jobs and APIs that create Amazon SageMaker + endpoints use this role to access training data and model artifacts. + After the endpoint is created, the inference code might use the IAM + role, if accessing AWS resource. +- ``train_instance_type`` Type of EC2 instance to use for training, for + example, 'ml.m4.xlarge'. Please note that Scikit-learn does not have GPU support. + +Optional arguments +'''''''''''''''''' + +The following are optional arguments. When you create a ``SKLearn`` object, you can specify these as keyword arguments. + +- ``source_dir`` Path (absolute or relative) to a directory with any + other training source code dependencies including the entry point + file. Structure within this directory will be preserved when training + on SageMaker. +- ``hyperparameters`` Hyperparameters that will be used for training. + Will be made accessible as a dict[str, str] to the training code on + SageMaker. For convenience, accepts other types besides str, but + str() will be called on keys and values to convert them before + training. +- ``py_version`` Python version you want to use for executing your + model training code. +- ``train_volume_size`` Size in GB of the EBS volume to use for storing + input data during training. Must be large enough to store training + data if input_mode='File' is used (which is the default). +- ``train_max_run`` Timeout in seconds for training, after which Amazon + SageMaker terminates the job regardless of its current status. +- ``input_mode`` The input mode that the algorithm supports. Valid + modes: 'File' - Amazon SageMaker copies the training dataset from the + s3 location to a directory in the Docker container. 'Pipe' - Amazon + SageMaker streams data directly from s3 to the container via a Unix + named pipe. +- ``output_path`` s3 location where you want the training result (model + artifacts and optional output files) saved. If not specified, results + are stored to a default bucket. If the bucket with the specific name + does not exist, the estimator creates the bucket during the fit() + method execution. +- ``output_kms_key`` Optional KMS key ID to optionally encrypt training + output with. +- ``base_job_name`` Name to assign for the training job that the fit() + method launches. If not specified, the estimator generates a default + job name, based on the training image name and current timestamp +- ``image_name`` An alternative docker image to use for training and + serving. If specified, the estimator will use this image for training and + hosting, instead of selecting the appropriate SageMaker official image based on + framework_version and py_version. Refer to: `SageMaker Scikit-learn Docker Containers + <#sagemaker-scikit-learn-docker-containers>`_ for details on what the official images support + and where to find the source code to build your custom image. + + +Calling fit +^^^^^^^^^^^ + +You start your training script by calling ``fit`` on a ``SKLearn`` Estimator. ``fit`` takes both required and optional +arguments. + +fit Required arguments +'''''''''''''''''''''' + +- ``inputs``: This can take one of the following forms: A string + s3 URI, for example ``s3://my-bucket/my-training-data``. In this + case, the s3 objects rooted at the ``my-training-data`` prefix will + be available in the default ``train`` channel. A dict from + string channel names to s3 URIs. In this case, the objects rooted at + each s3 prefix will available as files in each channel directory. + +For example: + +.. code:: python + + {'train':'s3://my-bucket/my-training-data', + 'eval':'s3://my-bucket/my-evaluation-data'} + +.. optional-arguments-1: + +fit Optional arguments +'''''''''''''''''''''' + +- ``wait``: Defaults to True, whether to block and wait for the + training script to complete before returning. +- ``logs``: Defaults to True, whether to show logs produced by training + job in the Python session. Only meaningful when wait is True. + + +Saving models +~~~~~~~~~~~~~ + +In order to save your trained Scikit-learn model for deployment on SageMaker, your training script should save your +model to a certain filesystem path called `model_dir`. This value is accessible through the environment variable +``SM_MODEL_DIR``. The following code demonstrates how to save a trained Scikit-learn model named ``model`` as +``model.joblib`` at the end of training: + +.. code:: python + + from sklearn.externals import joblib + import argparse + import os + + if __name__=='__main__': + # default to the value in environment variable `SM_MODEL_DIR`. Using args makes the script more portable. + parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR']) + args, _ = parser.parse_known_args() + + # ... train classifier `clf`, then save it to `model_dir` as file 'model.joblib' + joblib.dump(clf, os.path.join(args.model_dir, "model.joblib")) + +After your training job is complete, SageMaker will compress and upload the serialized model to S3, and your model data +will available in the s3 ``output_path`` you specified when you created the Scikit-learn Estimator. + +Deploying Scikit-learn models +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +After an Scikit-learn Estimator has been fit, you can host the newly created model in SageMaker. + +After calling ``fit``, you can call ``deploy`` on a ``SKLearn`` Estimator to create a SageMaker Endpoint. +The Endpoint runs a SageMaker-provided Scikit-learn model server and hosts the model produced by your training script, +which was run when you called ``fit``. This was the model you saved to ``model_dir``. + +``deploy`` returns a ``Predictor`` object, which you can use to do inference on the Endpoint hosting your Scikit-learn +model. Each ``Predictor`` provides a ``predict`` method which can do inference with numpy arrays or Python lists. +Inference arrays or lists are serialized and sent to the Scikit-learn model server by an ``InvokeEndpoint`` SageMaker +operation. + +``predict`` returns the result of inference against your model. By default, the inference result a NumPy array. + +.. code:: python + + # Train my estimator + sklearn_estimator = SKLearn(entry_point='train_and_deploy.py', + train_instance_type='ml.m4.xlarge', + framework_version='0.20.0') + sklearn_estimator.fit('s3://my_bucket/my_training_data/') + + # Deploy my estimator to a SageMaker Endpoint and get a Predictor + predictor = sklearn_estimator.deploy(instance_type='ml.m4.xlarge', + initial_instance_count=1) + + # `data` is a NumPy array or a Python list. + # `response` is a NumPy array. + response = predictor.predict(data) + +You use the SageMaker Scikit-learn model server to host your Scikit-learn model when you call ``deploy`` +on an ``SKLearn`` Estimator. The model server runs inside a SageMaker Endpoint, which your call to ``deploy`` creates. +You can access the name of the Endpoint by the ``name`` property on the returned ``Predictor``. + + +SageMaker Scikit-learn Model Server +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The Scikit-learn Endpoint you create with ``deploy`` runs a SageMaker Scikit-learn model server. +The model server loads the model that was saved by your training script and performs inference on the model in response +to SageMaker InvokeEndpoint API calls. + +You can configure two components of the SageMaker Scikit-learn model server: Model loading and model serving. +Model loading is the process of deserializing your saved model back into an Scikit-learn model. +Serving is the process of translating InvokeEndpoint requests to inference calls on the loaded model. + +You configure the Scikit-learn model server by defining functions in the Python source file you passed to the +Scikit-learn constructor. + +Model loading +^^^^^^^^^^^^^ + +Before a model can be served, it must be loaded. The SageMaker Scikit-learn model server loads your model by invoking a +``model_fn`` function that you must provide in your script. The ``model_fn`` should have the following signature: + +.. code:: python + + def model_fn(model_dir) + +SageMaker will inject the directory where your model files and sub-directories, saved by ``save``, have been mounted. +Your model function should return a model object that can be used for model serving. + +SageMaker provides automated serving functions that work with Gluon API ``net`` objects and Module API ``Module`` objects. If you return either of these types of objects, then you will be able to use the default serving request handling functions. + +The following code-snippet shows an example ``model_fn`` implementation. +This loads returns a Scikit-learn Classifier from a ``model.joblib`` file in the SageMaker model directory +``model_dir``. + +.. code:: python + + from sklearn.externals import joblib + import os + + def model_fn(model_dir): + clf = joblib.load(os.path.join(model_dir, "model.joblib")) + return clf + +Model serving +^^^^^^^^^^^^^ + +After the SageMaker model server has loaded your model by calling ``model_fn``, SageMaker will serve your model. +Model serving is the process of responding to inference requests, received by SageMaker InvokeEndpoint API calls. +The SageMaker Scikit-learn model server breaks request handling into three steps: + + +- input processing, +- prediction, and +- output processing. + +In a similar way to model loading, you configure these steps by defining functions in your Python source file. + +Each step involves invoking a python function, with information about the request and the return-value from the previous +function in the chain. Inside the SageMaker Scikit-learn model server, the process looks like: + +.. code:: python + + # Deserialize the Invoke request body into an object we can perform prediction on + input_object = input_fn(request_body, request_content_type) + + # Perform prediction on the deserialized object, with the loaded model + prediction = predict_fn(input_object, model) + + # Serialize the prediction result into the desired response content type + output = output_fn(prediction, response_content_type) + +The above code-sample shows the three function definitions: + +- ``input_fn``: Takes request data and deserializes the data into an + object for prediction. +- ``predict_fn``: Takes the deserialized request object and performs + inference against the loaded model. +- ``output_fn``: Takes the result of prediction and serializes this + according to the response content type. + +The SageMaker Scikit-learn model server provides default implementations of these functions. +You can provide your own implementations for these functions in your hosting script. +If you omit any definition then the SageMaker Scikit-learn model server will use its default implementation for that +function. + +The ``RealTimePredictor`` used by Scikit-learn in the SageMaker Python SDK serializes NumPy arrays to the `NPY `_ format +by default, with Content-Type ``application/x-npy``. The SageMaker Scikit-learn model server can deserialize NPY-formatted +data (along with JSON and CSV data). + +If you rely solely on the SageMaker Scikit-learn model server defaults, you get the following functionality: + +- Prediction on models that implement the ``__call__`` method +- Serialization and deserialization of NumPy arrays. + +The default ``input_fn`` and ``output_fn`` are meant to make it easy to predict on NumPy arrays. If your model expects +a NumPy array and returns a NumPy array, then these functions do not have to be overridden when sending NPY-formatted +data. + +In the following sections we describe the default implementations of input_fn, predict_fn, and output_fn. +We describe the input arguments and expected return types of each, so you can define your own implementations. + +Input processing +'''''''''''''''' + +When an InvokeEndpoint operation is made against an Endpoint running a SageMaker Scikit-learn model server, +the model server receives two pieces of information: + +- The request Content-Type, for example "application/x-npy" +- The request data body, a byte array + +The SageMaker Scikit-learn model server will invoke an "input_fn" function in your hosting script, +passing in this information. If you define an ``input_fn`` function definition, +it should return an object that can be passed to ``predict_fn`` and have the following signature: + +.. code:: python + + def input_fn(request_body, request_content_type) + +Where ``request_body`` is a byte buffer and ``request_content_type`` is a Python string + +The SageMaker Scikit-learn model server provides a default implementation of ``input_fn``. +This function deserializes JSON, CSV, or NPY encoded data into a NumPy array. + +Default NPY deserialization requires ``request_body`` to follow the `NPY `_ format. For Scikit-learn, the Python SDK +defaults to sending prediction requests with this format. + +Default json deserialization requires ``request_body`` contain a single json list. +Sending multiple json objects within the same ``request_body`` is not supported. +The list must have a dimensionality compatible with the model loaded in ``model_fn``. +The list's shape must be identical to the model's input shape, for all dimensions after the first (which first +dimension is the batch size). + +Default csv deserialization requires ``request_body`` contain one or more lines of CSV numerical data. +The data is loaded into a two-dimensional array, where each line break defines the boundaries of the first dimension. + +The example below shows a custom ``input_fn`` for preparing pickled NumPy arrays. + +.. code:: python + + import numpy as np + + def input_fn(request_body, request_content_type): + """An input_fn that loads a pickled numpy array""" + if request_content_type == "application/python-pickle": + array = np.load(StringIO(request_body)) + return array + else: + # Handle other content-types here or raise an Exception + # if the content type is not supported. + pass + + + +Prediction +'''''''''' + +After the inference request has been deserialized by ``input_fn``, the SageMaker Scikit-learn model server invokes +``predict_fn`` on the return value of ``input_fn``. + +As with ``input_fn``, you can define your own ``predict_fn`` or use the SageMaker Scikit-learn model server default. + +The ``predict_fn`` function has the following signature: + +.. code:: python + + def predict_fn(input_object, model) + +Where ``input_object`` is the object returned from ``input_fn`` and +``model`` is the model loaded by ``model_fn``. + +The default implementation of ``predict_fn`` invokes the loaded model's ``__call__`` function on ``input_object``, +and returns the resulting value. The return-type should be a NumPy array to be compatible with the default +``output_fn``. + +The example below shows an overridden ``predict_fn`` for a Logistic Regression classifier. This model accepts a +Python list and returns a tuple of predictions and prediction probabilities from the model in a NumPy array. +This ``predict_fn`` can rely on the default ``input_fn`` and ``output_fn`` because ``input_data`` is a NumPy array, +and the return value of this function is a NumPy array. + +.. code:: python + + import sklearn + import numpy as np + + def predict_fn(input_data, model): + prediction = model.predict(input_data) + pred_prob = model.predict_proba(input_data) + return np.array([prediction, pred_prob]) + +If you implement your own prediction function, you should take care to ensure that: + +- The first argument is expected to be the return value from input_fn. + If you use the default input_fn, this will be a NumPy array. +- The second argument is the loaded model. +- The return value should be of the correct type to be passed as the + first argument to ``output_fn``. If you use the default + ``output_fn``, this should be a NumPy array. + +Output processing +''''''''''''''''' + +After invoking ``predict_fn``, the model server invokes ``output_fn``, passing in the return-value from ``predict_fn`` +and the InvokeEndpoint requested response content-type. + +The ``output_fn`` has the following signature: + +.. code:: python + + def output_fn(prediction, content_type) + +Where ``prediction`` is the result of invoking ``predict_fn`` and +``content_type`` is the InvokeEndpoint requested response content-type. +The function should return a byte array of data serialized to content_type. + +The default implementation expects ``prediction`` to be an NumPy and can serialize the result to JSON, CSV, or NPY. +It accepts response content types of "application/json", "text/csv", and "application/x-npy". + +Working with existing model data and training jobs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Attaching to existing training jobs +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You can attach an Scikit-learn Estimator to an existing training job using the +``attach`` method. + +.. code:: python + + my_training_job_name = "MyAwesomeSKLearnTrainingJob" + sklearn_estimator = SKLearn.attach(my_training_job_name) + +After attaching, if the training job is in a Complete status, it can be +``deploy``\ ed to create a SageMaker Endpoint and return a +``Predictor``. If the training job is in progress, +attach will block and display log messages from the training job, until the training job completes. + +The ``attach`` method accepts the following arguments: + +- ``training_job_name (str):`` The name of the training job to attach + to. +- ``sagemaker_session (sagemaker.Session or None):`` The Session used + to interact with SageMaker + +Deploying Endpoints from model data +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +As well as attaching to existing training jobs, you can deploy models directly from model data in S3. +The following code sample shows how to do this, using the ``SKLearnModel`` class. + +.. code:: python + + sklearn_model = SKLearnModel(model_data="s3://bucket/model.tar.gz", role="SageMakerRole", + entry_point="transform_script.py") + + predictor = sklearn_model.deploy(instance_type="ml.c4.xlarge", initial_instance_count=1) + +The sklearn_model constructor takes the following arguments: + +- ``model_data (str):`` An S3 location of a SageMaker model data + .tar.gz file +- ``image (str):`` A Docker image URI +- ``role (str):`` An IAM role name or Arn for SageMaker to access AWS + resources on your behalf. +- ``predictor_cls (callable[string,sagemaker.Session]):`` A function to + call to create a predictor. If not None, ``deploy`` will return the + result of invoking this function on the created endpoint name +- ``env (dict[string,string]):`` Environment variables to run with + ``image`` when hosted in SageMaker. +- ``name (str):`` The model name. If None, a default model name will be + selected on each ``deploy.`` +- ``entry_point (str):`` Path (absolute or relative) to the Python file + which should be executed as the entry point to model hosting. +- ``source_dir (str):`` Optional. Path (absolute or relative) to a + directory with any other training source code dependencies including + tne entry point file. Structure within this directory will be + preserved when training on SageMaker. +- ``enable_cloudwatch_metrics (boolean):`` Optional. If true, training + and hosting containers will generate Cloudwatch metrics under the + AWS/SageMakerContainer namespace. +- ``container_log_level (int):`` Log level to use within the container. + Valid values are defined in the Python logging module. +- ``code_location (str):`` Optional. Name of the S3 bucket where your + custom code will be uploaded to. If not specified, will use the + SageMaker default bucket created by sagemaker.Session. +- ``sagemaker_session (sagemaker.Session):`` The SageMaker Session + object, used for SageMaker interaction""" + +Your model data must be a .tar.gz file in S3. SageMaker Training Job model data is saved to .tar.gz files in S3, +however if you have local data you want to deploy, you can prepare the data yourself. + +Assuming you have a local directory containg your model data named "my_model" you can tar and gzip compress the file and +upload to S3 using the following commands: + +:: + + tar -czf model.tar.gz my_model + aws s3 cp model.tar.gz s3://my-bucket/my-path/model.tar.gz + +This uploads the contents of my_model to a gzip compressed tar file to S3 in the bucket "my-bucket", with the key +"my-path/model.tar.gz". + +To run this command, you'll need the aws cli tool installed. Please refer to our `FAQ <#FAQ>`__ for more information on +installing this. + +Scikit-learn Training Examples +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Amazon provides an example Jupyter notebook that demonstrate end-to-end training on Amazon SageMaker using Scikit-learn. +Please refer to: + +https://github.com/awslabs/amazon-sagemaker-examples/tree/master/sagemaker-python-sdk + +These are also available in SageMaker Notebook Instance hosted Jupyter notebooks under the "sample notebooks" folder. + + +SageMaker Scikit-learn Docker Containers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When training and deploying training scripts, SageMaker runs your Python script in a Docker container with several +libraries installed. When creating the Estimator and calling deploy to create the SageMaker Endpoint, you can control +the environment your script runs in. + +SageMaker runs Scikit-learn Estimator scripts in either Python 2.7 or Python 3.5. You can select the Python version by +passing a py_version keyword arg to the Scikit-learn Estimator constructor. Setting this to py3 (the default) will cause +your training script to be run on Python 3.5. Setting this to py2 will cause your training script to be run on Python 2.7 +This Python version applies to both the Training Job, created by fit, and the Endpoint, created by deploy. + +The Scikit-learn Docker images have the following dependencies installed: + ++-----------------------------+-------------+ +| Dependencies | sklearn 0.2 | ++-----------------------------+-------------+ +| sklearn | 0.20.0 | ++-----------------------------+-------------+ +| sagemaker | 1.11.3 | ++-----------------------------+-------------+ +| sagemaker-containers | 2.2.4 | ++-----------------------------+-------------+ +| numpy | 1.15.2 | ++-----------------------------+-------------+ +| pandas | 0.23.4 | ++-----------------------------+-------------+ +| Pillow | 3.1.2 | ++-----------------------------+-------------+ +| Python | 2.7 or 3.5 | ++-----------------------------+-------------+ + +You can see the full list by calling ``pip freeze`` from the running Docker image. + +The Docker images extend Ubuntu 16.04. + +You can select version of Scikit-learn by passing a framework_version keyword arg to the Scikit-learn Estimator constructor. +Currently supported versions are listed in the above table. You can also set framework_version to only specify major and +minor version, which will cause your training script to be run on the latest supported patch version of that minor +version. + +Alternatively, you can build your own image by following the instructions in the SageMaker Scikit-learn containers +repository, and passing ``image_name`` to the Scikit-learn Estimator constructor. +sagemaker-containers +You can visit the SageMaker Scikit-learn containers repository here: https://github.com/aws/sagemaker-scikit-learn-container/ diff --git a/doc/using_tf.rst b/doc/using_tf.rst index d2d9228153..db5d6ae141 100644 --- a/doc/using_tf.rst +++ b/doc/using_tf.rst @@ -1,501 +1,985 @@ -============================================== -Using TensorFlow with the SageMaker Python SDK -============================================== - -TensorFlow SageMaker Estimators allow you to run your own TensorFlow -training algorithms on SageMaker Learner, and to host your own TensorFlow -models on SageMaker Hosting. - -**Note:** This topic describes how to use script mode for TensorFlow versions 1.11 and later. -For Documentation of the previous Legacy Mode versions, see: - -* `1.4.1 `_ -* `1.5.0 `_ -* `1.6.0 `_ -* `1.7.0 `_ -* `1.8.0 `_ -* `1.9.0 `_ -* `1.10.0 `_ - -.. warning:: - We have added a new format of your TensorFlow training script with TensorFlow version 1.11. - This new way gives the user script more flexibility. - This new format is called Script Mode, as opposed to Legacy Mode, which is what we support with TensorFlow 1.11 and older versions. - In addition we are adding Python 3 support with Script Mode. - Last supported version of Legacy Mode will be TensorFlow 1.12. - Script Mode is available with TensorFlow version 1.11 and newer. - Make sure you refer to the correct version of this README when you prepare your script. - You can find the Legacy Mode README `here `_. - -.. contents:: - -Supported versions of TensorFlow for Elastic Inference: ``1.11.0``, ``1.12.0``. - -Training with TensorFlow -~~~~~~~~~~~~~~~~~~~~~~~~ - -Training TensorFlow models using ``sagemaker.tensorflow.TensorFlow`` is a two-step process. -First, you prepare your training script, then second, you run it on -SageMaker Learner via the ``sagemaker.tensorflow.TensorFlow`` estimator. - -Preparing a Script Mode training script -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Your TensorFlow training script must be a Python 2.7- or 3.6-compatible source file. - -The training script is very similar to a training script you might run outside of SageMaker, but you can access useful properties about the training environment through various environment variables, including the following: - -* ``SM_MODEL_DIR``: A string that represents the local path where the training job can write the model artifacts to. - After training, artifacts in this directory are uploaded to S3 for model hosting. This is different than the ``model_dir`` - argument passed in your training script which is a S3 location. ``SM_MODEL_DIR`` is always set to ``/opt/ml/model``. -* ``SM_NUM_GPUS``: An integer representing the number of GPUs available to the host. -* ``SM_OUTPUT_DATA_DIR``: A string that represents the path to the directory to write output artifacts to. - Output artifacts might include checkpoints, graphs, and other files to save, but do not include model artifacts. - These artifacts are compressed and uploaded to S3 to an S3 bucket with the same prefix as the model artifacts. -* ``SM_CHANNEL_XXXX``: A string that represents the path to the directory that contains the input data for the specified channel. - For example, if you specify two input channels in the TensorFlow estimator's ``fit`` call, named 'train' and 'test', the environment variables ``SM_CHANNEL_TRAIN`` and ``SM_CHANNEL_TEST`` are set. - -For the exhaustive list of available environment variables, see the `SageMaker Containers documentation `__. - -A typical training script loads data from the input channels, configures training with hyperparameters, trains a model, and saves a model to ``SM_CHANNEL_TRAIN`` so that it can be deployed for inference later. -Hyperparameters are passed to your script as arguments and can be retrieved with an ``argparse.ArgumentParser`` instance. -For example, a training script might start with the following: - -.. code:: python - - import argparse - import os - - if __name__ =='__main__': - - parser = argparse.ArgumentParser() - - # hyperparameters sent by the client are passed as command-line arguments to the script. - parser.add_argument('--epochs', type=int, default=10) - parser.add_argument('--batch_size', type=int, default=100) - parser.add_argument('--learning_rate', type=float, default=0.1) - - # input data and model directories - parser.add_argument('--model_dir', type=str) - parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAIN')) - parser.add_argument('--test', type=str, default=os.environ.get('SM_CHANNEL_TEST')) - - args, _ = parser.parse_known_args() - - # ... load from args.train and args.test, train a model, write model to args.model_dir. - -Because the SageMaker imports your training script, putting your training launching code in a main guard (``if __name__=='__main__':``) -is good practice. - -Note that SageMaker doesn't support argparse actions. -If you want to use, for example, boolean hyperparameters, you need to specify ``type`` as ``bool`` in your script and provide an explicit ``True`` or ``False`` value for this hyperparameter when instantiating your TensorFlow estimator. - -Adapting your local TensorFlow script -''''''''''''''''''''''''''''''''''''' - -If you have a TensorFlow training script that runs outside of SageMaker please follow the directions here: - -1. Make sure your script can handle ``--model_dir`` as an additional command line argument. If you did not specify a -location when the TensorFlow estimator is constructed a S3 location under the default training job bucket will be passed -in here. Distributed training with parameter servers requires you use the ``tf.estimator.train_and_evaluate`` API and -a S3 location is needed as the model directory during training. Here is an example: - -.. code:: python - - estimator = tf.estimator.Estimator(model_fn=my_model_fn, model_dir=args.model_dir) - ... - train_spec = tf.estimator.TrainSpec(train_input_fn, max_steps=1000) - eval_spec = tf.estimator.EvalSpec(eval_input_fn) - tf.estimator.train_and_evaluate(mnist_classifier, train_spec, eval_spec) - -2. Load input data from the input channels. The input channels are defined when ``fit`` is called. For example: - -.. code:: python - - estimator.fit({'train':'s3://my-bucket/my-training-data', - 'eval':'s3://my-bucket/my-evaluation-data'}) - -In your training script the channels will be stored in environment variables ``SM_CHANNEL_TRAIN`` and -``SM_CHANNEL_EVAL``. You can add them to your argument parsing logic like this: - -.. code:: python - - parser = argparse.ArgumentParser() - parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAIN')) - parser.add_argument('--eval', type=str, default=os.environ.get('SM_CHANNEL_EVAL')) - -3. Export your final model to path stored in environment variable ``SM_MODEL_DIR`` which should always be - ``/opt/ml/model``. At end of training SageMaker will upload the model file under ``/opt/ml/model`` to - ``output_path``. - - -Training with TensorFlow estimator -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Calling fit -''''''''''' - -To use Script Mode, set at least one of these args - -- ``py_version='py3'`` -- ``script_mode=True`` - -Please note that when using Script Mode, your training script need to accept the following args: - -- ``model_dir`` - -Please note that the following args are not permitted when using Script Mode: - -- ``checkpoint_path`` -- ``training_steps`` -- ``evaluation_steps`` -- ``requirements_file`` - -.. code:: python - - from sagemaker.tensorflow import TensorFlow - - tf_estimator = TensorFlow(entry_point='tf-train.py', role='SageMakerRole', - train_instance_count=1, train_instance_type='ml.p2.xlarge', - framework_version='1.12', py_version='py3') - tf_estimator.fit('s3://bucket/path/to/training/data') - -Where the S3 url is a path to your training data, within Amazon S3. The -constructor keyword arguments define how SageMaker runs your training -script which we discussed earlier. - -You start your training script by calling ``fit`` on a ``TensorFlow`` estimator. ``fit`` takes -both required and optional arguments. - -Required argument -""""""""""""""""" - -- ``inputs``: The S3 location(s) of datasets to be used for training. This can take one of two forms: - - - ``str``: An S3 URI, for example ``s3://my-bucket/my-training-data``, which indicates the dataset's location. - - ``dict[str, str]``: A dictionary mapping channel names to S3 locations, for example ``{'train': 's3://my-bucket/my-training-data/train', 'test': 's3://my-bucket/my-training-data/test'}`` - - ``sagemaker.session.s3_input``: channel configuration for S3 data sources that can provide additional information as well as the path to the training dataset. See `the API docs `_ for full details. - -Optional arguments -"""""""""""""""""" - -- ``wait (bool)``: Defaults to True, whether to block and wait for the - training script to complete before returning. - If set to False, it will return immediately, and can later be attached to. -- ``logs (bool)``: Defaults to True, whether to show logs produced by training - job in the Python session. Only meaningful when wait is True. -- ``run_tensorboard_locally (bool)``: Defaults to False. If set to True a Tensorboard command will be printed out. -- ``job_name (str)``: Training job name. If not specified, the estimator generates a default job name, - based on the training image name and current timestamp. - -What happens when fit is called -""""""""""""""""""""""""""""""" - -Calling ``fit`` starts a SageMaker training job. The training job will execute the following. - -- Starts ``train_instance_count`` EC2 instances of the type ``train_instance_type``. -- On each instance, it will do the following steps: - - - starts a Docker container optimized for TensorFlow. - - downloads the dataset. - - setup up training related environment varialbes - - setup up distributed training environment if configured to use parameter server - - starts asynchronous training - -If the ``wait=False`` flag is passed to ``fit``, then it will return immediately. The training job will continue running -asynchronously. At a later time, a Tensorflow Estimator can be obtained by attaching to the existing training job. If -the training job is not finished it will start showing the standard output of training and wait until it completes. -After attaching, the estimator can be deployed as usual. - -.. code:: python - - tf_estimator.fit(your_input_data, wait=False) - training_job_name = tf_estimator.latest_training_job.name - - # after some time, or in a separate Python notebook, we can attach to it again. - - tf_estimator = TensorFlow.attach(training_job_name=training_job_name) - -Distributed Training -'''''''''''''''''''' - -To run your training job with multiple instances in a distributed fashion, set ``train_instance_count`` -to a number larger than 1. We support two different types of distributed training, parameter server and Horovod. -The ``distributions`` parameter is used to configure which distributed training strategy to use. - -Training with parameter servers -""""""""""""""""""""""""""""""" - -If you specify parameter_server as the value of the distributions parameter, the container launches a parameter server -thread on each instance in the training cluster, and then executes your training code. You can find more information on -TensorFlow distributed training at `TensorFlow docs `__. -To enable parameter server training: - -.. code:: python - - from sagemaker.tensorflow import TensorFlow - - tf_estimator = TensorFlow(entry_point='tf-train.py', role='SageMakerRole', - train_instance_count=2, train_instance_type='ml.p2.xlarge', - framework_version='1.11', py_version='py3', - distributions={'parameter_server': {'enabled': True}}) - tf_estimator.fit('s3://bucket/path/to/training/data') - -Training with Horovod -""""""""""""""""""""" - -Horovod is a distributed training framework based on MPI. Horovod is only available with TensorFlow version ``1.12`` or newer. -You can find more details at `Horovod README `__. - -The container sets up the MPI environment and executes the ``mpirun`` command enabling you to run any Horovod -training script with Script Mode. - -Training with ``MPI`` is configured by specifying following fields in ``distributions``: - -- ``enabled (bool)``: If set to ``True``, the MPI setup is performed and ``mpirun`` command is executed. -- ``processes_per_host (int)``: Number of processes MPI should launch on each host. Note, this should not be - greater than the available slots on the selected instance type. This flag should be set for the multi-cpu/gpu - training. -- ``custom_mpi_options (str)``: Any `mpirun` flag(s) can be passed in this field that will be added to the `mpirun` - command executed by SageMaker to launch distributed horovod training. - - -In the below example we create an estimator to launch Horovod distributed training with 2 processes on one host: - -.. code:: python - - from sagemaker.tensorflow import TensorFlow - - tf_estimator = TensorFlow(entry_point='tf-train.py', role='SageMakerRole', - train_instance_count=1, train_instance_type='ml.p2.xlarge', - framework_version='1.12', py_version='py3', - distributions={ - 'mpi': { - 'enabled': True, - 'processes_per_host': 2, - 'custom_mpi_options': '--NCCL_DEBUG INFO' - } - }) - tf_estimator.fit('s3://bucket/path/to/training/data') - -sagemaker.tensorflow.TensorFlow class -''''''''''''''''''''''''''''''''''''' - -The ``TensorFlow`` constructor takes both required and optional arguments. - -Required: - -- ``entry_point (str)`` Path (absolute or relative) to the Python file which - should be executed as the entry point to training. -- ``role (str)`` An AWS IAM role (either name or full ARN). The Amazon - SageMaker training jobs and APIs that create Amazon SageMaker - endpoints use this role to access training data and model artifacts. - After the endpoint is created, the inference code might use the IAM - role, if accessing AWS resource. -- ``train_instance_count (int)`` Number of Amazon EC2 instances to use for - training. -- ``train_instance_type (str)`` Type of EC2 instance to use for training, for - example, 'ml.c4.xlarge'. - -Optional: - -- ``source_dir (str)`` Path (absolute or relative) to a directory with any - other training source code dependencies including the entry point - file. Structure within this directory will be preserved when training - on SageMaker. -- ``dependencies (list[str])`` A list of paths to directories (absolute or relative) with - any additional libraries that will be exported to the container (default: ``[]``). - The library folders will be copied to SageMaker in the same folder where the entrypoint is copied. - If the ``source_dir`` points to S3, code will be uploaded and the S3 location will be used - instead. Example: - - The following call - - >>> TensorFlow(entry_point='train.py', dependencies=['my/libs/common', 'virtual-env']) - - results in the following inside the container: - - >>> opt/ml/code - >>> ├── train.py - >>> ├── common - >>> └── virtual-env - -- ``hyperparameters (dict[str, ANY])`` Hyperparameters that will be used for training. - Will be made accessible as command line arguments. -- ``train_volume_size (int)`` Size in GB of the EBS volume to use for storing - input data during training. Must be large enough to the store training - data. -- ``train_max_run (int)`` Timeout in seconds for training, after which Amazon - SageMaker terminates the job regardless of its current status. -- ``output_path (str)`` S3 location where you want the training result (model - artifacts and optional output files) saved. If not specified, results - are stored to a default bucket. If the bucket with the specific name - does not exist, the estimator creates the bucket during the ``fit`` - method execution. -- ``output_kms_key`` Optional KMS key ID to optionally encrypt training - output with. -- ``base_job_name`` Name to assign for the training job that the ``fit`` - method launches. If not specified, the estimator generates a default - job name, based on the training image name and current timestamp. -- ``image_name`` An alternative docker image to use for training and - serving. If specified, the estimator will use this image for training and - hosting, instead of selecting the appropriate SageMaker official image based on - ``framework_version`` and ``py_version``. Refer to: `SageMaker TensorFlow Docker Containers - <#sagemaker-tensorflow-docker-containers>`_ for details on what the official images support - and where to find the source code to build your custom image. -- ``script_mode (bool)`` Whether to use Script Mode or not. Script mode is the only available training mode in Python 3, - setting ``py_version`` to ``py3`` automatically sets ``script_mode`` to True. -- ``model_dir (str)`` Location where model data, checkpoint data, and TensorBoard checkpoints should be saved during training. - If not specified a S3 location will be generated under the training job's default bucket. And ``model_dir`` will be - passed in your training script as one of the command line arguments. -- ``distributions (dict)`` Configure your distribution strategy with this argument. - -Training with Pipe Mode using PipeModeDataset -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Amazon SageMaker allows users to create training jobs using Pipe input mode. -With Pipe input mode, your dataset is streamed directly to your training instances instead of being downloaded first. -This means that your training jobs start sooner, finish quicker, and need less disk space. - -SageMaker TensorFlow provides an implementation of ``tf.data.Dataset`` that makes it easy to take advantage of Pipe -input mode in SageMaker. You can replace your ``tf.data.Dataset`` with a ``sagemaker_tensorflow.PipeModeDataset`` to -read TFRecords as they are streamed to your training instances. - -In your ``entry_point`` script, you can use ``PipeModeDataset`` like a ``Dataset``. In this example, we create a -``PipeModeDataset`` to read TFRecords from the 'training' channel: - - -.. code:: python - - from sagemaker_tensorflow import PipeModeDataset - - features = { - 'data': tf.FixedLenFeature([], tf.string), - 'labels': tf.FixedLenFeature([], tf.int64), - } - - def parse(record): - parsed = tf.parse_single_example(record, features) - return ({ - 'data': tf.decode_raw(parsed['data'], tf.float64) - }, parsed['labels']) - - def train_input_fn(training_dir, hyperparameters): - ds = PipeModeDataset(channel='training', record_format='TFRecord') - ds = ds.repeat(20) - ds = ds.prefetch(10) - ds = ds.map(parse, num_parallel_calls=10) - ds = ds.batch(64) - return ds - - -To run training job with Pipe input mode, pass in ``input_mode='Pipe'`` to your TensorFlow Estimator: - - -.. code:: python - - from sagemaker.tensorflow import TensorFlow - - tf_estimator = TensorFlow(entry_point='tf-train-with-pipemodedataset.py', role='SageMakerRole', - training_steps=10000, evaluation_steps=100, - train_instance_count=1, train_instance_type='ml.p2.xlarge', - framework_version='1.10.0', input_mode='Pipe') - - tf_estimator.fit('s3://bucket/path/to/training/data') - - -If your TFRecords are compressed, you can train on Gzipped TF Records by passing in ``compression='Gzip'`` to the call to -``fit()``, and SageMaker will automatically unzip the records as data is streamed to your training instances: - -.. code:: python - - from sagemaker.session import s3_input - - train_s3_input = s3_input('s3://bucket/path/to/training/data', compression='Gzip') - tf_estimator.fit(train_s3_input) - - -You can learn more about ``PipeModeDataset`` in the sagemaker-tensorflow-extensions repository: https://github.com/aws/sagemaker-tensorflow-extensions - - -Training with MKL-DNN disabled -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -SageMaker TensorFlow CPU images use TensorFlow built with Intel® MKL-DNN optimization. - -In certain cases you might be able to get a better performance by disabling this optimization -(`for example when using small models `_) - -You can disable MKL-DNN optimization for TensorFlow ``1.8.0`` and above by setting two following environment variables: - -.. code:: python - - import os - - os.environ['TF_DISABLE_MKL'] = '1' - os.environ['TF_DISABLE_POOL_ALLOCATOR'] = '1' - - -Deploying TensorFlow Serving models -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -After a TensorFlow estimator has been fit, it saves a TensorFlow SavedModel in -the S3 location defined by ``output_path``. You can call ``deploy`` on a TensorFlow -estimator to create a SageMaker Endpoint. - -Your model will be deployed to a TensorFlow Serving-based server. The server provides a super-set of the -`TensorFlow Serving REST API `_. - -See `Deploying to TensorFlow Serving Endpoints `_ to learn how to deploy your model and make inference requests. - - -SageMaker TensorFlow Docker containers -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The containers include the following Python packages: - -+--------------------------------+---------------+-------------------+ -| Dependencies | Script Mode | Legacy Mode | -+--------------------------------+---------------+-------------------+ -| boto3 | Latest | Latest | -+--------------------------------+---------------+-------------------+ -| botocore | Latest | Latest | -+--------------------------------+---------------+-------------------+ -| CUDA (GPU image only) | 9.0 | 9.0 | -+--------------------------------+---------------+-------------------+ -| numpy | Latest | Latest | -+--------------------------------+---------------+-------------------+ -| Pillow | Latest | Latest | -+--------------------------------+---------------+-------------------+ -| scipy | Latest | Latest | -+--------------------------------+---------------+-------------------+ -| sklean | Latest | Latest | -+--------------------------------+---------------+-------------------+ -| h5py | Latest | Latest | -+--------------------------------+---------------+-------------------+ -| pip | 18.1 | 18.1 | -+--------------------------------+---------------+-------------------+ -| curl | Latest | Latest | -+--------------------------------+---------------+-------------------+ -| tensorflow | 1.12.0 | 1.12.0 | -+--------------------------------+---------------+-------------------+ -| tensorflow-serving-api | 1.12.0 | None | -+--------------------------------+---------------+-------------------+ -| sagemaker-containers | >=2.3.5 | >=2.3.5 | -+--------------------------------+---------------+-------------------+ -| sagemaker-tensorflow-container | 1.0 | 1.0 | -+--------------------------------+---------------+-------------------+ -| Python | 2.7 or 3.6 | 2.7 | -+--------------------------------+---------------+-------------------+ - -Legacy Mode TensorFlow Docker images support Python 2.7. Script Mode TensorFlow Docker images support both Python 2.7 -and Python 3.6. The Docker images extend Ubuntu 16.04. - -You can select version of TensorFlow by passing a ``framework_version`` keyword arg to the TensorFlow Estimator constructor. Currently supported versions are listed in the table above. You can also set ``framework_version`` to only specify major and minor version, e.g ``'1.6'``, which will cause your training script to be run on the latest supported patch version of that minor version, which in this example would be 1.6.0. -Alternatively, you can build your own image by following the instructions in the SageMaker TensorFlow containers -repository, and passing ``image_name`` to the TensorFlow Estimator constructor. - -For more information on the contents of the images, see the SageMaker TensorFlow containers repositories here: - -- training: https://github.com/aws/sagemaker-tensorflow-container -- serving: https://github.com/aws/sagemaker-tensorflow-serving-container +############################################## +Using TensorFlow with the SageMaker Python SDK +############################################## + +TensorFlow SageMaker Estimators allow you to run your own TensorFlow +training algorithms on SageMaker Learner, and to host your own TensorFlow +models on SageMaker Hosting. + +For general information about using the SageMaker Python SDK, see :ref:`overview:Using the SageMaker Python SDK`. + +.. warning:: + We have added a new format of your TensorFlow training script with TensorFlow version 1.11. + This new way gives the user script more flexibility. + This new format is called Script Mode, as opposed to Legacy Mode, which is what we support with TensorFlow 1.11 and older versions. + In addition we are adding Python 3 support with Script Mode. + The last supported version of Legacy Mode will be TensorFlow 1.12. + Script Mode is available with TensorFlow version 1.11 and newer. + Make sure you refer to the correct version of this README when you prepare your script. + You can find the Legacy Mode README `here `_. + +.. contents:: + +Supported versions of TensorFlow for Elastic Inference: ``1.11.0``, ``1.12.0``. + + +***************************** +Train a Model with TensorFlow +***************************** + +To train a TensorFlow model by using the SageMaker Python SDK: + +.. |create tf estimator| replace:: Create a ``sagemaker.tensorflow.TensorFlow estimator`` +.. _create tf estimator: #create-an-estimator + +.. |call fit| replace:: Call the estimator's ``fit`` method +.. _call fit: #call-the-fit-method + +1. `Prepare a training script <#prepare-a-script-mode-training-script>`_ +2. |create tf estimator|_ +3. |call fit|_ + +Prepare a Script Mode Training Script +====================================== + +Your TensorFlow training script must be a Python 2.7- or 3.6-compatible source file. + +The training script is very similar to a training script you might run outside of SageMaker, but you can access useful properties about the training environment through various environment variables, including the following: + +* ``SM_MODEL_DIR``: A string that represents the local path where the training job writes the model artifacts to. + After training, artifacts in this directory are uploaded to S3 for model hosting. This is different than the ``model_dir`` + argument passed in your training script, which is an S3 location. ``SM_MODEL_DIR`` is always set to ``/opt/ml/model``. +* ``SM_NUM_GPUS``: An integer representing the number of GPUs available to the host. +* ``SM_OUTPUT_DATA_DIR``: A string that represents the path to the directory to write output artifacts to. + Output artifacts might include checkpoints, graphs, and other files to save, but do not include model artifacts. + These artifacts are compressed and uploaded to S3 to an S3 bucket with the same prefix as the model artifacts. +* ``SM_CHANNEL_XXXX``: A string that represents the path to the directory that contains the input data for the specified channel. + For example, if you specify two input channels in the TensorFlow estimator's ``fit`` call, named 'train' and 'test', the environment variables ``SM_CHANNEL_TRAIN`` and ``SM_CHANNEL_TEST`` are set. + +For the exhaustive list of available environment variables, see the `SageMaker Containers documentation `_. + +A typical training script loads data from the input channels, configures training with hyperparameters, trains a model, and saves a model to ``SM_CHANNEL_TRAIN`` so that it can be deployed for inference later. +Hyperparameters are passed to your script as arguments and can be retrieved with an ``argparse.ArgumentParser`` instance. +For example, a training script might start with the following: + +.. code:: python + + import argparse + import os + + if __name__ =='__main__': + + parser = argparse.ArgumentParser() + + # hyperparameters sent by the client are passed as command-line arguments to the script. + parser.add_argument('--epochs', type=int, default=10) + parser.add_argument('--batch_size', type=int, default=100) + parser.add_argument('--learning_rate', type=float, default=0.1) + + # input data and model directories + parser.add_argument('--model_dir', type=str) + parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAIN')) + parser.add_argument('--test', type=str, default=os.environ.get('SM_CHANNEL_TEST')) + + args, _ = parser.parse_known_args() + + # ... load from args.train and args.test, train a model, write model to args.model_dir. + +Because the SageMaker imports your training script, putting your training launching code in a main guard (``if __name__=='__main__':``) +is good practice. + +Note that SageMaker doesn't support argparse actions. +For example, if you want to use a boolean hyperparameter, specify ``type`` as ``bool`` in your script and provide an explicit ``True`` or ``False`` value for this hyperparameter when you create the TensorFlow estimator. + +For a complete example of a TensorFlow training script, see `mnist.py `__. + + +Adapting your local TensorFlow script +------------------------------------- + +If you have a TensorFlow training script that runs outside of SageMaker, do the following to adapt the script to run in SageMaker: + +1. Make sure your script can handle ``--model_dir`` as an additional command line argument. If you did not specify a +location when you created the TensorFlow estimator, an S3 location under the default training job bucket is used. +Distributed training with parameter servers requires you to use the ``tf.estimator.train_and_evaluate`` API and +to provide an S3 location as the model directory during training. Here is an example: + +.. code:: python + + estimator = tf.estimator.Estimator(model_fn=my_model_fn, model_dir=args.model_dir) + ... + train_spec = tf.estimator.TrainSpec(train_input_fn, max_steps=1000) + eval_spec = tf.estimator.EvalSpec(eval_input_fn) + tf.estimator.train_and_evaluate(mnist_classifier, train_spec, eval_spec) + +2. Load input data from the input channels. The input channels are defined when ``fit`` is called. For example: + +.. code:: python + + estimator.fit({'train':'s3://my-bucket/my-training-data', + 'eval':'s3://my-bucket/my-evaluation-data'}) + +In your training script the channels will be stored in environment variables ``SM_CHANNEL_TRAIN`` and +``SM_CHANNEL_EVAL``. You can add them to your argument parsing logic like this: + +.. code:: python + + parser = argparse.ArgumentParser() + parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAIN')) + parser.add_argument('--eval', type=str, default=os.environ.get('SM_CHANNEL_EVAL')) + +3. Export your final model to path stored in environment variable ``SM_MODEL_DIR`` which should always be + ``/opt/ml/model``. At end of training SageMaker will upload the model file under ``/opt/ml/model`` to + ``output_path``. + + +Create an Estimator +=================== + +After you create your training script, create an instance of the :class:`sagemaker.tensorflow.TensorFlow` estimator. + +To use Script Mode, set at least one of these args + +- ``py_version='py3'`` +- ``script_mode=True`` + +When using Script Mode, your training script needs to accept the following args: + +- ``model_dir`` + +The following args are not permitted when using Script Mode: + +- ``checkpoint_path`` +- ``training_steps`` +- ``evaluation_steps`` +- ``requirements_file`` + +.. code:: python + + from sagemaker.tensorflow import TensorFlow + + tf_estimator = TensorFlow(entry_point='tf-train.py', role='SageMakerRole', + train_instance_count=1, train_instance_type='ml.p2.xlarge', + framework_version='1.12', py_version='py3') + tf_estimator.fit('s3://bucket/path/to/training/data') + +Where the S3 url is a path to your training data within Amazon S3. +The constructor keyword arguments define how SageMaker runs your training script. + +For more information about the sagemaker.tensorflow.TensorFlow estimator, see `sagemaker.tensorflow.TensorFlow Class`_. + +Call the fit Method +=================== + +You start your training script by calling the ``fit`` method on a ``TensorFlow`` estimator. ``fit`` takes +both required and optional arguments. + +Required arguments +------------------ + +- ``inputs``: The S3 location(s) of datasets to be used for training. This can take one of two forms: + + - ``str``: An S3 URI, for example ``s3://my-bucket/my-training-data``, which indicates the dataset's location. + - ``dict[str, str]``: A dictionary mapping channel names to S3 locations, for example ``{'train': 's3://my-bucket/my-training-data/train', 'test': 's3://my-bucket/my-training-data/test'}`` + - ``sagemaker.session.s3_input``: channel configuration for S3 data sources that can provide additional information as well as the path to the training dataset. See `the API docs `_ for full details. + +Optional arguments +------------------ + +- ``wait (bool)``: Defaults to True, whether to block and wait for the + training script to complete before returning. + If set to False, it will return immediately, and can later be attached to. +- ``logs (bool)``: Defaults to True, whether to show logs produced by training + job in the Python session. Only meaningful when wait is True. +- ``run_tensorboard_locally (bool)``: Defaults to False. If set to True a Tensorboard command will be printed out. +- ``job_name (str)``: Training job name. If not specified, the estimator generates a default job name, + based on the training image name and current timestamp. + +What happens when fit is called +------------------------------- + +Calling ``fit`` starts a SageMaker training job. The training job will execute the following. + +- Starts ``train_instance_count`` EC2 instances of the type ``train_instance_type``. +- On each instance, it will do the following steps: + + - starts a Docker container optimized for TensorFlow. + - downloads the dataset. + - setup up training related environment varialbes + - setup up distributed training environment if configured to use parameter server + - starts asynchronous training + +If the ``wait=False`` flag is passed to ``fit``, then it returns immediately. The training job continues running +asynchronously. Later, a Tensorflow estimator can be obtained by attaching to the existing training job. +If the training job is not finished, it starts showing the standard output of training and wait until it completes. +After attaching, the estimator can be deployed as usual. + +.. code:: python + + tf_estimator.fit(your_input_data, wait=False) + training_job_name = tf_estimator.latest_training_job.name + + # after some time, or in a separate Python notebook, we can attach to it again. + + tf_estimator = TensorFlow.attach(training_job_name=training_job_name) + +Distributed Training +==================== + +To run your training job with multiple instances in a distributed fashion, set ``train_instance_count`` +to a number larger than 1. We support two different types of distributed training, parameter server and Horovod. +The ``distributions`` parameter is used to configure which distributed training strategy to use. + +Training with parameter servers +------------------------------- + +If you specify parameter_server as the value of the distributions parameter, the container launches a parameter server +thread on each instance in the training cluster, and then executes your training code. You can find more information on +TensorFlow distributed training at `TensorFlow docs `__. +To enable parameter server training: + +.. code:: python + + from sagemaker.tensorflow import TensorFlow + + tf_estimator = TensorFlow(entry_point='tf-train.py', role='SageMakerRole', + train_instance_count=2, train_instance_type='ml.p2.xlarge', + framework_version='1.11', py_version='py3', + distributions={'parameter_server': {'enabled': True}}) + tf_estimator.fit('s3://bucket/path/to/training/data') + +Training with Horovod +--------------------- + +Horovod is a distributed training framework based on MPI. Horovod is only available with TensorFlow version ``1.12`` or newer. +You can find more details at `Horovod README `__. + +The container sets up the MPI environment and executes the ``mpirun`` command enabling you to run any Horovod +training script with Script Mode. + +Training with ``MPI`` is configured by specifying following fields in ``distributions``: + +- ``enabled (bool)``: If set to ``True``, the MPI setup is performed and ``mpirun`` command is executed. +- ``processes_per_host (int)``: Number of processes MPI should launch on each host. Note, this should not be + greater than the available slots on the selected instance type. This flag should be set for the multi-cpu/gpu + training. +- ``custom_mpi_options (str)``: Any `mpirun` flag(s) can be passed in this field that will be added to the `mpirun` + command executed by SageMaker to launch distributed horovod training. + + +In the below example we create an estimator to launch Horovod distributed training with 2 processes on one host: + +.. code:: python + + from sagemaker.tensorflow import TensorFlow + + tf_estimator = TensorFlow(entry_point='tf-train.py', role='SageMakerRole', + train_instance_count=1, train_instance_type='ml.p2.xlarge', + framework_version='1.12', py_version='py3', + distributions={ + 'mpi': { + 'enabled': True, + 'processes_per_host': 2, + 'custom_mpi_options': '--NCCL_DEBUG INFO' + } + }) + tf_estimator.fit('s3://bucket/path/to/training/data') + + +Training with Pipe Mode using PipeModeDataset +============================================= + +Amazon SageMaker allows users to create training jobs using Pipe input mode. +With Pipe input mode, your dataset is streamed directly to your training instances instead of being downloaded first. +This means that your training jobs start sooner, finish quicker, and need less disk space. + +SageMaker TensorFlow provides an implementation of ``tf.data.Dataset`` that makes it easy to take advantage of Pipe +input mode in SageMaker. You can replace your ``tf.data.Dataset`` with a ``sagemaker_tensorflow.PipeModeDataset`` to +read TFRecords as they are streamed to your training instances. + +In your ``entry_point`` script, you can use ``PipeModeDataset`` like a ``Dataset``. In this example, we create a +``PipeModeDataset`` to read TFRecords from the 'training' channel: + + +.. code:: python + + from sagemaker_tensorflow import PipeModeDataset + + features = { + 'data': tf.FixedLenFeature([], tf.string), + 'labels': tf.FixedLenFeature([], tf.int64), + } + + def parse(record): + parsed = tf.parse_single_example(record, features) + return ({ + 'data': tf.decode_raw(parsed['data'], tf.float64) + }, parsed['labels']) + + def train_input_fn(training_dir, hyperparameters): + ds = PipeModeDataset(channel='training', record_format='TFRecord') + ds = ds.repeat(20) + ds = ds.prefetch(10) + ds = ds.map(parse, num_parallel_calls=10) + ds = ds.batch(64) + return ds + + +To run training job with Pipe input mode, pass in ``input_mode='Pipe'`` to your TensorFlow Estimator: + + +.. code:: python + + from sagemaker.tensorflow import TensorFlow + + tf_estimator = TensorFlow(entry_point='tf-train-with-pipemodedataset.py', role='SageMakerRole', + training_steps=10000, evaluation_steps=100, + train_instance_count=1, train_instance_type='ml.p2.xlarge', + framework_version='1.10.0', input_mode='Pipe') + + tf_estimator.fit('s3://bucket/path/to/training/data') + + +If your TFRecords are compressed, you can train on Gzipped TF Records by passing in ``compression='Gzip'`` to the call to +``fit()``, and SageMaker will automatically unzip the records as data is streamed to your training instances: + +.. code:: python + + from sagemaker.session import s3_input + + train_s3_input = s3_input('s3://bucket/path/to/training/data', compression='Gzip') + tf_estimator.fit(train_s3_input) + + +You can learn more about ``PipeModeDataset`` in the sagemaker-tensorflow-extensions repository: https://github.com/aws/sagemaker-tensorflow-extensions + + +Training with MKL-DNN disabled +============================== + +SageMaker TensorFlow CPU images use TensorFlow built with Intel® MKL-DNN optimization. + +In certain cases you might be able to get a better performance by disabling this optimization +(`for example when using small models `_) + +You can disable MKL-DNN optimization for TensorFlow ``1.8.0`` and above by setting two following environment variables: + +.. code:: python + + import os + + os.environ['TF_DISABLE_MKL'] = '1' + os.environ['TF_DISABLE_POOL_ALLOCATOR'] = '1' + +******************************** +Deploy TensorFlow Serving models +******************************** + +After a TensorFlow estimator has been fit, it saves a TensorFlow SavedModel in +the S3 location defined by ``output_path``. You can call ``deploy`` on a TensorFlow +estimator to create a SageMaker Endpoint, or you can call ``transformer`` to create a ``Transformer`` that you can use to run a batch transform job. + +Your model will be deployed to a TensorFlow Serving-based server. The server provides a super-set of the +`TensorFlow Serving REST API `_. + + +Deploy to a SageMaker Endpoint +============================== + +Deploying from an Estimator +--------------------------- + +After a TensorFlow estimator has been fit, it saves a TensorFlow +`SavedModel `_ bundle in +the S3 location defined by ``output_path``. You can call ``deploy`` on a TensorFlow +estimator object to create a SageMaker Endpoint: + +.. code:: python + + from sagemaker.tensorflow import TensorFlow + + estimator = TensorFlow(entry_point='tf-train.py', ..., train_instance_count=1, + train_instance_type='ml.c4.xlarge', framework_version='1.11') + + estimator.fit(inputs) + + predictor = estimator.deploy(initial_instance_count=1, + instance_type='ml.c5.xlarge', + endpoint_type='tensorflow-serving') + + +The code block above deploys a SageMaker Endpoint with one instance of the type 'ml.c5.xlarge'. + +What happens when deploy is called +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Calling ``deploy`` starts the process of creating a SageMaker Endpoint. This process includes the following steps. + +- Starts ``initial_instance_count`` EC2 instances of the type ``instance_type``. +- On each instance, it will do the following steps: + + - start a Docker container optimized for TensorFlow Serving, see `SageMaker TensorFlow Serving containers `_. + - start a `TensorFlow Serving` process configured to run your model. + - start an HTTP server that provides access to TensorFlow Server through the SageMaker InvokeEndpoint API. + + +When the ``deploy`` call finishes, the created SageMaker Endpoint is ready for prediction requests. The +`Making predictions against a SageMaker Endpoint`_ section will explain how to make prediction requests +against the Endpoint. + +Deploying directly from model artifacts +--------------------------------------- + +If you already have existing model artifacts in S3, you can skip training and deploy them directly to an endpoint: + +.. code:: python + + from sagemaker.tensorflow.serving import Model + + model = Model(model_data='s3://mybucket/model.tar.gz', role='MySageMakerRole') + + predictor = model.deploy(initial_instance_count=1, instance_type='ml.c5.xlarge') + +Python-based TensorFlow serving on SageMaker has support for `Elastic Inference `__, which allows for inference acceleration to a hosted endpoint for a fraction of the cost of using a full GPU instance. In order to attach an Elastic Inference accelerator to your endpoint provide the accelerator type to accelerator_type to your deploy call. + +.. code:: python + + from sagemaker.tensorflow.serving import Model + + model = Model(model_data='s3://mybucket/model.tar.gz', role='MySageMakerRole') + + predictor = model.deploy(initial_instance_count=1, instance_type='ml.c5.xlarge', accelerator_type='ml.eia1.medium') + +Making predictions against a SageMaker Endpoint +----------------------------------------------- + +Once you have the ``Predictor`` instance returned by ``model.deploy(...)`` or ``estimator.deploy(...)``, you +can send prediction requests to your Endpoint. + +The following code shows how to make a prediction request: + +.. code:: python + + input = { + 'instances': [1.0, 2.0, 5.0] + } + result = predictor.predict(input) + +The result object will contain a Python dict like this: + +.. code:: python + + { + 'predictions': [3.5, 4.0, 5.5] + } + +The formats of the input and the output data correspond directly to the request and response formats +of the ``Predict`` method in the `TensorFlow Serving REST API `_. + +If your SavedModel includes the right ``signature_def``, you can also make Classify or Regress requests: + +.. code:: python + + # input matches the Classify and Regress API + input = { + 'signature_name': 'tensorflow/serving/regress', + 'examples': [{'x': 1.0}, {'x': 2.0}] + } + + result = predictor.regress(input) # or predictor.classify(...) + + # result contains: + { + 'results': [3.5, 4.0] + } + +You can include multiple ``instances`` in your predict request (or multiple ``examples`` in +classify/regress requests) to get multiple prediction results in one request to your Endpoint: + +.. code:: python + + input = { + 'instances': [ + [1.0, 2.0, 5.0], + [1.0, 2.0, 5.0], + [1.0, 2.0, 5.0] + ] + } + result = predictor.predict(input) + + # result contains: + { + 'predictions': [ + [3.5, 4.0, 5.5], + [3.5, 4.0, 5.5], + [3.5, 4.0, 5.5] + ] + } + +If your application allows request grouping like this, it is **much** more efficient than making separate requests. + +See `Deploying to TensorFlow Serving Endpoints ` to learn how to deploy your model and make inference requests. + +Run a Batch Transform Job +========================= + +Batch transform allows you to get inferences for an entire dataset that is stored in an S3 bucket. + +For general information about using batch transform with the SageMaker Python SDK, see :ref:`overview:SageMaker Batch Transform`. +For information about SageMaker batch transform, see `Get Inferences for an Entire Dataset with Batch Transform ` in the AWS documentation. + +To run a batch transform job, you first create a ``Transformer`` object, and then call that object's ``transform`` method. + +Create a Transformer Object +--------------------------- + +If you used an estimator to train your model, you can call the ``transformer`` method of the estimator to create a ``Transformer`` object. + +For example: + +.. code:: python + + bucket = myBucket # The name of the S3 bucket where the results are stored + prefix = 'batch-results' # The folder in the S3 bucket where the results are stored + + batch_output = 's3://{}/{}/results'.format(bucket, prefix) # The location to store the results + + tf_transformer = tf_estimator.transformer(instance_count=1, instance_type='ml.m4.xlarge, output_path=batch_output) + +To use a model trained outside of SageMaker, you can package the model as a SageMaker model, and call the ``transformer`` method of the SageMaker model. + +For example: + +.. code:: python + + bucket = myBucket # The name of the S3 bucket where the results are stored + prefix = 'batch-results' # The folder in the S3 bucket where the results are stored + + batch_output = 's3://{}/{}/results'.format(bucket, prefix) # The location to store the results + + tf_transformer = tensorflow_serving_model.transformer(instance_count=1, instance_type='ml.m4.xlarge, output_path=batch_output) + +For information about how to package a model as a SageMaker model, see :ref:`overview:BYO Model`. +When you call the ``tranformer`` method, you specify the type and number of instances to use for the batch transform job, and the location where the results are stored in S3. + + + +Call transform +-------------- + +After you create a ``Transformer`` object, you call that object's ``transform`` method to start a batch transform job. +For example: + +.. code:: python + + batch_input = 's3://{}/{}/test/examples'.format(bucket, prefix) # The location of the input dataset + + tf_transformer.transform(data=batch_input, data_type='S3Prefix', content_type='text/csv', split_type='Line') + +In the example, the content type is CSV, and each line in the dataset is treated as a record to get a predition for. + +Batch Transform Supported Data Formats +-------------------------------------- + +When you call the ``tranform`` method to start a batch transform job, +you specify the data format by providing a MIME type as the value for the ``content_type`` parameter. + +The following content formats are supported without custom intput and output handling: + +* CSV - specify ``text/csv`` as the value of the ``content_type`` parameter. +* JSON - specify ``application/json`` as the value of the ``content_type`` parameter. +* JSON lines - specify ``application/jsonlines`` as the value of the ``content_type`` parameter. + +For detailed information about how TensorFlow Serving formats these data types for input and output, see :ref:`using_tf:TensorFlow Serving Input and Output`. + +You can also accept any custom data format by writing input and output functions, and include them in the ``inference.py`` file in your model. +For information, see :ref:`using_tf:Create Python Scripts for Custom Input and Output Formats`. + + +TensorFlow Serving Input and Output +=================================== + +The following sections describe the data formats that TensorFlow Serving endpoints and batch transform jobs accept, +and how to write input and output functions to input and output custom data formats. + +Supported Formats +----------------- + +SageMaker's TensforFlow Serving endpoints can also accept some additional input formats that are not part of the +TensorFlow REST API, including a simplified json format, line-delimited json objects ("jsons" or "jsonlines"), and +CSV data. + +Simplified JSON Input +^^^^^^^^^^^^^^^^^^^^^ + +The Endpoint will accept simplified JSON input that doesn't match the TensorFlow REST API's Predict request format. +When the Endpoint receives data like this, it will attempt to transform it into a valid +Predict request, using a few simple rules: + +- python value, dict, or one-dimensional arrays are treated as the input value in a single 'instance' Predict request. +- multidimensional arrays are treated as a multiple values in a multi-instance Predict request. + +Combined with the client-side ``Predictor`` object's JSON serialization, this allows you to make simple +requests like this: + +.. code:: python + + input = [ + [1.0, 2.0, 5.0], + [1.0, 2.0, 5.0] + ] + result = predictor.predict(input) + + # result contains: + { + 'predictions': [ + [3.5, 4.0, 5.5], + [3.5, 4.0, 5.5] + ] + } + +Or this: + +.. code:: python + + # 'x' must match name of input tensor in your SavedModel graph + # for models with multiple named inputs, just include all the keys in the input dict + input = { + 'x': [1.0, 2.0, 5.0] + } + + # result contains: + { + 'predictions': [ + [3.5, 4.0, 5.5] + ] + } + + +Line-delimited JSON +^^^^^^^^^^^^^^^^^^^ + +The Endpoint will accept line-delimited JSON objects (also known as "jsons" or "jsonlines" data). +The Endpoint treats each line as a separate instance in a multi-instance Predict request. To use +this feature from your python code, you need to create a ``Predictor`` instance that does not +try to serialize your input to JSON: + +.. code:: python + + # create a Predictor without JSON serialization + + predictor = Predictor('endpoint-name', serializer=None, content_type='application/jsonlines') + + input = '''{'x': [1.0, 2.0, 5.0]} + {'x': [1.0, 2.0, 5.0]} + {'x': [1.0, 2.0, 5.0]}''' + + result = predictor.predict(input) + + # result contains: + { + 'predictions': [ + [3.5, 4.0, 5.5], + [3.5, 4.0, 5.5], + [3.5, 4.0, 5.5] + ] + } + +This feature is especially useful if you are reading data from a file containing jsonlines data. + +**CSV (comma-separated values)** + +The Endpoint will accept CSV data. Each line is treated as a separate instance. This is a +compact format for representing multiple instances of 1-d array data. To use this feature +from your python code, you need to create a ``Predictor`` instance that can serialize +your input data to CSV format: + +.. code:: python + + # create a Predictor with JSON serialization + + predictor = Predictor('endpoint-name', serializer=sagemaker.predictor.csv_serializer) + + # CSV-formatted string input + input = '1.0,2.0,5.0\n1.0,2.0,5.0\n1.0,2.0,5.0' + + result = predictor.predict(input) + + # result contains: + { + 'predictions': [ + [3.5, 4.0, 5.5], + [3.5, 4.0, 5.5], + [3.5, 4.0, 5.5] + ] + } + +You can also use python arrays or numpy arrays as input and let the `csv_serializer` object +convert them to CSV, but the client-size CSV conversion is more sophisticated than the +CSV parsing on the Endpoint, so if you encounter conversion problems, try using one of the +JSON options instead. + + +Create Python Scripts for Custom Input and Output Formats +--------------------------------------------------------- + +You can add your customized Python code to process your input and output data: + +.. code:: + + from sagemaker.tensorflow.serving import Model + + model = Model(entry_point='inference.py', + model_data='s3://mybucket/model.tar.gz', + role='MySageMakerRole') + +How to implement the pre- and/or post-processing handler(s) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Your entry point file should implement either a pair of ``input_handler`` + and ``output_handler`` functions or a single ``handler`` function. + Note that if ``handler`` function is implemented, ``input_handler`` + and ``output_handler`` are ignored. + +To implement pre- and/or post-processing handler(s), use the Context +object that the Python service creates. The Context object is a namedtuple with the following attributes: + +- ``model_name (string)``: the name of the model to use for + inference. For example, 'half-plus-three' + +- ``model_version (string)``: version of the model. For example, '5' + +- ``method (string)``: inference method. For example, 'predict', + 'classify' or 'regress', for more information on methods, please see + `Classify and Regress + API `__ + and `Predict + API `__ + +- ``rest_uri (string)``: the TFS REST uri generated by the Python + service. For example, + 'http://localhost:8501/v1/models/half_plus_three:predict' + +- ``grpc_uri (string)``: the GRPC port number generated by the Python + service. For example, '9000' + +- ``custom_attributes (string)``: content of + 'X-Amzn-SageMaker-Custom-Attributes' header from the original + request. For example, + 'tfs-model-name=half*plus*\ three,tfs-method=predict' + +- ``request_content_type (string)``: the original request content type, + defaulted to 'application/json' if not provided + +- ``accept_header (string)``: the original request accept type, + defaulted to 'application/json' if not provided + +- ``content_length (int)``: content length of the original request + +The following code example implements ``input_handler`` and +``output_handler``. By providing these, the Python service posts the +request to the TFS REST URI with the data pre-processed by ``input_handler`` +and passes the response to ``output_handler`` for post-processing. + +.. code:: + + import json + + def input_handler(data, context): + """ Pre-process request input before it is sent to TensorFlow Serving REST API + Args: + data (obj): the request data, in format of dict or string + context (Context): an object containing request and configuration details + Returns: + (dict): a JSON-serializable dict that contains request body and headers + """ + if context.request_content_type == 'application/json': + # pass through json (assumes it's correctly formed) + d = data.read().decode('utf-8') + return d if len(d) else '' + + if context.request_content_type == 'text/csv': + # very simple csv handler + return json.dumps({ + 'instances': [float(x) for x in data.read().decode('utf-8').split(',')] + }) + + raise ValueError('{{"error": "unsupported content type {}"}}'.format( + context.request_content_type or "unknown")) + + + def output_handler(data, context): + """Post-process TensorFlow Serving output before it is returned to the client. + Args: + data (obj): the TensorFlow serving response + context (Context): an object containing request and configuration details + Returns: + (bytes, string): data to return to client, response content type + """ + if data.status_code != 200: + raise ValueError(data.content.decode('utf-8')) + + response_content_type = context.accept_header + prediction = data.content + return prediction, response_content_type + +You might want to have complete control over the request. +For example, you might want to make a TFS request (REST or GRPC) to the first model, +inspect the results, and then make a request to a second model. In this case, implement +the ``handler`` method instead of the ``input_handler`` and ``output_handler`` methods, as demonstrated +in the following code: + +.. code:: + + import json + import requests + + + def handler(data, context): + """Handle request. + Args: + data (obj): the request data + context (Context): an object containing request and configuration details + Returns: + (bytes, string): data to return to client, (optional) response content type + """ + processed_input = _process_input(data, context) + response = requests.post(context.rest_uri, data=processed_input) + return _process_output(response, context) + + + def _process_input(data, context): + if context.request_content_type == 'application/json': + # pass through json (assumes it's correctly formed) + d = data.read().decode('utf-8') + return d if len(d) else '' + + if context.request_content_type == 'text/csv': + # very simple csv handler + return json.dumps({ + 'instances': [float(x) for x in data.read().decode('utf-8').split(',')] + }) + + raise ValueError('{{"error": "unsupported content type {}"}}'.format( + context.request_content_type or "unknown")) + + + def _process_output(data, context): + if data.status_code != 200: + raise ValueError(data.content.decode('utf-8')) + + response_content_type = context.accept_header + prediction = data.content + return prediction, response_content_type + +You can also bring in external dependencies to help with your data +processing. There are 2 ways to do this: + +1. If you included ``requirements.txt`` in your ``source_dir`` or in + your dependencies, the container installs the Python dependencies at runtime using ``pip install -r``: + +.. code:: + + from sagemaker.tensorflow.serving import Model + + model = Model(entry_point='inference.py', + dependencies=['requirements.txt'], + model_data='s3://mybucket/model.tar.gz', + role='MySageMakerRole') + + +2. If you are working in a network-isolation situation or if you don't + want to install dependencies at runtime every time your endpoint starts or a batch + transform job runs, you might want to put + pre-downloaded dependencies under a ``lib`` directory and this + directory as dependency. The container adds the modules to the Python + path. Note that if both ``lib`` and ``requirements.txt`` + are present in the model archive, the ``requirements.txt`` is ignored: + +.. code:: + + from sagemaker.tensorflow.serving import Model + + model = Model(entry_point='inference.py', + dependencies=['/path/to/folder/named/lib'], + model_data='s3://mybucket/model.tar.gz', + role='MySageMakerRole') + + +************************************* +sagemaker.tensorflow.TensorFlow Class +************************************* + +The following are the most commonly used ``TensorFlow`` constructor arguments. + +Required: + +- ``entry_point (str)`` Path (absolute or relative) to the Python file which + should be executed as the entry point to training. +- ``role (str)`` An AWS IAM role (either name or full ARN). The Amazon + SageMaker training jobs and APIs that create Amazon SageMaker + endpoints use this role to access training data and model artifacts. + After the endpoint is created, the inference code might use the IAM + role, if accessing AWS resource. +- ``train_instance_count (int)`` Number of Amazon EC2 instances to use for + training. +- ``train_instance_type (str)`` Type of EC2 instance to use for training, for + example, 'ml.c4.xlarge'. + +Optional: + +- ``source_dir (str)`` Path (absolute or relative) to a directory with any + other training source code dependencies including the entry point + file. Structure within this directory will be preserved when training + on SageMaker. +- ``dependencies (list[str])`` A list of paths to directories (absolute or relative) with + any additional libraries that will be exported to the container (default: ``[]``). + The library folders will be copied to SageMaker in the same folder where the entrypoint is copied. + If the ``source_dir`` points to S3, code will be uploaded and the S3 location will be used + instead. Example: + + The following call + + >>> TensorFlow(entry_point='train.py', dependencies=['my/libs/common', 'virtual-env']) + + results in the following inside the container: + + >>> opt/ml/code + >>> ├── train.py + >>> ├── common + >>> └── virtual-env + +- ``hyperparameters (dict[str, ANY])`` Hyperparameters that will be used for training. + Will be made accessible as command line arguments. +- ``train_volume_size (int)`` Size in GB of the EBS volume to use for storing + input data during training. Must be large enough to the store training + data. +- ``train_max_run (int)`` Timeout in seconds for training, after which Amazon + SageMaker terminates the job regardless of its current status. +- ``output_path (str)`` S3 location where you want the training result (model + artifacts and optional output files) saved. If not specified, results + are stored to a default bucket. If the bucket with the specific name + does not exist, the estimator creates the bucket during the ``fit`` + method execution. +- ``output_kms_key`` Optional KMS key ID to optionally encrypt training + output with. +- ``base_job_name`` Name to assign for the training job that the ``fit`` + method launches. If not specified, the estimator generates a default + job name, based on the training image name and current timestamp. +- ``image_name`` An alternative docker image to use for training and + serving. If specified, the estimator will use this image for training and + hosting, instead of selecting the appropriate SageMaker official image based on + ``framework_version`` and ``py_version``. Refer to: `SageMaker TensorFlow Docker containers `_ for details on what the official images support + and where to find the source code to build your custom image. +- ``script_mode (bool)`` Whether to use Script Mode or not. Script mode is the only available training mode in Python 3, + setting ``py_version`` to ``py3`` automatically sets ``script_mode`` to True. +- ``model_dir (str)`` Location where model data, checkpoint data, and TensorBoard checkpoints should be saved during training. + If not specified a S3 location will be generated under the training job's default bucket. And ``model_dir`` will be + passed in your training script as one of the command line arguments. +- ``distributions (dict)`` Configure your distribution strategy with this argument. + +************************************** +SageMaker TensorFlow Docker containers +************************************** + +For information about SageMaker TensorFlow Docker containers and their dependencies, see `SageMaker TensorFlow Docker containers `_. diff --git a/doc/using_workflow.rst b/doc/using_workflow.rst index 46dbedd612..8ae50e42da 100644 --- a/doc/using_workflow.rst +++ b/doc/using_workflow.rst @@ -1,166 +1,166 @@ -==================================== -SageMaker Workflow in Apache Airflow -==================================== - -Apache Airflow -~~~~~~~~~~~~~~ - -`Apache Airflow `_ -is a platform that enables you to programmatically author, schedule, and monitor workflows. Using Airflow, -you can build a workflow for SageMaker training, hyperparameter tuning, batch transform and endpoint deployment. -You can use any SageMaker deep learning framework or Amazon algorithms to perform above operations in Airflow. - -There are two ways to build a SageMaker workflow. Using Airflow SageMaker operators or using Airflow PythonOperator. - -1. SageMaker Operators: In Airflow 1.10.1, the SageMaker team contributed special operators for SageMaker operations. -Each operator takes a configuration dictionary that defines the corresponding operation. We provide APIs to generate -the configuration dictionary in the SageMaker Python SDK. Currently, the following SageMaker operators are supported: - -* ``SageMakerTrainingOperator`` -* ``SageMakerTuningOperator`` -* ``SageMakerModelOperator`` -* ``SageMakerTransformOperator`` -* ``SageMakerEndpointConfigOperator`` -* ``SageMakerEndpointOperator`` - -2. PythonOperator: Airflow built-in operator that executes Python callables. You can use the PythonOperator to execute -operations in the SageMaker Python SDK to create a SageMaker workflow. - -Using Airflow on AWS -~~~~~~~~~~~~~~~~~~~~ - -Turbine is an open-source AWS CloudFormation template that enables you to create an Airflow resource stack on AWS. -You can get it here: https://github.com/villasv/aws-airflow-stack - -Using Airflow SageMaker Operators -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Starting with Airflow 1.10.1, you can use SageMaker operators in Airflow. All SageMaker operators take a configuration -dictionary that can be generated by the SageMaker Python SDK. For example: - -.. code:: python - - import sagemaker - from sagemaker.tensorflow import TensorFlow - from sagemaker.workflow.airflow import training_config, transform_config_from_estimator - - estimator = TensorFlow(entry_point='tf_train.py', - role='sagemaker-role', - framework_version='1.11.0', - training_steps=1000, - evaluation_steps=100, - train_instance_count=2, - train_instance_type='ml.p2.xlarge') - - # train_config specifies SageMaker training configuration - train_config = training_config(estimator=estimator, - inputs=your_training_data_s3_uri) - - # trans_config specifies SageMaker batch transform configuration - # task_id specifies which operator the training job associatd with; task_type specifies whether the operator is a - # training operator or tuning operator - trans_config = transform_config_from_estimator(estimator=estimator, - task_id='tf_training', - task_type='training', - instance_count=1, - instance_type='ml.m4.xlarge', - data=your_transform_data_s3_uri, - content_type='text/csv') - -Now you can pass these configurations to the corresponding SageMaker operators and create the workflow: - -.. code:: python - - import airflow - from airflow import DAG - from airflow.contrib.operators.sagemaker_training_operator import SageMakerTrainingOperator - from airflow.contrib.operators.sagemaker_transform_operator import SageMakerTransformOperator - - default_args = { - 'owner': 'airflow', - 'start_date': airflow.utils.dates.days_ago(2), - 'provide_context': True - } - - dag = DAG('tensorflow_example', default_args=default_args, - schedule_interval='@once') - - train_op = SageMakerTrainingOperator( - task_id='tf_training', - config=train_config, - wait_for_completion=True, - dag=dag) - - transform_op = SageMakerTransformOperator( - task_id='tf_transform', - config=trans_config, - wait_for_completion=True, - dag=dag) - - transform_op.set_upstream(train_op) - -Using Airflow Python Operator -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -`Airflow PythonOperator `_ -is a built-in operator that can execute any Python callable. If you want to build the SageMaker workflow in a more -flexible way, write your python callables for SageMaker operations by using the SageMaker Python SDK. - -.. code:: python - - from sagemaker.tensorflow import TensorFlow - - # callable for SageMaker training in TensorFlow - def train(data, **context): - estimator = TensorFlow(entry_point='tf_train.py', - role='sagemaker-role', - framework_version='1.11.0', - training_steps=1000, - evaluation_steps=100, - train_instance_count=2, - train_instance_type='ml.p2.xlarge') - estimator.fit(data) - return estimator.latest_training_job.job_name - - # callable for SageMaker batch transform - def transform(data, **context): - training_job = context['ti'].xcom_pull(task_ids='training') - estimator = TensorFlow.attach(training_job) - transformer = estimator.transformer(instance_count=1, instance_type='ml.c4.xlarge') - transformer.transform(data, content_type='text/csv') - -Then build your workflow by using the PythonOperator with the Python callables defined above: - -.. code:: python - - import airflow - from airflow import DAG - from airflow.operators.python_operator import PythonOperator - - default_args = { - 'owner': 'airflow', - 'start_date': airflow.utils.dates.days_ago(2), - 'provide_context': True - } - - dag = DAG('tensorflow_example', default_args=default_args, - schedule_interval='@once') - - train_op = PythonOperator( - task_id='training', - python_callable=train, - op_args=[training_data_s3_uri], - provide_context=True, - dag=dag) - - transform_op = PythonOperator( - task_id='transform', - python_callable=transform, - op_args=[transform_data_s3_uri], - provide_context=True, - dag=dag) - - transform_op.set_upstream(train_op) - -A workflow that runs a SageMaker training job and a batch transform job is finished. You can customize your Python +==================================== +SageMaker Workflow in Apache Airflow +==================================== + +Apache Airflow +~~~~~~~~~~~~~~ + +`Apache Airflow `_ +is a platform that enables you to programmatically author, schedule, and monitor workflows. Using Airflow, +you can build a workflow for SageMaker training, hyperparameter tuning, batch transform and endpoint deployment. +You can use any SageMaker deep learning framework or Amazon algorithms to perform above operations in Airflow. + +There are two ways to build a SageMaker workflow. Using Airflow SageMaker operators or using Airflow PythonOperator. + +1. SageMaker Operators: In Airflow 1.10.1, the SageMaker team contributed special operators for SageMaker operations. +Each operator takes a configuration dictionary that defines the corresponding operation. We provide APIs to generate +the configuration dictionary in the SageMaker Python SDK. Currently, the following SageMaker operators are supported: + +* ``SageMakerTrainingOperator`` +* ``SageMakerTuningOperator`` +* ``SageMakerModelOperator`` +* ``SageMakerTransformOperator`` +* ``SageMakerEndpointConfigOperator`` +* ``SageMakerEndpointOperator`` + +2. PythonOperator: Airflow built-in operator that executes Python callables. You can use the PythonOperator to execute +operations in the SageMaker Python SDK to create a SageMaker workflow. + +Using Airflow on AWS +~~~~~~~~~~~~~~~~~~~~ + +Turbine is an open-source AWS CloudFormation template that enables you to create an Airflow resource stack on AWS. +You can get it here: https://github.com/villasv/aws-airflow-stack + +Using Airflow SageMaker Operators +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Starting with Airflow 1.10.1, you can use SageMaker operators in Airflow. All SageMaker operators take a configuration +dictionary that can be generated by the SageMaker Python SDK. For example: + +.. code:: python + + import sagemaker + from sagemaker.tensorflow import TensorFlow + from sagemaker.workflow.airflow import training_config, transform_config_from_estimator + + estimator = TensorFlow(entry_point='tf_train.py', + role='sagemaker-role', + framework_version='1.11.0', + training_steps=1000, + evaluation_steps=100, + train_instance_count=2, + train_instance_type='ml.p2.xlarge') + + # train_config specifies SageMaker training configuration + train_config = training_config(estimator=estimator, + inputs=your_training_data_s3_uri) + + # trans_config specifies SageMaker batch transform configuration + # task_id specifies which operator the training job associatd with; task_type specifies whether the operator is a + # training operator or tuning operator + trans_config = transform_config_from_estimator(estimator=estimator, + task_id='tf_training', + task_type='training', + instance_count=1, + instance_type='ml.m4.xlarge', + data=your_transform_data_s3_uri, + content_type='text/csv') + +Now you can pass these configurations to the corresponding SageMaker operators and create the workflow: + +.. code:: python + + import airflow + from airflow import DAG + from airflow.contrib.operators.sagemaker_training_operator import SageMakerTrainingOperator + from airflow.contrib.operators.sagemaker_transform_operator import SageMakerTransformOperator + + default_args = { + 'owner': 'airflow', + 'start_date': airflow.utils.dates.days_ago(2), + 'provide_context': True + } + + dag = DAG('tensorflow_example', default_args=default_args, + schedule_interval='@once') + + train_op = SageMakerTrainingOperator( + task_id='tf_training', + config=train_config, + wait_for_completion=True, + dag=dag) + + transform_op = SageMakerTransformOperator( + task_id='tf_transform', + config=trans_config, + wait_for_completion=True, + dag=dag) + + transform_op.set_upstream(train_op) + +Using Airflow Python Operator +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +`Airflow PythonOperator `_ +is a built-in operator that can execute any Python callable. If you want to build the SageMaker workflow in a more +flexible way, write your python callables for SageMaker operations by using the SageMaker Python SDK. + +.. code:: python + + from sagemaker.tensorflow import TensorFlow + + # callable for SageMaker training in TensorFlow + def train(data, **context): + estimator = TensorFlow(entry_point='tf_train.py', + role='sagemaker-role', + framework_version='1.11.0', + training_steps=1000, + evaluation_steps=100, + train_instance_count=2, + train_instance_type='ml.p2.xlarge') + estimator.fit(data) + return estimator.latest_training_job.job_name + + # callable for SageMaker batch transform + def transform(data, **context): + training_job = context['ti'].xcom_pull(task_ids='training') + estimator = TensorFlow.attach(training_job) + transformer = estimator.transformer(instance_count=1, instance_type='ml.c4.xlarge') + transformer.transform(data, content_type='text/csv') + +Then build your workflow by using the PythonOperator with the Python callables defined above: + +.. code:: python + + import airflow + from airflow import DAG + from airflow.operators.python_operator import PythonOperator + + default_args = { + 'owner': 'airflow', + 'start_date': airflow.utils.dates.days_ago(2), + 'provide_context': True + } + + dag = DAG('tensorflow_example', default_args=default_args, + schedule_interval='@once') + + train_op = PythonOperator( + task_id='training', + python_callable=train, + op_args=[training_data_s3_uri], + provide_context=True, + dag=dag) + + transform_op = PythonOperator( + task_id='transform', + python_callable=transform, + op_args=[transform_data_s3_uri], + provide_context=True, + dag=dag) + + transform_op.set_upstream(train_op) + +A workflow that runs a SageMaker training job and a batch transform job is finished. You can customize your Python callables with the SageMaker Python SDK according to your needs, and build more flexible and powerful workflows. \ No newline at end of file From 7b9ad5ca33d16130468652326d0d57580c1ba8c2 Mon Sep 17 00:00:00 2001 From: icywang86rui Date: Mon, 22 Jul 2019 09:53:59 -0700 Subject: [PATCH 30/35] fix: update TensorFlow script mode dependency list (#869) --- README.rst | 2 +- src/sagemaker/tensorflow/README.rst | 75 +++++++++++++++-------------- 2 files changed, 40 insertions(+), 37 deletions(-) diff --git a/README.rst b/README.rst index eff60c8941..e1e0a02864 100644 --- a/README.rst +++ b/README.rst @@ -191,7 +191,7 @@ By using TensorFlow SageMaker Estimators, you can train and host TensorFlow mode Supported versions of TensorFlow: ``1.4.1``, ``1.5.0``, ``1.6.0``, ``1.7.0``, ``1.8.0``, ``1.9.0``, ``1.10.0``, ``1.11.0``, ``1.12.0``, ``1.13.1``. -Supported versions of TensorFlow for Elastic Inference: ``1.11.0``, ``1.12.0``. +Supported versions of TensorFlow for Elastic Inference: ``1.11.0``, ``1.12.0``, ``1.13.0`` We recommend that you use the latest supported version, because that's where we focus most of our development efforts. diff --git a/src/sagemaker/tensorflow/README.rst b/src/sagemaker/tensorflow/README.rst index cae54f7185..3533288eed 100644 --- a/src/sagemaker/tensorflow/README.rst +++ b/src/sagemaker/tensorflow/README.rst @@ -20,7 +20,7 @@ Documentation of the previous Legacy Mode versions: `1.4.1 `_. | +-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -Supported versions of TensorFlow for Elastic Inference: ``1.11.0``, ``1.12.0``. +Supported versions of TensorFlow for Elastic Inference: ``1.11``, ``1.12``, ``1.13``. For information about using TensorFlow with the SageMaker Python SDK, see https://sagemaker.readthedocs.io/en/stable/using_tf.html. @@ -29,42 +29,45 @@ SageMaker TensorFlow Docker containers The containers include the following Python packages: -+--------------------------------+---------------+-------------------+ -| Dependencies | Script Mode | Legacy Mode | -+--------------------------------+---------------+-------------------+ -| boto3 | Latest | Latest | -+--------------------------------+---------------+-------------------+ -| botocore | Latest | Latest | -+--------------------------------+---------------+-------------------+ -| CUDA (GPU image only) | 9.0 | 9.0 | -+--------------------------------+---------------+-------------------+ -| numpy | Latest | Latest | -+--------------------------------+---------------+-------------------+ -| Pillow | Latest | Latest | -+--------------------------------+---------------+-------------------+ -| scipy | Latest | Latest | -+--------------------------------+---------------+-------------------+ -| sklean | Latest | Latest | -+--------------------------------+---------------+-------------------+ -| h5py | Latest | Latest | -+--------------------------------+---------------+-------------------+ -| pip | 18.1 | 18.1 | -+--------------------------------+---------------+-------------------+ -| curl | Latest | Latest | -+--------------------------------+---------------+-------------------+ -| tensorflow | 1.12.0 | 1.12.0 | -+--------------------------------+---------------+-------------------+ -| tensorflow-serving-api | 1.12.0 | None | -+--------------------------------+---------------+-------------------+ -| sagemaker-containers | >=2.3.5 | >=2.3.5 | -+--------------------------------+---------------+-------------------+ -| sagemaker-tensorflow-container | 1.0 | 1.0 | -+--------------------------------+---------------+-------------------+ -| Python | 2.7 or 3.6 | 2.7 | -+--------------------------------+---------------+-------------------+ ++--------------------------------+---------------+ +| Dependencies | Script Mode | ++--------------------------------+---------------+ +| awscli | 1.16.130 | ++--------------------------------+---------------+ +| boto3 | Latest | ++--------------------------------+---------------+ +| botocore | Latest | ++--------------------------------+---------------+ +| h5py | 2.9.0 | ++--------------------------------+---------------+ +| keras | 2.2.4 | ++--------------------------------+---------------+ +| mpi4py | 3.0.1 | ++--------------------------------+---------------+ +| numpy | 1.16.2 | ++--------------------------------+---------------+ +| pandas | 0.24.2 | ++--------------------------------+---------------+ +| pip | 19.1.1 | ++--------------------------------+---------------+ +| Pillow | 5.4.1 | ++--------------------------------+---------------+ +| Python | 2.7 or 3.6 | ++--------------------------------+---------------+ +| requests | 2.21.0 | ++--------------------------------+---------------+ +| sagemaker-containers | >=2.4.6 | ++--------------------------------+---------------+ +| sagemaker-tensorflow-container | Latest | ++--------------------------------+---------------+ +| scipy | 1.2.1 | ++--------------------------------+---------------+ +| sklean | 0.20.3 | ++--------------------------------+---------------+ +| tensorflow | 1.13.0 | ++--------------------------------+---------------+ -Legacy Mode TensorFlow Docker images support Python 2.7. Script Mode TensorFlow Docker images support both Python 2.7 -and Python 3.6. The Docker images extend Ubuntu 16.04. +Script Mode TensorFlow Docker images support both Python 2.7 and Python 3.6. The Docker images extend Ubuntu 16.04. You can select version of TensorFlow by passing a ``framework_version`` keyword arg to the TensorFlow Estimator constructor. Currently supported versions are listed in the table above. You can also set ``framework_version`` to only specify major and minor version, e.g ``'1.6'``, which will cause your training script to be run on the latest supported patch version of that minor version, which in this example would be 1.6.0. Alternatively, you can build your own image by following the instructions in the SageMaker TensorFlow containers From d3c5205793582f0297befc08c664a0e6c328ac59 Mon Sep 17 00:00:00 2001 From: Marcio Vinicius dos Santos Date: Mon, 22 Jul 2019 10:33:41 -0700 Subject: [PATCH 31/35] change: improving Chainer integ tests (#872) --- tests/data/chainer_mnist/distributed_mnist.py | 9 +- tests/data/chainer_mnist/mnist.py | 4 +- tests/integ/test_chainer_train.py | 174 ++++++++---------- 3 files changed, 85 insertions(+), 102 deletions(-) diff --git a/tests/data/chainer_mnist/distributed_mnist.py b/tests/data/chainer_mnist/distributed_mnist.py index 7507fd1ab3..2d8f0319bd 100644 --- a/tests/data/chainer_mnist/distributed_mnist.py +++ b/tests/data/chainer_mnist/distributed_mnist.py @@ -46,7 +46,7 @@ def __call__(self, x): def _preprocess_mnist(raw, withlabel, ndim, scale, image_dtype, label_dtype, rgb_format): - images = raw["x"] + images = raw["x"][-100:] if ndim == 2: images = images.reshape(-1, 28, 28) elif ndim == 3: @@ -59,7 +59,7 @@ def _preprocess_mnist(raw, withlabel, ndim, scale, image_dtype, label_dtype, rgb images *= scale / 255.0 if withlabel: - labels = raw["y"].astype(label_dtype) + labels = raw["y"][-100:].astype(label_dtype) return tuple_dataset.TupleDataset(images, labels) return images @@ -111,9 +111,6 @@ def _preprocess_mnist(raw, withlabel, ndim, scale, image_dtype, label_dtype, rgb optimizer = chainermn.create_multi_node_optimizer(chainer.optimizers.Adam(), comm) optimizer.setup(model) - train_file = np.load(os.path.join(args.train, "train.npz")) - test_file = np.load(os.path.join(args.test, "test.npz")) - preprocess_mnist_options = { "withlabel": True, "ndim": 1, @@ -173,7 +170,7 @@ def _preprocess_mnist(raw, withlabel, ndim, scale, image_dtype, label_dtype, rgb trainer.run() # only save the model in the master node - if args.host == "algo-1": + if args.host == env.hosts[0]: serializers.save_npz(os.path.join(env.model_dir, "model.npz"), model) diff --git a/tests/data/chainer_mnist/mnist.py b/tests/data/chainer_mnist/mnist.py index c31a0167db..dd98f504bc 100644 --- a/tests/data/chainer_mnist/mnist.py +++ b/tests/data/chainer_mnist/mnist.py @@ -42,7 +42,7 @@ def __call__(self, x): def _preprocess_mnist(raw, withlabel, ndim, scale, image_dtype, label_dtype, rgb_format): - images = raw["x"] + images = raw["x"][-100:] if ndim == 2: images = images.reshape(-1, 28, 28) elif ndim == 3: @@ -55,7 +55,7 @@ def _preprocess_mnist(raw, withlabel, ndim, scale, image_dtype, label_dtype, rgb images *= scale / 255.0 if withlabel: - labels = raw["y"].astype(label_dtype) + labels = raw["y"][-100:].astype(label_dtype) return tuple_dataset.TupleDataset(images, labels) else: return images diff --git a/tests/integ/test_chainer_train.py b/tests/integ/test_chainer_train.py index a3840c8aca..5b036cd432 100644 --- a/tests/integ/test_chainer_train.py +++ b/tests/integ/test_chainer_train.py @@ -13,39 +13,56 @@ from __future__ import absolute_import import os -import time -import pytest import numpy +import pytest -from sagemaker.chainer.defaults import CHAINER_VERSION from sagemaker.chainer.estimator import Chainer from sagemaker.chainer.model import ChainerModel from sagemaker.utils import unique_name_from_base -import tests.integ from tests.integ import DATA_DIR, PYTHON_VERSION, TRAINING_DEFAULT_TIMEOUT_MINUTES from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name @pytest.fixture(scope="module") -def chainer_training_job(sagemaker_session, chainer_full_version): - return _run_mnist_training_job(sagemaker_session, "ml.c4.xlarge", 1, chainer_full_version) +def chainer_local_training_job(sagemaker_local_session, chainer_full_version): + return _run_mnist_training_job(sagemaker_local_session, "local", 1, chainer_full_version) + + +@pytest.mark.local_mode +def test_distributed_cpu_training(sagemaker_local_session, chainer_full_version): + _run_mnist_training_job(sagemaker_local_session, "local", 2, chainer_full_version) -def test_distributed_cpu_training(sagemaker_session, chainer_full_version): - _run_mnist_training_job(sagemaker_session, "ml.c4.xlarge", 2, chainer_full_version) +@pytest.mark.local_mode +def test_training_with_additional_hyperparameters(sagemaker_local_session, chainer_full_version): + script_path = os.path.join(DATA_DIR, "chainer_mnist", "mnist.py") + data_path = os.path.join(DATA_DIR, "chainer_mnist") + chainer = Chainer( + entry_point=script_path, + role="SageMakerRole", + train_instance_count=1, + train_instance_type="local", + framework_version=chainer_full_version, + py_version=PYTHON_VERSION, + sagemaker_session=sagemaker_local_session, + hyperparameters={"epochs": 1}, + use_mpi=True, + num_processes=2, + process_slots_per_host=2, + additional_mpi_options="-x NCCL_DEBUG=INFO", + ) -@pytest.mark.skipif( - tests.integ.test_region() in tests.integ.HOSTING_NO_P2_REGIONS - or tests.integ.test_region() in tests.integ.TRAINING_NO_P2_REGIONS, - reason="no ml.p2 instances in these regions", -) -def test_distributed_gpu_training(sagemaker_session, chainer_full_version): - _run_mnist_training_job(sagemaker_session, "ml.p2.xlarge", 2, chainer_full_version) + train_input = "file://" + os.path.join(data_path, "train") + test_input = "file://" + os.path.join(data_path, "test") + chainer.fit({"train": train_input, "test": test_input}) -def test_training_with_additional_hyperparameters(sagemaker_session, chainer_full_version): + +@pytest.mark.canary_quick +@pytest.mark.regional_testing +def test_attach_deploy(sagemaker_session, chainer_full_version): with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): script_path = os.path.join(DATA_DIR, "chainer_mnist", "mnist.py") data_path = os.path.join(DATA_DIR, "chainer_mnist") @@ -53,112 +70,81 @@ def test_training_with_additional_hyperparameters(sagemaker_session, chainer_ful chainer = Chainer( entry_point=script_path, role="SageMakerRole", - train_instance_count=1, - train_instance_type="ml.c4.xlarge", framework_version=chainer_full_version, py_version=PYTHON_VERSION, + train_instance_count=1, + train_instance_type="ml.c4.xlarge", sagemaker_session=sagemaker_session, hyperparameters={"epochs": 1}, - use_mpi=True, - num_processes=2, - process_slots_per_host=2, - additional_mpi_options="-x NCCL_DEBUG=INFO", ) - train_input = chainer.sagemaker_session.upload_data( + train_input = sagemaker_session.upload_data( path=os.path.join(data_path, "train"), key_prefix="integ-test-data/chainer_mnist/train" ) - test_input = chainer.sagemaker_session.upload_data( + + test_input = sagemaker_session.upload_data( path=os.path.join(data_path, "test"), key_prefix="integ-test-data/chainer_mnist/test" ) job_name = unique_name_from_base("test-chainer-training") - chainer.fit({"train": train_input, "test": test_input}, job_name=job_name) - return chainer.latest_training_job.name - + chainer.fit({"train": train_input, "test": test_input}, wait=False, job_name=job_name) -@pytest.mark.canary_quick -@pytest.mark.regional_testing -def test_attach_deploy(chainer_training_job, sagemaker_session): endpoint_name = unique_name_from_base("test-chainer-attach-deploy") with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): - estimator = Chainer.attach(chainer_training_job, sagemaker_session=sagemaker_session) + estimator = Chainer.attach( + chainer.latest_training_job.name, sagemaker_session=sagemaker_session + ) predictor = estimator.deploy(1, "ml.m4.xlarge", endpoint_name=endpoint_name) _predict_and_assert(predictor) -def test_deploy_model(chainer_training_job, sagemaker_session): - endpoint_name = unique_name_from_base("test-chainer-deploy-model") - with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): - desc = sagemaker_session.sagemaker_client.describe_training_job( - TrainingJobName=chainer_training_job - ) - model_data = desc["ModelArtifacts"]["S3ModelArtifacts"] - script_path = os.path.join(DATA_DIR, "chainer_mnist", "mnist.py") - model = ChainerModel( - model_data, - "SageMakerRole", - entry_point=script_path, - sagemaker_session=sagemaker_session, - ) - predictor = model.deploy(1, "ml.m4.xlarge", endpoint_name=endpoint_name) - _predict_and_assert(predictor) - +@pytest.mark.local_mode +def test_deploy_model(chainer_local_training_job, sagemaker_local_session): + script_path = os.path.join(DATA_DIR, "chainer_mnist", "mnist.py") -def test_async_fit(sagemaker_session): - with timeout(minutes=5): - training_job_name = _run_mnist_training_job( - sagemaker_session, "ml.c4.xlarge", 1, chainer_full_version=CHAINER_VERSION, wait=False - ) + model = ChainerModel( + chainer_local_training_job.model_data, + "SageMakerRole", + entry_point=script_path, + sagemaker_session=sagemaker_local_session, + ) - print("Waiting to re-attach to the training job: %s" % training_job_name) - time.sleep(20) - - endpoint_name = unique_name_from_base("test-chainer-async-fit") - with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): - print("Re-attaching now to: %s" % training_job_name) - estimator = Chainer.attach( - training_job_name=training_job_name, sagemaker_session=sagemaker_session - ) - predictor = estimator.deploy(1, "ml.c4.xlarge", endpoint_name=endpoint_name) + predictor = model.deploy(1, "local") + try: _predict_and_assert(predictor) + finally: + predictor.delete_endpoint() def _run_mnist_training_job( sagemaker_session, instance_type, instance_count, chainer_full_version, wait=True ): - with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): - - script_path = ( - os.path.join(DATA_DIR, "chainer_mnist", "mnist.py") - if instance_type == 1 - else os.path.join(DATA_DIR, "chainer_mnist", "distributed_mnist.py") - ) - - data_path = os.path.join(DATA_DIR, "chainer_mnist") - - chainer = Chainer( - entry_point=script_path, - role="SageMakerRole", - framework_version=chainer_full_version, - py_version=PYTHON_VERSION, - train_instance_count=instance_count, - train_instance_type=instance_type, - sagemaker_session=sagemaker_session, - hyperparameters={"epochs": 1}, - ) - - train_input = chainer.sagemaker_session.upload_data( - path=os.path.join(data_path, "train"), key_prefix="integ-test-data/chainer_mnist/train" - ) - test_input = chainer.sagemaker_session.upload_data( - path=os.path.join(data_path, "test"), key_prefix="integ-test-data/chainer_mnist/test" - ) - - job_name = unique_name_from_base("test-chainer-training") - chainer.fit({"train": train_input, "test": test_input}, wait=wait, job_name=job_name) - return chainer.latest_training_job.name + script_path = ( + os.path.join(DATA_DIR, "chainer_mnist", "mnist.py") + if instance_type == 1 + else os.path.join(DATA_DIR, "chainer_mnist", "distributed_mnist.py") + ) + + data_path = os.path.join(DATA_DIR, "chainer_mnist") + + chainer = Chainer( + entry_point=script_path, + role="SageMakerRole", + framework_version=chainer_full_version, + py_version=PYTHON_VERSION, + train_instance_count=instance_count, + train_instance_type=instance_type, + sagemaker_session=sagemaker_session, + hyperparameters={"epochs": 1}, + ) + + train_input = "file://" + os.path.join(data_path, "train") + test_input = "file://" + os.path.join(data_path, "test") + + job_name = unique_name_from_base("test-chainer-training") + chainer.fit({"train": train_input, "test": test_input}, wait=wait, job_name=job_name) + return chainer def _predict_and_assert(predictor): From 4ebb088ef84bef059e785b779e72a23e724f7622 Mon Sep 17 00:00:00 2001 From: Karim Nakad Date: Mon, 22 Jul 2019 15:36:39 -0700 Subject: [PATCH 32/35] change: enable line-too-long Pylint check (#948) --- .pylintrc | 6 +- src/sagemaker/algorithm.py | 2 +- src/sagemaker/amazon/linear_learner.py | 7 +- src/sagemaker/analytics.py | 10 +- src/sagemaker/chainer/estimator.py | 5 +- src/sagemaker/estimator.py | 27 +- src/sagemaker/fw_utils.py | 10 +- src/sagemaker/local/entities.py | 12 +- src/sagemaker/local/image.py | 33 +- src/sagemaker/local/local_session.py | 5 +- src/sagemaker/model.py | 6 +- src/sagemaker/mxnet/estimator.py | 33 +- src/sagemaker/parameter.py | 4 +- src/sagemaker/pipeline.py | 6 +- src/sagemaker/predictor.py | 6 +- src/sagemaker/pytorch/defaults.py | 3 +- src/sagemaker/pytorch/estimator.py | 5 +- src/sagemaker/session.py | 562 ++++++++++++++----------- src/sagemaker/sklearn/estimator.py | 3 +- src/sagemaker/sparkml/model.py | 3 +- src/sagemaker/tensorflow/__init__.py | 3 +- src/sagemaker/tensorflow/estimator.py | 161 ++++--- src/sagemaker/tensorflow/predictor.py | 5 +- src/sagemaker/transformer.py | 8 +- src/sagemaker/tuner.py | 24 +- src/sagemaker/vpc_utils.py | 3 +- src/sagemaker/workflow/airflow.py | 40 +- tests/unit/test_session.py | 2 +- 28 files changed, 556 insertions(+), 438 deletions(-) diff --git a/.pylintrc b/.pylintrc index 4989c1a22c..db7c5d79fb 100644 --- a/.pylintrc +++ b/.pylintrc @@ -83,7 +83,6 @@ disable= too-many-arguments, invalid-name, too-many-instance-attributes, - line-too-long, # We let Flake8 take care of this # TODO: Fix these and stop relying on flake8 len-as-condition, # TODO: Enable this check once pylint 2.4.0 is released and consumed due to the fix in https://github.com/PyCQA/pylint/issues/2684 import-error, # Since we run Pylint before any of our builds in tox, this will always fail protected-access, # TODO: Fix access @@ -228,8 +227,9 @@ max-nested-blocks=5 # Maximum number of characters on a single line. max-line-length=100 -# Regexp for a line that is allowed to be longer than the limit. -ignore-long-lines=^\s*(# )??$ +# Regexp for a line that is allowed to be longer than the limit. Can only be a single regex. +# The following matches any semblance of a url of any sort. +ignore-long-lines=^\s*.*(# )?(<]{1,256}\.[a-zA-Z0-9()]{1,6}\b[-a-zA-Z0-9()@:%%_\+.~#?&\/\/=<>]*\S*>?$ # Allow the body of an if to be on the same line as the test if there is no # else. diff --git a/src/sagemaker/algorithm.py b/src/sagemaker/algorithm.py index 649d144c9d..fae19cbd24 100644 --- a/src/sagemaker/algorithm.py +++ b/src/sagemaker/algorithm.py @@ -128,7 +128,7 @@ def __init__( (default: None). If specified, the estimator will create a channel pointing to the model so the training job can download it. This model can be a 'model.tar.gz' from a previous training job, or - other artifacts coming from a different source. More + other artifacts coming from a different source. More information: https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html#td-deserialization diff --git a/src/sagemaker/amazon/linear_learner.py b/src/sagemaker/amazon/linear_learner.py index e33caea804..e2d18185d9 100644 --- a/src/sagemaker/amazon/linear_learner.py +++ b/src/sagemaker/amazon/linear_learner.py @@ -83,8 +83,8 @@ class LinearLearner(AmazonAlgorithmEstimatorBase): "softmax_loss", "auto", ), - '"logistic", "squared_loss", "absolute_loss", "hinge_loss", "eps_insensitive_squared_loss", ' - '"eps_insensitive_absolute_loss", "quantile_loss", "huber_loss", "softmax_loss" or "auto"', + '"logistic", "squared_loss", "absolute_loss", "hinge_loss", "eps_insensitive_squared_loss",' + ' "eps_insensitive_absolute_loss", "quantile_loss", "huber_loss", "softmax_loss" or "auto"', str, ) wd = hp("wd", ge(0), "A float greater-than or equal to 0", float) @@ -369,7 +369,8 @@ def __init__( num_classes is None or num_classes < 3 ): raise ValueError( - "For predictor_type 'multiclass_classifier', 'num_classes' should be set to a value greater than 2." + "For predictor_type 'multiclass_classifier', 'num_classes' should be set to a " + "value greater than 2." ) def create_model(self, vpc_config_override=VPC_CONFIG_DEFAULT): diff --git a/src/sagemaker/analytics.py b/src/sagemaker/analytics.py index e32792606b..ec4020dfdf 100644 --- a/src/sagemaker/analytics.py +++ b/src/sagemaker/analytics.py @@ -176,7 +176,7 @@ def description(self, force_refresh=False): if force_refresh: self.clear_cache() if not self._tuning_job_describe_result: - self._tuning_job_describe_result = self._sage_client.describe_hyper_parameter_tuning_job( + self._tuning_job_describe_result = self._sage_client.describe_hyper_parameter_tuning_job( # noqa: E501 # pylint: disable=line-too-long HyperParameterTuningJobName=self.name ) return self._tuning_job_describe_result @@ -288,10 +288,12 @@ def _determine_timeinterval(self): description = self._sage_client.describe_training_job(TrainingJobName=self.name) start_time = self._start_time or description[u"TrainingStartTime"] # datetime object # Incrementing end time by 1 min since CloudWatch drops seconds before finding the logs. - # This results in logs being searched in the time range in which the correct log line was not present. + # This results in logs being searched in the time range in which the correct log line was + # not present. # Example - Log time - 2018-10-22 08:25:55 - # Here calculated end time would also be 2018-10-22 08:25:55 (without 1 min addition) - # CW will consider end time as 2018-10-22 08:25 and will not be able to search the correct log. + # Here calculated end time would also be 2018-10-22 08:25:55 (without 1 min addition) + # CW will consider end time as 2018-10-22 08:25 and will not be able to search the + # correct log. end_time = self._end_time or description.get( u"TrainingEndTime", datetime.datetime.utcnow() ) + datetime.timedelta(minutes=1) diff --git a/src/sagemaker/chainer/estimator.py b/src/sagemaker/chainer/estimator.py index 6ae6fd6992..ab70de1985 100644 --- a/src/sagemaker/chainer/estimator.py +++ b/src/sagemaker/chainer/estimator.py @@ -177,8 +177,9 @@ def create_model( the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. - entry_point (str): Path (absolute or relative) to the local Python source file which should be executed - as the entry point to training. If not specified, the training entry point is used. + entry_point (str): Path (absolute or relative) to the local Python source file which + should be executed as the entry point to training. If not specified, the training + entry point is used. source_dir (str): Path (absolute or relative) to a directory with any other serving source code dependencies aside from the entry point file. If not specified, the model source directory from training is used. diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py index bbe06a77e8..f3caa2e8bb 100644 --- a/src/sagemaker/estimator.py +++ b/src/sagemaker/estimator.py @@ -279,15 +279,13 @@ def fit(self, inputs=None, wait=True, logs=True, job_name=None): * (str) the S3 location where training data is saved. - * (dict[str, str] or dict[str, sagemaker.session.s3_input]) If using multiple channels for - training data, you can specify a dict mapping channel - names to strings or :func:`~sagemaker.session.s3_input` - objects. - - * (sagemaker.session.s3_input) - channel configuration for S3 data sources that can provide - additional information as well as the path to the training - dataset. See :func:`sagemaker.session.s3_input` for full - details. + * (dict[str, str] or dict[str, sagemaker.session.s3_input]) If using multiple + channels for training data, you can specify a dict mapping channel names to + strings or :func:`~sagemaker.session.s3_input` objects. + + * (sagemaker.session.s3_input) - channel configuration for S3 data sources that can + provide additional information as well as the path to the training dataset. + See :func:`sagemaker.session.s3_input` for full details. wait (bool): Whether the call should wait until the job completes (default: True). logs (bool): Whether to show the logs produced by the job. Only @@ -990,9 +988,9 @@ def create_model( ): """Create a model to deploy. - The serializer, deserializer, content_type, and accept arguments are only used to define a default - RealTimePredictor. They are ignored if an explicit predictor class is passed in. Other arguments - are passed through to the Model class. + The serializer, deserializer, content_type, and accept arguments are only used to define a + default RealTimePredictor. They are ignored if an explicit predictor class is passed in. + Other arguments are passed through to the Model class. Args: role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, @@ -1562,8 +1560,9 @@ def transformer( worker per vCPU. volume_kms_key (str): Optional. KMS key ID for encrypting the volume attached to the ML compute instance (default: None). - entry_point (str): Path (absolute or relative) to the local Python source file which should be executed - as the entry point to training. If not specified, the training entry point is used. + entry_point (str): Path (absolute or relative) to the local Python source file which + should be executed as the entry point to training. If not specified, the training + entry point is used. Returns: sagemaker.transformer.Transformer: a ``Transformer`` object that can be used to start a diff --git a/src/sagemaker/fw_utils.py b/src/sagemaker/fw_utils.py index b23deec8fd..493151dec0 100644 --- a/src/sagemaker/fw_utils.py +++ b/src/sagemaker/fw_utils.py @@ -185,8 +185,9 @@ def create_image_uri( else: family = instance_type.split(".")[1] - # For some frameworks, we have optimized images for specific families, e.g c5 or p3. In those cases, - # we use the family name in the image tag. In other cases, we use 'cpu' or 'gpu'. + # For some frameworks, we have optimized images for specific families, e.g c5 or p3. + # In those cases, we use the family name in the image tag. In other cases, we use + # 'cpu' or 'gpu'. if family in optimized_families: device_type = family elif family[0] in ["g", "p"]: @@ -340,6 +341,7 @@ def _list_files_to_compress(script, directory): def framework_name_from_image(image_name): + # noinspection LongLine """Extract the framework and Python version from the image name. Args: @@ -357,7 +359,7 @@ def framework_name_from_image(image_name): tuple: A tuple containing: str: The framework name str: The Python version str: The image tag str: If the image is script mode - """ + """ sagemaker_pattern = re.compile(ECR_URI_PATTERN) sagemaker_match = sagemaker_pattern.match(image_name) if sagemaker_match is None: @@ -365,7 +367,7 @@ def framework_name_from_image(image_name): # extract framework, python version and image tag # We must support both the legacy and current image name format. name_pattern = re.compile( - r"^(?:sagemaker(?:-rl)?-)?(tensorflow|mxnet|chainer|pytorch|scikit-learn)(?:-)?(scriptmode|training)?:(.*)-(.*?)-(py2|py3)$" # noqa: E501 + r"^(?:sagemaker(?:-rl)?-)?(tensorflow|mxnet|chainer|pytorch|scikit-learn)(?:-)?(scriptmode|training)?:(.*)-(.*?)-(py2|py3)$" # noqa: E501 # pylint: disable=line-too-long ) legacy_name_pattern = re.compile(r"^sagemaker-(tensorflow|mxnet)-(py2|py3)-(cpu|gpu):(.*)$") diff --git a/src/sagemaker/local/entities.py b/src/sagemaker/local/entities.py index ba7a832943..7d03fd2cd4 100644 --- a/src/sagemaker/local/entities.py +++ b/src/sagemaker/local/entities.py @@ -256,7 +256,8 @@ def _get_container_environment(self, **kwargs): # we only do 1 max concurrent transform in Local Mode if "MaxConcurrentTransforms" in kwargs and int(kwargs["MaxConcurrentTransforms"]) > 1: logger.warning( - "Local Mode only supports 1 ConcurrentTransform. Setting MaxConcurrentTransforms to 1" + "Local Mode only supports 1 ConcurrentTransform. Setting MaxConcurrentTransforms " + "to 1" ) environment["SAGEMAKER_MAX_CONCURRENT_TRANSFORMS"] = "1" @@ -287,8 +288,9 @@ def _get_required_defaults(self, **kwargs): def _get_working_directory(self): """Placeholder docstring""" - # Root dir to use for intermediate data location. To make things simple we will write here regardless - # of the final destination. At the end the files will either be moved or uploaded to S3 and deleted. + # Root dir to use for intermediate data location. To make things simple we will write here + # regardless of the final destination. At the end the files will either be moved or + # uploaded to S3 and deleted. root_dir = get_config_value("local.container_root", self.local_session.config) if root_dir: root_dir = os.path.abspath(root_dir) @@ -313,8 +315,8 @@ def _prepare_data_transformation(self, input_data, batch_strategy): def _perform_batch_inference(self, input_data, output_data, **kwargs): # Transform the input data to feed the serving container. We need to first gather the files - # from S3 or Local FileSystem. Split them as required (Line, RecordIO, None) and finally batch them - # according to the batch strategy and limit the request size. + # from S3 or Local FileSystem. Split them as required (Line, RecordIO, None) and finally + # batch them according to the batch strategy and limit the request size. """ Args: diff --git a/src/sagemaker/local/image.py b/src/sagemaker/local/image.py index d1335d6c6e..5a42a10f4c 100644 --- a/src/sagemaker/local/image.py +++ b/src/sagemaker/local/image.py @@ -81,8 +81,8 @@ def __init__(self, instance_type, instance_count, image, sagemaker_session=None) self.instance_type = instance_type self.instance_count = instance_count self.image = image - # Since we are using a single docker network, Generate a random suffix to attach to the container names. - # This way multiple jobs can run in parallel. + # Since we are using a single docker network, Generate a random suffix to attach to the + # container names. This way multiple jobs can run in parallel. suffix = "".join(random.choice(string.ascii_lowercase + string.digits) for _ in range(5)) self.hosts = [ "{}-{}-{}".format(CONTAINER_PREFIX, i, suffix) @@ -160,8 +160,8 @@ def train(self, input_data_config, output_data_config, hyperparameters, job_name dirs_to_delete = [data_dir, shared_dir] self._cleanup(dirs_to_delete) - # Print our Job Complete line to have a similar experience to training on SageMaker where you - # see this line at the end. + # Print our Job Complete line to have a similar experience to training on SageMaker where + # you see this line at the end. print("===== Job Complete =====") return artifacts @@ -702,10 +702,10 @@ def _delete_tree(path): try: shutil.rmtree(path) except OSError as exc: - # on Linux, when docker writes to any mounted volume, it uses the container's user. In most cases - # this is root. When the container exits and we try to delete them we can't because root owns those - # files. We expect this to happen, so we handle EACCESS. Any other error we will raise the - # exception up. + # on Linux, when docker writes to any mounted volume, it uses the container's user. In most + # cases this is root. When the container exits and we try to delete them we can't because + # root owns those files. We expect this to happen, so we handle EACCESS. Any other error + # we will raise the exception up. if exc.errno == errno.EACCES: logger.warning("Failed to delete: %s Please remove it manually.", path) else: @@ -724,13 +724,14 @@ def _aws_credentials(session): secret_key = creds.secret_key token = creds.token - # The presence of a token indicates the credentials are short-lived and as such are risky to be used as they - # might expire while running. + # The presence of a token indicates the credentials are short-lived and as such are risky + # to be used as they might expire while running. # Long-lived credentials are available either through # 1. boto session - # 2. EC2 Metadata Service (SageMaker Notebook instances or EC2 instances with roles attached them) - # Short-lived credentials available via boto session are permitted to support running on machines with no - # EC2 Metadata Service but a warning is provided about their danger + # 2. EC2 Metadata Service (SageMaker Notebook instances or EC2 instances with roles + # attached them) + # Short-lived credentials available via boto session are permitted to support running on + # machines with no EC2 Metadata Service but a warning is provided about their danger if token is None: logger.info("Using the long-lived AWS credentials found in session") return [ @@ -739,7 +740,8 @@ def _aws_credentials(session): ] if not _aws_credentials_available_in_metadata_service(): logger.warning( - "Using the short-lived AWS credentials found in session. They might expire while running." + "Using the short-lived AWS credentials found in session. They might expire while " + "running." ) return [ "AWS_ACCESS_KEY_ID=%s" % (str(access_key)), @@ -747,7 +749,8 @@ def _aws_credentials(session): "AWS_SESSION_TOKEN=%s" % (str(token)), ] logger.info( - "No AWS credentials found in session but credentials from EC2 Metadata Service are available." + "No AWS credentials found in session but credentials from EC2 Metadata Service are " + "available." ) return None except Exception as e: # pylint: disable=broad-except diff --git a/src/sagemaker/local/local_session.py b/src/sagemaker/local/local_session.py index a1aa62919b..64495f3fde 100644 --- a/src/sagemaker/local/local_session.py +++ b/src/sagemaker/local/local_session.py @@ -82,9 +82,8 @@ def create_training_job( OutputDataConfig(dict): Identifies the location where you want to save the results of model training. ResourceConfig(dict): Identifies the resources to use for local model training. - HyperParameters(dict) [optional]: Specifies these algorithm-specific parameters to influence the - quality of - the final model. + HyperParameters(dict) [optional]: Specifies these algorithm-specific parameters to + influence the quality of the final model. **kwargs: Returns: diff --git a/src/sagemaker/model.py b/src/sagemaker/model.py index c9e3469a74..920cfa201e 100644 --- a/src/sagemaker/model.py +++ b/src/sagemaker/model.py @@ -406,9 +406,9 @@ def deploy( this model completes (default: True). Returns: - callable[string, sagemaker.session.Session] or None: Invocation of ``self.predictor_cls`` on - the created endpoint name, if ``self.predictor_cls`` is not - None. Otherwise, return None. + callable[string, sagemaker.session.Session] or None: Invocation of + ``self.predictor_cls`` on the created endpoint name, if ``self.predictor_cls`` + is not None. Otherwise, return None. """ if not self.sagemaker_session: if instance_type in ("local", "local_gpu"): diff --git a/src/sagemaker/mxnet/estimator.py b/src/sagemaker/mxnet/estimator.py index e74a1cffa2..3a4ebe4bca 100644 --- a/src/sagemaker/mxnet/estimator.py +++ b/src/sagemaker/mxnet/estimator.py @@ -86,17 +86,15 @@ def __init__( your model training code. List of supported versions https://github.com/aws/sagemaker-python-sdk#mxnet-sagemaker-estimators. If not specified, this will default to 1.2.1. - image_name (str): If specified, the estimator will use this image for training and hosting, instead of - selecting the appropriate SageMaker official image based on - framework_version and py_version. It can be an ECR url or - dockerhub image and tag. - - Examples: - 123.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0 - custom-image:latest. - - distributions (dict): A dictionary with information on how to run distributed training - (default: None). + image_name (str): If specified, the estimator will use this image for training and + hosting, instead of selecting the appropriate SageMaker official image based on + framework_version and py_version. It can be an ECR url or dockerhub image and tag. + Examples: + 123.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0 + custom-image:latest. + + distributions (dict): A dictionary with information on how to run distributed + training (default: None). distributions: **kwargs: Additional kwargs passed to the :class:`~sagemaker.estimator.Framework` constructor. @@ -157,8 +155,9 @@ def create_model( the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. - entry_point (str): Path (absolute or relative) to the local Python source file which should be executed - as the entry point to training. If not specified, the training entry point is used. + entry_point (str): Path (absolute or relative) to the local Python source file which + should be executed as the entry point to training. If not specified, the training + entry point is used. source_dir (str): Path (absolute or relative) to a directory with any other serving source code dependencies aside from the entry point file. If not specified, the model source directory from training is used. @@ -216,10 +215,10 @@ class constructor init_params["py_version"] = py_version - # We switched image tagging scheme from regular image version (e.g. '1.0') to more expressive - # containing framework version, device type and python version (e.g. '0.12-gpu-py2'). - # For backward compatibility map deprecated image tag '1.0' to a '0.12' framework version - # otherwise extract framework version from the tag itself. + # We switched image tagging scheme from regular image version (e.g. '1.0') to more + # expressive containing framework version, device type and python version + # (e.g. '0.12-gpu-py2'). For backward compatibility map deprecated image tag '1.0' to a + # '0.12' framework version otherwise extract framework version from the tag itself. init_params["framework_version"] = ( "0.12" if tag == "1.0" else framework_version_from_tag(tag) ) diff --git a/src/sagemaker/parameter.py b/src/sagemaker/parameter.py index e254809334..2f6da618fc 100644 --- a/src/sagemaker/parameter.py +++ b/src/sagemaker/parameter.py @@ -140,8 +140,8 @@ def as_json_range(self, name): name (str): The name of the hyperparameter. Returns: - dict[str, list[str]]: A dictionary that contains the name and values of the hyperparameter, - where the values are serialized as JSON. + dict[str, list[str]]: A dictionary that contains the name and values of the + hyperparameter, where the values are serialized as JSON. """ return {"Name": name, "Values": [json.dumps(v) for v in self.values]} diff --git a/src/sagemaker/pipeline.py b/src/sagemaker/pipeline.py index 9e2a24ae41..47f148a20d 100644 --- a/src/sagemaker/pipeline.py +++ b/src/sagemaker/pipeline.py @@ -112,9 +112,9 @@ def deploy( model completes (default: True). Returns: - callable[string, sagemaker.session.Session] or None: Invocation of ``self.predictor_cls`` on - the created endpoint name, if ``self.predictor_cls`` is not - None. Otherwise, return None. + callable[string, sagemaker.session.Session] or None: Invocation of + ``self.predictor_cls`` on the created endpoint name, if ``self.predictor_cls`` + is not None. Otherwise, return None. """ if not self.sagemaker_session: self.sagemaker_session = Session() diff --git a/src/sagemaker/predictor.py b/src/sagemaker/predictor.py index 3dd6c48e2a..fa59d835f8 100644 --- a/src/sagemaker/predictor.py +++ b/src/sagemaker/predictor.py @@ -205,7 +205,8 @@ def __call__(self, data): Returns: object: Sequence of bytes to be used for the request body. """ - # For inputs which represent multiple "rows", the result should be newline-separated CSV rows + # For inputs which represent multiple "rows", the result should be newline-separated CSV + # rows if _is_mutable_sequence_like(data) and len(data) > 0 and _is_sequence_like(data[0]): return "\n".join([_CsvSerializer._serialize_row(row) for row in data]) return _CsvSerializer._serialize_row(data) @@ -418,7 +419,8 @@ def __call__(self, data): object: Serialized data used for the request. """ if isinstance(data, dict): - # convert each value in dict from a numpy array to a list if necessary, so they can be json serialized + # convert each value in dict from a numpy array to a list if necessary, so they can be + # json serialized return json.dumps({k: _ndarray_to_list(v) for k, v in six.iteritems(data)}) # files and buffers diff --git a/src/sagemaker/pytorch/defaults.py b/src/sagemaker/pytorch/defaults.py index 372424b6bd..714f8390c4 100644 --- a/src/sagemaker/pytorch/defaults.py +++ b/src/sagemaker/pytorch/defaults.py @@ -15,7 +15,8 @@ PYTORCH_VERSION = "0.4" """Default PyTorch version for when the framework version is not specified. -The latest PyTorch version is 1.1.0, but the default version is no longer updated so as to not break existing workflows. +The latest PyTorch version is 1.1.0, but the default version is no longer updated so as to not +break existing workflows. """ PYTHON_VERSION = "py3" diff --git a/src/sagemaker/pytorch/estimator.py b/src/sagemaker/pytorch/estimator.py index 74c45b8c6e..ee84c7002a 100644 --- a/src/sagemaker/pytorch/estimator.py +++ b/src/sagemaker/pytorch/estimator.py @@ -130,8 +130,9 @@ def create_model( the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. - entry_point (str): Path (absolute or relative) to the local Python source file which should be executed - as the entry point to training. If not specified, the training entry point is used. + entry_point (str): Path (absolute or relative) to the local Python source file which + should be executed as the entry point to training. If not specified, the training + entry point is used. source_dir (str): Path (absolute or relative) to a directory with any other serving source code dependencies aside from the entry point file. If not specified, the model source directory from training is used. diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index 470297cbbd..1e35e258d8 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -61,27 +61,30 @@ class LogState(object): class Session(object): # pylint: disable=too-many-public-methods """Manage interactions with the Amazon SageMaker APIs and any other AWS services needed. - This class provides convenient methods for manipulating entities and resources that Amazon SageMaker uses, - such as training jobs, endpoints, and input datasets in S3. + This class provides convenient methods for manipulating entities and resources that Amazon + SageMaker uses, such as training jobs, endpoints, and input datasets in S3. AWS service calls are delegated to an underlying Boto3 session, which by default - is initialized using the AWS configuration chain. When you make an Amazon SageMaker API call that - accesses an S3 bucket location and one is not specified, the ``Session`` creates a default bucket based on - a naming convention which includes the current AWS account ID. + is initialized using the AWS configuration chain. When you make an Amazon SageMaker API call + that accesses an S3 bucket location and one is not specified, the ``Session`` creates a default + bucket based on a naming convention which includes the current AWS account ID. """ def __init__(self, boto_session=None, sagemaker_client=None, sagemaker_runtime_client=None): """Initialize a SageMaker ``Session``. Args: - boto_session (boto3.session.Session): The underlying Boto3 session which AWS service calls - are delegated to (default: None). If not provided, one is created with default AWS configuration chain. - sagemaker_client (boto3.SageMaker.Client): Client which makes Amazon SageMaker service calls other - than ``InvokeEndpoint`` (default: None). Estimators created using this ``Session`` use this client. - If not provided, one will be created using this instance's ``boto_session``. - sagemaker_runtime_client (boto3.SageMakerRuntime.Client): Client which makes ``InvokeEndpoint`` - calls to Amazon SageMaker (default: None). Predictors created using this ``Session`` use this client. - If not provided, one will be created using this instance's ``boto_session``. + boto_session (boto3.session.Session): The underlying Boto3 session which AWS service + calls are delegated to (default: None). If not provided, one is created with + default AWS configuration chain. + sagemaker_client (boto3.SageMaker.Client): Client which makes Amazon SageMaker service + calls other than ``InvokeEndpoint`` (default: None). Estimators created using this + ``Session`` use this client. If not provided, one will be created using this + instance's ``boto_session``. + sagemaker_runtime_client (boto3.SageMakerRuntime.Client): Client which makes + ``InvokeEndpoint`` calls to Amazon SageMaker (default: None). Predictors created + using this ``Session`` use this client. If not provided, one will be created using + this instance's ``boto_session``. """ self._default_bucket = None @@ -130,8 +133,8 @@ def boto_region_name(self): def upload_data(self, path, bucket=None, key_prefix="data", extra_args=None): """Upload local file or directory to S3. - If a single file is specified for upload, the resulting S3 object key is ``{key_prefix}/{filename}`` - (filename does not include the local path, if any specified). + If a single file is specified for upload, the resulting S3 object key is + ``{key_prefix}/{filename}`` (filename does not include the local path, if any specified). If a directory is specified for upload, the API uploads all content, recursively, preserving relative structure of subdirectories. The resulting object key names are: @@ -140,19 +143,21 @@ def upload_data(self, path, bucket=None, key_prefix="data", extra_args=None): Args: path (str): Path (absolute or relative) of local file or directory to upload. bucket (str): Name of the S3 Bucket to upload to (default: None). If not specified, the - default bucket of the ``Session`` is used (if default bucket does not exist, the ``Session`` - creates it). - key_prefix (str): Optional S3 object key name prefix (default: 'data'). S3 uses the prefix to - create a directory structure for the bucket content that it display in the S3 console. - extra_args (dict): Optional extra arguments that may be passed to the upload operation. Similar to - ExtraArgs parameter in S3 upload_file function. Please refer to the ExtraArgs parameter - documentation here: + default bucket of the ``Session`` is used (if default bucket does not exist, the + ``Session`` creates it). + key_prefix (str): Optional S3 object key name prefix (default: 'data'). S3 uses the + prefix to create a directory structure for the bucket content that it display in + the S3 console. + extra_args (dict): Optional extra arguments that may be passed to the upload operation. + Similar to ExtraArgs parameter in S3 upload_file function. Please refer to the + ExtraArgs parameter documentation here: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/s3-uploading-files.html#the-extraargs-parameter Returns: - str: The S3 URI of the uploaded file(s). If a file is specified in the path argument, the URI format is: - ``s3://{bucket name}/{key_prefix}/{original_file_name}``. - If a directory is specified in the path argument, the URI format is ``s3://{bucket name}/{key_prefix}``. + str: The S3 URI of the uploaded file(s). If a file is specified in the path argument, + the URI format is: ``s3://{bucket name}/{key_prefix}/{original_file_name}``. + If a directory is specified in the path argument, the URI format is + ``s3://{bucket name}/{key_prefix}``. """ # Generate a tuple for each file that we want to upload of the form (local_path, s3_key). files = [] @@ -180,8 +185,8 @@ def upload_data(self, path, bucket=None, key_prefix="data", extra_args=None): s3_uri = "s3://{}/{}".format(bucket, key_prefix) # If a specific file was used as input (instead of a directory), we return the full S3 key - # of the uploaded object. This prevents unintentionally using other files under the same prefix - # during training. + # of the uploaded object. This prevents unintentionally using other files under the same + # prefix during training. if key_suffix: s3_uri = "{}/{}".format(s3_uri, key_suffix) return s3_uri @@ -190,7 +195,8 @@ def default_bucket(self): """Return the name of the default bucket to use in relevant Amazon SageMaker interactions. Returns: - str: The name of the default bucket, which is of the form: ``sagemaker-{region}-{AWS account ID}``. + str: The name of the default bucket, which is of the form: + ``sagemaker-{region}-{AWS account ID}``. """ if self._default_bucket: return self._default_bucket @@ -220,7 +226,8 @@ def default_bucket(self): elif ( error_code == "OperationAborted" and "conflicting conditional operation" in message ): - # If this bucket is already being concurrently created, we don't need to create it again. + # If this bucket is already being concurrently created, we don't need to create it + # again. pass elif error_code == "TooManyBuckets": # Succeed if the default bucket exists @@ -254,49 +261,51 @@ def train( # noqa: C901 Args: input_mode (str): The input mode that the algorithm supports. Valid modes: - * 'File' - Amazon SageMaker copies the training dataset from the S3 location to - a directory in the Docker container. - * 'Pipe' - Amazon SageMaker streams data directly from S3 to the container via a Unix-named pipe. - - input_config (list): A list of Channel objects. Each channel is a named input source. Please refer to - the format details described: - https://botocore.readthedocs.io/en/latest/reference/services/sagemaker.html#SageMaker.Client.create_training_job - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs - that create Amazon SageMaker endpoints use this role to access training data and model artifacts. - You must grant sufficient permissions to this role. + a directory in the Docker container. + * 'Pipe' - Amazon SageMaker streams data directly from S3 to the container via a + Unix-named pipe. + + input_config (list): A list of Channel objects. Each channel is a named input source. + Please refer to the format details described: + https://botocore.readthedocs.io/en/latest/reference/services/sagemaker.html#SageMaker.Client.create_training_job + role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training + jobs and APIs that create Amazon SageMaker endpoints use this role to access + training data and model artifacts. You must grant sufficient permissions to this + role. job_name (str): Name of the training job being created. - output_config (dict): The S3 URI where you want to store the training results and optional KMS key ID. + output_config (dict): The S3 URI where you want to store the training results and + optional KMS key ID. resource_config (dict): Contains values for ResourceConfig: - * instance_count (int): Number of EC2 instances to use for training. - The key in resource_config is 'InstanceCount'. - * instance_type (str): Type of EC2 instance to use for training, for example, 'ml.c4.xlarge'. - The key in resource_config is 'InstanceType'. + The key in resource_config is 'InstanceCount'. + * instance_type (str): Type of EC2 instance to use for training, for example, + 'ml.c4.xlarge'. The key in resource_config is 'InstanceType'. vpc_config (dict): Contains values for VpcConfig: - * subnets (list[str]): List of subnet ids. - The key in vpc_config is 'Subnets'. + The key in vpc_config is 'Subnets'. * security_group_ids (list[str]): List of security group ids. - The key in vpc_config is 'SecurityGroupIds'. - - hyperparameters (dict): Hyperparameters for model training. The hyperparameters are made accessible as - a dict[str, str] to the training code on SageMaker. For convenience, this accepts other types for - keys and values, but ``str()`` will be called to convert them before training. - stop_condition (dict): Defines when training shall finish. Contains entries that can be understood by the - service like ``MaxRuntimeInSeconds``. + The key in vpc_config is 'SecurityGroupIds'. + + hyperparameters (dict): Hyperparameters for model training. The hyperparameters are + made accessible as a dict[str, str] to the training code on SageMaker. For + convenience, this accepts other types for keys and values, but ``str()`` will be + called to convert them before training. + stop_condition (dict): Defines when training shall finish. Contains entries that can + be understood by the service like ``MaxRuntimeInSeconds``. tags (list[dict]): List of tags for labeling a training job. For more, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. - metric_definitions (list[dict]): A list of dictionaries that defines the metric(s) used to evaluate the - training jobs. Each dictionary contains two keys: 'Name' for the name of the metric, and 'Regex' for - the regular expression used to extract the metric from the logs. + metric_definitions (list[dict]): A list of dictionaries that defines the metric(s) + used to evaluate the training jobs. Each dictionary contains two keys: 'Name' for + the name of the metric, and 'Regex' for the regular expression used to extract the + metric from the logs. enable_network_isolation (bool): Whether to request for the training job to run with network isolation or not. image (str): Docker image containing training code. algorithm_arn (str): Algorithm Arn from Marketplace. - encrypt_inter_container_traffic (bool): Specifies whether traffic between training containers is - encrypted for the training job (default: ``False``). + encrypt_inter_container_traffic (bool): Specifies whether traffic between training + containers is encrypted for the training job (default: ``False``). Returns: str: ARN of the training job, if it is created. @@ -357,14 +366,16 @@ def compile_model( """Create an Amazon SageMaker Neo compilation job. Args: - input_model_config (dict): the trained model and the Amazon S3 location where it is stored. - output_model_config (dict): Identifies the Amazon S3 location where you want Amazon SageMaker Neo to save - the results of compilation job - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker Neo compilation jobs use this - role to access model artifacts. You must grant sufficient permissions to this role. + input_model_config (dict): the trained model and the Amazon S3 location where it is + stored. + output_model_config (dict): Identifies the Amazon S3 location where you want Amazon + SageMaker Neo to save the results of compilation job + role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker Neo + compilation jobs use this role to access model artifacts. You must grant + sufficient permissions to this role. job_name (str): Name of the compilation job being created. - stop_condition (dict): Defines when compilation job shall finish. Contains entries that can be understood - by the service like ``MaxRuntimeInSeconds``. + stop_condition (dict): Defines when compilation job shall finish. Contains entries + that can be understood by the service like ``MaxRuntimeInSeconds``. tags (list[dict]): List of tags for labeling a compile model job. For more, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. @@ -417,40 +428,44 @@ def tune( Args: job_name (str): Name of the tuning job being created. strategy (str): Strategy to be used for hyperparameter estimations. - objective_type (str): The type of the objective metric for evaluating training jobs. This value can be - either 'Minimize' or 'Maximize'. + objective_type (str): The type of the objective metric for evaluating training jobs. + This value can be either 'Minimize' or 'Maximize'. objective_metric_name (str): Name of the metric for evaluating training jobs. - max_jobs (int): Maximum total number of training jobs to start for the hyperparameter tuning job. + max_jobs (int): Maximum total number of training jobs to start for the hyperparameter + tuning job. max_parallel_jobs (int): Maximum number of parallel training jobs to start. - parameter_ranges (dict): Dictionary of parameter ranges. These parameter ranges can be one of three types: - Continuous, Integer, or Categorical. - static_hyperparameters (dict): Hyperparameters for model training. These hyperparameters remain - unchanged across all of the training jobs for the hyperparameter tuning job. The hyperparameters are - made accessible as a dictionary for the training code on SageMaker. + parameter_ranges (dict): Dictionary of parameter ranges. These parameter ranges can be + one of three types: Continuous, Integer, or Categorical. + static_hyperparameters (dict): Hyperparameters for model training. These + hyperparameters remain unchanged across all of the training jobs for the + hyperparameter tuning job. The hyperparameters are made accessible as a dictionary + for the training code on SageMaker. image (str): Docker image containing training code. input_mode (str): The input mode that the algorithm supports. Valid modes: - * 'File' - Amazon SageMaker copies the training dataset from the S3 location to - a directory in the Docker container. - * 'Pipe' - Amazon SageMaker streams data directly from S3 to the container via a Unix-named pipe. - - metric_definitions (list[dict]): A list of dictionaries that defines the metric(s) used to evaluate the - training jobs. Each dictionary contains two keys: 'Name' for the name of the metric, and 'Regex' for - the regular expression used to extract the metric from the logs. This should be defined only for - jobs that don't use an Amazon algorithm. - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs - that create Amazon SageMaker endpoints use this role to access training data and model artifacts. - You must grant sufficient permissions to this role. - input_config (list): A list of Channel objects. Each channel is a named input source. Please refer to - the format details described: - https://botocore.readthedocs.io/en/latest/reference/services/sagemaker.html#SageMaker.Client.create_training_job - output_config (dict): The S3 URI where you want to store the training results and optional KMS key ID. + a directory in the Docker container. + * 'Pipe' - Amazon SageMaker streams data directly from S3 to the container via a + Unix-named pipe. + + metric_definitions (list[dict]): A list of dictionaries that defines the metric(s) + used to evaluate the training jobs. Each dictionary contains two keys: 'Name' for + the name of the metric, and 'Regex' for the regular expression used to extract the + metric from the logs. This should be defined only for jobs that don't use an + Amazon algorithm. + role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker + training jobs and APIs that create Amazon SageMaker endpoints use this role to + access training data and model artifacts. You must grant sufficient permissions + to this role. + input_config (list): A list of Channel objects. Each channel is a named input source. + Please refer to the format details described: + https://botocore.readthedocs.io/en/latest/reference/services/sagemaker.html#SageMaker.Client.create_training_job + output_config (dict): The S3 URI where you want to store the training results and + optional KMS key ID. resource_config (dict): Contains values for ResourceConfig: - * instance_count (int): Number of EC2 instances to use for training. - The key in resource_config is 'InstanceCount'. - * instance_type (str): Type of EC2 instance to use for training, for example, 'ml.c4.xlarge'. - The key in resource_config is 'InstanceType'. + The key in resource_config is 'InstanceCount'. + * instance_type (str): Type of EC2 instance to use for training, for example, + 'ml.c4.xlarge'. The key in resource_config is 'InstanceType'. stop_condition (dict): When training should finish, e.g. ``MaxRuntimeInSeconds``. tags (list[dict]): List of tags for labeling the tuning job. For more, see @@ -458,16 +473,17 @@ def tune( warm_start_config (dict): Configuration defining the type of warm start and other required configurations. early_stopping_type (str): Specifies whether early stopping is enabled for the job. - Can be either 'Auto' or 'Off'. If set to 'Off', early stopping will not be attempted. - If set to 'Auto', early stopping of some training jobs may happen, but is not guaranteed to. - encrypt_inter_container_traffic (bool): Specifies whether traffic between training containers - is encrypted for the training jobs started for this hyperparameter tuning job (default: ``False``). + Can be either 'Auto' or 'Off'. If set to 'Off', early stopping will not be + attempted. If set to 'Auto', early stopping of some training jobs may happen, but + is not guaranteed to. + encrypt_inter_container_traffic (bool): Specifies whether traffic between training + containers is encrypted for the training jobs started for this hyperparameter + tuning job (default: ``False``). vpc_config (dict): Contains values for VpcConfig (default: None): - * subnets (list[str]): List of subnet ids. - The key in vpc_config is 'Subnets'. + The key in vpc_config is 'Subnets'. * security_group_ids (list[str]): List of security group ids. - The key in vpc_config is 'SecurityGroupIds'. + The key in vpc_config is 'SecurityGroupIds'. """ tune_request = { @@ -576,14 +592,17 @@ def transform( Possible values are 'MULTI_RECORD' and 'SINGLE_RECORD'. max_concurrent_transforms (int): The maximum number of HTTP requests to be made to each individual transform container at one time. - max_payload (int): Maximum size of the payload in a single HTTP request to the container in MB. + max_payload (int): Maximum size of the payload in a single HTTP request to the + container in MB. env (dict): Environment variables to be set for use during the transform job. - input_config (dict): A dictionary describing the input data (and its location) for the job. + input_config (dict): A dictionary describing the input data (and its location) for the + job. output_config (dict): A dictionary describing the output location for the job. resource_config (dict): A dictionary describing the resources to complete the job. - tags (list[dict]): List of tags for labeling a transform job. For more information, - see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. - data_processing(dict): A dictionary describing config for combining the input data and transformed data. + tags (list[dict]): List of tags for labeling a transform job. + data_processing(dict): A dictionary describing config for combining the input data and + transformed data. For more, see + https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. """ transform_request = { "TransformJobName": job_name, @@ -628,32 +647,38 @@ def create_model( """Create an Amazon SageMaker ``Model``. Specify the S3 location of the model artifacts and Docker image containing the inference code. Amazon SageMaker uses this information to deploy the - model in Amazon SageMaker. This method can also be used to create a Model for an Inference Pipeline - if you pass the list of container definitions through the containers parameter. + model in Amazon SageMaker. This method can also be used to create a Model for an Inference + Pipeline if you pass the list of container definitions through the containers parameter. Args: name (str): Name of the Amazon SageMaker ``Model`` to create. - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs - that create Amazon SageMaker endpoints use this role to access training data and model artifacts. - You must grant sufficient permissions to this role. - container_defs (list[dict[str, str]] or [dict[str, str]]): A single container definition or a list of - container definitions which will be invoked sequentially while performing the prediction. If the list - contains only one container, then it'll be passed to SageMaker Hosting as the ``PrimaryContainer`` and - otherwise, it'll be passed as ``Containers``.You can also specify the return value of - ``sagemaker.get_container_def()`` or ``sagemaker.pipeline_container_def()``, which will used to - create more advanced container configurations ,including model containers which need artifacts from S3. + role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training + jobs and APIs that create Amazon SageMaker endpoints use this role to access + training data and model artifacts. You must grant sufficient permissions to this + role. + container_defs (list[dict[str, str]] or [dict[str, str]]): A single container + definition or a list of container definitions which will be invoked sequentially + while performing the prediction. If the list contains only one container, then + it'll be passed to SageMaker Hosting as the ``PrimaryContainer`` and otherwise, + it'll be passed as ``Containers``.You can also specify the return value of + ``sagemaker.get_container_def()`` or ``sagemaker.pipeline_container_def()``, + which will used to create more advanced container configurations, including model + containers which need artifacts from S3. vpc_config (dict[str, list[str]]): The VpcConfig set on the model (default: None) * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. enable_network_isolation (bool): Wether the model requires network isolation or not. - primary_container (str or dict[str, str]): Docker image which defines the inference code. - You can also specify the return value of ``sagemaker.container_def()``, which is used to create - more advanced container configurations, including model containers which need artifacts from S3. This - field is deprecated, please use container_defs instead. - tags(List[dict[str, str]]): Optional. The list of tags to add to the model. Example: - >>> tags = [{'Key': 'tagname', 'Value': 'tagvalue'}] - For more information about tags, see https://boto3.amazonaws.com/v1/documentation\ - /api/latest/reference/services/sagemaker.html#SageMaker.Client.add_tags + primary_container (str or dict[str, str]): Docker image which defines the inference + code. You can also specify the return value of ``sagemaker.container_def()``, + which is used to create more advanced container configurations, including model + containers which need artifacts from S3. This field is deprecated, please use + container_defs instead. + tags(List[dict[str, str]]): Optional. The list of tags to add to the model. + + Example: + >>> tags = [{'Key': 'tagname', 'Value': 'tagvalue'}] + For more information about tags, see https://boto3.amazonaws.com/v1/documentation\ + /api/latest/reference/services/sagemaker.html#SageMaker.Client.add_tags Returns: @@ -663,7 +688,10 @@ def create_model( raise ValueError("Both container_defs and primary_container can not be passed as input") if primary_container: - msg = "primary_container is going to be deprecated in a future release. Please use container_defs instead." + msg = ( + "primary_container is going to be deprecated in a future release. Please use " + "container_defs instead." + ) warnings.warn(msg, DeprecationWarning) container_defs = primary_container @@ -720,19 +748,21 @@ def create_model_from_job( training_job_name (str): The Amazon SageMaker Training Job name. name (str): The name of the SageMaker ``Model`` to create (default: None). If not specified, the training job name is used. - role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, specified either by an IAM role name or - role ARN. If None, the ``RoleArn`` from the SageMaker Training Job will be used. - primary_container_image (str): The Docker image reference (default: None). If None, it defaults to - the Training Image in ``training_job_name``. - model_data_url (str): S3 location of the model data (default: None). If None, defaults to - the ``ModelS3Artifacts`` of ``training_job_name``. + role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, specified either + by an IAM role name or role ARN. If None, the ``RoleArn`` from the SageMaker + Training Job will be used. + primary_container_image (str): The Docker image reference (default: None). If None, it + defaults to the Training Image in ``training_job_name``. + model_data_url (str): S3 location of the model data (default: None). If None, defaults + to the ``ModelS3Artifacts`` of ``training_job_name``. env (dict[string,string]): Model environment variables (default: {}). - vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on the model. + vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on the + model. Default: use VpcConfig from training job. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. - tags(List[dict[str, str]]): Optional. The list of tags to add to the model. For more, see - https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. + tags(List[dict[str, str]]): Optional. The list of tags to add to the model. + For more, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. Returns: str: The name of the created ``Model``. @@ -816,21 +846,26 @@ def create_endpoint_config( """Create an Amazon SageMaker endpoint configuration. The endpoint configuration identifies the Amazon SageMaker model (created using the - ``CreateModel`` API) and the hardware configuration on which to deploy the model. Provide this - endpoint configuration to the ``CreateEndpoint`` API, which then launches the hardware and deploys the model. + ``CreateModel`` API) and the hardware configuration on which to deploy the model. Provide + this endpoint configuration to the ``CreateEndpoint`` API, which then launches the + hardware and deploys the model. Args: name (str): Name of the Amazon SageMaker endpoint configuration to create. model_name (str): Name of the Amazon SageMaker ``Model``. - initial_instance_count (int): Minimum number of EC2 instances to launch. The actual number of - active instances for an endpoint at any given time varies due to autoscaling. + initial_instance_count (int): Minimum number of EC2 instances to launch. The actual + number of active instances for an endpoint at any given time varies due to + autoscaling. instance_type (str): Type of EC2 instance to launch, for example, 'ml.c4.xlarge'. - accelerator_type (str): Type of Elastic Inference accelerator to attach to the instance. For example, - 'ml.eia1.medium'. For more information: https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html - tags(List[dict[str, str]]): Optional. The list of tags to add to the endpoint config. Example: - >>> tags = [{'Key': 'tagname', 'Value': 'tagvalue'}] - For more information about tags, see https://boto3.amazonaws.com/v1/documentation\ - /api/latest/reference/services/sagemaker.html#SageMaker.Client.add_tags + accelerator_type (str): Type of Elastic Inference accelerator to attach to the + instance. For example, 'ml.eia1.medium'. + For more information: https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html + tags(List[dict[str, str]]): Optional. The list of tags to add to the endpoint config. + + Example: + >>> tags = [{'Key': 'tagname', 'Value': 'tagvalue'}] + For more information about tags, see + https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.add_tags Returns: @@ -862,15 +897,17 @@ def create_endpoint_config( return name def create_endpoint(self, endpoint_name, config_name, tags=None, wait=True): - """Create an Amazon SageMaker ``Endpoint`` according to the endpoint configuration specified in the request. + """Create an Amazon SageMaker ``Endpoint`` according to the endpoint configuration + specified in the request. - Once the ``Endpoint`` is created, client applications can send requests to obtain inferences. - The endpoint configuration is created using the ``CreateEndpointConfig`` API. + Once the ``Endpoint`` is created, client applications can send requests to obtain + inferences. The endpoint configuration is created using the ``CreateEndpointConfig`` API. Args: endpoint_name (str): Name of the Amazon SageMaker ``Endpoint`` being created. config_name (str): Name of the Amazon SageMaker endpoint configuration to deploy. - wait (bool): Whether to wait for the endpoint deployment to complete before returning (default: True). + wait (bool): Whether to wait for the endpoint deployment to complete before returning + (default: True). Returns: str: Name of the Amazon SageMaker ``Endpoint`` created. @@ -887,13 +924,15 @@ def create_endpoint(self, endpoint_name, config_name, tags=None, wait=True): return endpoint_name def update_endpoint(self, endpoint_name, endpoint_config_name): - """ Update an Amazon SageMaker ``Endpoint`` according to the endpoint configuration specified in the request + """ Update an Amazon SageMaker ``Endpoint`` according to the endpoint configuration + specified in the request Raise an error if endpoint with endpoint_name does not exist. Args: endpoint_name (str): Name of the Amazon SageMaker ``Endpoint`` to update. - endpoint_config_name (str): Name of the Amazon SageMaker endpoint configuration to deploy. + endpoint_config_name (str): Name of the Amazon SageMaker endpoint configuration to + deploy. Returns: str: Name of the Amazon SageMaker ``Endpoint`` being updated. @@ -902,9 +941,8 @@ def update_endpoint(self, endpoint_name, endpoint_config_name): lambda: self.sagemaker_client.describe_endpoint(EndpointName=endpoint_name) ): raise ValueError( - 'Endpoint with name "{}" does not exist; please use an existing endpoint name'.format( - endpoint_name - ) + "Endpoint with name '{}' does not exist; please use an " + "existing endpoint name".format(endpoint_name) ) self.sagemaker_client.update_endpoint( @@ -925,7 +963,8 @@ def delete_endpoint_config(self, endpoint_config_name): """Delete an Amazon SageMaker endpoint configuration. Args: - endpoint_config_name (str): Name of the Amazon SageMaker endpoint configuration to delete. + endpoint_config_name (str): Name of the Amazon SageMaker endpoint configuration to + delete. """ LOGGER.info("Deleting endpoint configuration with name: %s", endpoint_config_name) self.sagemaker_client.delete_endpoint_config(EndpointConfigName=endpoint_config_name) @@ -1066,33 +1105,40 @@ def endpoint_from_job( ): """Create an ``Endpoint`` using the results of a successful training job. - Specify the job name, Docker image containing the inference code, and hardware configuration to deploy - the model. Internally the API, creates an Amazon SageMaker model (that describes the model artifacts and - the Docker image containing inference code), endpoint configuration (describing the hardware to deploy - for hosting the model), and creates an ``Endpoint`` (launches the EC2 instances and deploys the model on them). - In response, the API returns the endpoint name to which you can send requests for inferences. + Specify the job name, Docker image containing the inference code, and hardware + configuration to deploy the model. Internally the API, creates an Amazon SageMaker model + (that describes the model artifacts and the Docker image containing inference code), + endpoint configuration (describing the hardware to deploy for hosting the model), and + creates an ``Endpoint`` (launches the EC2 instances and deploys the model on them). In + response, the API returns the endpoint name to which you can send requests for inferences. Args: job_name (str): Name of the training job to deploy the results of. - initial_instance_count (int): Minimum number of EC2 instances to launch. The actual number of - active instances for an endpoint at any given time varies due to autoscaling. + initial_instance_count (int): Minimum number of EC2 instances to launch. The actual + number of active instances for an endpoint at any given time varies due to + autoscaling. instance_type (str): Type of EC2 instance to deploy to an endpoint for prediction, for example, 'ml.c4.xlarge'. - deployment_image (str): The Docker image which defines the inference code to be used as the entry point for - accepting prediction requests. If not specified, uses the image used for the training job. - name (str): Name of the ``Endpoint`` to create. If not specified, uses the training job name. - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs - that create Amazon SageMaker endpoints use this role to access training data and model artifacts. - You must grant sufficient permissions to this role. - wait (bool): Whether to wait for the endpoint deployment to complete before returning (default: True). - model_environment_vars (dict[str, str]): Environment variables to set on the model container - (default: None). + deployment_image (str): The Docker image which defines the inference code to be used + as the entry point for accepting prediction requests. If not specified, uses the + image used for the training job. + name (str): Name of the ``Endpoint`` to create. If not specified, uses the training job + name. + role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training + jobs and APIs that create Amazon SageMaker endpoints use this role to access + training data and model artifacts. You must grant sufficient permissions to this + role. + wait (bool): Whether to wait for the endpoint deployment to complete before returning + (default: True). + model_environment_vars (dict[str, str]): Environment variables to set on the model + container (default: None). vpc_config_override (dict[str, list[str]]): Overrides VpcConfig set on the model. Default: use VpcConfig from training job. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. - accelerator_type (str): Type of Elastic Inference accelerator to attach to the instance. For example, - 'ml.eia1.medium'. For more information: https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html + accelerator_type (str): Type of Elastic Inference accelerator to attach to the + instance. For example, 'ml.eia1.medium'. + For more information: https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html Returns: str: Name of the ``Endpoint`` that is created. @@ -1136,22 +1182,27 @@ def endpoint_from_model_data( model_s3_location (str): S3 URI of the model artifacts to use for the endpoint. deployment_image (str): The Docker image which defines the runtime code to be used as the entry point for accepting prediction requests. - initial_instance_count (int): Minimum number of EC2 instances to launch. The actual number of - active instances for an endpoint at any given time varies due to autoscaling. - instance_type (str): Type of EC2 instance to deploy to an endpoint for prediction, e.g. 'ml.c4.xlarge'. - name (str): Name of the ``Endpoint`` to create. If not specified, uses a name generated by - combining the image name with a timestamp. - role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs - that create Amazon SageMaker endpoints use this role to access training data and model artifacts. + initial_instance_count (int): Minimum number of EC2 instances to launch. The actual + number of active instances for an endpoint at any given time varies due to + autoscaling. + instance_type (str): Type of EC2 instance to deploy to an endpoint for prediction, + e.g. 'ml.c4.xlarge'. + name (str): Name of the ``Endpoint`` to create. If not specified, uses a name + generated by combining the image name with a timestamp. + role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training + jobs and APIs that create Amazon SageMaker endpoints use this role to access + training data and model artifacts. You must grant sufficient permissions to this role. - wait (bool): Whether to wait for the endpoint deployment to complete before returning (default: True). - model_environment_vars (dict[str, str]): Environment variables to set on the model container - (default: None). + wait (bool): Whether to wait for the endpoint deployment to complete before returning + (default: True). + model_environment_vars (dict[str, str]): Environment variables to set on the model + container (default: None). model_vpc_config (dict[str, list[str]]): The VpcConfig set on the model (default: None) * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. - accelerator_type (str): Type of Elastic Inference accelerator to attach to the instance. For example, - 'ml.eia1.medium'. For more information: https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html + accelerator_type (str): Type of Elastic Inference accelerator to attach to the instance. + For example, 'ml.eia1.medium'. + For more information: https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html Returns: str: Name of the ``Endpoint`` that is created. @@ -1200,10 +1251,12 @@ def endpoint_from_production_variants( Args: name (str): The name of the ``Endpoint`` to create. production_variants (list[dict[str, str]]): The list of production variants to deploy. - tags (list[dict[str, str]]): A list of key-value pairs for tagging the endpoint (default: None). - kms_key (str): The KMS key that is used to encrypt the data on the storage volume attached - to the instance hosting the endpoint. - wait (bool): Whether to wait for the endpoint deployment to complete before returning (default: True). + tags (list[dict[str, str]]): A list of key-value pairs for tagging the endpoint + (default: None). + kms_key (str): The KMS key that is used to encrypt the data on the storage volume + attached to the instance hosting the endpoint. + wait (bool): Whether to wait for the endpoint deployment to complete before returning + (default: True). Returns: str: The name of the created ``Endpoint``. @@ -1224,8 +1277,8 @@ def endpoint_from_production_variants( def expand_role(self, role): """Expand an IAM role name into an ARN. - If the role is already in the form of an ARN, then the role is simply returned. Otherwise we retrieve the full - ARN and return it. + If the role is already in the form of an ARN, then the role is simply returned. Otherwise + we retrieve the full ARN and return it. Args: role (str): An AWS IAM role (either name or full ARN). @@ -1275,8 +1328,10 @@ def logs_for_job( # noqa: C901 - suppress complexity warning for this method Args: job_name (str): Name of the training job to display the logs for. - wait (bool): Whether to keep looking for new log entries until the job completes (default: False). - poll (int): The interval in seconds between polling for new log entries and job completion (default: 5). + wait (bool): Whether to keep looking for new log entries until the job completes + (default: False). + poll (int): The interval in seconds between polling for new log entries and job + completion (default: 5). Raises: ValueError: If waiting and the training job fails. @@ -1303,12 +1358,13 @@ def logs_for_job( # noqa: C901 - suppress complexity warning for this method color_wrap = sagemaker.logs.ColorWrap() - # The loop below implements a state machine that alternates between checking the job status and - # reading whatever is available in the logs at this point. Note, that if we were called with - # wait == False, we never check the job status. + # The loop below implements a state machine that alternates between checking the job status + # and reading whatever is available in the logs at this point. Note, that if we were + # called with wait == False, we never check the job status. # # If wait == TRUE and job is not completed, the initial state is TAILING - # If wait == FALSE, the initial state is COMPLETE (doesn't matter if the job really is complete). + # If wait == FALSE, the initial state is COMPLETE (doesn't matter if the job really is + # complete). # # The state table: # @@ -1320,14 +1376,14 @@ def logs_for_job( # noqa: C901 - suppress complexity warning for this method # COMPLETE Read logs, Exit N/A # # Notes: - # - The JOB_COMPLETE state forces us to do an extra pause and read any items that got to Cloudwatch after - # the job was marked complete. + # - The JOB_COMPLETE state forces us to do an extra pause and read any items that got to + # Cloudwatch after the job was marked complete. last_describe_job_call = time.time() last_description = description while True: if len(stream_names) < instance_count: - # Log streams are created whenever a container starts writing to stdout/err, so this list - # may be dynamic until we have a stream for every instance. + # Log streams are created whenever a container starts writing to stdout/err, so + # this list # may be dynamic until we have a stream for every instance. try: streams = client.describe_log_streams( logGroupName=log_group, @@ -1397,7 +1453,8 @@ def logs_for_job( # noqa: C901 - suppress complexity warning for this method self._check_job_status(job_name, description, "TrainingJobStatus") if dot: print() - # Customers are not billed for hardware provisioning, so billable time is less than total time + # Customers are not billed for hardware provisioning, so billable time is less than + # total time billable_time = ( description["TrainingEndTime"] - description["TrainingStartTime"] ) * instance_count @@ -1413,8 +1470,8 @@ def container_def(image, model_data_url=None, env=None): e.g. SageMaker training job model artifacts (default: None). env (dict[str, str]): Environment variables to set inside the container (default: None). Returns: - dict[str, str]: A complete container definition object usable with the CreateModel API if passed via - `PrimaryContainers` field. + dict[str, str]: A complete container definition object usable with the CreateModel API if + passed via `PrimaryContainers` field. """ if env is None: env = {} @@ -1425,15 +1482,17 @@ def container_def(image, model_data_url=None, env=None): def pipeline_container_def(models, instance_type=None): - """ - Create a definition for executing a pipeline of containers as part of a SageMaker model. + """Create a definition for executing a pipeline of containers as part of a SageMaker model. + Args: - models (list[sagemaker.Model]): this will be a list of ``sagemaker.Model`` objects in the order the inference - should be invoked. - instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge' (default: None). + models (list[sagemaker.Model]): this will be a list of ``sagemaker.Model`` objects in the + order the inference should be invoked. + instance_type (str): The EC2 instance type to deploy this Model to. For example, + 'ml.p2.xlarge' (default: None). + Returns: - list[dict[str, str]]: list of container definition objects usable with with the CreateModel API for inference - pipelines if passed via `Containers` field. + list[dict[str, str]]: list of container definition objects usable with with the + CreateModel API for inference pipelines if passed via `Containers` field. """ c_defs = [] # should contain list of container definitions in the same order customer passed for model in models: @@ -1449,17 +1508,22 @@ def production_variant( initial_weight=1, accelerator_type=None, ): - """Create a production variant description suitable for use in a ``ProductionVariant`` list as part of a - ``CreateEndpointConfig`` request. + """Create a production variant description suitable for use in a ``ProductionVariant`` list as + part of a ``CreateEndpointConfig`` request. Args: model_name (str): The name of the SageMaker model this production variant references. - instance_type (str): The EC2 instance type for this production variant. For example, 'ml.c4.8xlarge'. - initial_instance_count (int): The initial instance count for this production variant (default: 1). - variant_name (string): The ``VariantName`` of this production variant (default: 'AllTraffic'). - initial_weight (int): The relative ``InitialVariantWeight`` of this production variant (default: 1). - accelerator_type (str): Type of Elastic Inference accelerator for this production variant. For example, - 'ml.eia1.medium'. For more information: https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html + instance_type (str): The EC2 instance type for this production variant. For example, + 'ml.c4.8xlarge'. + initial_instance_count (int): The initial instance count for this production variant + (default: 1). + variant_name (string): The ``VariantName`` of this production variant + (default: 'AllTraffic'). + initial_weight (int): The relative ``InitialVariantWeight`` of this production variant + (default: 1). + accelerator_type (str): Type of Elastic Inference accelerator for this production variant. + For example, 'ml.eia1.medium'. + For more information: https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html Returns: dict[str, str]: An SageMaker ``ProductionVariant`` description @@ -1492,7 +1556,10 @@ def get_execution_role(sagemaker_session=None): if ":role/" in arn: return arn - message = "The current AWS identity is not a role: {}, therefore it cannot be used as a SageMaker execution role" + message = ( + "The current AWS identity is not a role: {}, therefore it cannot be used as a " + "SageMaker execution role" + ) raise ValueError(message.format(arn)) @@ -1500,7 +1567,8 @@ class s3_input(object): """Amazon SageMaker channel configurations for S3 data sources. Attributes: - config (dict[str, dict]): A SageMaker ``DataSource`` referencing a SageMaker ``S3DataSource``. + config (dict[str, dict]): A SageMaker ``DataSource`` referencing a SageMaker + ``S3DataSource``. """ def __init__( @@ -1523,27 +1591,32 @@ def __init__( s3_data (str): Defines the location of s3 data to train on. distribution (str): Valid values: 'FullyReplicated', 'ShardedByS3Key' (default: 'FullyReplicated'). - compression (str): Valid values: 'Gzip', None (default: None). This is used only in Pipe input mode. + compression (str): Valid values: 'Gzip', None (default: None). This is used only in + Pipe input mode. content_type (str): MIME type of the input data (default: None). record_wrapping (str): Valid values: 'RecordIO' (default: None). - s3_data_type (str): Valid values: 'S3Prefix', 'ManifestFile', 'AugmentedManifestFile'. If 'S3Prefix', - ``s3_data`` defines a prefix of s3 objects to train on. All objects with s3 keys beginning with - ``s3_data`` will be used to train. If 'ManifestFile' or 'AugmentedManifestFile', then ``s3_data`` - defines a single s3 manifest file or augmented manifest file (respectively), listing the s3 data to - train on. Both the ManifestFile and AugmentedManifestFile formats are described in the SageMaker API + s3_data_type (str): Valid values: 'S3Prefix', 'ManifestFile', 'AugmentedManifestFile'. + If 'S3Prefix', ``s3_data`` defines a prefix of s3 objects to train on. All objects + with s3 keys beginning with ``s3_data`` will be used to train. If 'ManifestFile' + or 'AugmentedManifestFile', then ``s3_data`` defines a single s3 manifest file or + augmented manifest file (respectively), listing the s3 data to train on. Both the + ManifestFile and AugmentedManifestFile formats are described in the SageMaker API documentation: https://docs.aws.amazon.com/sagemaker/latest/dg/API_S3DataSource.html - input_mode (str): Optional override for this channel's input mode (default: None). By default, channels will - use the input mode defined on ``sagemaker.estimator.EstimatorBase.input_mode``, but they will ignore - that setting if this parameter is set. - - * None - Amazon SageMaker will use the input mode specified in the ``Estimator``. - * 'File' - Amazon SageMaker copies the training dataset from the S3 location to a local directory. - * 'Pipe' - Amazon SageMaker streams data directly from S3 to the container via a Unix-named pipe. - - attribute_names (list[str]): A list of one or more attribute names to use that are found in a specified - AugmentedManifestFile. - shuffle_config (ShuffleConfig): If specified this configuration enables shuffling on this channel. See the - SageMaker API documentation for more info: + input_mode (str): Optional override for this channel's input mode (default: None). By + default, channels will use the input mode defined on + ``sagemaker.estimator.EstimatorBase.input_mode``, but they will ignore that setting + if this parameter is set. + * None - Amazon SageMaker will use the input mode specified in the + ``Estimator``. + * 'File' - Amazon SageMaker copies the training dataset from the S3 location + to a local directory. + * 'Pipe' - Amazon SageMaker streams data directly from S3 to the container via + a Unix-named pipe. + + attribute_names (list[str]): A list of one or more attribute names to use that are + found in a specified AugmentedManifestFile. + shuffle_config (ShuffleConfig): If specified this configuration enables shuffling on + this channel. See the SageMaker API documentation for more info: https://docs.aws.amazon.com/sagemaker/latest/dg/API_ShuffleConfig.html """ @@ -1573,8 +1646,8 @@ def __init__( class ShuffleConfig(object): """ - Used to configure channel shuffling using a seed. See SageMaker - documentation for more detail: https://docs.aws.amazon.com/sagemaker/latest/dg/API_ShuffleConfig.html + Used to configure channel shuffling using a seed. See SageMaker documentation for + more detail: https://docs.aws.amazon.com/sagemaker/latest/dg/API_ShuffleConfig.html """ def __init__(self, seed): @@ -1587,8 +1660,8 @@ def __init__(self, seed): class ModelContainer(object): - """ - Amazon SageMaker Model configurations for inference pipelines. + """Amazon SageMaker Model configurations for inference pipelines. + Attributes: model_data (str): S3 Model artifact location image (str): Docker image URL in ECR @@ -1596,12 +1669,13 @@ class ModelContainer(object): """ def __init__(self, model_data, image, env=None): - """ - Create a definition of a model which can be part of an Inference Pipeline + """Create a definition of a model which can be part of an Inference Pipeline + Args: model_data (str): The S3 location of a SageMaker model data ``.tar.gz`` file. image (str): A Docker image URI. - env (dict[str, str]): Environment variables to run with ``image`` when hosted in SageMaker (default: None). + env (dict[str, str]): Environment variables to run with ``image`` when hosted in + SageMaker (default: None). """ self.model_data = model_data self.image = image diff --git a/src/sagemaker/sklearn/estimator.py b/src/sagemaker/sklearn/estimator.py index 15de26e0b3..45c5ad665f 100644 --- a/src/sagemaker/sklearn/estimator.py +++ b/src/sagemaker/sklearn/estimator.py @@ -91,7 +91,8 @@ def __init__( **kwargs: Additional kwargs passed to the :class:`~sagemaker.estimator.Framework` constructor. """ - # SciKit-Learn does not support distributed training or training on GPU instance types. Fail fast. + # SciKit-Learn does not support distributed training or training on GPU instance types. + # Fail fast. train_instance_type = kwargs.get("train_instance_type") _validate_not_gpu_instance_type(train_instance_type) diff --git a/src/sagemaker/sparkml/model.py b/src/sagemaker/sparkml/model.py index 97d62674c3..98bf5f6c18 100644 --- a/src/sagemaker/sparkml/model.py +++ b/src/sagemaker/sparkml/model.py @@ -85,7 +85,8 @@ def __init__(self, model_data, role=None, spark_version=2.2, sagemaker_session=N please do not pass this variable. **kwargs: """ - # for local mode, sagemaker_session should be passed as None but we need a session to get boto_region_name + # For local mode, sagemaker_session should be passed as None but we need a session to get + # boto_region_name region_name = (sagemaker_session or Session()).boto_region_name image = "{}/{}:{}".format(registry(region_name, framework_name), repo_name, spark_version) super(SparkMLModel, self).__init__( diff --git a/src/sagemaker/tensorflow/__init__.py b/src/sagemaker/tensorflow/__init__.py index b32b2b5eea..194f79689d 100644 --- a/src/sagemaker/tensorflow/__init__.py +++ b/src/sagemaker/tensorflow/__init__.py @@ -17,7 +17,8 @@ import os # Hack to use our local copy of tensorflow_serving.apis, which contains the protobuf-generated -# classes for tensorflow serving. Currently tensorflow_serving_api can only be pip-installed for python 2. +# classes for tensorflow serving. Currently tensorflow_serving_api can only be pip-installed for +# python 2. sys.path.append(os.path.dirname(__file__)) from sagemaker.tensorflow.estimator import ( # noqa: E402, F401 # pylint: disable=wrong-import-position diff --git a/src/sagemaker/tensorflow/estimator.py b/src/sagemaker/tensorflow/estimator.py index d07dc63961..5a3406a0bd 100644 --- a/src/sagemaker/tensorflow/estimator.py +++ b/src/sagemaker/tensorflow/estimator.py @@ -60,7 +60,8 @@ def __init__(self, estimator, logdir=None): Args: estimator (sagemaker.estimator.Framework): A SageMaker ``Estimator``. - logdir (str): Directory for logs (default: None). If not specified, a temporary directory is made. + logdir (str): Directory for logs (default: None). If not specified, a temporary + directory is made. """ threading.Thread.__init__(self) self.event = threading.Event() @@ -216,32 +217,40 @@ def __init__( """Initialize a ``TensorFlow`` estimator. Args: - training_steps (int): Perform this many steps of training. `None`, the default means train forever. - evaluation_steps (int): Perform this many steps of evaluation. `None`, the default means that evaluation - runs until input from eval_input_fn is exhausted (or another exception is raised). - checkpoint_path (str): Identifies S3 location where checkpoint data during model training can be - saved (default: None). For distributed model training, this parameter is required. - py_version (str): Python version you want to use for executing your model training code (default: 'py2'). - framework_version (str): TensorFlow version you want to use for executing your model training code. - List of supported versions https://github.com/aws/sagemaker-python-sdk#tensorflow-sagemaker-estimators. + training_steps (int): Perform this many steps of training. `None`, the default means + train forever. + evaluation_steps (int): Perform this many steps of evaluation. `None`, the default + means that evaluation runs until input from eval_input_fn is exhausted (or another + exception is raised). + checkpoint_path (str): Identifies S3 location where checkpoint data during model + training can be saved (default: None). For distributed model training, this + parameter is required. + py_version (str): Python version you want to use for executing your model training + code (default: 'py2'). + framework_version (str): TensorFlow version you want to use for executing your model + training code. List of supported versions + https://github.com/aws/sagemaker-python-sdk#tensorflow-sagemaker-estimators. If not specified, this will default to 1.11. - model_dir (str): S3 location where the checkpoint data and models can be exported to during training - (default: None). If not specified a default S3 URI will be generated. It will be passed in the - training script as one of the command line arguments. - requirements_file (str): Path to a ``requirements.txt`` file (default: ''). The path should be within and - relative to ``source_dir``. Details on the format can be found in the - `Pip User Guide `_. - image_name (str): If specified, the estimator will use this image for training and hosting, instead of - selecting the appropriate SageMaker official image based on framework_version and py_version. It can - be an ECR url or dockerhub image and tag. + model_dir (str): S3 location where the checkpoint data and models can be exported to + during training (default: None). If not specified a default S3 URI will be + generated. It will be passed in the training script as one of the command line + arguments. + requirements_file (str): Path to a ``requirements.txt`` file (default: ''). The path + should be within and relative to ``source_dir``. Details on the format can be + found in the Pip User Guide: + + image_name (str): If specified, the estimator will use this image for training and + hosting, instead of selecting the appropriate SageMaker official image based on + framework_version and py_version. It can be an ECR url or dockerhub image and tag. Examples: 123.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0 custom-image:latest. - script_mode (bool): If set to True will the estimator will use the Script Mode containers (default: False). - This will be ignored if py_version is set to 'py3'. + script_mode (bool): If set to True will the estimator will use the Script Mode + containers (default: False). This will be ignored if py_version is set to 'py3'. distributions (dict): A dictionary with information on how to run distributed training - (default: None). Currently we support distributed training with parameter servers and MPI. + (default: None). Currently we support distributed training with parameter servers + and MPI. To enable parameter server use the following setup: .. code:: python @@ -371,21 +380,22 @@ def fit(self, inputs=None, wait=True, logs=True, job_name=None, run_tensorboard_ This can be one of three types: * (str) - the S3 location where training data is saved. - * (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple channels for - training data, you can specify a dict mapping channel names + * (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple + channels for training data, you can specify a dict mapping channel names to strings or :func:`~sagemaker.session.s3_input` objects. - * (sagemaker.session.s3_input) - channel configuration for S3 data sources that can provide - additional information as well as the path to the training dataset. + * (sagemaker.session.s3_input) - channel configuration for S3 data sources that + can provide additional information as well as the path to the training dataset. See :func:`sagemaker.session.s3_input` for full details. wait (bool): Whether the call should wait until the job completes (default: True). logs (bool): Whether to show the logs produced by the job. Only meaningful when wait is True (default: True). - job_name (str): Training job name. If not specified, the estimator generates a default job name, - based on the training image name and current timestamp. - run_tensorboard_locally (bool): Whether to execute TensorBoard in a different process with - downloaded checkpoint information (default: False). This is an experimental feature, and requires - TensorBoard and AWS CLI to be installed. It terminates TensorBoard when execution ends. + job_name (str): Training job name. If not specified, the estimator generates a default + job name, based on the training image name and current timestamp. + run_tensorboard_locally (bool): Whether to execute TensorBoard in a different process + with downloaded checkpoint information (default: False). This is an experimental + feature, and requires TensorBoard and AWS CLI to be installed. It terminates + TensorBoard when execution ends. """ def fit_super(): @@ -427,7 +437,8 @@ def _prepare_init_params_from_job_description(cls, job_details, model_channel_na job_details, model_channel_name ) - # Move some of the tensorflow specific init params from hyperparameters into the main init params. + # Move some of the tensorflow specific init params from hyperparameters into the main init + # params. for argument in ("checkpoint_path", "training_steps", "evaluation_steps", "model_dir"): value = init_params["hyperparameters"].pop(argument, None) if value is not None: @@ -446,10 +457,10 @@ def _prepare_init_params_from_job_description(cls, job_details, model_channel_na init_params["py_version"] = py_version - # We switched image tagging scheme from regular image version (e.g. '1.0') to more expressive - # containing framework version, device type and python version (e.g. '1.5-gpu-py2'). - # For backward compatibility map deprecated image tag '1.0' to a '1.4' framework version - # otherwise extract framework version from the tag itself. + # We switched image tagging scheme from regular image version (e.g. '1.0') to more + # expressive containing framework version, device type and python version + # (e.g. '1.5-gpu-py2'). For backward compatibility map deprecated image tag '1.0' to a + # '1.4' framework version otherwise extract framework version from the tag itself. init_params["framework_version"] = ( "1.4" if tag == "1.0" else fw.framework_version_from_tag(tag) ) @@ -478,11 +489,13 @@ def create_model( deploying to a SageMaker endpoint, or starting SageMaker Batch Transform jobs. Args: - role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also used during - transform jobs. If not specified, the role from the Estimator will be used. - model_server_workers (int): Optional. The number of worker processes used by the inference server. - If None, server will use one worker per vCPU. - vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on the model. + role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also + used during transform jobs. If not specified, the role from the Estimator will be + used. + model_server_workers (int): Optional. The number of worker processes used by the + inference server. If None, server will use one worker per vCPU. + vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on the + model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. @@ -490,22 +503,25 @@ def create_model( If not specified, the model will be configured to use the default SageMaker model server. If 'tensorflow-serving', the model will be configured to use the SageMaker Tensorflow Serving container. - entry_point (str): Path (absolute or relative) to the local Python source file which should be executed - as the entry point to training. If not specified and ``endpoint_type`` is 'tensorflow-serving', - no entry point is used. If ``endpoint_type`` is also ``None``, then the training entry point is used. + entry_point (str): Path (absolute or relative) to the local Python source file which + should be executed as the entry point to training. If not specified and + ``endpoint_type`` is 'tensorflow-serving', no entry point is used. If + ``endpoint_type`` is also ``None``, then the training entry point is used. source_dir (str): Path (absolute or relative) to a directory with any other serving source code dependencies aside from the entry point file. If not specified and - ``endpoint_type`` is 'tensorflow-serving', no source_dir is used. If ``endpoint_type`` is also ``None``, - then the model source directory from training is used. + ``endpoint_type`` is 'tensorflow-serving', no source_dir is used. If + ``endpoint_type`` is also ``None``, then the model source directory from training + is used. dependencies (list[str]): A list of paths to directories (absolute or relative) with any additional libraries that will be exported to the container. - If not specified and ``endpoint_type`` is 'tensorflow-serving', ``dependencies`` is set to ``None``. + If not specified and ``endpoint_type`` is 'tensorflow-serving', ``dependencies`` is + set to ``None``. If ``endpoint_type`` is also ``None``, then the dependencies from training are used. Returns: - sagemaker.tensorflow.model.TensorFlowModel or sagemaker.tensorflow.serving.Model: A ``Model`` object. - See :class:`~sagemaker.tensorflow.serving.Model` or :class:`~sagemaker.tensorflow.model.TensorFlowModel` - for full details. + sagemaker.tensorflow.model.TensorFlowModel or sagemaker.tensorflow.serving.Model: A + ``Model`` object. See :class:`~sagemaker.tensorflow.serving.Model` or + :class:`~sagemaker.tensorflow.model.TensorFlowModel` for full details. """ role = role or self.role @@ -670,29 +686,35 @@ def transformer( endpoint_type=None, entry_point=None, ): - """Return a ``Transformer`` that uses a SageMaker Model based on the training job. It reuses the - SageMaker Session and base job name used by the Estimator. + """Return a ``Transformer`` that uses a SageMaker Model based on the training job. It + reuses the SageMaker Session and base job name used by the Estimator. Args: instance_count (int): Number of EC2 instances to use. instance_type (str): Type of EC2 instance to use, for example, 'ml.c4.xlarge'. - strategy (str): The strategy used to decide how to batch records in a single request (default: None). - Valid values: 'MULTI_RECORD' and 'SINGLE_RECORD'. - assemble_with (str): How the output is assembled (default: None). Valid values: 'Line' or 'None'. - output_path (str): S3 location for saving the transform result. If not specified, results are stored to - a default bucket. - output_kms_key (str): Optional. KMS key ID for encrypting the transform output (default: None). - accept (str): The content type accepted by the endpoint deployed during the transform job. - env (dict): Environment variables to be set for use during the transform job (default: None). + strategy (str): The strategy used to decide how to batch records in a single request + (default: None). Valid values: 'MULTI_RECORD' and 'SINGLE_RECORD'. + assemble_with (str): How the output is assembled (default: None). Valid values: 'Line' + or 'None'. + output_path (str): S3 location for saving the transform result. If not specified, + results are stored to a default bucket. + output_kms_key (str): Optional. KMS key ID for encrypting the transform output + (default: None). + accept (str): The content type accepted by the endpoint deployed during the transform + job. + env (dict): Environment variables to be set for use during the transform job + (default: None). max_concurrent_transforms (int): The maximum number of HTTP requests to be made to each individual transform container at one time. - max_payload (int): Maximum size of the payload in a single HTTP request to the container in MB. - tags (list[dict]): List of tags for labeling a transform job. If none specified, then the tags used for - the training job are used for the transform job. - role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also used during - transform jobs. If not specified, the role from the Estimator will be used. - model_server_workers (int): Optional. The number of worker processes used by the inference server. - If None, server will use one worker per vCPU. + max_payload (int): Maximum size of the payload in a single HTTP request to the + container in MB. + tags (list[dict]): List of tags for labeling a transform job. If none specified, then + the tags used for the training job are used for the transform job. + role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also + used during transform jobs. If not specified, the role from the Estimator will be + used. + model_server_workers (int): Optional. The number of worker processes used by the + inference server. If None, server will use one worker per vCPU. volume_kms_key (str): Optional. KMS key ID for encrypting the volume attached to the ML compute instance (default: None). endpoint_type (str): Optional. Selects the software stack used by the inference server. @@ -700,9 +722,10 @@ def transformer( SageMaker model server. If 'tensorflow-serving', the model will be configured to use the SageMaker Tensorflow Serving container. - entry_point (str): Path (absolute or relative) to the local Python source file which should be executed - as the entry point to training. If not specified and ``endpoint_type`` is 'tensorflow-serving', - no entry point is used. If ``endpoint_type`` is also ``None``, then the training entry point is used. + entry_point (str): Path (absolute or relative) to the local Python source file which + should be executed as the entry point to training. If not specified and + ``endpoint_type`` is 'tensorflow-serving', no entry point is used. If + ``endpoint_type`` is also ``None``, then the training entry point is used. """ role = role or self.role diff --git a/src/sagemaker/tensorflow/predictor.py b/src/sagemaker/tensorflow/predictor.py index f47f6860ae..ae7277e9ac 100644 --- a/src/sagemaker/tensorflow/predictor.py +++ b/src/sagemaker/tensorflow/predictor.py @@ -46,8 +46,9 @@ def __init__(self): self.content_type = CONTENT_TYPE_OCTET_STREAM def __call__(self, data): - # isinstance does not work here because a same protobuf message can be imported from a different module. - # for example sagemaker.tensorflow.tensorflow_serving.regression_pb2 and tensorflow_serving.apis.regression_pb2 + # isinstance does not work here because a same protobuf message can be imported from a + # different module. For example sagemaker.tensorflow.tensorflow_serving.regression_pb2 and + # tensorflow_serving.apis.regression_pb2 """ Args: data: diff --git a/src/sagemaker/transformer.py b/src/sagemaker/transformer.py index a0f570ecc3..ce442e0986 100644 --- a/src/sagemaker/transformer.py +++ b/src/sagemaker/transformer.py @@ -126,11 +126,11 @@ def transform( data_type (str): What the S3 location defines (default: 'S3Prefix'). Valid values: - * 'S3Prefix' - the S3 URI defines a key name prefix. All objects with this prefix will be used as - inputs for the transform job. + * 'S3Prefix' - the S3 URI defines a key name prefix. All objects with this prefix + will be used as inputs for the transform job. - * 'ManifestFile' - the S3 URI points to a single manifest file listing each S3 object to use as - an input for the transform job. + * 'ManifestFile' - the S3 URI points to a single manifest file listing each S3 + object to use as an input for the transform job. content_type (str): MIME type of the input data (default: None). compression_type (str): Compression type of the input data, if compressed (default: None). Valid values: 'Gzip', None. diff --git a/src/sagemaker/tuner.py b/src/sagemaker/tuner.py index 9531bd4524..debbc5c013 100644 --- a/src/sagemaker/tuner.py +++ b/src/sagemaker/tuner.py @@ -69,7 +69,8 @@ class WarmStartConfig(object): ``HyperparameterTuner``, with type and parents for warm start. Examples: - >>> warm_start_config = WarmStartConfig(type=WarmStartTypes.TransferLearning, parents={"p1","p2"}) + >>> warm_start_config = WarmStartConfig( + >>> type=WarmStartTypes.TransferLearning, parents={"p1","p2"}) >>> warm_start_config.type "TransferLearning" >>> warm_start_config.parents @@ -491,7 +492,7 @@ def best_training_job(self): """ self._ensure_last_tuning_job() - tuning_job_describe_result = self.estimator.sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job( # noqa: E501 + tuning_job_describe_result = self.estimator.sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job( # noqa: E501 # pylint: disable=line-too-long HyperParameterTuningJobName=self.latest_tuning_job.name ) @@ -669,7 +670,8 @@ def hyperparameter_ranges(self): parameter_ranges = [] for parameter_name, parameter in self._hyperparameter_ranges.items(): if parameter is not None and parameter.__name__ == range_type: - # Categorical parameters needed to be serialized as JSON for our framework containers + # Categorical parameters needed to be serialized as JSON for our framework + # containers if isinstance(parameter, CategoricalParameter) and isinstance( self.estimator, Framework ): @@ -701,9 +703,10 @@ def _validate_parameter_ranges(self): for _, value in kls.__dict__.items(): if isinstance(value, hp): try: - # The hyperparam names may not be the same as the class attribute that holds them, - # for instance: local_lloyd_init_method is called local_init_method. We need to map these - # and pass the correct name to the constructor. + # The hyperparam names may not be the same as the class attribute that + # holds them, for instance: local_lloyd_init_method is called + # local_init_method. We need to map these and pass the correct name to + # the constructor. parameter_range = self._hyperparameter_ranges[value.name] if isinstance(parameter_range, ParameterRange): @@ -739,7 +742,8 @@ def transfer_learning_tuner(self, additional_parents=None, estimator=None): Examples: >>> parent_tuner = HyperparameterTuner.attach(tuning_job_name="parent-job-1") - >>> transfer_learning_tuner = parent_tuner.transfer_learning_tuner(additional_parents={"parent-job-2"}) + >>> transfer_learning_tuner = parent_tuner.transfer_learning_tuner( + >>> additional_parents={"parent-job-2"}) Later On: >>> transfer_learning_tuner.fit(inputs={}) @@ -772,7 +776,7 @@ def identical_dataset_and_algorithm_tuner(self, additional_parents=None): Examples: >>> parent_tuner = HyperparameterTuner.attach(tuning_job_name="parent-job-1") >>> identical_dataset_algo_tuner = parent_tuner.identical_dataset_and_algorithm_tuner( - >>> additional_parents={"parent-job-2"}) + >>> additional_parents={"parent-job-2"}) Later On: >>> identical_dataset_algo_tuner.fit(inputs={}) @@ -896,8 +900,8 @@ def wait(self): def create_identical_dataset_and_algorithm_tuner( parent, additional_parents=None, sagemaker_session=None ): - """Creates a new tuner by copying the request fields from the provided parent to the new instance of - ``HyperparameterTuner`` followed by addition of warm start configuration + """Creates a new tuner by copying the request fields from the provided parent to the new + instance of ``HyperparameterTuner`` followed by addition of warm start configuration with the type as "IdenticalDataAndAlgorithm" and ``parents`` as the union of provided list of ``additional_parents`` and the ``parent``. diff --git a/src/sagemaker/vpc_utils.py b/src/sagemaker/vpc_utils.py index c569e20c1c..0382e06b05 100644 --- a/src/sagemaker/vpc_utils.py +++ b/src/sagemaker/vpc_utils.py @@ -18,7 +18,8 @@ VPC_CONFIG_KEY = "VpcConfig" # A global constant value for methods which can optionally override VpcConfig -# Using the default implies that VpcConfig should be reused from an existing Estimator or Training Job +# Using the default implies that VpcConfig should be reused from an existing Estimator or +# Training Job VPC_CONFIG_DEFAULT = "VPC_CONFIG_DEFAULT" diff --git a/src/sagemaker/workflow/airflow.py b/src/sagemaker/workflow/airflow.py index 21173723a8..9b74580f19 100644 --- a/src/sagemaker/workflow/airflow.py +++ b/src/sagemaker/workflow/airflow.py @@ -188,12 +188,12 @@ def training_config(estimator, inputs=None, job_name=None, mini_batch_size=None) method of the associated estimator, as this can take any of the following forms: * (str) - The S3 location where training data is saved. - * (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple channels for - training data, you can specify a dict mapping channel names to + * (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple + channels for training data, you can specify a dict mapping channel names to strings or :func:`~sagemaker.session.s3_input` objects. - * (sagemaker.session.s3_input) - Channel configuration for S3 data sources that can provide - additional information about the training dataset. See + * (sagemaker.session.s3_input) - Channel configuration for S3 data sources that can + provide additional information about the training dataset. See :func:`sagemaker.session.s3_input` for full details. * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of @@ -235,12 +235,12 @@ def tuning_config(tuner, inputs, job_name=None): * (str) - The S3 location where training data is saved. - * (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple channels for - training data, you can specify a dict mapping channel names to + * (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple + channels for training data, you can specify a dict mapping channel names to strings or :func:`~sagemaker.session.s3_input` objects. - * (sagemaker.session.s3_input) - Channel configuration for S3 data sources that can provide - additional information about the training dataset. See + * (sagemaker.session.s3_input) - Channel configuration for S3 data sources that can + provide additional information about the training dataset. See :func:`sagemaker.session.s3_input` for full details. * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of @@ -359,8 +359,8 @@ def update_estimator_from_task(estimator, task_id, task_type): ) # need to strip the double quotes in json to get the string job_name = ( - "{{ ti.xcom_pull(task_ids='%s')['Tuning']['TrainingJobDefinition']['StaticHyperParameters']" - "['sagemaker_job_name'].strip('%s') }}" % (task_id, '"') + "{{ ti.xcom_pull(task_ids='%s')['Tuning']['TrainingJobDefinition']" + "['StaticHyperParameters']['sagemaker_job_name'].strip('%s') }}" % (task_id, '"') ) else: raise ValueError("task_type must be either 'training', 'tuning' or None.") @@ -508,8 +508,8 @@ def model_config_from_estimator( * 'SecurityGroupIds' (list[str]): List of security group ids. Returns: - dict: Model config that can be directly used by SageMakerModelOperator in Airflow. It can also be part - of the config used by SageMakerEndpointOperator in Airflow. + dict: Model config that can be directly used by SageMakerModelOperator in Airflow. It can + also be part of the config used by SageMakerEndpointOperator in Airflow. """ update_estimator_from_task(estimator, task_id, task_type) if isinstance(estimator, sagemaker.estimator.Estimator): @@ -552,11 +552,11 @@ def transform_config( data_type (str): What the S3 location defines (default: 'S3Prefix'). Valid values: - * 'S3Prefix' - the S3 URI defines a key name prefix. All objects with this prefix will be used as - inputs for the transform job. + * 'S3Prefix' - the S3 URI defines a key name prefix. All objects with this prefix will + be used as inputs for the transform job. - * 'ManifestFile' - the S3 URI points to a single manifest file listing each S3 object to use as - an input for the transform job. + * 'ManifestFile' - the S3 URI points to a single manifest file listing each S3 object + to use as an input for the transform job. content_type (str): MIME type of the input data (default: None). compression_type (str): Compression type of the input data, if compressed (default: None). Valid values: 'Gzip', None. @@ -661,11 +661,11 @@ def transform_config_from_estimator( data_type (str): What the S3 location defines (default: 'S3Prefix'). Valid values: - * 'S3Prefix' - the S3 URI defines a key name prefix. All objects with this prefix will be used as - inputs for the transform job. + * 'S3Prefix' - the S3 URI defines a key name prefix. All objects with this prefix will + be used as inputs for the transform job. - * 'ManifestFile' - the S3 URI points to a single manifest file listing each S3 object to use as - an input for the transform job. + * 'ManifestFile' - the S3 URI points to a single manifest file listing each S3 object + to use as an input for the transform job. content_type (str): MIME type of the input data (default: None). compression_type (str): Compression type of the input data, if compressed (default: None). Valid values: 'Gzip', None. diff --git a/tests/unit/test_session.py b/tests/unit/test_session.py index 714da580c8..2fe4c416fb 100644 --- a/tests/unit/test_session.py +++ b/tests/unit/test_session.py @@ -1174,7 +1174,7 @@ def test_update_endpoint_non_existing_endpoint(sagemaker_session): {"Error": {"Code": "ValidationException", "Message": "Could not find entity"}}, "foo" ) expected_error_message = ( - 'Endpoint with name "non-existing-endpoint" does not exist; ' + "Endpoint with name 'non-existing-endpoint' does not exist; " "please use an existing endpoint name" ) sagemaker_session.sagemaker_client.describe_endpoint = Mock(side_effect=error) From d3892fb1c1050b98c33afff6ac603600f42f9f65 Mon Sep 17 00:00:00 2001 From: Karim Nakad Date: Mon, 22 Jul 2019 15:43:59 -0700 Subject: [PATCH 33/35] doc: add instructions for setting up Cloud9 environment. (#949) Added instructions that allow for a low-cost ~10min environment setup. --- CONTRIBUTING.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 11a3062395..a114d13319 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -19,6 +19,23 @@ reported the issue. Please try to include as much information as you can. Detail * Any modifications you've made relevant to the bug * A description of your environment or deployment +## Setting up your development environment [optional, but recommended] + +* Set up the Cloud9 environment: + * Instance type: You'll need at least 4 GB of RAM to avoid running into memory issues. We recommend at least a t3.medium to run the unit tests. Larger hosts will reduce the chance of encountering resource limits. + * Follow the instructions at [Creating a Cloud9 EC2 Environment](https://docs.aws.amazon.com/cloud9/latest/user-guide/create-environment.html#create-environment-main) to set up a Cloud9 EC2 environment +* Expand the storage of the EC2 instance from 10GB to 20GB + * Because you'll need a minimum of 11GB of disk storage on the EC2 instance to run the package's unit tests, you'll need to expand your EC2 volume size. We recommend at least 20GB. A larger volume will reduce the chance of encountering resource limits. + * Follow the instructions at [Modifying an EBS Volume Using Elastic Volumes (Console)](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/requesting-ebs-volume-modifications.html#modify-ebs-volume) to increase the EBS volume size associated with the newly created EC2 instance. + * Wait 5-10min for the new EBS volume increase to take effect. + * Allow EC2 to claim the additional space by stopping and then starting your EC2 host. +* Create a fork of this package on GitHub. You should end up with a fork at `https://github.com//sagemaker-python-sdk` + * Follow the instructions at [Fork a repo](https://help.github.com/en/articles/fork-a-repo) to fork a GitHub repository. +* In the Cloud9 UI, pull down this package by clicking on "Clone from Github" or running the following command in the Cloud9 terminal: `git clone https://github.com//sagemaker-python-sdk` where is your github username. +* Install tox using `pip install tox` +* Install coverage using `pip install .[test]` +* cd into the sagemaker-python-sdk package: `cd sagemaker-python-sdk` or `cd /environment/sagemaker-python-sdk` +* Run the following tox command and verify that all unit tests pass: `tox tests/unit` ## Contributing via Pull Requests Contributions via pull requests are much appreciated. From 6545cf84ff5d9f73cfeea422b0b74ce2b32b36fd Mon Sep 17 00:00:00 2001 From: ci Date: Tue, 23 Jul 2019 17:17:03 +0000 Subject: [PATCH 34/35] prepare release v1.34.1 --- CHANGELOG.md | 18 ++++++++++++++++++ VERSION | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4efaf4f302..e33e7c8de8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,23 @@ # Changelog +## v1.34.1 (2019-07-23) + +### Bug fixes and other changes + + * enable line-too-long Pylint check + * improving Chainer integ tests + * update TensorFlow script mode dependency list + * improve documentation of some functions + * update PyTorch version + * allow serving script to be defined for deploy() and transformer() with frameworks + * format and add missing docstring placeholders + * add MXNet 1.4.1 support + +### Documentation changes + + * add instructions for setting up Cloud9 environment. + * update using_tensorflow topic + ## v1.34.0 (2019-07-18) ### Features diff --git a/VERSION b/VERSION index 1a572d8c8e..a95a46d9fa 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.34.1.dev0 +1.34.1 From dfa9644ce0a7bc91791cbf9e2df00192d7ee9f4b Mon Sep 17 00:00:00 2001 From: ci Date: Tue, 23 Jul 2019 17:39:01 +0000 Subject: [PATCH 35/35] update development version to v1.34.2.dev0 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index a95a46d9fa..b25a8ecb76 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.34.1 +1.34.2.dev0