Skip to content

Commit 459995b

Browse files
authored
Merge branch 'zwei' into add-string-deserializer
2 parents 6730bea + fe68d4e commit 459995b

File tree

94 files changed

+1148
-1009
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

94 files changed

+1148
-1009
lines changed

src/sagemaker/algorithm.py

+23-23
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,11 @@ def __init__(
3535
self,
3636
algorithm_arn,
3737
role,
38-
train_instance_count,
39-
train_instance_type,
40-
train_volume_size=30,
41-
train_volume_kms_key=None,
42-
train_max_run=24 * 60 * 60,
38+
instance_count,
39+
instance_type,
40+
volume_size=30,
41+
volume_kms_key=None,
42+
max_run=24 * 60 * 60,
4343
input_mode="File",
4444
output_path=None,
4545
output_kms_key=None,
@@ -65,15 +65,15 @@ def __init__(
6565
access training data and model artifacts. After the endpoint
6666
is created, the inference code might use the IAM role, if it
6767
needs to access an AWS resource.
68-
train_instance_count (int): Number of Amazon EC2 instances to
69-
use for training. train_instance_type (str): Type of EC2
68+
instance_count (int): Number of Amazon EC2 instances to
69+
use for training. instance_type (str): Type of EC2
7070
instance to use for training, for example, 'ml.c4.xlarge'.
71-
train_volume_size (int): Size in GB of the EBS volume to use for
71+
volume_size (int): Size in GB of the EBS volume to use for
7272
storing input data during training (default: 30). Must be large enough to store
7373
training data if File Mode is used (which is the default).
74-
train_volume_kms_key (str): Optional. KMS key ID for encrypting EBS volume attached
74+
volume_kms_key (str): Optional. KMS key ID for encrypting EBS volume attached
7575
to the training instance (default: None).
76-
train_max_run (int): Timeout in seconds for training (default: 24 * 60 * 60).
76+
max_run (int): Timeout in seconds for training (default: 24 * 60 * 60).
7777
After this amount of time Amazon SageMaker terminates the
7878
job regardless of its current status.
7979
input_mode (str): The input mode that the algorithm supports
@@ -131,11 +131,11 @@ def __init__(
131131
self.algorithm_arn = algorithm_arn
132132
super(AlgorithmEstimator, self).__init__(
133133
role,
134-
train_instance_count,
135-
train_instance_type,
136-
train_volume_size,
137-
train_volume_kms_key,
138-
train_max_run,
134+
instance_count,
135+
instance_type,
136+
volume_size,
137+
volume_kms_key,
138+
max_run,
139139
input_mode,
140140
output_path,
141141
output_kms_key,
@@ -167,30 +167,30 @@ def validate_train_spec(self):
167167

168168
# Check that the input mode provided is compatible with the training input modes for the
169169
# algorithm.
170-
train_input_modes = self._algorithm_training_input_modes(train_spec["TrainingChannels"])
171-
if self.input_mode not in train_input_modes:
170+
input_modes = self._algorithm_training_input_modes(train_spec["TrainingChannels"])
171+
if self.input_mode not in input_modes:
172172
raise ValueError(
173173
"Invalid input mode: %s. %s only supports: %s"
174-
% (self.input_mode, algorithm_name, train_input_modes)
174+
% (self.input_mode, algorithm_name, input_modes)
175175
)
176176

177177
# Check that the training instance type is compatible with the algorithm.
178178
supported_instances = train_spec["SupportedTrainingInstanceTypes"]
179-
if self.train_instance_type not in supported_instances:
179+
if self.instance_type not in supported_instances:
180180
raise ValueError(
181-
"Invalid train_instance_type: %s. %s supports the following instance types: %s"
182-
% (self.train_instance_type, algorithm_name, supported_instances)
181+
"Invalid instance_type: %s. %s supports the following instance types: %s"
182+
% (self.instance_type, algorithm_name, supported_instances)
183183
)
184184

185185
# Verify if distributed training is supported by the algorithm
186186
if (
187-
self.train_instance_count > 1
187+
self.instance_count > 1
188188
and "SupportsDistributedTraining" in train_spec
189189
and not train_spec["SupportsDistributedTraining"]
190190
):
191191
raise ValueError(
192192
"Distributed training is not supported by %s. "
193-
"Please set train_instance_count=1" % algorithm_name
193+
"Please set instance_count=1" % algorithm_name
194194
)
195195

196196
def set_hyperparameters(self, **kwargs):

src/sagemaker/amazon/README.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ Please find an example code snippet for illustration:
3434
.. code:: python
3535
3636
from sagemaker import PCA
37-
pca_estimator = PCA(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.m4.xlarge', num_components=3)
37+
pca_estimator = PCA(role='SageMakerRole', instance_count=1, instance_type='ml.m4.xlarge', num_components=3)
3838
3939
import numpy as np
4040
records = pca_estimator.record_set(np.arange(10).reshape(2,5))

src/sagemaker/amazon/amazon_estimator.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@ class AmazonAlgorithmEstimatorBase(EstimatorBase):
5252
def __init__(
5353
self,
5454
role,
55-
train_instance_count,
56-
train_instance_type,
55+
instance_count,
56+
instance_type,
5757
data_location=None,
5858
enable_network_isolation=False,
5959
**kwargs
@@ -66,9 +66,9 @@ def __init__(
6666
endpoints use this role to access training data and model
6767
artifacts. After the endpoint is created, the inference code
6868
might use the IAM role, if it needs to access an AWS resource.
69-
train_instance_count (int): Number of Amazon EC2 instances to use
69+
instance_count (int): Number of Amazon EC2 instances to use
7070
for training.
71-
train_instance_type (str): Type of EC2 instance to use for training,
71+
instance_type (str): Type of EC2 instance to use for training,
7272
for example, 'ml.c4.xlarge'.
7373
data_location (str or None): The s3 prefix to upload RecordSet
7474
objects to, expressed as an S3 url. For example
@@ -89,8 +89,8 @@ def __init__(
8989
"""
9090
super(AmazonAlgorithmEstimatorBase, self).__init__(
9191
role,
92-
train_instance_count,
93-
train_instance_type,
92+
instance_count,
93+
instance_type,
9494
enable_network_isolation=enable_network_isolation,
9595
**kwargs
9696
)
@@ -266,7 +266,7 @@ def record_set(self, train, labels=None, channel="train", encrypt=False):
266266
the list of objects created and also stored in S3.
267267
268268
The number of S3 objects created is controlled by the
269-
``train_instance_count`` property on this Estimator. One S3 object is
269+
``instance_count`` property on this Estimator. One S3 object is
270270
created per training instance.
271271
272272
Args:
@@ -291,7 +291,7 @@ def record_set(self, train, labels=None, channel="train", encrypt=False):
291291
key_prefix = key_prefix.lstrip("/")
292292
logger.debug("Uploading to bucket %s and key_prefix %s", bucket, key_prefix)
293293
manifest_s3_file = upload_numpy_to_s3_shards(
294-
self.train_instance_count, s3, bucket, key_prefix, train, labels, encrypt
294+
self.instance_count, s3, bucket, key_prefix, train, labels, encrypt
295295
)
296296
logger.debug("Created manifest file %s", manifest_s3_file)
297297
return RecordSet(

src/sagemaker/amazon/factorization_machines.py

+5-7
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,8 @@ class FactorizationMachines(AmazonAlgorithmEstimatorBase):
7777
def __init__(
7878
self,
7979
role,
80-
train_instance_count,
81-
train_instance_type,
80+
instance_count,
81+
instance_type,
8282
num_factors,
8383
predictor_type,
8484
epochs=None,
@@ -150,9 +150,9 @@ def __init__(
150150
endpoints use this role to access training data and model
151151
artifacts. After the endpoint is created, the inference code
152152
might use the IAM role, if accessing AWS resource.
153-
train_instance_count (int): Number of Amazon EC2 instances to use
153+
instance_count (int): Number of Amazon EC2 instances to use
154154
for training.
155-
train_instance_type (str): Type of EC2 instance to use for training,
155+
instance_type (str): Type of EC2 instance to use for training,
156156
for example, 'ml.c4.xlarge'.
157157
num_factors (int): Dimensionality of factorization.
158158
predictor_type (str): Type of predictor 'binary_classifier' or
@@ -212,9 +212,7 @@ def __init__(
212212
:class:`~sagemaker.estimator.amazon_estimator.AmazonAlgorithmEstimatorBase` and
213213
:class:`~sagemaker.estimator.EstimatorBase`.
214214
"""
215-
super(FactorizationMachines, self).__init__(
216-
role, train_instance_count, train_instance_type, **kwargs
217-
)
215+
super(FactorizationMachines, self).__init__(role, instance_count, instance_type, **kwargs)
218216

219217
self.num_factors = num_factors
220218
self.predictor_type = predictor_type

src/sagemaker/amazon/ipinsights.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ class IPInsights(AmazonAlgorithmEstimatorBase):
5353
def __init__(
5454
self,
5555
role,
56-
train_instance_count,
57-
train_instance_type,
56+
instance_count,
57+
instance_type,
5858
num_entity_vectors,
5959
vector_dim,
6060
batch_metrics_publish_interval=None,
@@ -94,9 +94,9 @@ def __init__(
9494
endpoints use this role to access training data and model
9595
artifacts. After the endpoint is created, the inference code
9696
might use the IAM role, if accessing AWS resource.
97-
train_instance_count (int): Number of Amazon EC2 instances to use
97+
instance_count (int): Number of Amazon EC2 instances to use
9898
for training.
99-
train_instance_type (str): Type of EC2 instance to use for training,
99+
instance_type (str): Type of EC2 instance to use for training,
100100
for example, 'ml.m5.xlarge'.
101101
num_entity_vectors (int): Required. The number of embeddings to
102102
train for entities accessing online resources. We recommend 2x
@@ -126,7 +126,7 @@ def __init__(
126126
:class:`~sagemaker.estimator.amazon_estimator.AmazonAlgorithmEstimatorBase` and
127127
:class:`~sagemaker.estimator.EstimatorBase`.
128128
"""
129-
super(IPInsights, self).__init__(role, train_instance_count, train_instance_type, **kwargs)
129+
super(IPInsights, self).__init__(role, instance_count, instance_type, **kwargs)
130130
self.num_entity_vectors = num_entity_vectors
131131
self.vector_dim = vector_dim
132132
self.batch_metrics_publish_interval = batch_metrics_publish_interval

src/sagemaker/amazon/kmeans.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ class KMeans(AmazonAlgorithmEstimatorBase):
5151
def __init__(
5252
self,
5353
role,
54-
train_instance_count,
55-
train_instance_type,
54+
instance_count,
55+
instance_type,
5656
k,
5757
init_method=None,
5858
max_iterations=None,
@@ -103,9 +103,9 @@ def __init__(
103103
endpoints use this role to access training data and model
104104
artifacts. After the endpoint is created, the inference code
105105
might use the IAM role, if accessing AWS resource.
106-
train_instance_count (int): Number of Amazon EC2 instances to use
106+
instance_count (int): Number of Amazon EC2 instances to use
107107
for training.
108-
train_instance_type (str): Type of EC2 instance to use for training,
108+
instance_type (str): Type of EC2 instance to use for training,
109109
for example, 'ml.c4.xlarge'.
110110
k (int): The number of clusters to produce.
111111
init_method (str): How to initialize cluster locations. One of
@@ -142,7 +142,7 @@ def __init__(
142142
:class:`~sagemaker.estimator.amazon_estimator.AmazonAlgorithmEstimatorBase` and
143143
:class:`~sagemaker.estimator.EstimatorBase`.
144144
"""
145-
super(KMeans, self).__init__(role, train_instance_count, train_instance_type, **kwargs)
145+
super(KMeans, self).__init__(role, instance_count, instance_type, **kwargs)
146146
self.k = k
147147
self.init_method = init_method
148148
self.max_iterations = max_iterations

src/sagemaker/amazon/knn.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,8 @@ class KNN(AmazonAlgorithmEstimatorBase):
6363
def __init__(
6464
self,
6565
role,
66-
train_instance_count,
67-
train_instance_type,
66+
instance_count,
67+
instance_type,
6868
k,
6969
sample_size,
7070
predictor_type,
@@ -105,8 +105,8 @@ def __init__(
105105
endpoints use this role to access training data and model
106106
artifacts. After the endpoint is created, the inference code
107107
might use the IAM role, if accessing AWS resource.
108-
train_instance_count:
109-
train_instance_type (str): Type of EC2 instance to use for training,
108+
instance_count:
109+
instance_type (str): Type of EC2 instance to use for training,
110110
for example, 'ml.c4.xlarge'.
111111
k (int): Required. Number of nearest neighbors.
112112
sample_size (int): Required. Number of data points to be sampled
@@ -136,7 +136,7 @@ def __init__(
136136
:class:`~sagemaker.estimator.EstimatorBase`.
137137
"""
138138

139-
super(KNN, self).__init__(role, train_instance_count, train_instance_type, **kwargs)
139+
super(KNN, self).__init__(role, instance_count, instance_type, **kwargs)
140140
self.k = k
141141
self.sample_size = sample_size
142142
self.predictor_type = predictor_type

src/sagemaker/amazon/lda.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ class LDA(AmazonAlgorithmEstimatorBase):
3838
def __init__(
3939
self,
4040
role,
41-
train_instance_type,
41+
instance_type,
4242
num_topics,
4343
alpha0=None,
4444
max_restarts=None,
@@ -92,7 +92,7 @@ def __init__(
9292
endpoints use this role to access training data and model
9393
artifacts. After the endpoint is created, the inference code
9494
might use the IAM role, if accessing AWS resource.
95-
train_instance_type (str): Type of EC2 instance to use for training,
95+
instance_type (str): Type of EC2 instance to use for training,
9696
for example, 'ml.c4.xlarge'.
9797
num_topics (int): The number of topics for LDA to find within the
9898
data.
@@ -114,14 +114,14 @@ def __init__(
114114
:class:`~sagemaker.estimator.EstimatorBase`.
115115
"""
116116
# this algorithm only supports single instance training
117-
if kwargs.pop("train_instance_count", 1) != 1:
117+
if kwargs.pop("instance_count", 1) != 1:
118118
print(
119119
"LDA only supports single instance training. Defaulting to 1 {}.".format(
120-
train_instance_type
120+
instance_type
121121
)
122122
)
123123

124-
super(LDA, self).__init__(role, 1, train_instance_type, **kwargs)
124+
super(LDA, self).__init__(role, 1, instance_type, **kwargs)
125125
self.num_topics = num_topics
126126
self.alpha0 = alpha0
127127
self.max_restarts = max_restarts

src/sagemaker/amazon/linear_learner.py

+6-8
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,8 @@ class LinearLearner(AmazonAlgorithmEstimatorBase):
120120
def __init__(
121121
self,
122122
role,
123-
train_instance_count,
124-
train_instance_type,
123+
instance_count,
124+
instance_type,
125125
predictor_type,
126126
binary_classifier_model_selection_criteria=None,
127127
target_recall=None,
@@ -214,9 +214,9 @@ def __init__(
214214
endpoints use this role to access training data and model
215215
artifacts. After the endpoint is created, the inference code
216216
might use the IAM role, if accessing AWS resource.
217-
train_instance_count (int): Number of Amazon EC2 instances to use
217+
instance_count (int): Number of Amazon EC2 instances to use
218218
for training.
219-
train_instance_type (str): Type of EC2 instance to use for training,
219+
instance_type (str): Type of EC2 instance to use for training,
220220
for example, 'ml.c4.xlarge'.
221221
predictor_type (str): The type of predictor to learn. Either
222222
"binary_classifier" or "multiclass_classifier" or "regressor".
@@ -325,9 +325,7 @@ def __init__(
325325
:class:`~sagemaker.estimator.amazon_estimator.AmazonAlgorithmEstimatorBase` and
326326
:class:`~sagemaker.estimator.EstimatorBase`.
327327
"""
328-
super(LinearLearner, self).__init__(
329-
role, train_instance_count, train_instance_type, **kwargs
330-
)
328+
super(LinearLearner, self).__init__(role, instance_count, instance_type, **kwargs)
331329
self.predictor_type = predictor_type
332330
self.binary_classifier_model_selection_criteria = binary_classifier_model_selection_criteria
333331
self.target_recall = target_recall
@@ -418,7 +416,7 @@ def _prepare_for_training(self, records, mini_batch_size=None, job_name=None):
418416

419417
# mini_batch_size can't be greater than number of records or training job fails
420418
default_mini_batch_size = min(
421-
self.DEFAULT_MINI_BATCH_SIZE, max(1, int(num_records / self.train_instance_count))
419+
self.DEFAULT_MINI_BATCH_SIZE, max(1, int(num_records / self.instance_count))
422420
)
423421
mini_batch_size = mini_batch_size or default_mini_batch_size
424422
super(LinearLearner, self)._prepare_for_training(

src/sagemaker/amazon/ntm.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ class NTM(AmazonAlgorithmEstimatorBase):
5959
def __init__(
6060
self,
6161
role,
62-
train_instance_count,
63-
train_instance_type,
62+
instance_count,
63+
instance_type,
6464
num_topics,
6565
encoder_layers=None,
6666
epochs=None,
@@ -113,8 +113,8 @@ def __init__(
113113
endpoints use this role to access training data and model
114114
artifacts. After the endpoint is created, the inference code
115115
might use the IAM role, if accessing AWS resource.
116-
train_instance_count:
117-
train_instance_type (str): Type of EC2 instance to use for training,
116+
instance_count:
117+
instance_type (str): Type of EC2 instance to use for training,
118118
for example, 'ml.c4.xlarge'.
119119
num_topics (int): Required. The number of topics for NTM to find
120120
within the data.
@@ -147,7 +147,7 @@ def __init__(
147147
:class:`~sagemaker.estimator.EstimatorBase`.
148148
"""
149149

150-
super(NTM, self).__init__(role, train_instance_count, train_instance_type, **kwargs)
150+
super(NTM, self).__init__(role, instance_count, instance_type, **kwargs)
151151
self.num_topics = num_topics
152152
self.encoder_layers = encoder_layers
153153
self.epochs = epochs

0 commit comments

Comments
 (0)