Skip to content

Commit d254128

Browse files
committed
Merge remote-tracking branch 'public/master' into pytorch-release
2 parents aa99ba9 + d0b7384 commit d254128

File tree

138 files changed

+8558
-2447
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

138 files changed

+8558
-2447
lines changed

.flake8

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[flake8]
2+
application_import_names = sagemaker, tests
3+
import-order-style = google

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,4 @@ doc/_templates
2424
venv/
2525
*~
2626
.pytest_cache/
27+
*.swp

CHANGELOG.rst

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,45 @@
22
CHANGELOG
33
=========
44

5+
1.4.1
6+
=====
7+
8+
* bug-fix: Local Mode: Fix for non Framework containers
9+
10+
1.4.0
11+
=====
12+
13+
* bug-fix: Remove __all__ and add noqa in __init__
14+
* bug-fix: Estimators: Change max_iterations hyperparameter key for KMeans
15+
* bug-fix: Estimators: Remove unused argument job_details for ``EstimatorBase.attach()``
16+
* bug-fix: Local Mode: Show logs in Jupyter notebooks
17+
* feature: HyperparameterTuner: Add support for hyperparameter tuning jobs
18+
* feature: Analytics: Add functions for metrics in Training and Hyperparameter Tuning jobs
19+
* feature: Estimators: add support for tagging training jobs
20+
21+
1.3.0
22+
=====
23+
24+
* feature: Add chainer
25+
26+
1.2.5
27+
=====
28+
29+
* bug-fix: Change module names to string type in __all__
30+
* feature: Save training output files in local mode
31+
* bug-fix: tensorflow-serving-api: SageMaker does not conflict with tensorflow-serving-api module version
32+
* feature: Local Mode: add support for local training data using file://
33+
* feature: Updated TensorFlow Serving api protobuf files
34+
* bug-fix: No longer poll for logs from stopped training jobs
35+
536
1.2.4
637
=====
738

839
* feature: Estimators: add support for Amazon Random Cut Forest algorithm
940

1041
1.2.3
11-
=========
42+
=====
43+
1244
* bug-fix: Fix local mode not using the right s3 bucket
1345

1446
1.2.2

README.rst

Lines changed: 27 additions & 1395 deletions
Large diffs are not rendered by default.

doc/conf.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,17 @@
1-
# -*- coding: utf-8 -*-
1+
# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"). You
4+
# may not use this file except in compliance with the License. A copy of
5+
# the License is located at
6+
#
7+
# http://aws.amazon.com/apache2.0/
8+
#
9+
# or in the "license" file accompanying this file. This file is
10+
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific
12+
# language governing permissions and limitations under the License.
13+
from __future__ import absolute_import
14+
215
import os
316
import sys
417
from datetime import datetime

setup.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,19 @@
1-
import os
1+
# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"). You
4+
# may not use this file except in compliance with the License. A copy of
5+
# the License is located at
6+
#
7+
# http://aws.amazon.com/apache2.0/
8+
#
9+
# or in the "license" file accompanying this file. This file is
10+
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific
12+
# language governing permissions and limitations under the License.
13+
from __future__ import absolute_import
14+
215
from glob import glob
3-
from os.path import basename
4-
from os.path import splitext
16+
import os
517

618
from setuptools import setup, find_packages
719

@@ -11,11 +23,11 @@ def read(fname):
1123

1224

1325
setup(name="sagemaker",
14-
version="1.3.dev",
26+
version="1.5.dev",
1527
description="Open source library for training and deploying models on Amazon SageMaker.",
1628
packages=find_packages('src'),
1729
package_dir={'': 'src'},
18-
py_modules=[splitext(basename(path))[0] for path in glob('src/*.py')],
30+
py_modules=[os.splitext(os.basename(path))[0] for path in glob('src/*.py')],
1931
long_description=read('README.rst'),
2032
author="Amazon Web Services",
2133
url='https://github.com/aws/sagemaker-python-sdk/',
@@ -37,7 +49,7 @@ def read(fname):
3749

3850
extras_require={
3951
'test': ['tox', 'flake8', 'pytest', 'pytest-cov', 'pytest-xdist',
40-
'mock', 'tensorflow>=1.3.0', 'contextlib2', 'awslogs']},
52+
'mock', 'tensorflow>=1.3.0', 'contextlib2', 'awslogs', 'pandas']},
4153

4254
entry_points={
4355
'console_scripts': ['sagemaker=sagemaker.cli.main:main'],

src/sagemaker/__init__.py

Lines changed: 20 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
1+
# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License"). You
44
# may not use this file except in compliance with the License. A copy of
@@ -12,31 +12,24 @@
1212
# language governing permissions and limitations under the License.
1313
from __future__ import absolute_import
1414

15-
from sagemaker import estimator
16-
from sagemaker.amazon.kmeans import KMeans, KMeansModel, KMeansPredictor
17-
from sagemaker.amazon.pca import PCA, PCAModel, PCAPredictor
18-
from sagemaker.amazon.lda import LDA, LDAModel, LDAPredictor
19-
from sagemaker.amazon.linear_learner import LinearLearner, LinearLearnerModel, LinearLearnerPredictor
20-
from sagemaker.amazon.factorization_machines import FactorizationMachines, FactorizationMachinesModel
21-
from sagemaker.amazon.factorization_machines import FactorizationMachinesPredictor
22-
from sagemaker.amazon.ntm import NTM, NTMModel, NTMPredictor
23-
from sagemaker.amazon.randomcutforest import RandomCutForest, RandomCutForestModel, RandomCutForestPredictor
15+
from sagemaker import estimator # noqa: F401
16+
from sagemaker.amazon.kmeans import KMeans, KMeansModel, KMeansPredictor # noqa: F401
17+
from sagemaker.amazon.pca import PCA, PCAModel, PCAPredictor # noqa: F401
18+
from sagemaker.amazon.lda import LDA, LDAModel, LDAPredictor # noqa: F401
19+
from sagemaker.amazon.linear_learner import LinearLearner, LinearLearnerModel, LinearLearnerPredictor # noqa: F401
20+
from sagemaker.amazon.factorization_machines import FactorizationMachines, FactorizationMachinesModel # noqa: F401
21+
from sagemaker.amazon.factorization_machines import FactorizationMachinesPredictor # noqa: F401
22+
from sagemaker.amazon.ntm import NTM, NTMModel, NTMPredictor # noqa: F401
23+
from sagemaker.amazon.randomcutforest import (RandomCutForest, RandomCutForestModel, # noqa: F401
24+
RandomCutForestPredictor)
2425

25-
from sagemaker.local.local_session import LocalSession
26+
from sagemaker.analytics import TrainingJobAnalytics, HyperparameterTuningJobAnalytics # noqa: F401
27+
from sagemaker.local.local_session import LocalSession # noqa: F401
2628

27-
from sagemaker.model import Model
28-
from sagemaker.predictor import RealTimePredictor
29-
from sagemaker.session import Session
30-
from sagemaker.session import container_def
31-
from sagemaker.session import production_variant
32-
from sagemaker.session import s3_input
33-
from sagemaker.session import get_execution_role
34-
35-
36-
__all__ = [estimator, KMeans, KMeansModel, KMeansPredictor, PCA, PCAModel, PCAPredictor, LinearLearner,
37-
LinearLearnerModel, LinearLearnerPredictor,
38-
LDA, LDAModel, LDAPredictor,
39-
FactorizationMachines, FactorizationMachinesModel, FactorizationMachinesPredictor,
40-
RandomCutForest, RandomCutForestModel, RandomCutForestPredictor,
41-
Model, NTM, NTMModel, NTMPredictor, RealTimePredictor, Session, LocalSession,
42-
container_def, s3_input, production_variant, get_execution_role]
29+
from sagemaker.model import Model # noqa: F401
30+
from sagemaker.predictor import RealTimePredictor # noqa: F401
31+
from sagemaker.session import Session # noqa: F401
32+
from sagemaker.session import container_def # noqa: F401
33+
from sagemaker.session import production_variant # noqa: F401
34+
from sagemaker.session import s3_input # noqa: F401
35+
from sagemaker.session import get_execution_role # noqa: F401

src/sagemaker/amazon/README.rst

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
2+
===================================
3+
AWS SageMaker Estimators and Models
4+
===================================
5+
6+
Amazon SageMaker provides several built-in machine learning algorithms that you can use for a variety of problem types.
7+
8+
The full list of algorithms is available on the AWS website: https://docs.aws.amazon.com/sagemaker/latest/dg/algos.html
9+
10+
SageMaker Python SDK includes Estimator wrappers for the AWS K-means, Principal Components Analysis(PCA), Linear Learner, Factorization Machines, Latent Dirichlet Allocation(LDA), Neural Topic Model(NTM) and Random Cut Forest algorithms.
11+
12+
Definition and usage
13+
~~~~~~~~~~~~~~~~~~~~
14+
Estimators that wrap Amazon's built-in algorithms define algorithm's hyperparameters with defaults. When a default is not possible you need to provide the value during construction, e.g.:
15+
16+
- ``KMeans`` Estimator requires parameter ``k`` to define number of clusters
17+
- ``PCA`` Estimator requires parameter ``num_components`` to define number of principal components
18+
19+
Interaction is identical as any other Estimators. There are additional details about how data is specified.
20+
21+
Input data format
22+
^^^^^^^^^^^^^^^^^
23+
Please note that Amazon's built-in algorithms are working best with protobuf ``recordIO`` format.
24+
The data is expected to be available in S3 location and depending on algorithm it can handle dat in multiple data channels.
25+
26+
This package offers support to prepare data into required fomrat and upload data to S3.
27+
Provided class ``RecordSet`` captures necessary details like S3 location, number of records, data channel and is expected as input parameter when calling ``fit()``.
28+
29+
Function ``record_set`` is available on algorithms objects to make it simple to achieve the above.
30+
It takes 2D numpy array as input, uploads data to S3 and returns ``RecordSet`` objects. By default it uses ``train`` data channel and no labels but can be specified when called.
31+
32+
Please find an example code snippet for illustration:
33+
34+
.. code:: python
35+
36+
from sagemaker import PCA
37+
pca_estimator = PCA(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.m4.xlarge', num_components=3)
38+
39+
import numpy as np
40+
records = pca_estimator.record_set(np.arange(10).reshape(2,5))
41+
42+
pca_estimator.fit(records)
43+
44+
45+
Predictions support
46+
~~~~~~~~~~~~~~~~~~~
47+
Calling inference on deployed Amazon's built-in algorithms requires specific input format. By default, this library creates a predictor that allows to use just numpy data.
48+
Data is converted so that ``application/x-recordio-protobuf`` input format is used. Received response is deserialized from the protobuf and provided as result from the ``predict`` call.

src/sagemaker/amazon/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
1+
# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License"). You
44
# may not use this file except in compliance with the License. A copy of

src/sagemaker/amazon/amazon_estimator.py

Lines changed: 55 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
1+
# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License"). You
44
# may not use this file except in compliance with the License. A copy of
@@ -10,14 +10,16 @@
1010
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
1111
# ANY KIND, either express or implied. See the License for the specific
1212
# language governing permissions and limitations under the License.
13+
from __future__ import absolute_import
14+
1315
import json
1416
import logging
1517
import tempfile
1618
from six.moves.urllib.parse import urlparse
1719
from sagemaker.amazon import validation
1820
from sagemaker.amazon.hyperparameter import Hyperparameter as hp # noqa
1921
from sagemaker.amazon.common import write_numpy_to_dense_tensor
20-
from sagemaker.estimator import EstimatorBase
22+
from sagemaker.estimator import EstimatorBase, _TrainingJob
2123
from sagemaker.session import s3_input
2224
from sagemaker.utils import sagemaker_timestamp
2325

@@ -90,11 +92,38 @@ def _prepare_init_params_from_job_description(cls, job_details):
9092
del init_params['image']
9193
return init_params
9294

93-
def fit(self, records, mini_batch_size=None, **kwargs):
95+
def _prepare_for_training(self, records, mini_batch_size=None, job_name=None):
96+
"""Set hyperparameters needed for training.
97+
98+
Args:
99+
* records (:class:`~RecordSet`): The records to train this ``Estimator`` on.
100+
* mini_batch_size (int or None): The size of each mini-batch to use when training. If ``None``, a
101+
default value will be used.
102+
* job_name (str): Name of the training job to be created. If not specified, one is generated,
103+
using the base name given to the constructor if applicable.
104+
"""
105+
super(AmazonAlgorithmEstimatorBase, self)._prepare_for_training(job_name=job_name)
106+
107+
feature_dim = None
108+
109+
if isinstance(records, list):
110+
for record in records:
111+
if record.channel == 'train':
112+
feature_dim = record.feature_dim
113+
break
114+
if feature_dim is None:
115+
raise ValueError('Must provide train channel.')
116+
else:
117+
feature_dim = records.feature_dim
118+
119+
self.feature_dim = feature_dim
120+
self.mini_batch_size = mini_batch_size
121+
122+
def fit(self, records, mini_batch_size=None, wait=True, logs=True, job_name=None):
94123
"""Fit this Estimator on serialized Record objects, stored in S3.
95124
96125
``records`` should be an instance of :class:`~RecordSet`. This defines a collection of
97-
s3 data files to train this ``Estimator`` on.
126+
S3 data files to train this ``Estimator`` on.
98127
99128
Training data is expected to be encoded as dense or sparse vectors in the "values" feature
100129
on each Record. If the data is labeled, the label is expected to be encoded as a list of
@@ -108,15 +137,19 @@ def fit(self, records, mini_batch_size=None, **kwargs):
108137
109138
Args:
110139
records (:class:`~RecordSet`): The records to train this ``Estimator`` on
111-
mini_batch_size (int or None): The size of each mini-batch to use when training. If None, a
140+
mini_batch_size (int or None): The size of each mini-batch to use when training. If ``None``, a
112141
default value will be used.
142+
wait (bool): Whether the call should wait until the job completes (default: True).
143+
logs (bool): Whether to show the logs produced by the job.
144+
Only meaningful when wait is True (default: True).
145+
job_name (str): Training job name. If not specified, the estimator generates a default job name,
146+
based on the training image name and current timestamp.
113147
"""
114-
self.feature_dim = records.feature_dim
115-
self.mini_batch_size = mini_batch_size
148+
self._prepare_for_training(records, job_name=job_name, mini_batch_size=mini_batch_size)
116149

117-
data = {records.channel: s3_input(records.s3_data, distribution='ShardedByS3Key',
118-
s3_data_type=records.s3_data_type)}
119-
super(AmazonAlgorithmEstimatorBase, self).fit(data, **kwargs)
150+
self.latest_training_job = _TrainingJob.start_new(self, records)
151+
if wait:
152+
self.latest_training_job.wait(logs=logs)
120153

121154
def record_set(self, train, labels=None, channel="train"):
122155
"""Build a :class:`~RecordSet` from a numpy :class:`~ndarray` matrix and label vector.
@@ -178,6 +211,14 @@ def __repr__(self):
178211
"""Return an unambiguous representation of this RecordSet"""
179212
return str((RecordSet, self.__dict__))
180213

214+
def data_channel(self):
215+
"""Return a dictionary to represent the training data in a channel for use with ``fit()``"""
216+
return {self.channel: self.records_s3_input()}
217+
218+
def records_s3_input(self):
219+
"""Return a s3_input to represent the training data"""
220+
return s3_input(self.s3_data, distribution='ShardedByS3Key', s3_data_type=self.s3_data_type)
221+
181222

182223
def _build_shards(num_shards, array):
183224
if num_shards < 1:
@@ -234,14 +275,16 @@ def registry(region_name, algorithm=None):
234275
"us-east-1": "382416733822",
235276
"us-east-2": "404615174143",
236277
"us-west-2": "174872318107",
237-
"eu-west-1": "438346466558"
278+
"eu-west-1": "438346466558",
279+
"ap-northeast-1": "351501993468"
238280
}[region_name]
239281
elif algorithm in ["lda"]:
240282
account_id = {
241283
"us-east-1": "766337827248",
242284
"us-east-2": "999911452149",
243285
"us-west-2": "266724342769",
244-
"eu-west-1": "999678624901"
286+
"eu-west-1": "999678624901",
287+
"ap-northeast-1": "258307448986"
245288
}[region_name]
246289
else:
247290
raise ValueError("Algorithm class:{} doesn't have mapping to account_id with images".format(algorithm))

src/sagemaker/amazon/common.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
1+
# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License"). You
44
# may not use this file except in compliance with the License. A copy of
@@ -10,6 +10,8 @@
1010
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
1111
# ANY KIND, either express or implied. See the License for the specific
1212
# language governing permissions and limitations under the License.
13+
from __future__ import absolute_import
14+
1315
import io
1416
import struct
1517
import sys

src/sagemaker/amazon/factorization_machines.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
1+
# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License"). You
44
# may not use this file except in compliance with the License. A copy of
@@ -10,6 +10,8 @@
1010
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
1111
# ANY KIND, either express or implied. See the License for the specific
1212
# language governing permissions and limitations under the License.
13+
from __future__ import absolute_import
14+
1315
from sagemaker.amazon.amazon_estimator import AmazonAlgorithmEstimatorBase, registry
1416
from sagemaker.amazon.common import numpy_to_record_serializer, record_deserializer
1517
from sagemaker.amazon.hyperparameter import Hyperparameter as hp # noqa

0 commit comments

Comments
 (0)