Skip to content

Commit e93eff6

Browse files
jesterhazywinstonaws
authored andcommitted
add sagemaker cli (#32)
* add sagemaker cli * remove unnecessary close * address PR comments * tidy up imports * fix imports, flake8 errors * improve help message for bucket-name * remove default role name * fix log-level and py3 tests, add copyright * update cli example scripts
1 parent 2e0ed8f commit e93eff6

19 files changed

+838
-1
lines changed

examples/cli/host/data/model.json

+126
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
{
2+
"nodes": [
3+
{
4+
"op": "null",
5+
"name": "data",
6+
"inputs": []
7+
},
8+
{
9+
"op": "null",
10+
"name": "sequential0_dense0_weight",
11+
"attr": {
12+
"__dtype__": "0",
13+
"__lr_mult__": "1.0",
14+
"__shape__": "(128, 0)",
15+
"__wd_mult__": "1.0"
16+
},
17+
"inputs": []
18+
},
19+
{
20+
"op": "null",
21+
"name": "sequential0_dense0_bias",
22+
"attr": {
23+
"__dtype__": "0",
24+
"__init__": "zeros",
25+
"__lr_mult__": "1.0",
26+
"__shape__": "(128,)",
27+
"__wd_mult__": "1.0"
28+
},
29+
"inputs": []
30+
},
31+
{
32+
"op": "FullyConnected",
33+
"name": "sequential0_dense0_fwd",
34+
"attr": {"num_hidden": "128"},
35+
"inputs": [[0, 0, 0], [1, 0, 0], [2, 0, 0]]
36+
},
37+
{
38+
"op": "Activation",
39+
"name": "sequential0_dense0_relu_fwd",
40+
"attr": {"act_type": "relu"},
41+
"inputs": [[3, 0, 0]]
42+
},
43+
{
44+
"op": "null",
45+
"name": "sequential0_dense1_weight",
46+
"attr": {
47+
"__dtype__": "0",
48+
"__lr_mult__": "1.0",
49+
"__shape__": "(64, 0)",
50+
"__wd_mult__": "1.0"
51+
},
52+
"inputs": []
53+
},
54+
{
55+
"op": "null",
56+
"name": "sequential0_dense1_bias",
57+
"attr": {
58+
"__dtype__": "0",
59+
"__init__": "zeros",
60+
"__lr_mult__": "1.0",
61+
"__shape__": "(64,)",
62+
"__wd_mult__": "1.0"
63+
},
64+
"inputs": []
65+
},
66+
{
67+
"op": "FullyConnected",
68+
"name": "sequential0_dense1_fwd",
69+
"attr": {"num_hidden": "64"},
70+
"inputs": [[4, 0, 0], [5, 0, 0], [6, 0, 0]]
71+
},
72+
{
73+
"op": "Activation",
74+
"name": "sequential0_dense1_relu_fwd",
75+
"attr": {"act_type": "relu"},
76+
"inputs": [[7, 0, 0]]
77+
},
78+
{
79+
"op": "null",
80+
"name": "sequential0_dense2_weight",
81+
"attr": {
82+
"__dtype__": "0",
83+
"__lr_mult__": "1.0",
84+
"__shape__": "(10, 0)",
85+
"__wd_mult__": "1.0"
86+
},
87+
"inputs": []
88+
},
89+
{
90+
"op": "null",
91+
"name": "sequential0_dense2_bias",
92+
"attr": {
93+
"__dtype__": "0",
94+
"__init__": "zeros",
95+
"__lr_mult__": "1.0",
96+
"__shape__": "(10,)",
97+
"__wd_mult__": "1.0"
98+
},
99+
"inputs": []
100+
},
101+
{
102+
"op": "FullyConnected",
103+
"name": "sequential0_dense2_fwd",
104+
"attr": {"num_hidden": "10"},
105+
"inputs": [[8, 0, 0], [9, 0, 0], [10, 0, 0]]
106+
}
107+
],
108+
"arg_nodes": [0, 1, 2, 5, 6, 9, 10],
109+
"node_row_ptr": [
110+
0,
111+
1,
112+
2,
113+
3,
114+
4,
115+
5,
116+
6,
117+
7,
118+
8,
119+
9,
120+
10,
121+
11,
122+
12
123+
],
124+
"heads": [[11, 0, 0]],
125+
"attrs": {"mxnet_version": ["int", 1100]}
126+
}

examples/cli/host/data/model.params

428 KB
Binary file not shown.
+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#!/bin/bash
2+
3+
sagemaker mxnet host --role-name <your-sagemaker-execution-role>

examples/cli/host/script.py

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
from __future__ import print_function
2+
3+
import json
4+
import mxnet as mx
5+
from mxnet import gluon
6+
7+
8+
def model_fn(model_dir):
9+
"""
10+
Load the gluon model. Called once when hosting service starts.
11+
12+
:param: model_dir The directory where model files are stored.
13+
:return: a model (in this case a Gluon network)
14+
"""
15+
symbol = mx.sym.load('%s/model.json' % model_dir)
16+
outputs = mx.symbol.softmax(data=symbol, name='softmax_label')
17+
inputs = mx.sym.var('data')
18+
param_dict = gluon.ParameterDict('model_')
19+
net = gluon.SymbolBlock(outputs, inputs, param_dict)
20+
net.load_params('%s/model.params' % model_dir, ctx=mx.cpu())
21+
return net
22+
23+
24+
def transform_fn(net, data, input_content_type, output_content_type):
25+
"""
26+
Transform a request using the Gluon model. Called once per request.
27+
28+
:param net: The Gluon model.
29+
:param data: The request payload.
30+
:param input_content_type: The request content type.
31+
:param output_content_type: The (desired) response content type.
32+
:return: response payload and content type.
33+
"""
34+
# we can use content types to vary input/output handling, but
35+
# here we just assume json for both
36+
parsed = json.loads(data)
37+
nda = mx.nd.array(parsed)
38+
output = net(nda)
39+
prediction = mx.nd.argmax(output, axis=1)
40+
response_body = json.dumps(prediction.asnumpy().tolist())
41+
return response_body, output_content_type
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from mxnet import gluon
2+
3+
4+
def download_training_data():
5+
gluon.data.vision.MNIST('./data/training', train=True)
6+
gluon.data.vision.MNIST('./data/training', train=False)
7+
8+
9+
if __name__ == "__main__":
10+
download_training_data()
+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"batch_size": 100,
3+
"epochs": 10,
4+
"learning_rate": 0.1,
5+
"momentum": 0.9,
6+
"log_interval": 100
7+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/bin/bash
2+
3+
python ./download_training_data.py
4+
sagemaker mxnet train --role-name <your-sagemaker-execution-role>

examples/cli/train/script.py

+118
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
import logging
2+
import time
3+
4+
import mxnet as mx
5+
import numpy as np
6+
from mxnet import gluon, autograd
7+
from mxnet.gluon import nn
8+
9+
logger = logging.getLogger(__name__)
10+
11+
12+
def train(channel_input_dirs, hyperparameters, **kwargs):
13+
# SageMaker passes num_cpus, num_gpus and other args we can use to tailor training to
14+
# the current container environment, but here we just use simple cpu context.
15+
ctx = mx.cpu()
16+
17+
# retrieve the hyperparameters we set in notebook (with some defaults)
18+
batch_size = hyperparameters.get('batch_size', 100)
19+
epochs = hyperparameters.get('epochs', 10)
20+
learning_rate = hyperparameters.get('learning_rate', 0.1)
21+
momentum = hyperparameters.get('momentum', 0.9)
22+
log_interval = hyperparameters.get('log_interval', 100)
23+
24+
training_data = channel_input_dirs['training']
25+
26+
# load training and validation data
27+
# we use the gluon.data.vision.MNIST class because of its built in mnist pre-processing logic,
28+
# but point it at the location where SageMaker placed the data files, so it doesn't download them again.
29+
train_data = get_train_data(training_data, batch_size)
30+
val_data = get_val_data(training_data, batch_size)
31+
32+
# define the network
33+
net = define_network()
34+
35+
# Collect all parameters from net and its children, then initialize them.
36+
net.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)
37+
# Trainer is for updating parameters with gradient.
38+
trainer = gluon.Trainer(net.collect_params(), 'sgd',
39+
{'learning_rate': learning_rate, 'momentum': momentum})
40+
metric = mx.metric.Accuracy()
41+
loss = gluon.loss.SoftmaxCrossEntropyLoss()
42+
43+
for epoch in range(epochs):
44+
# reset data iterator and metric at begining of epoch.
45+
metric.reset()
46+
btic = time.time()
47+
for i, (data, label) in enumerate(train_data):
48+
# Copy data to ctx if necessary
49+
data = data.as_in_context(ctx)
50+
label = label.as_in_context(ctx)
51+
# Start recording computation graph with record() section.
52+
# Recorded graphs can then be differentiated with backward.
53+
with autograd.record():
54+
output = net(data)
55+
L = loss(output, label)
56+
L.backward()
57+
# take a gradient step with batch_size equal to data.shape[0]
58+
trainer.step(data.shape[0])
59+
# update metric at last.
60+
metric.update([label], [output])
61+
62+
if i % log_interval == 0 and i > 0:
63+
name, acc = metric.get()
64+
logger.info('[Epoch %d Batch %d] Training: %s=%f, %f samples/s' %
65+
(epoch, i, name, acc, batch_size / (time.time() - btic)))
66+
67+
btic = time.time()
68+
69+
name, acc = metric.get()
70+
logger.info('[Epoch %d] Training: %s=%f' % (epoch, name, acc))
71+
72+
name, val_acc = test(ctx, net, val_data)
73+
logger.info('[Epoch %d] Validation: %s=%f' % (epoch, name, val_acc))
74+
75+
return net
76+
77+
78+
def save(net, model_dir):
79+
# save the model
80+
y = net(mx.sym.var('data'))
81+
y.save('%s/model.json' % model_dir)
82+
net.collect_params().save('%s/model.params' % model_dir)
83+
84+
85+
def define_network():
86+
net = nn.Sequential()
87+
with net.name_scope():
88+
net.add(nn.Dense(128, activation='relu'))
89+
net.add(nn.Dense(64, activation='relu'))
90+
net.add(nn.Dense(10))
91+
return net
92+
93+
94+
def input_transformer(data, label):
95+
data = data.reshape((-1,)).astype(np.float32) / 255
96+
return data, label
97+
98+
99+
def get_train_data(data_dir, batch_size):
100+
return gluon.data.DataLoader(
101+
gluon.data.vision.MNIST(data_dir, train=True, transform=input_transformer),
102+
batch_size=batch_size, shuffle=True, last_batch='discard')
103+
104+
105+
def get_val_data(data_dir, batch_size):
106+
return gluon.data.DataLoader(
107+
gluon.data.vision.MNIST(data_dir, train=False, transform=input_transformer),
108+
batch_size=batch_size, shuffle=False)
109+
110+
111+
def test(ctx, net, val_data):
112+
metric = mx.metric.Accuracy()
113+
for data, label in val_data:
114+
data = data.as_in_context(ctx)
115+
label = label.as_in_context(ctx)
116+
output = net(data)
117+
metric.update([label], [output])
118+
return metric.get()

setup.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import os
2-
from setuptools import setup, find_packages
32
from glob import glob
43
from os.path import basename
54
from os.path import splitext
65

6+
from setuptools import setup, find_packages
7+
78

89
def read(fname):
910
return open(os.path.join(os.path.dirname(__file__), fname)).read()
@@ -36,4 +37,8 @@ def read(fname):
3637
extras_require={
3738
'test': ['tox', 'flake8', 'pytest', 'pytest-cov', 'pytest-xdist',
3839
'mock', 'tensorflow>=1.3.0', 'contextlib2']},
40+
41+
entry_points={
42+
'console_scripts': ['sagemaker=sagemaker.cli.main:main'],
43+
}
3944
)

src/sagemaker/cli/__init__.py

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"). You
4+
# may not use this file except in compliance with the License. A copy of
5+
# the License is located at
6+
#
7+
# http://aws.amazon.com/apache2.0/
8+
#
9+
# or in the "license" file accompanying this file. This file is
10+
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific
12+
# language governing permissions and limitations under the License.

0 commit comments

Comments
 (0)