Skip to content

sync (#63) #64

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ examples/tensorflow/distributed_mnist/data
doc/_build
**/.DS_Store
venv/
*~
189 changes: 110 additions & 79 deletions README.rst

Large diffs are not rendered by default.

126 changes: 126 additions & 0 deletions examples/cli/host/data/model.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
{
"nodes": [
{
"op": "null",
"name": "data",
"inputs": []
},
{
"op": "null",
"name": "sequential0_dense0_weight",
"attr": {
"__dtype__": "0",
"__lr_mult__": "1.0",
"__shape__": "(128, 0)",
"__wd_mult__": "1.0"
},
"inputs": []
},
{
"op": "null",
"name": "sequential0_dense0_bias",
"attr": {
"__dtype__": "0",
"__init__": "zeros",
"__lr_mult__": "1.0",
"__shape__": "(128,)",
"__wd_mult__": "1.0"
},
"inputs": []
},
{
"op": "FullyConnected",
"name": "sequential0_dense0_fwd",
"attr": {"num_hidden": "128"},
"inputs": [[0, 0, 0], [1, 0, 0], [2, 0, 0]]
},
{
"op": "Activation",
"name": "sequential0_dense0_relu_fwd",
"attr": {"act_type": "relu"},
"inputs": [[3, 0, 0]]
},
{
"op": "null",
"name": "sequential0_dense1_weight",
"attr": {
"__dtype__": "0",
"__lr_mult__": "1.0",
"__shape__": "(64, 0)",
"__wd_mult__": "1.0"
},
"inputs": []
},
{
"op": "null",
"name": "sequential0_dense1_bias",
"attr": {
"__dtype__": "0",
"__init__": "zeros",
"__lr_mult__": "1.0",
"__shape__": "(64,)",
"__wd_mult__": "1.0"
},
"inputs": []
},
{
"op": "FullyConnected",
"name": "sequential0_dense1_fwd",
"attr": {"num_hidden": "64"},
"inputs": [[4, 0, 0], [5, 0, 0], [6, 0, 0]]
},
{
"op": "Activation",
"name": "sequential0_dense1_relu_fwd",
"attr": {"act_type": "relu"},
"inputs": [[7, 0, 0]]
},
{
"op": "null",
"name": "sequential0_dense2_weight",
"attr": {
"__dtype__": "0",
"__lr_mult__": "1.0",
"__shape__": "(10, 0)",
"__wd_mult__": "1.0"
},
"inputs": []
},
{
"op": "null",
"name": "sequential0_dense2_bias",
"attr": {
"__dtype__": "0",
"__init__": "zeros",
"__lr_mult__": "1.0",
"__shape__": "(10,)",
"__wd_mult__": "1.0"
},
"inputs": []
},
{
"op": "FullyConnected",
"name": "sequential0_dense2_fwd",
"attr": {"num_hidden": "10"},
"inputs": [[8, 0, 0], [9, 0, 0], [10, 0, 0]]
}
],
"arg_nodes": [0, 1, 2, 5, 6, 9, 10],
"node_row_ptr": [
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12
],
"heads": [[11, 0, 0]],
"attrs": {"mxnet_version": ["int", 1100]}
}
Binary file added examples/cli/host/data/model.params
Binary file not shown.
3 changes: 3 additions & 0 deletions examples/cli/host/run_hosting_example.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

sagemaker mxnet host --role-name <your-sagemaker-execution-role>
41 changes: 41 additions & 0 deletions examples/cli/host/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from __future__ import print_function

import json
import mxnet as mx
from mxnet import gluon


def model_fn(model_dir):
"""
Load the gluon model. Called once when hosting service starts.

:param: model_dir The directory where model files are stored.
:return: a model (in this case a Gluon network)
"""
symbol = mx.sym.load('%s/model.json' % model_dir)
outputs = mx.symbol.softmax(data=symbol, name='softmax_label')
inputs = mx.sym.var('data')
param_dict = gluon.ParameterDict('model_')
net = gluon.SymbolBlock(outputs, inputs, param_dict)
net.load_params('%s/model.params' % model_dir, ctx=mx.cpu())
return net


def transform_fn(net, data, input_content_type, output_content_type):
"""
Transform a request using the Gluon model. Called once per request.

:param net: The Gluon model.
:param data: The request payload.
:param input_content_type: The request content type.
:param output_content_type: The (desired) response content type.
:return: response payload and content type.
"""
# we can use content types to vary input/output handling, but
# here we just assume json for both
parsed = json.loads(data)
nda = mx.nd.array(parsed)
output = net(nda)
prediction = mx.nd.argmax(output, axis=1)
response_body = json.dumps(prediction.asnumpy().tolist())
return response_body, output_content_type
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
10 changes: 10 additions & 0 deletions examples/cli/train/download_training_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from mxnet import gluon


def download_training_data():
gluon.data.vision.MNIST('./data/training', train=True)
gluon.data.vision.MNIST('./data/training', train=False)


if __name__ == "__main__":
download_training_data()
7 changes: 7 additions & 0 deletions examples/cli/train/hyperparameters.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"batch_size": 100,
"epochs": 10,
"learning_rate": 0.1,
"momentum": 0.9,
"log_interval": 100
}
4 changes: 4 additions & 0 deletions examples/cli/train/run_training_example.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

python ./download_training_data.py
sagemaker mxnet train --role-name <your-sagemaker-execution-role>
118 changes: 118 additions & 0 deletions examples/cli/train/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import logging
import time

import mxnet as mx
import numpy as np
from mxnet import gluon, autograd
from mxnet.gluon import nn

logger = logging.getLogger(__name__)


def train(channel_input_dirs, hyperparameters, **kwargs):
# SageMaker passes num_cpus, num_gpus and other args we can use to tailor training to
# the current container environment, but here we just use simple cpu context.
ctx = mx.cpu()

# retrieve the hyperparameters we set in notebook (with some defaults)
batch_size = hyperparameters.get('batch_size', 100)
epochs = hyperparameters.get('epochs', 10)
learning_rate = hyperparameters.get('learning_rate', 0.1)
momentum = hyperparameters.get('momentum', 0.9)
log_interval = hyperparameters.get('log_interval', 100)

training_data = channel_input_dirs['training']

# load training and validation data
# we use the gluon.data.vision.MNIST class because of its built in mnist pre-processing logic,
# but point it at the location where SageMaker placed the data files, so it doesn't download them again.
train_data = get_train_data(training_data, batch_size)
val_data = get_val_data(training_data, batch_size)

# define the network
net = define_network()

# Collect all parameters from net and its children, then initialize them.
net.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)
# Trainer is for updating parameters with gradient.
trainer = gluon.Trainer(net.collect_params(), 'sgd',
{'learning_rate': learning_rate, 'momentum': momentum})
metric = mx.metric.Accuracy()
loss = gluon.loss.SoftmaxCrossEntropyLoss()

for epoch in range(epochs):
# reset data iterator and metric at begining of epoch.
metric.reset()
btic = time.time()
for i, (data, label) in enumerate(train_data):
# Copy data to ctx if necessary
data = data.as_in_context(ctx)
label = label.as_in_context(ctx)
# Start recording computation graph with record() section.
# Recorded graphs can then be differentiated with backward.
with autograd.record():
output = net(data)
L = loss(output, label)
L.backward()
# take a gradient step with batch_size equal to data.shape[0]
trainer.step(data.shape[0])
# update metric at last.
metric.update([label], [output])

if i % log_interval == 0 and i > 0:
name, acc = metric.get()
logger.info('[Epoch %d Batch %d] Training: %s=%f, %f samples/s' %
(epoch, i, name, acc, batch_size / (time.time() - btic)))

btic = time.time()

name, acc = metric.get()
logger.info('[Epoch %d] Training: %s=%f' % (epoch, name, acc))

name, val_acc = test(ctx, net, val_data)
logger.info('[Epoch %d] Validation: %s=%f' % (epoch, name, val_acc))

return net


def save(net, model_dir):
# save the model
y = net(mx.sym.var('data'))
y.save('%s/model.json' % model_dir)
net.collect_params().save('%s/model.params' % model_dir)


def define_network():
net = nn.Sequential()
with net.name_scope():
net.add(nn.Dense(128, activation='relu'))
net.add(nn.Dense(64, activation='relu'))
net.add(nn.Dense(10))
return net


def input_transformer(data, label):
data = data.reshape((-1,)).astype(np.float32) / 255
return data, label


def get_train_data(data_dir, batch_size):
return gluon.data.DataLoader(
gluon.data.vision.MNIST(data_dir, train=True, transform=input_transformer),
batch_size=batch_size, shuffle=True, last_batch='discard')


def get_val_data(data_dir, batch_size):
return gluon.data.DataLoader(
gluon.data.vision.MNIST(data_dir, train=False, transform=input_transformer),
batch_size=batch_size, shuffle=False)


def test(ctx, net, val_data):
metric = mx.metric.Accuracy()
for data, label in val_data:
data = data.as_in_context(ctx)
label = label.as_in_context(ctx)
output = net(data)
metric.update([label], [output])
return metric.get()
7 changes: 6 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import os
from setuptools import setup, find_packages
from glob import glob
from os.path import basename
from os.path import splitext

from setuptools import setup, find_packages


def read(fname):
return open(os.path.join(os.path.dirname(__file__), fname)).read()
Expand Down Expand Up @@ -36,4 +37,8 @@ def read(fname):
extras_require={
'test': ['tox', 'flake8', 'pytest', 'pytest-cov', 'pytest-xdist',
'mock', 'tensorflow>=1.3.0', 'contextlib2']},

entry_points={
'console_scripts': ['sagemaker=sagemaker.cli.main:main'],
}
)
4 changes: 2 additions & 2 deletions src/sagemaker/amazon/amazon_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ class AmazonAlgorithmEstimatorBase(EstimatorBase):
"""Base class for Amazon first-party Estimator implementations. This class isn't intended
to be instantiated directly."""

feature_dim = hp('feature_dim', (validation.isint, validation.gt(0)))
mini_batch_size = hp('mini_batch_size', (validation.isint, validation.gt(0)))
feature_dim = hp('feature_dim', validation.gt(0), data_type=int)
mini_batch_size = hp('mini_batch_size', validation.gt(0), data_type=int)

def __init__(self, role, train_instance_count, train_instance_type, data_location=None, **kwargs):
"""Initialize an AmazonAlgorithmEstimatorBase.
Expand Down
Loading