Skip to content

Commit 9784c43

Browse files
Merge branch 'master' into warning_msg
2 parents 77a6523 + 28c23bc commit 9784c43

File tree

8 files changed

+117
-35
lines changed

8 files changed

+117
-35
lines changed

CHANGELOG.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,23 @@
11
# Changelog
22

3+
## v1.19.0 (2019-04-30)
4+
5+
### Features
6+
7+
* add document embedding support to Object2Vec algorithm
8+
9+
## v1.18.19 (2019-04-30)
10+
11+
### Bug fixes and other changes
12+
13+
* skip p2/p3 tests in eu-central-1
14+
15+
## v1.18.18 (2019-04-29)
16+
17+
### Bug fixes and other changes
18+
19+
* add automatic model tuning integ test for TF script mode
20+
321
## v1.18.17 (2019-04-25)
422

523
### Bug fixes and other changes

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.18.18.dev0
1+
1.19.1.dev0

src/sagemaker/amazon/object2vec.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,19 @@
2121
from sagemaker.vpc_utils import VPC_CONFIG_DEFAULT
2222

2323

24+
def _list_check_subset(valid_super_list):
25+
valid_superset = set(valid_super_list)
26+
27+
def validate(value):
28+
if not isinstance(value, str):
29+
return False
30+
31+
val_list = [s.strip() for s in value.split(',')]
32+
return set(val_list).issubset(valid_superset)
33+
34+
return validate
35+
36+
2437
class Object2Vec(AmazonAlgorithmEstimatorBase):
2538

2639
repo_name = 'object2vec'
@@ -57,6 +70,14 @@ class Object2Vec(AmazonAlgorithmEstimatorBase):
5770
'One of "adagrad", "adam", "rmsprop", "sgd", "adadelta"', str)
5871
learning_rate = hp('learning_rate', (ge(1e-06), le(1.0)),
5972
'A float in [1e-06, 1.0]', float)
73+
74+
negative_sampling_rate = hp('negative_sampling_rate', (ge(0), le(100)), 'An integer in [0, 100]', int)
75+
comparator_list = hp('comparator_list', _list_check_subset(["hadamard", "concat", "abs_diff"]),
76+
'Comma-separated of hadamard, concat, abs_diff. E.g. "hadamard,abs_diff"', str)
77+
tied_token_embedding_weight = hp('tied_token_embedding_weight', (), 'Either True or False', bool)
78+
token_embedding_storage_type = hp('token_embedding_storage_type', isin("dense", "row_sparse"),
79+
'One of "dense", "row_sparse"', str)
80+
6081
enc0_network = hp('enc0_network', isin("hcnn", "bilstm", "pooled_embedding"),
6182
'One of "hcnn", "bilstm", "pooled_embedding"', str)
6283
enc1_network = hp('enc1_network', isin("hcnn", "bilstm", "pooled_embedding", "enc0"),
@@ -104,6 +125,10 @@ def __init__(self, role, train_instance_count, train_instance_type,
104125
output_layer=None,
105126
optimizer=None,
106127
learning_rate=None,
128+
negative_sampling_rate=None,
129+
comparator_list=None,
130+
tied_token_embedding_weight=None,
131+
token_embedding_storage_type=None,
107132
enc0_network=None,
108133
enc1_network=None,
109134
enc0_cnn_filter_width=None,
@@ -164,6 +189,10 @@ def __init__(self, role, train_instance_count, train_instance_type,
164189
output_layer(str): Optional. Type of output layer
165190
optimizer(str): Optional. Type of optimizer for training
166191
learning_rate(float): Optional. Learning rate for SGD training
192+
negative_sampling_rate(int): Optional. Negative sampling rate
193+
comparator_list(str): Optional. Customization of comparator operator
194+
tied_token_embedding_weight(bool): Optional. Tying of token embedding layer weight
195+
token_embedding_storage_type(str): Optional. Type of token embedding storage
167196
enc0_network(str): Optional. Network model of encoder "enc0"
168197
enc1_network(str): Optional. Network model of encoder "enc1"
169198
enc0_cnn_filter_width(int): Optional. CNN filter width
@@ -197,6 +226,12 @@ def __init__(self, role, train_instance_count, train_instance_type,
197226
self.output_layer = output_layer
198227
self.optimizer = optimizer
199228
self.learning_rate = learning_rate
229+
230+
self.negative_sampling_rate = negative_sampling_rate
231+
self.comparator_list = comparator_list
232+
self.tied_token_embedding_weight = tied_token_embedding_weight
233+
self.token_embedding_storage_type = token_embedding_storage_type
234+
200235
self.enc0_network = enc0_network
201236
self.enc1_network = enc1_network
202237
self.enc0_cnn_filter_width = enc0_cnn_filter_width

tests/integ/__init__.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,11 @@
2424
TRANSFORM_DEFAULT_TIMEOUT_MINUTES = 20
2525
PYTHON_VERSION = 'py' + str(sys.version_info.major)
2626

27-
# 'eu-central-1' has some p2, but no enough for continuous testing
28-
HOSTING_NO_P2_REGIONS = ['ca-central-1', 'eu-west-2', 'us-west-1', 'eu-central-1']
27+
# these regions have some p2 and p3 instances, but not enough for continuous testing
28+
HOSTING_NO_P2_REGIONS = ['ca-central-1', 'eu-central-1', 'eu-west-2', 'us-west-1']
2929
HOSTING_NO_P3_REGIONS = ['ap-southeast-1', 'ap-southeast-2', 'ap-south-1', 'ca-central-1',
30-
'eu-west-2', 'us-west-1']
30+
'eu-central-1', 'eu-west-2', 'us-west-1']
31+
3132
# EI is currently only supported in the following regions
3233
# regions were derived from https://aws.amazon.com/machine-learning/elastic-inference/pricing/
3334
EI_SUPPORTED_REGIONS = ['us-east-1', 'us-east-2', 'us-west-2', 'eu-west-1', 'ap-northeast-1', 'ap-northeast-2']

tests/integ/test_object2vec.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ def test_object2vec(sagemaker_session):
4343
enc0_vocab_size=45000,
4444
enc_dim=16,
4545
num_classes=3,
46+
negative_sampling_rate=0,
47+
comparator_list='hadamard,concat,abs_diff',
48+
tied_token_embedding_weight=False,
49+
token_embedding_storage_type='dense',
4650
sagemaker_session=sagemaker_session)
4751

4852
record_set = prepare_record_set_from_local_files(data_path, object2vec.data_location,

tests/integ/test_tf_script_mode.py

Lines changed: 36 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,8 @@
2222
from sagemaker.tensorflow import TensorFlow
2323
from six.moves.urllib.parse import urlparse
2424
from sagemaker.utils import unique_name_from_base
25-
import tests.integ as integ
26-
from tests.integ import kms_utils
27-
import tests.integ.timeout as timeout
25+
26+
import tests.integ
2827

2928
ROLE = 'SageMakerRole'
3029

@@ -35,14 +34,18 @@
3534
TAGS = [{'Key': 'some-key', 'Value': 'some-value'}]
3635

3736

38-
@pytest.fixture(scope='session', params=['ml.c5.xlarge', 'ml.p2.xlarge'])
37+
@pytest.fixture(scope='session', params=[
38+
'ml.c5.xlarge',
39+
pytest.param('ml.p2.xlarge',
40+
marks=pytest.mark.skipif(
41+
tests.integ.test_region() in tests.integ.HOSTING_NO_P2_REGIONS,
42+
reason='no ml.p2 instances in this region'))])
3943
def instance_type(request):
4044
return request.param
4145

4246

43-
@pytest.mark.skipif(integ.test_region() in integ.HOSTING_NO_P2_REGIONS,
44-
reason='no ml.p2 instances in these regions')
45-
@pytest.mark.skipif(integ.PYTHON_VERSION != 'py3', reason="Script Mode tests are only configured to run with Python 3")
47+
@pytest.mark.skipif(tests.integ.PYTHON_VERSION != 'py3',
48+
reason="Script Mode tests are only configured to run with Python 3")
4649
def test_mnist(sagemaker_session, instance_type):
4750
estimator = TensorFlow(entry_point=SCRIPT,
4851
role='SageMakerRole',
@@ -51,26 +54,26 @@ def test_mnist(sagemaker_session, instance_type):
5154
sagemaker_session=sagemaker_session,
5255
py_version='py3',
5356
framework_version=TensorFlow.LATEST_VERSION,
54-
metric_definitions=[{'Name': 'train:global_steps', 'Regex': r'global_step\/sec:\s(.*)'}])
57+
metric_definitions=[
58+
{'Name': 'train:global_steps', 'Regex': r'global_step\/sec:\s(.*)'}])
5559
inputs = estimator.sagemaker_session.upload_data(
5660
path=os.path.join(RESOURCE_PATH, 'data'),
5761
key_prefix='scriptmode/mnist')
5862

59-
with timeout.timeout(minutes=integ.TRAINING_DEFAULT_TIMEOUT_MINUTES):
63+
with tests.integ.timeout.timeout(minutes=tests.integ.TRAINING_DEFAULT_TIMEOUT_MINUTES):
6064
estimator.fit(inputs=inputs, job_name=unique_name_from_base('test-tf-sm-mnist'))
6165
_assert_s3_files_exist(estimator.model_dir,
6266
['graph.pbtxt', 'model.ckpt-0.index', 'model.ckpt-0.meta'])
6367
df = estimator.training_job_analytics.dataframe()
64-
print(df)
6568
assert df.size > 0
6669

6770

6871
def test_server_side_encryption(sagemaker_session):
69-
7072
boto_session = sagemaker_session.boto_session
71-
with kms_utils.bucket_with_encryption(boto_session, ROLE) as (bucket_with_kms, kms_key):
72-
73-
output_path = os.path.join(bucket_with_kms, 'test-server-side-encryption', time.strftime('%y%m%d-%H%M'))
73+
with tests.integ.kms_utils.bucket_with_encryption(boto_session, ROLE) as (
74+
bucket_with_kms, kms_key):
75+
output_path = os.path.join(bucket_with_kms, 'test-server-side-encryption',
76+
time.strftime('%y%m%d-%H%M'))
7477

7578
estimator = TensorFlow(entry_point=SCRIPT,
7679
role=ROLE,
@@ -88,28 +91,29 @@ def test_server_side_encryption(sagemaker_session):
8891
path=os.path.join(RESOURCE_PATH, 'data'),
8992
key_prefix='scriptmode/mnist')
9093

91-
with timeout.timeout(minutes=integ.TRAINING_DEFAULT_TIMEOUT_MINUTES):
92-
estimator.fit(inputs=inputs, job_name=unique_name_from_base('test-server-side-encryption'))
94+
with tests.integ.timeout.timeout(minutes=tests.integ.TRAINING_DEFAULT_TIMEOUT_MINUTES):
95+
estimator.fit(inputs=inputs,
96+
job_name=unique_name_from_base('test-server-side-encryption'))
9397

9498

9599
@pytest.mark.canary_quick
96-
@pytest.mark.skipif(integ.PYTHON_VERSION != 'py3', reason="Script Mode tests are only configured to run with Python 3")
100+
@pytest.mark.skipif(tests.integ.PYTHON_VERSION != 'py3',
101+
reason="Script Mode tests are only configured to run with Python 3")
97102
def test_mnist_distributed(sagemaker_session, instance_type):
98103
estimator = TensorFlow(entry_point=SCRIPT,
99104
role=ROLE,
100105
train_instance_count=2,
101-
# TODO: change train_instance_type to instance_type once the test is passing consistently
102-
train_instance_type='ml.c5.xlarge',
106+
train_instance_type=instance_type,
103107
sagemaker_session=sagemaker_session,
104-
py_version=integ.PYTHON_VERSION,
108+
py_version=tests.integ.PYTHON_VERSION,
105109
script_mode=True,
106110
framework_version=TensorFlow.LATEST_VERSION,
107111
distributions=PARAMETER_SERVER_DISTRIBUTION)
108112
inputs = estimator.sagemaker_session.upload_data(
109113
path=os.path.join(RESOURCE_PATH, 'data'),
110114
key_prefix='scriptmode/distributed_mnist')
111115

112-
with timeout.timeout(minutes=integ.TRAINING_DEFAULT_TIMEOUT_MINUTES):
116+
with tests.integ.timeout.timeout(minutes=tests.integ.TRAINING_DEFAULT_TIMEOUT_MINUTES):
113117
estimator.fit(inputs=inputs, job_name=unique_name_from_base('test-tf-sm-distributed'))
114118
_assert_s3_files_exist(estimator.model_dir,
115119
['graph.pbtxt', 'model.ckpt-0.index', 'model.ckpt-0.meta'])
@@ -131,22 +135,26 @@ def test_mnist_async(sagemaker_session):
131135
training_job_name = estimator.latest_training_job.name
132136
time.sleep(20)
133137
endpoint_name = training_job_name
134-
_assert_training_job_tags_match(sagemaker_session.sagemaker_client, estimator.latest_training_job.name, TAGS)
135-
with timeout.timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
136-
estimator = TensorFlow.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session)
138+
_assert_training_job_tags_match(sagemaker_session.sagemaker_client,
139+
estimator.latest_training_job.name, TAGS)
140+
with tests.integ.timeout.timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
141+
estimator = TensorFlow.attach(training_job_name=training_job_name,
142+
sagemaker_session=sagemaker_session)
137143
predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.c4.xlarge',
138144
endpoint_name=endpoint_name)
139145

140146
result = predictor.predict(np.zeros(784))
141147
print('predict result: {}'.format(result))
142148
_assert_endpoint_tags_match(sagemaker_session.sagemaker_client, predictor.endpoint, TAGS)
143-
_assert_model_tags_match(sagemaker_session.sagemaker_client, estimator.latest_training_job.name, TAGS)
149+
_assert_model_tags_match(sagemaker_session.sagemaker_client,
150+
estimator.latest_training_job.name, TAGS)
144151

145152

146153
def _assert_s3_files_exist(s3_url, files):
147154
parsed_url = urlparse(s3_url)
148155
s3 = boto3.client('s3')
149-
contents = s3.list_objects_v2(Bucket=parsed_url.netloc, Prefix=parsed_url.path.lstrip('/'))["Contents"]
156+
contents = s3.list_objects_v2(Bucket=parsed_url.netloc, Prefix=parsed_url.path.lstrip('/'))[
157+
"Contents"]
150158
for f in files:
151159
found = [x['Key'] for x in contents if x['Key'].endswith(f)]
152160
if not found:
@@ -169,5 +177,6 @@ def _assert_endpoint_tags_match(sagemaker_client, endpoint_name, tags):
169177

170178

171179
def _assert_training_job_tags_match(sagemaker_client, training_job_name, tags):
172-
training_job_description = sagemaker_client.describe_training_job(TrainingJobName=training_job_name)
180+
training_job_description = sagemaker_client.describe_training_job(
181+
TrainingJobName=training_job_name)
173182
_assert_tags_match(sagemaker_client, training_job_description['TrainingJobArn'], tags)

tests/unit/test_object2vec.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,10 @@ def test_all_hyperparameters(sagemaker_session):
111111
output_layer='softmax',
112112
optimizer='adam',
113113
learning_rate=0.0001,
114+
negative_sampling_rate=1,
115+
comparator_list='hadamard, abs_diff',
116+
tied_token_embedding_weight=True,
117+
token_embedding_storage_type='row_sparse',
114118
enc0_network='bilstm',
115119
enc1_network='hcnn',
116120
enc0_cnn_filter_width=3,
@@ -161,7 +165,11 @@ def test_required_hyper_parameters_value(sagemaker_session, required_hyper_param
161165
('optimizer', 0),
162166
('enc0_cnn_filter_width', 'string'),
163167
('weight_decay', 'string'),
164-
('learning_rate', 'string')
168+
('learning_rate', 'string'),
169+
('negative_sampling_rate', 'some_string'),
170+
('comparator_list', 0),
171+
('comparator_list', ['foobar']),
172+
('token_embedding_storage_type', 123),
165173
])
166174
def test_optional_hyper_parameters_type(sagemaker_session, optional_hyper_parameters, value):
167175
with pytest.raises(ValueError):
@@ -182,7 +190,10 @@ def test_optional_hyper_parameters_type(sagemaker_session, optional_hyper_parame
182190
('weight_decay', 200000),
183191
('enc0_cnn_filter_width', 2000),
184192
('learning_rate', 0),
185-
('learning_rate', 2)
193+
('learning_rate', 2),
194+
('negative_sampling_rate', -1),
195+
('comparator_list', 'hadamard,foobar'),
196+
('token_embedding_storage_type', 'foobar'),
186197
])
187198
def test_optional_hyper_parameters_value(sagemaker_session, optional_hyper_parameters, value):
188199
with pytest.raises(ValueError):

tox.ini

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,21 +60,25 @@ commands =
6060
deps = .[test]
6161

6262
[testenv:flake8]
63-
basepython = python
63+
basepython = python3
64+
skipdist = true
65+
skip_install = true
6466
deps =
6567
flake8
6668
flake8-future-import
6769
commands = flake8
6870

6971
[testenv:pylint]
7072
basepython = python3
73+
skipdist = true
74+
skip_install = true
7175
deps =
7276
pylint==2.3.1
7377
commands =
7478
python -m pylint --rcfile=.pylintrc -j 0 src/sagemaker
7579

7680
[testenv:twine]
77-
basepython = python
81+
basepython = python3
7882
# twine check was added starting in 1.12.0
7983
# https://github.com/pypa/twine/blob/master/docs/changelog.rst
8084
deps =

0 commit comments

Comments
 (0)