Skip to content

Commit 021b67d

Browse files
authored
Update byo integ test and tuner test to use sagemaker upload_data method (#341)
1 parent 06f72ec commit 021b67d

File tree

3 files changed

+12
-40
lines changed

3 files changed

+12
-40
lines changed

tests/data/dummy_tensor

311 KB
Binary file not shown.

tests/integ/test_byo_estimator.py

Lines changed: 8 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -13,19 +13,15 @@
1313
from __future__ import absolute_import
1414

1515
import gzip
16-
import io
1716
import json
1817
import os
1918
import pickle
2019
import sys
2120

22-
import boto3
23-
import numpy as np
2421
import pytest
2522

2623
import sagemaker
2724
from sagemaker.amazon.amazon_estimator import registry
28-
from sagemaker.amazon.common import write_numpy_to_dense_tensor
2925
from sagemaker.estimator import Estimator
3026
from sagemaker.utils import name_from_base
3127
from tests.integ import DATA_DIR
@@ -57,6 +53,7 @@ def test_byo_estimator(sagemaker_session, region):
5753
5854
"""
5955
image_name = registry(region) + "/factorization-machines:1"
56+
training_data_path = os.path.join(DATA_DIR, 'dummy_tensor')
6057

6158
with timeout(minutes=15):
6259
data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
@@ -65,19 +62,11 @@ def test_byo_estimator(sagemaker_session, region):
6562
with gzip.open(data_path, 'rb') as f:
6663
train_set, _, _ = pickle.load(f, **pickle_args)
6764

68-
# take 100 examples for faster execution
69-
vectors = np.array([t.tolist() for t in train_set[0][:100]]).astype('float32')
70-
labels = np.where(np.array([t.tolist() for t in train_set[1][:100]]) == 0, 1.0, 0.0).astype('float32')
71-
72-
buf = io.BytesIO()
73-
write_numpy_to_dense_tensor(buf, vectors, labels)
74-
buf.seek(0)
75-
76-
bucket = sagemaker_session.default_bucket()
7765
prefix = 'test_byo_estimator'
7866
key = 'recordio-pb-data'
79-
boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)
80-
s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)
67+
68+
s3_train_data = sagemaker_session.upload_data(path=training_data_path,
69+
key_prefix=os.path.join(prefix, 'train', key))
8170

8271
estimator = Estimator(image_name=image_name,
8372
role='SageMakerRole', train_instance_count=1,
@@ -111,6 +100,7 @@ def test_byo_estimator(sagemaker_session, region):
111100
def test_async_byo_estimator(sagemaker_session, region):
112101
image_name = registry(region) + "/factorization-machines:1"
113102
endpoint_name = name_from_base('byo')
103+
training_data_path = os.path.join(DATA_DIR, 'dummy_tensor')
114104
training_job_name = ""
115105

116106
with timeout(minutes=5):
@@ -120,19 +110,11 @@ def test_async_byo_estimator(sagemaker_session, region):
120110
with gzip.open(data_path, 'rb') as f:
121111
train_set, _, _ = pickle.load(f, **pickle_args)
122112

123-
# take 100 examples for faster execution
124-
vectors = np.array([t.tolist() for t in train_set[0][:100]]).astype('float32')
125-
labels = np.where(np.array([t.tolist() for t in train_set[1][:100]]) == 0, 1.0, 0.0).astype('float32')
126-
127-
buf = io.BytesIO()
128-
write_numpy_to_dense_tensor(buf, vectors, labels)
129-
buf.seek(0)
130-
131-
bucket = sagemaker_session.default_bucket()
132113
prefix = 'test_byo_estimator'
133114
key = 'recordio-pb-data'
134-
boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)
135-
s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)
115+
116+
s3_train_data = sagemaker_session.upload_data(path=training_data_path,
117+
key_prefix=os.path.join(prefix, 'train', key))
136118

137119
estimator = Estimator(image_name=image_name,
138120
role='SageMakerRole', train_instance_count=1,

tests/integ/test_tuner.py

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,18 @@
1313
from __future__ import absolute_import
1414

1515
import gzip
16-
import io
1716
import json
1817
import os
1918
import pickle
2019
import sys
2120
import time
2221

23-
import boto3
2422
import numpy as np
2523
import pytest
2624

2725
from sagemaker import KMeans, LDA, RandomCutForest
2826
from sagemaker.amazon.amazon_estimator import registry
29-
from sagemaker.amazon.common import read_records, write_numpy_to_dense_tensor
27+
from sagemaker.amazon.common import read_records
3028
from sagemaker.chainer import Chainer
3129
from sagemaker.estimator import Estimator
3230
from sagemaker.mxnet.estimator import MXNet
@@ -368,6 +366,7 @@ def test_tuning_byo_estimator(sagemaker_session):
368366
Default predictor is updated with json serializer and deserializer.
369367
"""
370368
image_name = registry(sagemaker_session.boto_session.region_name) + '/factorization-machines:1'
369+
training_data_path = os.path.join(DATA_DIR, 'dummy_tensor')
371370

372371
with timeout(minutes=15):
373372
data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
@@ -376,19 +375,10 @@ def test_tuning_byo_estimator(sagemaker_session):
376375
with gzip.open(data_path, 'rb') as f:
377376
train_set, _, _ = pickle.load(f, **pickle_args)
378377

379-
# take 100 examples for faster execution
380-
vectors = np.array([t.tolist() for t in train_set[0][:100]]).astype('float32')
381-
labels = np.where(np.array([t.tolist() for t in train_set[1][:100]]) == 0, 1.0, 0.0).astype('float32')
382-
383-
buf = io.BytesIO()
384-
write_numpy_to_dense_tensor(buf, vectors, labels)
385-
buf.seek(0)
386-
387-
bucket = sagemaker_session.default_bucket()
388378
prefix = 'test_byo_estimator'
389379
key = 'recordio-pb-data'
390-
boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)
391-
s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)
380+
s3_train_data = sagemaker_session.upload_data(path=training_data_path,
381+
key_prefix=os.path.join(prefix, 'train', key))
392382

393383
estimator = Estimator(image_name=image_name,
394384
role='SageMakerRole', train_instance_count=1,

0 commit comments

Comments
 (0)