13
13
from __future__ import absolute_import
14
14
15
15
import gzip
16
- import io
17
16
import json
18
17
import os
19
18
import pickle
20
19
import sys
21
20
22
- import boto3
23
- import numpy as np
24
21
import pytest
25
22
26
23
import sagemaker
27
24
from sagemaker .amazon .amazon_estimator import registry
28
- from sagemaker .amazon .common import write_numpy_to_dense_tensor
29
25
from sagemaker .estimator import Estimator
30
26
from sagemaker .utils import name_from_base
31
27
from tests .integ import DATA_DIR
@@ -57,6 +53,7 @@ def test_byo_estimator(sagemaker_session, region):
57
53
58
54
"""
59
55
image_name = registry (region ) + "/factorization-machines:1"
56
+ training_data_path = os .path .join (DATA_DIR , 'dummy_tensor' )
60
57
61
58
with timeout (minutes = 15 ):
62
59
data_path = os .path .join (DATA_DIR , 'one_p_mnist' , 'mnist.pkl.gz' )
@@ -65,19 +62,11 @@ def test_byo_estimator(sagemaker_session, region):
65
62
with gzip .open (data_path , 'rb' ) as f :
66
63
train_set , _ , _ = pickle .load (f , ** pickle_args )
67
64
68
- # take 100 examples for faster execution
69
- vectors = np .array ([t .tolist () for t in train_set [0 ][:100 ]]).astype ('float32' )
70
- labels = np .where (np .array ([t .tolist () for t in train_set [1 ][:100 ]]) == 0 , 1.0 , 0.0 ).astype ('float32' )
71
-
72
- buf = io .BytesIO ()
73
- write_numpy_to_dense_tensor (buf , vectors , labels )
74
- buf .seek (0 )
75
-
76
- bucket = sagemaker_session .default_bucket ()
77
65
prefix = 'test_byo_estimator'
78
66
key = 'recordio-pb-data'
79
- boto3 .resource ('s3' ).Bucket (bucket ).Object (os .path .join (prefix , 'train' , key )).upload_fileobj (buf )
80
- s3_train_data = 's3://{}/{}/train/{}' .format (bucket , prefix , key )
67
+
68
+ s3_train_data = sagemaker_session .upload_data (path = training_data_path ,
69
+ key_prefix = os .path .join (prefix , 'train' , key ))
81
70
82
71
estimator = Estimator (image_name = image_name ,
83
72
role = 'SageMakerRole' , train_instance_count = 1 ,
@@ -111,6 +100,7 @@ def test_byo_estimator(sagemaker_session, region):
111
100
def test_async_byo_estimator (sagemaker_session , region ):
112
101
image_name = registry (region ) + "/factorization-machines:1"
113
102
endpoint_name = name_from_base ('byo' )
103
+ training_data_path = os .path .join (DATA_DIR , 'dummy_tensor' )
114
104
training_job_name = ""
115
105
116
106
with timeout (minutes = 5 ):
@@ -120,19 +110,11 @@ def test_async_byo_estimator(sagemaker_session, region):
120
110
with gzip .open (data_path , 'rb' ) as f :
121
111
train_set , _ , _ = pickle .load (f , ** pickle_args )
122
112
123
- # take 100 examples for faster execution
124
- vectors = np .array ([t .tolist () for t in train_set [0 ][:100 ]]).astype ('float32' )
125
- labels = np .where (np .array ([t .tolist () for t in train_set [1 ][:100 ]]) == 0 , 1.0 , 0.0 ).astype ('float32' )
126
-
127
- buf = io .BytesIO ()
128
- write_numpy_to_dense_tensor (buf , vectors , labels )
129
- buf .seek (0 )
130
-
131
- bucket = sagemaker_session .default_bucket ()
132
113
prefix = 'test_byo_estimator'
133
114
key = 'recordio-pb-data'
134
- boto3 .resource ('s3' ).Bucket (bucket ).Object (os .path .join (prefix , 'train' , key )).upload_fileobj (buf )
135
- s3_train_data = 's3://{}/{}/train/{}' .format (bucket , prefix , key )
115
+
116
+ s3_train_data = sagemaker_session .upload_data (path = training_data_path ,
117
+ key_prefix = os .path .join (prefix , 'train' , key ))
136
118
137
119
estimator = Estimator (image_name = image_name ,
138
120
role = 'SageMakerRole' , train_instance_count = 1 ,
0 commit comments