@@ -88,7 +88,6 @@ def test_sklearn_processor_with_required_parameters(
88
88
exists_mock , isfile_mock , botocore_resolver , sagemaker_session , sklearn_version
89
89
):
90
90
botocore_resolver .return_value .construct_endpoint .return_value = {"hostname" : ECR_HOSTNAME }
91
-
92
91
processor = SKLearnProcessor (
93
92
role = ROLE ,
94
93
instance_type = "ml.m4.xlarge" ,
@@ -99,34 +98,31 @@ def test_sklearn_processor_with_required_parameters(
99
98
100
99
processor .run (code = "/local/path/to/processing_code.py" )
101
100
102
- expected_args = _get_expected_args_modular_code (processor ._current_job_name )
101
+ expected_args = _get_expected_args (processor ._current_job_name )
103
102
104
103
sklearn_image_uri = (
105
104
"246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:{}-cpu-py3"
106
105
).format (sklearn_version )
107
106
expected_args ["app_specification" ]["ImageUri" ] = sklearn_image_uri
108
-
109
107
sagemaker_session .process .assert_called_with (** expected_args )
110
108
111
109
112
110
@patch ("sagemaker.utils._botocore_resolver" )
113
111
@patch ("os.path.exists" , return_value = True )
114
112
@patch ("os.path.isfile" , return_value = True )
115
113
def test_sklearn_with_all_parameters (
116
- exists_mock , isfile_mock , botocore_resolver , sklearn_version , sagemaker_session , uploaded_code
114
+ exists_mock , isfile_mock , botocore_resolver , sklearn_version , sagemaker_session
117
115
):
118
116
botocore_resolver .return_value .construct_endpoint .return_value = {"hostname" : ECR_HOSTNAME }
119
117
120
118
processor = SKLearnProcessor (
121
119
role = ROLE ,
122
120
framework_version = sklearn_version ,
123
- command = ["Rscript" ],
124
121
instance_type = "ml.m4.xlarge" ,
125
122
instance_count = 1 ,
126
123
volume_size_in_gb = 100 ,
127
124
volume_kms_key = "arn:aws:kms:us-west-2:012345678901:key/volume-kms-key" ,
128
125
output_kms_key = "arn:aws:kms:us-west-2:012345678901:key/output-kms-key" ,
129
- code_location = MOCKED_S3_URI ,
130
126
max_runtime_in_seconds = 3600 ,
131
127
base_job_name = "my_sklearn_processor" ,
132
128
env = {"my_env_variable" : "my_env_variable_value" },
@@ -140,21 +136,18 @@ def test_sklearn_with_all_parameters(
140
136
sagemaker_session = sagemaker_session ,
141
137
)
142
138
143
- with patch ("sagemaker.estimator.tar_and_upload_dir" , return_value = uploaded_code ):
144
- processor .run (
145
- code = "processing_code.py" ,
146
- source_dir = "/local/path/to/source_dir" ,
147
- dependencies = ["/local/path/to/dep_01" ],
148
- inputs = _get_data_inputs_all_parameters (),
149
- outputs = _get_data_outputs_all_parameters (),
150
- arguments = ["--drop-columns" , "'SelfEmployed'" ],
151
- wait = True ,
152
- logs = False ,
153
- job_name = "my_job_name" ,
154
- experiment_config = {"ExperimentName" : "AnExperiment" },
155
- )
139
+ processor .run (
140
+ code = "/local/path/to/processing_code.py" ,
141
+ inputs = _get_data_inputs_all_parameters (),
142
+ outputs = _get_data_outputs_all_parameters (),
143
+ arguments = ["--drop-columns" , "'SelfEmployed'" ],
144
+ wait = True ,
145
+ logs = False ,
146
+ job_name = "my_job_name" ,
147
+ experiment_config = {"ExperimentName" : "AnExperiment" },
148
+ )
156
149
157
- expected_args = _get_expected_args_all_parameters_modular_code (processor ._current_job_name )
150
+ expected_args = _get_expected_args_all_parameters (processor ._current_job_name )
158
151
sklearn_image_uri = (
159
152
"246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:{}-cpu-py3"
160
153
).format (sklearn_version )
@@ -181,21 +174,18 @@ def test_local_mode_disables_local_code_by_default(localsession_mock):
181
174
@patch ("os.path.exists" , return_value = True )
182
175
@patch ("os.path.isfile" , return_value = True )
183
176
def test_sklearn_with_all_parameters_via_run_args (
184
- exists_mock , isfile_mock , botocore_resolver , sklearn_version , sagemaker_session , uploaded_code
177
+ exists_mock , isfile_mock , botocore_resolver , sklearn_version , sagemaker_session
185
178
):
186
179
botocore_resolver .return_value .construct_endpoint .return_value = {"hostname" : ECR_HOSTNAME }
187
- custom_command = ["Rscript" ]
188
180
189
181
processor = SKLearnProcessor (
190
182
role = ROLE ,
191
183
framework_version = sklearn_version ,
192
- command = custom_command ,
193
184
instance_type = "ml.m4.xlarge" ,
194
- instance_count = 2 ,
185
+ instance_count = 1 ,
195
186
volume_size_in_gb = 100 ,
196
187
volume_kms_key = "arn:aws:kms:us-west-2:012345678901:key/volume-kms-key" ,
197
188
output_kms_key = "arn:aws:kms:us-west-2:012345678901:key/output-kms-key" ,
198
- code_location = MOCKED_S3_URI ,
199
189
max_runtime_in_seconds = 3600 ,
200
190
base_job_name = "my_sklearn_processor" ,
201
191
env = {"my_env_variable" : "my_env_variable_value" },
@@ -209,55 +199,37 @@ def test_sklearn_with_all_parameters_via_run_args(
209
199
sagemaker_session = sagemaker_session ,
210
200
)
211
201
212
- with patch ("sagemaker.estimator.tar_and_upload_dir" , return_value = uploaded_code ):
213
- run_args = processor .get_run_args (
214
- code = "processing_code.py" ,
215
- source_dir = "/local/path/to/source_dir" ,
216
- dependencies = ["/local/path/to/dep_01" ],
217
- git_config = None ,
218
- inputs = _get_data_inputs_all_parameters (),
219
- outputs = _get_data_outputs_all_parameters (),
220
- arguments = ["--drop-columns" , "'SelfEmployed'" ],
221
- )
222
-
223
- processor .run (
224
- code = run_args .code ,
225
- inputs = run_args .inputs ,
226
- outputs = run_args .outputs ,
227
- arguments = run_args .arguments ,
228
- wait = True ,
229
- logs = False ,
230
- experiment_config = {"ExperimentName" : "AnExperiment" },
231
- )
202
+ run_args = processor .get_run_args (
203
+ code = "/local/path/to/processing_code.py" ,
204
+ inputs = _get_data_inputs_all_parameters (),
205
+ outputs = _get_data_outputs_all_parameters (),
206
+ arguments = ["--drop-columns" , "'SelfEmployed'" ],
207
+ )
232
208
233
- expected_args = _get_expected_args_all_parameters_modular_code (
234
- processor ._current_job_name ,
235
- instance_count = 2 ,
236
- code_s3_prefix = run_args .code .replace ("/runproc.sh" , "" ),
209
+ processor .run (
210
+ code = run_args .code ,
211
+ inputs = run_args .inputs ,
212
+ outputs = run_args .outputs ,
213
+ arguments = run_args .arguments ,
214
+ wait = True ,
215
+ logs = False ,
216
+ experiment_config = {"ExperimentName" : "AnExperiment" },
237
217
)
218
+
219
+ expected_args = _get_expected_args_all_parameters (processor ._current_job_name )
238
220
sklearn_image_uri = (
239
221
"246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:{}-cpu-py3"
240
222
).format (sklearn_version )
241
223
expected_args ["app_specification" ]["ImageUri" ] = sklearn_image_uri
242
224
243
225
sagemaker_session .process .assert_called_with (** expected_args )
244
226
245
- # Verify the alternate command was applied successfully:
246
- framework_script = processor ._generate_framework_script ("processing_code.py" )
247
- expected_invocation = f"{ ' ' .join (custom_command )} processing_code.py"
248
- assert (
249
- f"\n { expected_invocation } " in framework_script
250
- ), "Framework script should contain customized invocation:\n {}\n \n Got:\n {}" .format (
251
- expected_invocation ,
252
- framework_script ,
253
- )
254
-
255
227
256
228
@patch ("sagemaker.utils._botocore_resolver" )
257
229
@patch ("os.path.exists" , return_value = True )
258
230
@patch ("os.path.isfile" , return_value = True )
259
231
def test_sklearn_with_all_parameters_via_run_args_called_twice (
260
- exists_mock , isfile_mock , botocore_resolver , sklearn_version , sagemaker_session , uploaded_code
232
+ exists_mock , isfile_mock , botocore_resolver , sklearn_version , sagemaker_session
261
233
):
262
234
botocore_resolver .return_value .construct_endpoint .return_value = {"hostname" : ECR_HOSTNAME }
263
235
@@ -269,7 +241,6 @@ def test_sklearn_with_all_parameters_via_run_args_called_twice(
269
241
volume_size_in_gb = 100 ,
270
242
volume_kms_key = "arn:aws:kms:us-west-2:012345678901:key/volume-kms-key" ,
271
243
output_kms_key = "arn:aws:kms:us-west-2:012345678901:key/output-kms-key" ,
272
- code_location = MOCKED_S3_URI ,
273
244
max_runtime_in_seconds = 3600 ,
274
245
base_job_name = "my_sklearn_processor" ,
275
246
env = {"my_env_variable" : "my_env_variable_value" },
@@ -283,27 +254,12 @@ def test_sklearn_with_all_parameters_via_run_args_called_twice(
283
254
sagemaker_session = sagemaker_session ,
284
255
)
285
256
286
- with patch ("sagemaker.estimator.tar_and_upload_dir" , return_value = uploaded_code ):
287
- run_args = processor .get_run_args (
288
- code = "processing_code.py" ,
289
- source_dir = "/local/path/to/source_dir" ,
290
- dependencies = ["/local/path/to/dep_01" ],
291
- git_config = None ,
292
- inputs = _get_data_inputs_all_parameters (),
293
- outputs = _get_data_outputs_all_parameters (),
294
- arguments = ["--drop-columns" , "'SelfEmployed'" ],
295
- )
296
-
297
257
run_args = processor .get_run_args (
298
258
code = "/local/path/to/processing_code.py" ,
299
- source_dir = None ,
300
- dependencies = None ,
301
- git_config = None ,
302
259
inputs = _get_data_inputs_all_parameters (),
303
260
outputs = _get_data_outputs_all_parameters (),
304
261
arguments = ["--drop-columns" , "'SelfEmployed'" ],
305
262
)
306
-
307
263
processor .run (
308
264
code = run_args .code ,
309
265
inputs = run_args .inputs ,
@@ -314,10 +270,8 @@ def test_sklearn_with_all_parameters_via_run_args_called_twice(
314
270
experiment_config = {"ExperimentName" : "AnExperiment" },
315
271
)
316
272
317
- expected_args = _get_expected_args_all_parameters_modular_code (
318
- processor ._current_job_name ,
319
- code_s3_prefix = run_args .code .replace ("/runproc.sh" , "" ),
320
- )
273
+ expected_args = _get_expected_args_all_parameters (processor ._current_job_name )
274
+
321
275
sklearn_image_uri = (
322
276
"246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:{}-cpu-py3"
323
277
).format (sklearn_version )
@@ -853,7 +807,7 @@ def _get_script_processor(sagemaker_session):
853
807
)
854
808
855
809
856
- def _get_expected_args (job_name , code_s3_uri = f "s3://{ BUCKET_NAME } " ):
810
+ def _get_expected_args (job_name , code_s3_uri = "s3://mocked_s3_uri_from_upload_data " ):
857
811
return {
858
812
"inputs" : [
859
813
{
0 commit comments