Skip to content

Commit 86f3594

Browse files
author
Rohit Kumar Srivastava
committed
fixing tf and pt unit tests
1 parent b176fdc commit 86f3594

File tree

2 files changed

+38
-22
lines changed

2 files changed

+38
-22
lines changed

src/sagemaker/image_uri_config/huggingface-training-compiler.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
},
3737
"4.17.0": {
3838
"version_aliases": {
39-
"pytorch1.10.2": "pytorch1.10.2",
39+
"pytorch1.10": "pytorch1.10.2",
4040
"tensorflow2.6": "tensorflow2.6.3"
4141
},
4242
"pytorch1.10.2": {

tests/unit/sagemaker/training_compiler/test_huggingface_tensorflow_compiler.py

+37-21
Original file line numberDiff line numberDiff line change
@@ -79,13 +79,17 @@ def fixture_sagemaker_session():
7979

8080

8181
def _get_full_gpu_image_uri(
82-
version, base_framework_version, instance_type, training_compiler_config
82+
version,
83+
base_framework_version,
84+
instance_type,
85+
training_compiler_config,
86+
py_version
8387
):
8488
return image_uris.retrieve(
8589
"huggingface",
8690
REGION,
8791
version=version,
88-
py_version="py38",
92+
py_version=py_version,
8993
instance_type=instance_type,
9094
image_scope="training",
9195
base_framework_version=base_framework_version,
@@ -94,10 +98,10 @@ def _get_full_gpu_image_uri(
9498
)
9599

96100

97-
def _create_train_job(version, base_framework_version, instance_type, training_compiler_config):
101+
def _create_train_job(version, base_framework_version, instance_type, training_compiler_config, py_version):
98102
return {
99103
"image_uri": _get_full_gpu_image_uri(
100-
version, base_framework_version, instance_type, training_compiler_config
104+
version, base_framework_version, instance_type, training_compiler_config, py_version
101105
),
102106
"input_mode": "File",
103107
"input_config": [
@@ -155,17 +159,18 @@ def _create_train_job(version, base_framework_version, instance_type, training_c
155159
def test_unsupported_BYOC(
156160
huggingface_training_compiler_version,
157161
huggingface_training_compiler_tensorflow_version,
162+
huggingface_training_compiler_py_version
158163
):
159164
byoc = (
160-
"1.dkr.ecr.us-east-1.amazonaws.com/huggingface-tensorflow-trcomp-training:"
161-
"2.6.3-"
162-
"transformers4.17.0-gpu-"
163-
"py38-cu112-ubuntu20.04"
165+
f"1.dkr.ecr.us-east-1.amazonaws.com/huggingface-tensorflow-trcomp-training:"
166+
f"2.6.3-"
167+
f"transformers4.17.0-gpu-"
168+
f"{huggingface_training_compiler_py_version}-cu112-ubuntu20.04"
164169
)
165170
with pytest.raises(ValueError):
166171
HuggingFace(
167172
image_uri=byoc,
168-
py_version="py38",
173+
py_version=huggingface_training_compiler_py_version,
169174
entry_point=SCRIPT_PATH,
170175
role=ROLE,
171176
instance_count=INSTANCE_COUNT,
@@ -181,10 +186,11 @@ def test_unsupported_cpu_instance(
181186
cpu_instance_type,
182187
huggingface_training_compiler_version,
183188
huggingface_training_compiler_tensorflow_version,
189+
huggingface_training_compiler_py_version
184190
):
185191
with pytest.raises(ValueError):
186192
HuggingFace(
187-
py_version="py38",
193+
py_version=huggingface_training_compiler_py_version,
188194
entry_point=SCRIPT_PATH,
189195
role=ROLE,
190196
instance_count=INSTANCE_COUNT,
@@ -201,10 +207,11 @@ def test_unsupported_gpu_instance(
201207
unsupported_gpu_instance_class,
202208
huggingface_training_compiler_version,
203209
huggingface_training_compiler_tensorflow_version,
210+
huggingface_training_compiler_py_version
204211
):
205212
with pytest.raises(ValueError):
206213
HuggingFace(
207-
py_version="py38",
214+
py_version=huggingface_training_compiler_py_version,
208215
entry_point=SCRIPT_PATH,
209216
role=ROLE,
210217
instance_count=INSTANCE_COUNT,
@@ -218,10 +225,11 @@ def test_unsupported_gpu_instance(
218225

219226
def test_unsupported_framework_version(
220227
huggingface_training_compiler_version,
228+
huggingface_training_compiler_py_version
221229
):
222230
with pytest.raises(ValueError):
223231
HuggingFace(
224-
py_version="py38",
232+
py_version=huggingface_training_compiler_py_version,
225233
entry_point=SCRIPT_PATH,
226234
role=ROLE,
227235
instance_count=INSTANCE_COUNT,
@@ -237,10 +245,11 @@ def test_unsupported_framework_version(
237245

238246
def test_unsupported_framework_mxnet(
239247
huggingface_training_compiler_version,
248+
huggingface_training_compiler_py_version
240249
):
241250
with pytest.raises(ValueError):
242251
HuggingFace(
243-
py_version="py38",
252+
py_version=huggingface_training_compiler_py_version,
244253
entry_point=SCRIPT_PATH,
245254
role=ROLE,
246255
instance_count=INSTANCE_COUNT,
@@ -254,7 +263,7 @@ def test_unsupported_framework_mxnet(
254263

255264
def test_unsupported_python_2(
256265
huggingface_training_compiler_version,
257-
huggingface_training_compiler_tensorflow_version,
266+
huggingface_training_compiler_tensorflow_version
258267
):
259268
with pytest.raises(ValueError):
260269
HuggingFace(
@@ -282,12 +291,13 @@ def test_default_compiler_config(
282291
huggingface_training_compiler_version,
283292
huggingface_training_compiler_tensorflow_version,
284293
instance_class,
294+
huggingface_training_compiler_py_version
285295
):
286296
compiler_config = TrainingCompilerConfig()
287297
instance_type = f"ml.{instance_class}.xlarge"
288298

289299
hf = HuggingFace(
290-
py_version="py38",
300+
py_version=huggingface_training_compiler_py_version,
291301
entry_point=SCRIPT_PATH,
292302
role=ROLE,
293303
sagemaker_session=sagemaker_session,
@@ -313,6 +323,7 @@ def test_default_compiler_config(
313323
f"tensorflow{huggingface_training_compiler_tensorflow_version}",
314324
instance_type,
315325
compiler_config,
326+
huggingface_training_compiler_py_version
316327
)
317328
expected_train_args["input_config"][0]["DataSource"]["S3DataSource"]["S3Uri"] = inputs
318329
expected_train_args["enable_sagemaker_metrics"] = False
@@ -339,11 +350,12 @@ def test_debug_compiler_config(
339350
sagemaker_session,
340351
huggingface_training_compiler_version,
341352
huggingface_training_compiler_tensorflow_version,
353+
huggingface_training_compiler_py_version
342354
):
343355
compiler_config = TrainingCompilerConfig(debug=True)
344356

345357
hf = HuggingFace(
346-
py_version="py38",
358+
py_version=huggingface_training_compiler_py_version,
347359
entry_point=SCRIPT_PATH,
348360
role=ROLE,
349361
sagemaker_session=sagemaker_session,
@@ -369,6 +381,7 @@ def test_debug_compiler_config(
369381
f"tensorflow{huggingface_training_compiler_tensorflow_version}",
370382
INSTANCE_TYPE,
371383
compiler_config,
384+
huggingface_training_compiler_py_version
372385
)
373386
expected_train_args["input_config"][0]["DataSource"]["S3DataSource"]["S3Uri"] = inputs
374387
expected_train_args["enable_sagemaker_metrics"] = False
@@ -395,11 +408,12 @@ def test_disable_compiler_config(
395408
sagemaker_session,
396409
huggingface_training_compiler_version,
397410
huggingface_training_compiler_tensorflow_version,
411+
huggingface_training_compiler_py_version
398412
):
399413
compiler_config = TrainingCompilerConfig(enabled=False)
400414

401415
hf = HuggingFace(
402-
py_version="py38",
416+
py_version=huggingface_training_compiler_py_version,
403417
entry_point=SCRIPT_PATH,
404418
role=ROLE,
405419
sagemaker_session=sagemaker_session,
@@ -425,6 +439,7 @@ def test_disable_compiler_config(
425439
f"tensorflow{huggingface_training_compiler_tensorflow_version}",
426440
INSTANCE_TYPE,
427441
compiler_config,
442+
huggingface_training_compiler_py_version
428443
)
429444
expected_train_args["input_config"][0]["DataSource"]["S3DataSource"]["S3Uri"] = inputs
430445
expected_train_args["enable_sagemaker_metrics"] = False
@@ -448,12 +463,13 @@ def test_attach(
448463
sagemaker_session,
449464
compiler_enabled,
450465
debug_enabled,
466+
huggingface_training_compiler_py_version
451467
):
452468
training_image = (
453-
"1.dkr.ecr.us-east-1.amazonaws.com/huggingface-tensorflow-trcomp-training:"
454-
"2.6.3-"
455-
"transformers4.17.0-gpu-"
456-
"py38-cu112-ubuntu20.04"
469+
f"1.dkr.ecr.us-east-1.amazonaws.com/huggingface-tensorflow-trcomp-training:"
470+
f"2.6.3-"
471+
f"transformers4.17.0-gpu-"
472+
f"{huggingface_training_compiler_py_version}-cu112-ubuntu20.04"
457473
)
458474
returned_job_description = {
459475
"AlgorithmSpecification": {"TrainingInputMode": "File", "TrainingImage": training_image},

0 commit comments

Comments
 (0)