Skip to content

Commit 7c5c2d5

Browse files
author
Rohit Kumar Srivastava
committed
fixing tf and pt unit tests
1 parent 1850662 commit 7c5c2d5

File tree

2 files changed

+38
-23
lines changed

2 files changed

+38
-23
lines changed

src/sagemaker/image_uri_config/huggingface-training-compiler.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
},
3737
"4.17.0": {
3838
"version_aliases": {
39-
"pytorch1.10.2": "pytorch1.10.2",
39+
"pytorch1.10": "pytorch1.10.2",
4040
"tensorflow2.6": "tensorflow2.6.3"
4141
},
4242
"pytorch1.10.2": {

tests/unit/sagemaker/training_compiler/test_huggingface_tensorflow_compiler.py

+37-22
Original file line numberDiff line numberDiff line change
@@ -79,13 +79,17 @@ def fixture_sagemaker_session():
7979

8080

8181
def _get_full_gpu_image_uri(
82-
version, base_framework_version, instance_type, training_compiler_config
82+
version,
83+
base_framework_version,
84+
instance_type,
85+
training_compiler_config,
86+
py_version
8387
):
8488
return image_uris.retrieve(
8589
"huggingface",
8690
REGION,
8791
version=version,
88-
py_version="py38",
92+
py_version=py_version,
8993
instance_type=instance_type,
9094
image_scope="training",
9195
base_framework_version=base_framework_version,
@@ -94,10 +98,10 @@ def _get_full_gpu_image_uri(
9498
)
9599

96100

97-
def _create_train_job(version, base_framework_version, instance_type, training_compiler_config):
101+
def _create_train_job(version, base_framework_version, instance_type, training_compiler_config, py_version):
98102
return {
99103
"image_uri": _get_full_gpu_image_uri(
100-
version, base_framework_version, instance_type, training_compiler_config
104+
version, base_framework_version, instance_type, training_compiler_config, py_version
101105
),
102106
"input_mode": "File",
103107
"input_config": [
@@ -155,17 +159,18 @@ def _create_train_job(version, base_framework_version, instance_type, training_c
155159
def test_unsupported_BYOC(
156160
huggingface_training_compiler_version,
157161
huggingface_training_compiler_tensorflow_version,
162+
huggingface_training_compiler_py_version
158163
):
159164
byoc = (
160-
"1.dkr.ecr.us-east-1.amazonaws.com/huggingface-tensorflow-trcomp-training:"
161-
"2.6.3-"
162-
"transformers4.17.0-gpu-"
163-
"py38-cu112-ubuntu20.04"
165+
f"1.dkr.ecr.us-east-1.amazonaws.com/huggingface-tensorflow-trcomp-training:"
166+
f"2.6.3-"
167+
f"transformers4.17.0-gpu-"
168+
f"{huggingface_training_compiler_py_version}-cu112-ubuntu20.04"
164169
)
165170
with pytest.raises(ValueError):
166171
HuggingFace(
167172
image_uri=byoc,
168-
py_version="py38",
173+
py_version=huggingface_training_compiler_py_version,
169174
entry_point=SCRIPT_PATH,
170175
role=ROLE,
171176
instance_count=INSTANCE_COUNT,
@@ -181,10 +186,11 @@ def test_unsupported_cpu_instance(
181186
cpu_instance_type,
182187
huggingface_training_compiler_version,
183188
huggingface_training_compiler_tensorflow_version,
189+
huggingface_training_compiler_py_version
184190
):
185191
with pytest.raises(ValueError):
186192
HuggingFace(
187-
py_version="py38",
193+
py_version=huggingface_training_compiler_py_version,
188194
entry_point=SCRIPT_PATH,
189195
role=ROLE,
190196
instance_count=INSTANCE_COUNT,
@@ -201,10 +207,11 @@ def test_unsupported_gpu_instance(
201207
unsupported_gpu_instance_class,
202208
huggingface_training_compiler_version,
203209
huggingface_training_compiler_tensorflow_version,
210+
huggingface_training_compiler_py_version
204211
):
205212
with pytest.raises(ValueError):
206213
HuggingFace(
207-
py_version="py38",
214+
py_version=huggingface_training_compiler_py_version,
208215
entry_point=SCRIPT_PATH,
209216
role=ROLE,
210217
instance_count=INSTANCE_COUNT,
@@ -218,10 +225,11 @@ def test_unsupported_gpu_instance(
218225

219226
def test_unsupported_framework_version(
220227
huggingface_training_compiler_version,
228+
huggingface_training_compiler_py_version
221229
):
222230
with pytest.raises(ValueError):
223231
HuggingFace(
224-
py_version="py38",
232+
py_version=huggingface_training_compiler_py_version,
225233
entry_point=SCRIPT_PATH,
226234
role=ROLE,
227235
instance_count=INSTANCE_COUNT,
@@ -237,10 +245,11 @@ def test_unsupported_framework_version(
237245

238246
def test_unsupported_framework_mxnet(
239247
huggingface_training_compiler_version,
248+
huggingface_training_compiler_py_version
240249
):
241250
with pytest.raises(ValueError):
242251
HuggingFace(
243-
py_version="py38",
252+
py_version=huggingface_training_compiler_py_version,
244253
entry_point=SCRIPT_PATH,
245254
role=ROLE,
246255
instance_count=INSTANCE_COUNT,
@@ -253,8 +262,7 @@ def test_unsupported_framework_mxnet(
253262

254263

255264
def test_unsupported_python_2(
256-
huggingface_training_compiler_version,
257-
huggingface_training_compiler_tensorflow_version,
265+
huggingface_training_compiler_version
258266
):
259267
with pytest.raises(ValueError):
260268
HuggingFace(
@@ -282,12 +290,13 @@ def test_default_compiler_config(
282290
huggingface_training_compiler_version,
283291
huggingface_training_compiler_tensorflow_version,
284292
instance_class,
293+
huggingface_training_compiler_py_version
285294
):
286295
compiler_config = TrainingCompilerConfig()
287296
instance_type = f"ml.{instance_class}.xlarge"
288297

289298
hf = HuggingFace(
290-
py_version="py38",
299+
py_version=huggingface_training_compiler_py_version,
291300
entry_point=SCRIPT_PATH,
292301
role=ROLE,
293302
sagemaker_session=sagemaker_session,
@@ -313,6 +322,7 @@ def test_default_compiler_config(
313322
f"tensorflow{huggingface_training_compiler_tensorflow_version}",
314323
instance_type,
315324
compiler_config,
325+
huggingface_training_compiler_py_version
316326
)
317327
expected_train_args["input_config"][0]["DataSource"]["S3DataSource"]["S3Uri"] = inputs
318328
expected_train_args["enable_sagemaker_metrics"] = False
@@ -339,11 +349,12 @@ def test_debug_compiler_config(
339349
sagemaker_session,
340350
huggingface_training_compiler_version,
341351
huggingface_training_compiler_tensorflow_version,
352+
huggingface_training_compiler_py_version
342353
):
343354
compiler_config = TrainingCompilerConfig(debug=True)
344355

345356
hf = HuggingFace(
346-
py_version="py38",
357+
py_version=huggingface_training_compiler_py_version,
347358
entry_point=SCRIPT_PATH,
348359
role=ROLE,
349360
sagemaker_session=sagemaker_session,
@@ -369,6 +380,7 @@ def test_debug_compiler_config(
369380
f"tensorflow{huggingface_training_compiler_tensorflow_version}",
370381
INSTANCE_TYPE,
371382
compiler_config,
383+
huggingface_training_compiler_py_version
372384
)
373385
expected_train_args["input_config"][0]["DataSource"]["S3DataSource"]["S3Uri"] = inputs
374386
expected_train_args["enable_sagemaker_metrics"] = False
@@ -395,11 +407,12 @@ def test_disable_compiler_config(
395407
sagemaker_session,
396408
huggingface_training_compiler_version,
397409
huggingface_training_compiler_tensorflow_version,
410+
huggingface_training_compiler_py_version
398411
):
399412
compiler_config = TrainingCompilerConfig(enabled=False)
400413

401414
hf = HuggingFace(
402-
py_version="py38",
415+
py_version=huggingface_training_compiler_py_version,
403416
entry_point=SCRIPT_PATH,
404417
role=ROLE,
405418
sagemaker_session=sagemaker_session,
@@ -425,6 +438,7 @@ def test_disable_compiler_config(
425438
f"tensorflow{huggingface_training_compiler_tensorflow_version}",
426439
INSTANCE_TYPE,
427440
compiler_config,
441+
huggingface_training_compiler_py_version
428442
)
429443
expected_train_args["input_config"][0]["DataSource"]["S3DataSource"]["S3Uri"] = inputs
430444
expected_train_args["enable_sagemaker_metrics"] = False
@@ -448,12 +462,13 @@ def test_attach(
448462
sagemaker_session,
449463
compiler_enabled,
450464
debug_enabled,
465+
huggingface_training_compiler_py_version
451466
):
452467
training_image = (
453-
"1.dkr.ecr.us-east-1.amazonaws.com/huggingface-tensorflow-trcomp-training:"
454-
"2.6.3-"
455-
"transformers4.17.0-gpu-"
456-
"py38-cu112-ubuntu20.04"
468+
f"1.dkr.ecr.us-east-1.amazonaws.com/huggingface-tensorflow-trcomp-training:"
469+
f"2.6.3-"
470+
f"transformers4.17.0-gpu-"
471+
f"{huggingface_training_compiler_py_version}-cu112-ubuntu20.04"
457472
)
458473
returned_job_description = {
459474
"AlgorithmSpecification": {"TrainingInputMode": "File", "TrainingImage": training_image},

0 commit comments

Comments
 (0)