Skip to content

Commit d696653

Browse files
committed
fix: broken unit tests for trcomp
1 parent 8ce021c commit d696653

File tree

4 files changed

+54
-31
lines changed

4 files changed

+54
-31
lines changed

tests/conftest.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,14 +265,21 @@ def huggingface_training_compiler_tensorflow_version(huggingface_training_compil
265265

266266

267267
@pytest.fixture(scope="module")
268-
def huggingface_training_compiler_py_version(huggingface_training_compiler_tensorflow_version):
268+
def huggingface_training_compiler_tensorflow_py_version(
269+
huggingface_training_compiler_tensorflow_version,
270+
):
269271
return (
270272
"py37"
271273
if Version(huggingface_training_compiler_tensorflow_version) < Version("2.6")
272274
else "py38"
273275
)
274276

275277

278+
@pytest.fixture(scope="module")
279+
def huggingface_training_compiler_pytorch_py_version(huggingface_training_compiler_pytorch_version):
280+
return "py38"
281+
282+
276283
@pytest.fixture(scope="module")
277284
def huggingface_pytorch_latest_training_py_version(huggingface_training_pytorch_latest_version):
278285
return (
@@ -545,6 +552,11 @@ def _huggingface_base_fm_version(huggingface_version, base_fw, fixture_prefix):
545552
if len(original_version.split(".")) == 2:
546553
base_fw_version = ".".join(base_fw_version.split(".")[:-1])
547554
versions.append(base_fw_version)
555+
if not versions:
556+
pytest.skip(
557+
f"{fixture_prefix} version {huggingface_version} does not have an"
558+
f" image URI configuration for {base_fw}"
559+
)
548560
return sorted(versions, reverse=True)
549561

550562

tests/unit/sagemaker/training_compiler/test_huggingface_pytorch_compiler.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import pytest
2020
from mock import MagicMock, Mock, patch, ANY
21+
from packaging.version import Version
2122

2223
from sagemaker import image_uris
2324
from sagemaker.huggingface import HuggingFace, TrainingCompilerConfig
@@ -96,7 +97,9 @@ def _get_full_gpu_image_uri(
9697
)
9798

9899

99-
def _create_train_job(version, base_framework_version, instance_type, training_compiler_config):
100+
def _create_train_job(
101+
version, base_framework_version, instance_type, training_compiler_config, instance_count=1
102+
):
100103
return {
101104
"image_uri": _get_full_gpu_image_uri(
102105
version, base_framework_version, instance_type, training_compiler_config
@@ -118,7 +121,7 @@ def _create_train_job(version, base_framework_version, instance_type, training_c
118121
"output_config": {"S3OutputPath": "s3://{}/".format(BUCKET_NAME)},
119122
"resource_config": {
120123
"InstanceType": instance_type,
121-
"InstanceCount": 1,
124+
"InstanceCount": instance_count,
122125
"VolumeSizeInGB": 30,
123126
},
124127
"hyperparameters": {
@@ -276,6 +279,8 @@ def test_unsupported_instance_group(
276279
huggingface_training_compiler_version,
277280
huggingface_training_compiler_pytorch_version,
278281
):
282+
if Version(huggingface_training_compiler_pytorch_version) < Version("1.11"):
283+
pytest.skip("This test is intended for PyTorch 1.11 and above")
279284
with pytest.raises(ValueError):
280285
HuggingFace(
281286
py_version="py38",
@@ -296,6 +301,8 @@ def test_unsupported_distribution(
296301
huggingface_training_compiler_version,
297302
huggingface_training_compiler_pytorch_version,
298303
):
304+
if Version(huggingface_training_compiler_pytorch_version) < Version("1.11"):
305+
pytest.skip("This test is intended for PyTorch 1.11 and above")
299306
with pytest.raises(ValueError):
300307
HuggingFace(
301308
py_version="py38",
@@ -383,6 +390,7 @@ def test_pytorchxla_distribution(
383390
f"pytorch{huggingface_training_compiler_pytorch_version}",
384391
instance_type,
385392
compiler_config,
393+
instance_count=2,
386394
)
387395
expected_train_args["input_config"][0]["DataSource"]["S3DataSource"]["S3Uri"] = inputs
388396
expected_train_args["enable_sagemaker_metrics"] = False
@@ -642,7 +650,7 @@ def test_register_hf_pytorch_model_auto_infer_framework(
642650
sagemaker_session,
643651
huggingface_training_compiler_version,
644652
huggingface_training_compiler_pytorch_version,
645-
huggingface_training_compiler_py_version,
653+
huggingface_training_compiler_pytorch_py_version,
646654
):
647655

648656
model_package_group_name = "test-hf-tfs-register-model"
@@ -657,7 +665,7 @@ def test_register_hf_pytorch_model_auto_infer_framework(
657665
role=ROLE,
658666
transformers_version=huggingface_training_compiler_version,
659667
pytorch_version=huggingface_training_compiler_pytorch_version,
660-
py_version=huggingface_training_compiler_py_version,
668+
py_version=huggingface_training_compiler_pytorch_py_version,
661669
sagemaker_session=sagemaker_session,
662670
)
663671

tests/unit/sagemaker/training_compiler/test_huggingface_tensorflow_compiler.py

Lines changed: 28 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -158,18 +158,18 @@ def _create_train_job(
158158
def test_unsupported_BYOC(
159159
huggingface_training_compiler_version,
160160
huggingface_training_compiler_tensorflow_version,
161-
huggingface_training_compiler_py_version,
161+
huggingface_training_compiler_tensorflow_py_version,
162162
):
163163
byoc = (
164164
f"1.dkr.ecr.us-east-1.amazonaws.com/huggingface-tensorflow-trcomp-training:"
165165
f"2.6.3-"
166166
f"transformers4.17.0-gpu-"
167-
f"{huggingface_training_compiler_py_version}-cu112-ubuntu20.04"
167+
f"{huggingface_training_compiler_tensorflow_py_version}-cu112-ubuntu20.04"
168168
)
169169
with pytest.raises(ValueError):
170170
HuggingFace(
171171
image_uri=byoc,
172-
py_version=huggingface_training_compiler_py_version,
172+
py_version=huggingface_training_compiler_tensorflow_py_version,
173173
entry_point=SCRIPT_PATH,
174174
role=ROLE,
175175
instance_count=INSTANCE_COUNT,
@@ -185,11 +185,11 @@ def test_unsupported_cpu_instance(
185185
cpu_instance_type,
186186
huggingface_training_compiler_version,
187187
huggingface_training_compiler_tensorflow_version,
188-
huggingface_training_compiler_py_version,
188+
huggingface_training_compiler_tensorflow_py_version,
189189
):
190190
with pytest.raises(ValueError):
191191
HuggingFace(
192-
py_version=huggingface_training_compiler_py_version,
192+
py_version=huggingface_training_compiler_tensorflow_py_version,
193193
entry_point=SCRIPT_PATH,
194194
role=ROLE,
195195
instance_count=INSTANCE_COUNT,
@@ -206,11 +206,11 @@ def test_unsupported_gpu_instance(
206206
unsupported_gpu_instance_class,
207207
huggingface_training_compiler_version,
208208
huggingface_training_compiler_tensorflow_version,
209-
huggingface_training_compiler_py_version,
209+
huggingface_training_compiler_tensorflow_py_version,
210210
):
211211
with pytest.raises(ValueError):
212212
HuggingFace(
213-
py_version=huggingface_training_compiler_py_version,
213+
py_version=huggingface_training_compiler_tensorflow_py_version,
214214
entry_point=SCRIPT_PATH,
215215
role=ROLE,
216216
instance_count=INSTANCE_COUNT,
@@ -224,11 +224,11 @@ def test_unsupported_gpu_instance(
224224

225225
def test_unsupported_framework_version(
226226
huggingface_training_compiler_version,
227-
huggingface_training_compiler_py_version,
227+
huggingface_training_compiler_tensorflow_py_version,
228228
):
229229
with pytest.raises(ValueError):
230230
HuggingFace(
231-
py_version=huggingface_training_compiler_py_version,
231+
py_version=huggingface_training_compiler_tensorflow_py_version,
232232
entry_point=SCRIPT_PATH,
233233
role=ROLE,
234234
instance_count=INSTANCE_COUNT,
@@ -244,11 +244,11 @@ def test_unsupported_framework_version(
244244

245245
def test_unsupported_framework_mxnet(
246246
huggingface_training_compiler_version,
247-
huggingface_training_compiler_py_version,
247+
huggingface_training_compiler_tensorflow_py_version,
248248
):
249249
with pytest.raises(ValueError):
250250
HuggingFace(
251-
py_version=huggingface_training_compiler_py_version,
251+
py_version=huggingface_training_compiler_tensorflow_py_version,
252252
entry_point=SCRIPT_PATH,
253253
role=ROLE,
254254
instance_count=INSTANCE_COUNT,
@@ -323,13 +323,13 @@ def test_default_compiler_config(
323323
huggingface_training_compiler_version,
324324
huggingface_training_compiler_tensorflow_version,
325325
instance_class,
326-
huggingface_training_compiler_py_version,
326+
huggingface_training_compiler_tensorflow_py_version,
327327
):
328328
compiler_config = TrainingCompilerConfig()
329329
instance_type = f"ml.{instance_class}.xlarge"
330330

331331
hf = HuggingFace(
332-
py_version=huggingface_training_compiler_py_version,
332+
py_version=huggingface_training_compiler_tensorflow_py_version,
333333
entry_point=SCRIPT_PATH,
334334
role=ROLE,
335335
sagemaker_session=sagemaker_session,
@@ -355,7 +355,7 @@ def test_default_compiler_config(
355355
f"tensorflow{huggingface_training_compiler_tensorflow_version}",
356356
instance_type,
357357
compiler_config,
358-
huggingface_training_compiler_py_version,
358+
huggingface_training_compiler_tensorflow_py_version,
359359
)
360360
expected_train_args["input_config"][0]["DataSource"]["S3DataSource"]["S3Uri"] = inputs
361361
expected_train_args["enable_sagemaker_metrics"] = False
@@ -382,12 +382,12 @@ def test_debug_compiler_config(
382382
sagemaker_session,
383383
huggingface_training_compiler_version,
384384
huggingface_training_compiler_tensorflow_version,
385-
huggingface_training_compiler_py_version,
385+
huggingface_training_compiler_tensorflow_py_version,
386386
):
387387
compiler_config = TrainingCompilerConfig(debug=True)
388388

389389
hf = HuggingFace(
390-
py_version=huggingface_training_compiler_py_version,
390+
py_version=huggingface_training_compiler_tensorflow_py_version,
391391
entry_point=SCRIPT_PATH,
392392
role=ROLE,
393393
sagemaker_session=sagemaker_session,
@@ -413,7 +413,7 @@ def test_debug_compiler_config(
413413
f"tensorflow{huggingface_training_compiler_tensorflow_version}",
414414
INSTANCE_TYPE,
415415
compiler_config,
416-
huggingface_training_compiler_py_version,
416+
huggingface_training_compiler_tensorflow_py_version,
417417
)
418418
expected_train_args["input_config"][0]["DataSource"]["S3DataSource"]["S3Uri"] = inputs
419419
expected_train_args["enable_sagemaker_metrics"] = False
@@ -440,12 +440,12 @@ def test_disable_compiler_config(
440440
sagemaker_session,
441441
huggingface_training_compiler_version,
442442
huggingface_training_compiler_tensorflow_version,
443-
huggingface_training_compiler_py_version,
443+
huggingface_training_compiler_tensorflow_py_version,
444444
):
445445
compiler_config = TrainingCompilerConfig(enabled=False)
446446

447447
hf = HuggingFace(
448-
py_version=huggingface_training_compiler_py_version,
448+
py_version=huggingface_training_compiler_tensorflow_py_version,
449449
entry_point=SCRIPT_PATH,
450450
role=ROLE,
451451
sagemaker_session=sagemaker_session,
@@ -471,7 +471,7 @@ def test_disable_compiler_config(
471471
f"tensorflow{huggingface_training_compiler_tensorflow_version}",
472472
INSTANCE_TYPE,
473473
compiler_config,
474-
huggingface_training_compiler_py_version,
474+
huggingface_training_compiler_tensorflow_py_version,
475475
)
476476
expected_train_args["input_config"][0]["DataSource"]["S3DataSource"]["S3Uri"] = inputs
477477
expected_train_args["enable_sagemaker_metrics"] = False
@@ -492,13 +492,16 @@ def test_disable_compiler_config(
492492
["compiler_enabled", "debug_enabled"], [(True, False), (True, True), (False, False)]
493493
)
494494
def test_attach(
495-
sagemaker_session, compiler_enabled, debug_enabled, huggingface_training_compiler_py_version
495+
sagemaker_session,
496+
compiler_enabled,
497+
debug_enabled,
498+
huggingface_training_compiler_tensorflow_py_version,
496499
):
497500
training_image = (
498501
f"1.dkr.ecr.us-east-1.amazonaws.com/huggingface-tensorflow-trcomp-training:"
499502
f"2.6.3-"
500503
f"transformers4.17.0-gpu-"
501-
f"{huggingface_training_compiler_py_version}-cu112-ubuntu20.04"
504+
f"{huggingface_training_compiler_tensorflow_py_version}-cu112-ubuntu20.04"
502505
)
503506
returned_job_description = {
504507
"AlgorithmSpecification": {"TrainingInputMode": "File", "TrainingImage": training_image},
@@ -532,7 +535,7 @@ def test_attach(
532535

533536
estimator = HuggingFace.attach(training_job_name="trcomp", sagemaker_session=sagemaker_session)
534537
assert estimator.latest_training_job.job_name == "trcomp"
535-
assert estimator.py_version == huggingface_training_compiler_py_version
538+
assert estimator.py_version == huggingface_training_compiler_tensorflow_py_version
536539
assert estimator.framework_version == "4.17.0"
537540
assert estimator.tensorflow_version == "2.6.3"
538541
assert estimator.role == "arn:aws:iam::366:role/SageMakerRole"
@@ -557,7 +560,7 @@ def test_register_hf_tfs_model_auto_infer_framework(
557560
sagemaker_session,
558561
huggingface_training_compiler_version,
559562
huggingface_training_compiler_tensorflow_version,
560-
huggingface_training_compiler_py_version,
563+
huggingface_training_compiler_tensorflow_py_version,
561564
):
562565

563566
model_package_group_name = "test-hf-tfs-register-model"
@@ -572,7 +575,7 @@ def test_register_hf_tfs_model_auto_infer_framework(
572575
role=ROLE,
573576
transformers_version=huggingface_training_compiler_version,
574577
tensorflow_version=huggingface_training_compiler_tensorflow_version,
575-
py_version=huggingface_training_compiler_py_version,
578+
py_version=huggingface_training_compiler_tensorflow_py_version,
576579
sagemaker_session=sagemaker_session,
577580
)
578581

tests/unit/sagemaker/training_compiler/test_tensorflow_compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
ROLE = "Dummy"
4242
REGION = "us-east-1"
4343
GPU = "ml.p3.2xlarge"
44-
SUPPORTED_GPU_INSTANCE_CLASSES = {"p3", "p3dn", "g4dn", "p4dn", "g5"}
44+
SUPPORTED_GPU_INSTANCE_CLASSES = {"p3", "p3dn", "g4dn", "p4d", "g5"}
4545
UNSUPPORTED_GPU_INSTANCE_CLASSES = EC2_GPU_INSTANCE_CLASSES - SUPPORTED_GPU_INSTANCE_CLASSES
4646

4747
LIST_TAGS_RESULT = {"Tags": [{"Key": "TagtestKey", "Value": "TagtestValue"}]}

0 commit comments

Comments
 (0)