Skip to content

Commit 44a7ba2

Browse files
committed
infra: use fixture for Python version in TF integ tests
1 parent c211417 commit 44a7ba2

File tree

5 files changed

+50
-68
lines changed

5 files changed

+50
-68
lines changed

tests/conftest.py

+4-20
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ def pytest_addoption(parser):
5858
"--rl-ray-full-version", action="store", default=RLEstimator.RAY_LATEST_VERSION
5959
)
6060
parser.addoption("--sklearn-full-version", action="store", default="0.20.0")
61-
parser.addoption("--tf-full-version", action="store", default="2.2.0")
6261
parser.addoption("--ei-tf-full-version", action="store")
6362
parser.addoption("--xgboost-full-version", action="store", default="1.0-1")
6463

@@ -300,32 +299,17 @@ def sklearn_full_version(request):
300299

301300

302301
@pytest.fixture(scope="module")
303-
def tf_full_version(request):
304-
return request.config.getoption("--tf-full-version")
302+
def tf_full_version():
303+
return "2.2.0"
305304

306305

307306
@pytest.fixture(scope="module")
308-
def tf_full_py_version(tf_full_version):
309-
"""fixture to match tf_full_version
310-
311-
Fixture exists as such, since tf_full_version may be overridden --tf-full-version.
312-
Otherwise, this would simply be py37 to match the latest version support.
313-
314-
TODO: Evaluate use of --tf-full-version with possible eye to remove and simplify code.
315-
"""
316-
version = [int(val) for val in tf_full_version.split(".")]
317-
if version < [1, 11]:
318-
return "py2"
319-
if version < [2, 2]:
320-
return "py3"
307+
def tf_full_py_version():
321308
return "py37"
322309

323310

324311
@pytest.fixture(scope="module")
325-
def tf_serving_version(tf_full_version):
326-
full_version = [int(val) for val in tf_full_version.split(".")]
327-
if full_version < [2, 2]:
328-
return tf_full_version
312+
def tf_serving_version():
329313
return "2.1.0"
330314

331315

tests/integ/test_horovod.py

+17-16
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@
1515
import json
1616
import os
1717
import tarfile
18-
from six.moves.urllib.parse import urlparse
1918

2019
import boto3
2120
import pytest
21+
from six.moves.urllib.parse import urlparse
2222

2323
import sagemaker.utils
2424
import tests.integ as integ
@@ -28,27 +28,28 @@
2828
horovod_dir = os.path.join(os.path.dirname(__file__), "..", "data", "horovod")
2929

3030

31-
@pytest.fixture(scope="module")
32-
def gpu_instance_type(request):
33-
return "ml.p2.xlarge"
34-
35-
3631
@pytest.mark.canary_quick
37-
def test_hvd_cpu(sagemaker_session, cpu_instance_type, tmpdir):
38-
_create_and_fit_estimator(sagemaker_session, cpu_instance_type, tmpdir)
32+
def test_hvd_cpu(sagemaker_session, tf_full_version, tf_full_py_version, cpu_instance_type, tmpdir):
33+
_create_and_fit_estimator(
34+
sagemaker_session, tf_full_version, tf_full_py_version, cpu_instance_type, tmpdir
35+
)
3936

4037

4138
@pytest.mark.canary_quick
4239
@pytest.mark.skipif(
4340
integ.test_region() in integ.TRAINING_NO_P2_REGIONS, reason="no ml.p2 instances in this region"
4441
)
45-
def test_hvd_gpu(sagemaker_session, gpu_instance_type, tmpdir):
46-
_create_and_fit_estimator(sagemaker_session, gpu_instance_type, tmpdir)
42+
def test_hvd_gpu(sagemaker_session, tf_full_version, tf_full_py_version, tmpdir):
43+
_create_and_fit_estimator(
44+
sagemaker_session, tf_full_version, tf_full_py_version, "ml.p2.xlarge", tmpdir
45+
)
4746

4847

4948
@pytest.mark.local_mode
5049
@pytest.mark.parametrize("instances, processes", [[1, 2], (2, 1), (2, 2)])
51-
def test_horovod_local_mode(sagemaker_local_session, instances, processes, tmpdir):
50+
def test_horovod_local_mode(
51+
sagemaker_local_session, tf_full_version, tf_full_py_version, instances, processes, tmpdir
52+
):
5253
output_path = "file://%s" % tmpdir
5354
job_name = sagemaker.utils.unique_name_from_base("tf-horovod")
5455
estimator = TensorFlow(
@@ -57,9 +58,9 @@ def test_horovod_local_mode(sagemaker_local_session, instances, processes, tmpdi
5758
train_instance_count=2,
5859
train_instance_type="local",
5960
sagemaker_session=sagemaker_local_session,
60-
py_version=integ.PYTHON_VERSION,
6161
output_path=output_path,
62-
framework_version="1.12",
62+
framework_version=tf_full_version,
63+
py_version=tf_full_py_version,
6364
distributions={"mpi": {"enabled": True, "processes_per_host": processes}},
6465
)
6566

@@ -96,16 +97,16 @@ def extract_files_from_s3(s3_url, tmpdir, sagemaker_session):
9697
tar_file.extractall(tmpdir)
9798

9899

99-
def _create_and_fit_estimator(sagemaker_session, instance_type, tmpdir):
100+
def _create_and_fit_estimator(sagemaker_session, tf_version, py_version, instance_type, tmpdir):
100101
job_name = sagemaker.utils.unique_name_from_base("tf-horovod")
101102
estimator = TensorFlow(
102103
entry_point=os.path.join(horovod_dir, "hvd_basic.py"),
103104
role="SageMakerRole",
104105
train_instance_count=2,
105106
train_instance_type=instance_type,
106107
sagemaker_session=sagemaker_session,
107-
py_version=integ.PYTHON_VERSION,
108-
framework_version="1.12",
108+
py_version=py_version,
109+
framework_version=tf_version,
109110
distributions={"mpi": {"enabled": True}},
110111
)
111112

tests/integ/test_tf.py

+9-8
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from sagemaker.utils import unique_name_from_base, sagemaker_timestamp
2323

2424
import tests.integ
25-
from tests.integ import kms_utils, timeout, PYTHON_VERSION
25+
from tests.integ import kms_utils, timeout
2626
from tests.integ.retry import retries
2727
from tests.integ.s3_utils import assert_s3_files_exist
2828

@@ -82,7 +82,7 @@ def test_mnist_with_checkpoint_config(
8282
assert actual_training_checkpoint_config == expected_training_checkpoint_config
8383

8484

85-
def test_server_side_encryption(sagemaker_session, tf_serving_version):
85+
def test_server_side_encryption(sagemaker_session, tf_serving_version, tf_full_py_version):
8686
with kms_utils.bucket_with_encryption(sagemaker_session, ROLE) as (bucket_with_kms, kms_key):
8787
output_path = os.path.join(
8888
bucket_with_kms, "test-server-side-encryption", time.strftime("%y%m%d-%H%M")
@@ -96,7 +96,7 @@ def test_server_side_encryption(sagemaker_session, tf_serving_version):
9696
train_instance_type="ml.c5.xlarge",
9797
sagemaker_session=sagemaker_session,
9898
framework_version=tf_serving_version,
99-
py_version=PYTHON_VERSION,
99+
py_version=tf_full_py_version,
100100
code_location=output_path,
101101
output_path=output_path,
102102
model_dir="/opt/ml/model",
@@ -147,16 +147,15 @@ def test_mnist_distributed(sagemaker_session, instance_type, tf_full_version, tf
147147
)
148148

149149

150-
def test_mnist_async(sagemaker_session, cpu_instance_type, tf_serving_version):
150+
def test_mnist_async(sagemaker_session, cpu_instance_type, tf_serving_version, tf_full_py_version):
151151
estimator = TensorFlow(
152152
entry_point=SCRIPT,
153153
role=ROLE,
154154
train_instance_count=1,
155155
train_instance_type="ml.c5.4xlarge",
156-
py_version=PYTHON_VERSION,
157156
sagemaker_session=sagemaker_session,
158-
# testing py-sdk functionality, no need to run against all TF versions
159157
framework_version=tf_serving_version,
158+
py_version=tf_full_py_version,
160159
tags=TAGS,
161160
)
162161
inputs = estimator.sagemaker_session.upload_data(
@@ -188,15 +187,17 @@ def test_mnist_async(sagemaker_session, cpu_instance_type, tf_serving_version):
188187
_assert_model_name_match(sagemaker_session.sagemaker_client, endpoint_name, model_name)
189188

190189

191-
def test_deploy_with_input_handlers(sagemaker_session, instance_type, tf_serving_version):
190+
def test_deploy_with_input_handlers(
191+
sagemaker_session, instance_type, tf_serving_version, tf_full_py_version
192+
):
192193
estimator = TensorFlow(
193194
entry_point="training.py",
194195
source_dir=TFS_RESOURCE_PATH,
195196
role=ROLE,
196197
train_instance_count=1,
197198
train_instance_type=instance_type,
198199
framework_version=tf_serving_version,
199-
py_version=PYTHON_VERSION,
200+
py_version=tf_full_py_version,
200201
sagemaker_session=sagemaker_session,
201202
tags=TAGS,
202203
)

tests/integ/test_transformer.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ def test_transform_mxnet_logs(
332332

333333

334334
def test_transform_tf_kms_network_isolation(
335-
sagemaker_session, cpu_instance_type, tmpdir, tf_serving_version
335+
sagemaker_session, cpu_instance_type, tmpdir, tf_serving_version, tf_full_py_version
336336
):
337337
data_path = os.path.join(DATA_DIR, "tensorflow_mnist")
338338

@@ -342,7 +342,7 @@ def test_transform_tf_kms_network_isolation(
342342
train_instance_count=1,
343343
train_instance_type=cpu_instance_type,
344344
framework_version=tf_serving_version,
345-
py_version=PYTHON_VERSION,
345+
py_version=tf_full_py_version,
346346
sagemaker_session=sagemaker_session,
347347
)
348348

tests/integ/test_tuner.py

+18-22
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,6 @@
5151
from tests.integ.record_set import prepare_record_set_from_local_files
5252
from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name
5353

54-
DATA_PATH = os.path.join(DATA_DIR, "iris", "data")
55-
5654

5755
@pytest.fixture(scope="module")
5856
def kmeans_train_set(sagemaker_session):
@@ -588,9 +586,7 @@ def test_tuning_mxnet(sagemaker_session, mxnet_full_version, cpu_instance_type):
588586

589587

590588
@pytest.mark.canary_quick
591-
def test_tuning_tf_script_mode(
592-
sagemaker_session, cpu_instance_type, tf_full_version, tf_full_py_version
593-
):
589+
def test_tuning_tf(sagemaker_session, cpu_instance_type, tf_full_version, tf_full_py_version):
594590
resource_path = os.path.join(DATA_DIR, "tensorflow_mnist")
595591
script_path = os.path.join(resource_path, "mnist.py")
596592

@@ -622,7 +618,7 @@ def test_tuning_tf_script_mode(
622618
path=os.path.join(resource_path, "data"), key_prefix="scriptmode/mnist"
623619
)
624620

625-
tuning_job_name = unique_name_from_base("tune-tf-script-mode", max_length=32)
621+
tuning_job_name = unique_name_from_base("tune-tf", max_length=32)
626622
tuner.fit(inputs, job_name=tuning_job_name)
627623

628624
print("Started hyperparameter tuning job with name: " + tuning_job_name)
@@ -631,13 +627,15 @@ def test_tuning_tf_script_mode(
631627
tuner.wait()
632628

633629

634-
@pytest.mark.skipif(PYTHON_VERSION != "py2", reason="TensorFlow image supports only python 2.")
635-
def test_tuning_tf_vpc_multi(sagemaker_session, cpu_instance_type):
630+
def test_tuning_tf_vpc_multi(
631+
sagemaker_session, cpu_instance_type, tf_full_version, tf_full_py_version
632+
):
636633
"""Test Tensorflow multi-instance using the same VpcConfig for training and inference"""
637634
instance_type = cpu_instance_type
638635
instance_count = 2
639636

640-
script_path = os.path.join(DATA_DIR, "iris", "iris-dnn-classifier.py")
637+
resource_path = os.path.join(DATA_DIR, "tensorflow_mnist")
638+
script_path = os.path.join(resource_path, "mnist.py")
641639

642640
ec2_client = sagemaker_session.boto_session.client("ec2")
643641
subnet_ids, security_group_id = vpc_test_utils.get_or_create_vpc_resources(ec2_client)
@@ -646,41 +644,39 @@ def test_tuning_tf_vpc_multi(sagemaker_session, cpu_instance_type):
646644
estimator = TensorFlow(
647645
entry_point=script_path,
648646
role="SageMakerRole",
649-
training_steps=1,
650-
evaluation_steps=1,
651-
hyperparameters={"input_tensor_name": "inputs"},
647+
framework_version=tf_full_version,
648+
py_version=tf_full_py_version,
652649
train_instance_count=instance_count,
653650
train_instance_type=instance_type,
654651
sagemaker_session=sagemaker_session,
655652
base_job_name="test-vpc-tf",
656653
subnets=subnet_ids,
657654
security_group_ids=[security_group_id],
658655
encrypt_inter_container_traffic=True,
659-
framework_version="1.11",
660-
py_version=PYTHON_VERSION,
661656
)
662657

663-
inputs = sagemaker_session.upload_data(path=DATA_PATH, key_prefix="integ-test-data/tf_iris")
664-
hyperparameter_ranges = {"learning_rate": ContinuousParameter(0.05, 0.2)}
665-
666-
objective_metric_name = "loss"
667-
metric_definitions = [{"Name": "loss", "Regex": "loss = ([0-9\\.]+)"}]
658+
hyperparameter_ranges = {"epochs": IntegerParameter(1, 2)}
659+
objective_metric_name = "accuracy"
660+
metric_definitions = [{"Name": objective_metric_name, "Regex": "accuracy = ([0-9\\.]+)"}]
668661

669662
tuner = HyperparameterTuner(
670663
estimator,
671664
objective_metric_name,
672665
hyperparameter_ranges,
673666
metric_definitions,
674-
objective_type="Minimize",
675667
max_jobs=2,
676668
max_parallel_jobs=2,
677669
)
678670

679-
tuning_job_name = unique_name_from_base("tune-tf", max_length=32)
680671
with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
672+
inputs = estimator.sagemaker_session.upload_data(
673+
path=os.path.join(resource_path, "data"), key_prefix="scriptmode/mnist"
674+
)
675+
676+
tuning_job_name = unique_name_from_base("tune-tf", max_length=32)
681677
tuner.fit(inputs, job_name=tuning_job_name)
682678

683-
print("Started hyperparameter tuning job with name:" + tuning_job_name)
679+
print("Started hyperparameter tuning job with name: " + tuning_job_name)
684680

685681
time.sleep(15)
686682
tuner.wait()

0 commit comments

Comments
 (0)