Skip to content

Commit bc1203d

Browse files
saimiduahsan-z-khan
authored andcommitted
feature: Add support for TF 2.6
1 parent ec128cd commit bc1203d

File tree

12 files changed

+550
-163
lines changed

12 files changed

+550
-163
lines changed

src/sagemaker/fw_utils.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,20 @@
5959
"local_gpu",
6060
)
6161
SM_DATAPARALLEL_SUPPORTED_FRAMEWORK_VERSIONS = {
62-
"tensorflow": ["2.3", "2.3.1", "2.3.2", "2.4", "2.4.1", "2.4.3", "2.5", "2.5.0", "2.5.1"],
62+
"tensorflow": [
63+
"2.3",
64+
"2.3.1",
65+
"2.3.2",
66+
"2.4",
67+
"2.4.1",
68+
"2.4.3",
69+
"2.5",
70+
"2.5.0",
71+
"2.5.1",
72+
"2.6",
73+
"2.6.0",
74+
"2.6.2",
75+
],
6376
"pytorch": ["1.6", "1.6.0", "1.7", "1.7.1", "1.8", "1.8.0", "1.8.1", "1.9", "1.9.0", "1.9.1"],
6477
}
6578
SMDISTRIBUTED_SUPPORTED_STRATEGIES = ["dataparallel", "modelparallel"]

src/sagemaker/image_uri_config/tensorflow.json

Lines changed: 67 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,8 @@
278278
"2.2": "2.2.2",
279279
"2.3": "2.3.2",
280280
"2.4": "2.4.3",
281-
"2.5": "2.5.1"
281+
"2.5": "2.5.1",
282+
"2.6": "2.6.0"
282283
},
283284
"versions": {
284285
"1.10.0": {
@@ -1312,6 +1313,36 @@
13121313
"us-west-2": "763104351884"
13131314
},
13141315
"repository": "tensorflow-inference"
1316+
},
1317+
"2.6.0": {
1318+
"registries": {
1319+
"af-south-1": "626614931356",
1320+
"ap-east-1": "871362719292",
1321+
"ap-northeast-1": "763104351884",
1322+
"ap-northeast-2": "763104351884",
1323+
"ap-northeast-3": "364406365360",
1324+
"ap-south-1": "763104351884",
1325+
"ap-southeast-1": "763104351884",
1326+
"ap-southeast-2": "763104351884",
1327+
"ca-central-1": "763104351884",
1328+
"cn-north-1": "727897471807",
1329+
"cn-northwest-1": "727897471807",
1330+
"eu-central-1": "763104351884",
1331+
"eu-north-1": "763104351884",
1332+
"eu-south-1": "692866216735",
1333+
"eu-west-1": "763104351884",
1334+
"eu-west-2": "763104351884",
1335+
"eu-west-3": "763104351884",
1336+
"me-south-1": "217643126080",
1337+
"sa-east-1": "763104351884",
1338+
"us-east-1": "763104351884",
1339+
"us-east-2": "763104351884",
1340+
"us-gov-west-1": "442386744353",
1341+
"us-iso-east-1": "886529160074",
1342+
"us-west-1": "763104351884",
1343+
"us-west-2": "763104351884"
1344+
},
1345+
"repository": "tensorflow-inference"
13151346
}
13161347
}
13171348
},
@@ -1338,7 +1369,8 @@
13381369
"2.2": "2.2.2",
13391370
"2.3": "2.3.2",
13401371
"2.4": "2.4.3",
1341-
"2.5": "2.5.1"
1372+
"2.5": "2.5.1",
1373+
"2.6": "2.6.0"
13421374
},
13431375
"versions": {
13441376
"1.10.0": {
@@ -2531,6 +2563,39 @@
25312563
"us-west-2": "763104351884"
25322564
},
25332565
"repository": "tensorflow-training"
2566+
},
2567+
"2.6.0": {
2568+
"py_versions": [
2569+
"py38"
2570+
],
2571+
"registries": {
2572+
"af-south-1": "626614931356",
2573+
"ap-east-1": "871362719292",
2574+
"ap-northeast-1": "763104351884",
2575+
"ap-northeast-2": "763104351884",
2576+
"ap-northeast-3": "364406365360",
2577+
"ap-south-1": "763104351884",
2578+
"ap-southeast-1": "763104351884",
2579+
"ap-southeast-2": "763104351884",
2580+
"ca-central-1": "763104351884",
2581+
"cn-north-1": "727897471807",
2582+
"cn-northwest-1": "727897471807",
2583+
"eu-central-1": "763104351884",
2584+
"eu-north-1": "763104351884",
2585+
"eu-south-1": "692866216735",
2586+
"eu-west-1": "763104351884",
2587+
"eu-west-2": "763104351884",
2588+
"eu-west-3": "763104351884",
2589+
"me-south-1": "217643126080",
2590+
"sa-east-1": "763104351884",
2591+
"us-east-1": "763104351884",
2592+
"us-east-2": "763104351884",
2593+
"us-gov-west-1": "442386744353",
2594+
"us-iso-east-1": "886529160074",
2595+
"us-west-1": "763104351884",
2596+
"us-west-2": "763104351884"
2597+
},
2598+
"repository": "tensorflow-training"
25342599
}
25352600
}
25362601
}

tests/conftest.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,9 @@ def _tf_py_version(tf_version, request):
315315
return "py3"
316316
if version < Version("2.2"):
317317
return request.param
318-
return "py37"
318+
if Version("2.2") <= version < Version("2.6"):
319+
return "py37"
320+
return "py38"
319321

320322

321323
@pytest.fixture(scope="module")
@@ -345,7 +347,9 @@ def tf_full_py_version(tf_full_version):
345347
return "py2"
346348
if version < Version("2.2"):
347349
return "py3"
348-
return "py37"
350+
if version < Version("2.6"):
351+
return "py37"
352+
return "py38"
349353

350354

351355
@pytest.fixture(scope="session")

tests/data/tensorflow_mnist/mnist.py

Lines changed: 26 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -10,114 +10,14 @@
1010
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
1111
# ANY KIND, either express or implied. See the License for the specific
1212
# language governing permissions and limitations under the License.
13-
from __future__ import absolute_import, division, print_function
14-
1513
import argparse
1614
import json
17-
import numpy as np
1815
import os
19-
import tensorflow as tf
20-
21-
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.DEBUG)
22-
23-
24-
def cnn_model_fn(features, labels, mode):
25-
"""Model function for CNN."""
26-
# Input Layer
27-
# Reshape X to 4-D tensor: [batch_size, width, height, channels]
28-
# MNIST images are 28x28 pixels, and have one color channel
29-
input_layer = tf.reshape(features["x"], [-1, 28, 28, 1])
30-
31-
# Convolutional Layer #1
32-
# Computes 32 features using a 5x5 filter with ReLU activation.
33-
# Padding is added to preserve width and height.
34-
# Input Tensor Shape: [batch_size, 28, 28, 1]
35-
# Output Tensor Shape: [batch_size, 28, 28, 32]
36-
conv1 = tf.compat.v1.layers.conv2d(
37-
inputs=input_layer, filters=32, kernel_size=[5, 5], padding="same", activation=tf.nn.relu
38-
)
39-
40-
# Pooling Layer #1
41-
# First max pooling layer with a 2x2 filter and stride of 2
42-
# Input Tensor Shape: [batch_size, 28, 28, 32]
43-
# Output Tensor Shape: [batch_size, 14, 14, 32]
44-
pool1 = tf.compat.v1.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
45-
46-
# Convolutional Layer #2
47-
# Computes 64 features using a 5x5 filter.
48-
# Padding is added to preserve width and height.
49-
# Input Tensor Shape: [batch_size, 14, 14, 32]
50-
# Output Tensor Shape: [batch_size, 14, 14, 64]
51-
conv2 = tf.compat.v1.layers.conv2d(
52-
inputs=pool1, filters=64, kernel_size=[5, 5], padding="same", activation=tf.nn.relu
53-
)
54-
55-
# Pooling Layer #2
56-
# Second max pooling layer with a 2x2 filter and stride of 2
57-
# Input Tensor Shape: [batch_size, 14, 14, 64]
58-
# Output Tensor Shape: [batch_size, 7, 7, 64]
59-
pool2 = tf.compat.v1.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
60-
61-
# Flatten tensor into a batch of vectors
62-
# Input Tensor Shape: [batch_size, 7, 7, 64]
63-
# Output Tensor Shape: [batch_size, 7 * 7 * 64]
64-
pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
65-
66-
# Dense Layer
67-
# Densely connected layer with 1024 neurons
68-
# Input Tensor Shape: [batch_size, 7 * 7 * 64]
69-
# Output Tensor Shape: [batch_size, 1024]
70-
dense = tf.compat.v1.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
71-
72-
# Add dropout operation; 0.6 probability that element will be kept
73-
dropout = tf.compat.v1.layers.dropout(
74-
inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN
75-
)
76-
77-
# Logits layer
78-
# Input Tensor Shape: [batch_size, 1024]
79-
# Output Tensor Shape: [batch_size, 10]
80-
logits = tf.compat.v1.layers.dense(inputs=dropout, units=10)
81-
82-
predictions = {
83-
# Generate predictions (for PREDICT and EVAL mode)
84-
"classes": tf.argmax(input=logits, axis=1),
85-
# Add `softmax_tensor` to the graph. It is used for PREDICT and by the
86-
# `logging_hook`.
87-
"probabilities": tf.nn.softmax(logits, name="softmax_tensor"),
88-
}
89-
if mode == tf.estimator.ModeKeys.PREDICT:
90-
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
9116

92-
# Calculate Loss (for both TRAIN and EVAL modes)
93-
loss = tf.compat.v1.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
17+
from packaging.version import Version
9418

95-
# Configure the Training Op (for TRAIN mode)
96-
if mode == tf.estimator.ModeKeys.TRAIN:
97-
optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.001)
98-
train_op = optimizer.minimize(loss=loss, global_step=tf.compat.v1.train.get_global_step())
99-
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
10019

101-
# Add evaluation metrics (for EVAL mode)
102-
eval_metric_ops = {
103-
"accuracy": tf.compat.v1.metrics.accuracy(labels=labels, predictions=predictions["classes"])
104-
}
105-
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
106-
107-
108-
def _load_training_data(base_dir):
109-
x_train = np.load(os.path.join(base_dir, "train_data.npy"))
110-
y_train = np.load(os.path.join(base_dir, "train_labels.npy"))
111-
return x_train, y_train
112-
113-
114-
def _load_testing_data(base_dir):
115-
x_test = np.load(os.path.join(base_dir, "eval_data.npy"))
116-
y_test = np.load(os.path.join(base_dir, "eval_labels.npy"))
117-
return x_test, y_test
118-
119-
120-
def _parse_args():
20+
def _parse_args_v1():
12121

12222
parser = argparse.ArgumentParser()
12323

@@ -130,46 +30,35 @@ def _parse_args():
13030
parser.add_argument("--hosts", type=list, default=json.loads(os.environ.get("SM_HOSTS")))
13131
parser.add_argument("--current-host", type=str, default=os.environ.get("SM_CURRENT_HOST"))
13232

133-
return parser.parse_known_args()
33+
known, unknown = parser.parse_known_args()
34+
return known
13435

13536

136-
def serving_input_fn():
137-
inputs = {"x": tf.compat.v1.placeholder(tf.float32, [None, 784])}
138-
return tf.estimator.export.ServingInputReceiver(inputs, inputs)
37+
def _parse_args_v2():
38+
parser = argparse.ArgumentParser()
39+
parser.add_argument("--train", type=str, default=os.environ["SM_CHANNEL_TRAINING"])
40+
parser.add_argument("--epochs", type=int, default=10)
41+
parser.add_argument("--model_dir", type=str)
42+
parser.add_argument("--max-steps", type=int, default=200)
43+
parser.add_argument("--save-checkpoint-steps", type=int, default=200)
44+
parser.add_argument("--throttle-secs", type=int, default=60)
45+
parser.add_argument("--hosts", type=list, default=json.loads(os.environ["SM_HOSTS"]))
46+
parser.add_argument("--current-host", type=str, default=os.environ["SM_CURRENT_HOST"])
47+
parser.add_argument("--batch-size", type=int, default=100)
48+
parser.add_argument("--export-model-during-training", type=bool, default=False)
49+
return parser.parse_args()
13950

14051

14152
if __name__ == "__main__":
142-
args, unknown = _parse_args()
143-
144-
if args.model_dir.startswith("s3://"):
145-
os.environ["S3_REGION"] = "us-west-2"
146-
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "1"
147-
os.environ["S3_USE_HTTPS"] = "1"
148-
149-
train_data, train_labels = _load_training_data(args.train)
150-
eval_data, eval_labels = _load_testing_data(args.train)
151-
152-
# Create the Estimator
153-
mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn, model_dir=args.model_dir)
154-
155-
# Set up logging for predictions
156-
# Log the values in the "Softmax" tensor with label "probabilities"
157-
tensors_to_log = {"probabilities": "softmax_tensor"}
158-
logging_hook = tf.estimator.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=50)
159-
160-
# Train the model
161-
train_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
162-
x={"x": train_data}, y=train_labels, batch_size=50, num_epochs=None, shuffle=False
163-
)
53+
import tensorflow as tf
16454

165-
# Evaluate the model and print results
166-
eval_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
167-
x={"x": eval_data}, y=eval_labels, num_epochs=1, shuffle=False
168-
)
55+
if Version(tf.__version__) <= Version("2.5"):
56+
from mnist_v1 import main
16957

170-
train_spec = tf.estimator.TrainSpec(train_input_fn, max_steps=1000)
171-
eval_spec = tf.estimator.EvalSpec(eval_input_fn)
172-
tf.estimator.train_and_evaluate(mnist_classifier, train_spec, eval_spec)
58+
args = _parse_args_v1()
59+
main(args)
60+
else:
61+
from mnist_v2 import main
17362

174-
if args.current_host == args.hosts[0]:
175-
mnist_classifier.export_saved_model("/opt/ml/model", serving_input_fn)
63+
args = _parse_args_v2()
64+
main(args)

0 commit comments

Comments
 (0)