Skip to content

Commit 9d4a528

Browse files
committed
Reducing the size of the training loop to fit in a p3.2xl
1 parent 00f83a5 commit 9d4a528

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

test/integration/sagemaker/test_multi_worker_mirrored.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,15 @@ def test_tf_model_garden(
5151
):
5252
epochs = 1
5353
global_batch_size = 64
54-
train_steps = int(10**6 * epochs / global_batch_size)
54+
train_steps = int(10**5 * epochs / global_batch_size)
5555
steps_per_loop = train_steps // 100
5656
overrides = (
5757
f"runtime.enable_xla=False,"
5858
f"runtime.num_gpus=1,"
5959
f"runtime.distribution_strategy=multi_worker_mirrored,"
6060
f"runtime.mixed_precision_dtype=float16,"
6161
f"task.train_data.global_batch_size={global_batch_size},"
62-
f"task.train_data.input_path=/opt/ml/input/data/training/train*,"
62+
f"task.train_data.input_path=/opt/ml/input/data/training/train-000*,"
6363
f"task.train_data.cache=True,"
6464
f"trainer.train_steps={train_steps},"
6565
f"trainer.steps_per_loop={steps_per_loop},"

0 commit comments

Comments
 (0)