-
Notifications
You must be signed in to change notification settings - Fork 1.2k
feature: pytorch 1.3.1 eia support #1328
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
a32893b
2581ae7
acb6c5d
e847e69
88ea003
adbf8db
c0cc779
e7f08c7
089ccd9
26f38e0
e96e629
1151215
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -53,7 +53,14 @@ | |||
) | ||||
|
||||
VALID_PY_VERSIONS = ["py2", "py3"] | ||||
VALID_EIA_FRAMEWORKS = ["tensorflow", "tensorflow-serving", "mxnet", "mxnet-serving"] | ||||
VALID_EIA_FRAMEWORKS = [ | ||||
"tensorflow", | ||||
"tensorflow-serving", | ||||
"mxnet", | ||||
"mxnet-serving", | ||||
"pytorch", | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't recall exactly where this logic is used, but doesn't "pytorch" technically still not support EI since none of the "pytorch" images that also had the serving stack supported EI? it's just "pytorch-serving" with the EI support, right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That makes sense. The only version of pytorch that supports eia is 1.3.1 and that's after spliting. So only |
||||
"pytorch-serving", | ||||
] | ||||
VALID_ACCOUNTS_BY_REGION = {"us-gov-west-1": "246785580436", "us-iso-east-1": "744548109606"} | ||||
ASIMOV_VALID_ACCOUNTS_BY_REGION = {"us-iso-east-1": "886529160074"} | ||||
OPT_IN_ACCOUNTS_BY_REGION = {"ap-east-1": "057415533634", "me-south-1": "724002660598"} | ||||
|
@@ -71,6 +78,7 @@ | |||
"mxnet-serving-eia": "mxnet-inference-eia", | ||||
"pytorch": "pytorch-training", | ||||
"pytorch-serving": "pytorch-inference", | ||||
"pytorch-serving-eia": "pytorch-inference-eia", | ||||
} | ||||
|
||||
MERGED_FRAMEWORKS_LOWEST_VERSIONS = { | ||||
|
@@ -82,6 +90,7 @@ | |||
"mxnet-serving-eia": [1, 4, 1], | ||||
"pytorch": [1, 2, 0], | ||||
"pytorch-serving": [1, 2, 0], | ||||
"pytorch-serving-eia": {"py3": [1, 3, 1]}, | ||||
} | ||||
|
||||
DEBUGGER_UNSUPPORTED_REGIONS = ["us-gov-west-1", "us-iso-east-1"] | ||||
|
@@ -117,6 +126,8 @@ def _is_dlc_version(framework, framework_version, py_version): | |||
""" | ||||
lowest_version_list = MERGED_FRAMEWORKS_LOWEST_VERSIONS.get(framework) | ||||
if isinstance(lowest_version_list, dict): | ||||
if py_version not in lowest_version_list: | ||||
raise ValueError("{} is not supported in {}.".format(framework, py_version)) | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how are we handling other cases where EI wouldn't work, e.g. PyTorch 1.2? I think it'd be better to keep this method strictly about determining if the version is one where a DLC image is used, and to handle other logic (e.g. "is this framework + Python version combination valid?") in its own location. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The version safe guard is here:
I've moved the framework_version + py_version check to framework model class and moved unit test accordingly There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sorry for not being clear - I meant that it didn't really fit in that particular function, but I think it's okay to leave in |
||||
lowest_version_list = lowest_version_list[py_version] | ||||
|
||||
if lowest_version_list: | ||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,8 @@ With PyTorch Estimators and Models, you can train and host PyTorch models on Ama | |
|
||
Supported versions of PyTorch: ``0.4.0``, ``1.0.0``, ``1.1.0``, ``1.2.0``, ``1.3.1``, ``1.4.0``. | ||
|
||
Supported versions of TensorFlow for Elastic Inference: ``1.3.1``. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. typo "TensorFlow" => "PyTorch" There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. PyTorch not TensorFlow |
||
|
||
We recommend that you use the latest supported version, because that's where we focus most of our development efforts. | ||
|
||
You can visit the PyTorch repository at https://github.com/pytorch/pytorch. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. would it make more sense to name this file something like "empty_inference_script"? imo "mnist" implies a script that trains a model on mnist because that's how we use it everywhere else 😂 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. updated script name. |
||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"). You | ||
# may not use this file except in compliance with the License. A copy of | ||
# the License is located at | ||
# | ||
# http://aws.amazon.com/apache2.0/ | ||
# | ||
# or in the "license" file accompanying this file. This file is | ||
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF | ||
# ANY KIND, either express or implied. See the License for the specific | ||
# language governing permissions and limitations under the License. | ||
# This file is intentionally left blank to invoke default model_fn and predict_fn |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,10 @@ | |
MNIST_DIR = os.path.join(DATA_DIR, "pytorch_mnist") | ||
MNIST_SCRIPT = os.path.join(MNIST_DIR, "mnist.py") | ||
|
||
EIA_DIR = os.path.join(DATA_DIR, "pytorch_eia") | ||
EIA_MODEL = os.path.join(EIA_DIR, "model_mnist.tar.gz") | ||
EIA_SCRIPT = os.path.join(EIA_DIR, "mnist.py") | ||
|
||
|
||
@pytest.fixture(scope="module", name="pytorch_training_job") | ||
def fixture_training_job(sagemaker_session, pytorch_full_version, cpu_instance_type): | ||
|
@@ -115,6 +119,32 @@ def test_deploy_model(pytorch_training_job, sagemaker_session, cpu_instance_type | |
assert output.shape == (batch_size, 10) | ||
|
||
|
||
@pytest.mark.skipif(PYTHON_VERSION == "py2", reason="PyTorch EIA does not support Python 2.") | ||
def test_deploy_model_with_accelerator(sagemaker_session, cpu_instance_type): | ||
endpoint_name = "test-pytorch-deploy-eia-{}".format(sagemaker_timestamp()) | ||
model_data = sagemaker_session.upload_data(path=EIA_MODEL) | ||
pytorch = PyTorchModel( | ||
model_data, | ||
"SageMakerRole", | ||
framework_version="1.3.1", | ||
entry_point=EIA_SCRIPT, | ||
sagemaker_session=sagemaker_session, | ||
) | ||
with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): | ||
predictor = pytorch.deploy( | ||
initial_instance_count=1, | ||
instance_type=cpu_instance_type, | ||
accelerator_type="ml.eia1.large", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could use |
||
endpoint_name=endpoint_name, | ||
) | ||
|
||
batch_size = 100 | ||
data = numpy.random.rand(batch_size, 1, 28, 28).astype(numpy.float32) | ||
output = predictor.predict(data) | ||
|
||
assert output.shape == (batch_size, 10) | ||
|
||
|
||
def _upload_training_data(pytorch): | ||
return pytorch.sagemaker_session.upload_data( | ||
path=os.path.join(MNIST_DIR, "training"), | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
PyTorch not TensorFlow