From f0d4399fb38f0fdedbc1df607a26e4998981a445 Mon Sep 17 00:00:00 2001 From: Lauren Yu <6631887+laurenyu@users.noreply.github.com> Date: Wed, 17 Jul 2019 09:55:57 -0700 Subject: [PATCH 01/19] fix: preserve script path when S3 source_dir is provided --- src/sagemaker/fw_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sagemaker/fw_utils.py b/src/sagemaker/fw_utils.py index 8deb803fa3..4bfb084eaa 100644 --- a/src/sagemaker/fw_utils.py +++ b/src/sagemaker/fw_utils.py @@ -261,7 +261,7 @@ def tar_and_upload_dir( script name. """ if directory and directory.lower().startswith("s3://"): - return UploadedCode(s3_prefix=directory, script_name=os.path.basename(script)) + return UploadedCode(s3_prefix=directory, script_name=script) script_name = script if directory else os.path.basename(script) dependencies = dependencies or [] From c861ce736a55574a48ad7769f7bae0ff89369fc3 Mon Sep 17 00:00:00 2001 From: Lauren Yu <6631887+laurenyu@users.noreply.github.com> Date: Wed, 17 Jul 2019 12:31:47 -0700 Subject: [PATCH 02/19] add unit test --- tests/unit/test_fw_utils.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/unit/test_fw_utils.py b/tests/unit/test_fw_utils.py index 5d1a64d231..81a129ba28 100644 --- a/tests/unit/test_fw_utils.py +++ b/tests/unit/test_fw_utils.py @@ -323,6 +323,18 @@ def test_tar_and_upload_dir_s3(sagemaker_session): assert result == fw_utils.UploadedCode("s3://m", "mnist.py") +def test_tar_and_upload_dir_s3_with_script_dir(sagemaker_session): + bucket = "mybucket" + s3_key_prefix = "something/source" + script = "some/dir/mnist.py" + directory = "s3://m" + result = fw_utils.tar_and_upload_dir( + sagemaker_session, bucket, s3_key_prefix, script, directory + ) + + assert result == fw_utils.UploadedCode("s3://m", "some/dir/mnist.py") + + @patch("sagemaker.utils") def test_tar_and_upload_dir_s3_with_kms(utils, sagemaker_session): bucket = "mybucket" From 2b3c1499e0b43ff6df67210d3962113a627b3959 Mon Sep 17 00:00:00 2001 From: Lauren Yu <6631887+laurenyu@users.noreply.github.com> Date: Tue, 23 Jul 2019 12:42:44 -0700 Subject: [PATCH 03/19] fix estimator --> model entry point logic --- src/sagemaker/chainer/estimator.py | 2 +- src/sagemaker/mxnet/estimator.py | 2 +- src/sagemaker/pytorch/estimator.py | 2 +- src/sagemaker/rl/estimator.py | 2 +- src/sagemaker/sklearn/estimator.py | 2 +- src/sagemaker/tensorflow/estimator.py | 2 +- tests/unit/test_estimator.py | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/sagemaker/chainer/estimator.py b/src/sagemaker/chainer/estimator.py index ab70de1985..dba0671e11 100644 --- a/src/sagemaker/chainer/estimator.py +++ b/src/sagemaker/chainer/estimator.py @@ -194,7 +194,7 @@ def create_model( return ChainerModel( self.model_data, role or self.role, - entry_point or self.entry_point, + entry_point or self.uploaded_code.script_name, source_dir=(source_dir or self._model_source_dir()), enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, name=self._current_job_name, diff --git a/src/sagemaker/mxnet/estimator.py b/src/sagemaker/mxnet/estimator.py index 3a4ebe4bca..754cfb65bd 100644 --- a/src/sagemaker/mxnet/estimator.py +++ b/src/sagemaker/mxnet/estimator.py @@ -172,7 +172,7 @@ def create_model( return MXNetModel( self.model_data, role or self.role, - entry_point or self.entry_point, + entry_point or self.uploaded_code.script_name, source_dir=(source_dir or self._model_source_dir()), enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, name=self._current_job_name, diff --git a/src/sagemaker/pytorch/estimator.py b/src/sagemaker/pytorch/estimator.py index ee84c7002a..2b0420f7d6 100644 --- a/src/sagemaker/pytorch/estimator.py +++ b/src/sagemaker/pytorch/estimator.py @@ -147,7 +147,7 @@ def create_model( return PyTorchModel( self.model_data, role or self.role, - entry_point or self.entry_point, + entry_point or self.uploaded_code.script_name, source_dir=(source_dir or self._model_source_dir()), enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, name=self._current_job_name, diff --git a/src/sagemaker/rl/estimator.py b/src/sagemaker/rl/estimator.py index dcbeb84fb5..5622c0cbc3 100644 --- a/src/sagemaker/rl/estimator.py +++ b/src/sagemaker/rl/estimator.py @@ -216,7 +216,7 @@ def create_model( if not entry_point and (source_dir or dependencies): raise AttributeError("Please provide an `entry_point`.") - entry_point = entry_point or self.entry_point + entry_point = entry_point or self.uploaded_code.script_name source_dir = source_dir or self._model_source_dir() dependencies = dependencies or self.dependencies diff --git a/src/sagemaker/sklearn/estimator.py b/src/sagemaker/sklearn/estimator.py index 45c5ad665f..ae0d211eed 100644 --- a/src/sagemaker/sklearn/estimator.py +++ b/src/sagemaker/sklearn/estimator.py @@ -154,7 +154,7 @@ def create_model( return SKLearnModel( self.model_data, role, - self.entry_point, + self.uploaded_code.script_name, source_dir=self._model_source_dir(), enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, name=self._current_job_name, diff --git a/src/sagemaker/tensorflow/estimator.py b/src/sagemaker/tensorflow/estimator.py index 5a3406a0bd..cd6b100431 100644 --- a/src/sagemaker/tensorflow/estimator.py +++ b/src/sagemaker/tensorflow/estimator.py @@ -579,7 +579,7 @@ def _create_default_model( return TensorFlowModel( self.model_data, role, - entry_point or self.entry_point, + entry_point or self.uploaded_code.script_name, source_dir=source_dir or self._model_source_dir(), enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, env={"SAGEMAKER_REQUIREMENTS": self.requirements_file}, diff --git a/tests/unit/test_estimator.py b/tests/unit/test_estimator.py index 766729c341..36087e0912 100644 --- a/tests/unit/test_estimator.py +++ b/tests/unit/test_estimator.py @@ -1196,7 +1196,7 @@ def test_git_support_codecommit_with_ssh_no_passphrase_needed(git_clone_repo, sa @patch("time.strftime", return_value=TIMESTAMP) def test_init_with_source_dir_s3(strftime, sagemaker_session): fw = DummyFramework( - entry_point=SCRIPT_PATH, + entry_point=SCRIPT_NAME, source_dir="s3://location", role=ROLE, sagemaker_session=sagemaker_session, From 95a30f39c74f8c5398a3a7278346269d0f881018 Mon Sep 17 00:00:00 2001 From: Lauren Yu <6631887+laurenyu@users.noreply.github.com> Date: Wed, 24 Jul 2019 17:44:53 -0700 Subject: [PATCH 04/19] fix MXNet logic --- src/sagemaker/mxnet/estimator.py | 11 +++++++++-- src/sagemaker/mxnet/model.py | 11 +++++------ src/sagemaker/utils.py | 5 +++-- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/sagemaker/mxnet/estimator.py b/src/sagemaker/mxnet/estimator.py index 754cfb65bd..a2f42059a7 100644 --- a/src/sagemaker/mxnet/estimator.py +++ b/src/sagemaker/mxnet/estimator.py @@ -169,10 +169,10 @@ def create_model( sagemaker.mxnet.model.MXNetModel: A SageMaker ``MXNetModel`` object. See :func:`~sagemaker.mxnet.model.MXNetModel` for full details. """ - return MXNetModel( + model = MXNetModel( self.model_data, role or self.role, - entry_point or self.uploaded_code.script_name, + entry_point, source_dir=(source_dir or self._model_source_dir()), enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, name=self._current_job_name, @@ -187,6 +187,13 @@ def create_model( dependencies=(dependencies or self.dependencies), ) + if entry_point is None: + model.entry_point = ( + self.entry_point if model._is_mms_version() else self.uploaded_code.script_name + ) + + return model + @classmethod def _prepare_init_params_from_job_description(cls, job_details, model_channel_name=None): """Convert the job description to init params that can be handled by the diff --git a/src/sagemaker/mxnet/model.py b/src/sagemaker/mxnet/model.py index 26b6a6ffa7..21e2a08e90 100644 --- a/src/sagemaker/mxnet/model.py +++ b/src/sagemaker/mxnet/model.py @@ -123,16 +123,12 @@ def prepare_container_def(self, instance_type, accelerator_type=None): dict[str, str]: A container definition object usable with the CreateModel API. """ - is_mms_version = parse_version(self.framework_version) >= parse_version( - self._LOWEST_MMS_VERSION - ) - deploy_image = self.image if not deploy_image: region_name = self.sagemaker_session.boto_session.region_name framework_name = self.__framework_name__ - if is_mms_version: + if self._is_mms_version(): framework_name += "-serving" deploy_image = create_image_uri( @@ -145,7 +141,7 @@ def prepare_container_def(self, instance_type, accelerator_type=None): ) deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image) - self._upload_code(deploy_key_prefix, is_mms_version) + self._upload_code(deploy_key_prefix, self._is_mms_version()) deploy_env = dict(self.env) deploy_env.update(self._framework_env_vars()) @@ -154,3 +150,6 @@ def prepare_container_def(self, instance_type, accelerator_type=None): return sagemaker.container_def( deploy_image, self.repacked_model_data or self.model_data, deploy_env ) + + def _is_mms_version(self): + return parse_version(self.framework_version) >= parse_version(self._LOWEST_MMS_VERSION) diff --git a/src/sagemaker/utils.py b/src/sagemaker/utils.py index 8e44955d1a..f526f8de9a 100644 --- a/src/sagemaker/utils.py +++ b/src/sagemaker/utils.py @@ -432,9 +432,10 @@ def _save_model(repacked_model_uri, tmp_model_path, sagemaker_session): bucket, key = url.netloc, url.path.lstrip("/") new_key = key.replace(os.path.basename(key), os.path.basename(repacked_model_uri)) - sagemaker_session.boto_session.resource("s3").Object(bucket, new_key).upload_file( - tmp_model_path + s3 = sagemaker_session.boto_session.resource( + "s3", region_name=sagemaker_session.boto_region_name ) + s3.Object(bucket, new_key).upload_file(tmp_model_path) else: shutil.move(tmp_model_path, repacked_model_uri.replace("file://", "")) From 3b49b6d36318de59c4fbc7a59718106759a1142b Mon Sep 17 00:00:00 2001 From: Lauren Yu <6631887+laurenyu@users.noreply.github.com> Date: Wed, 24 Jul 2019 17:53:39 -0700 Subject: [PATCH 05/19] add docstring and unit test --- src/sagemaker/mxnet/model.py | 2 ++ tests/unit/test_mxnet.py | 38 ++++++++++++++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/src/sagemaker/mxnet/model.py b/src/sagemaker/mxnet/model.py index 21e2a08e90..d53fc99c32 100644 --- a/src/sagemaker/mxnet/model.py +++ b/src/sagemaker/mxnet/model.py @@ -152,4 +152,6 @@ def prepare_container_def(self, instance_type, accelerator_type=None): ) def _is_mms_version(self): + """Return if the MXNet version uses MMS. + """ return parse_version(self.framework_version) >= parse_version(self._LOWEST_MMS_VERSION) diff --git a/tests/unit/test_mxnet.py b/tests/unit/test_mxnet.py index bfbb4387d7..8653019857 100644 --- a/tests/unit/test_mxnet.py +++ b/tests/unit/test_mxnet.py @@ -27,7 +27,8 @@ from sagemaker.mxnet import MXNetPredictor, MXNetModel DATA_DIR = os.path.join(os.path.dirname(__file__), "..", "data") -SCRIPT_PATH = os.path.join(DATA_DIR, "dummy_script.py") +SCRIPT_NAME = "dummy_script.py" +SCRIPT_PATH = os.path.join(DATA_DIR, SCRIPT_NAME) SERVING_SCRIPT_FILE = "another_dummy_script.py" MODEL_DATA = "s3://mybucket/model" TIMESTAMP = "2017-11-06-14:14:15.672" @@ -183,7 +184,6 @@ def test_create_model(sagemaker_session, mxnet_version): assert model.sagemaker_session == sagemaker_session assert model.framework_version == mxnet_version assert model.py_version == mx.py_version - assert model.entry_point == SCRIPT_PATH assert model.role == ROLE assert model.name == job_name assert model.container_log_level == container_log_level @@ -192,6 +192,40 @@ def test_create_model(sagemaker_session, mxnet_version): assert model.vpc_config is None +@patch("sagemaker.utils.create_tar_file", MagicMock()) +def test_create_model_default_entry_with_mms(sagemaker_session, mxnet_version, skip_if_not_mms_version): + mx = MXNet( + entry_point=SCRIPT_PATH, + role=ROLE, + sagemaker_session=sagemaker_session, + train_instance_count=INSTANCE_COUNT, + train_instance_type=INSTANCE_TYPE, + framework_version=mxnet_version, + ) + + mx.fit() + model = mx.create_model() + + assert model.entry_point == SCRIPT_PATH + + +@patch("sagemaker.utils.create_tar_file", MagicMock()) +def test_create_model_default_entry_not_mms(sagemaker_session, mxnet_version, skip_if_mms_version): + mx = MXNet( + entry_point=SCRIPT_PATH, + role=ROLE, + sagemaker_session=sagemaker_session, + train_instance_count=INSTANCE_COUNT, + train_instance_type=INSTANCE_TYPE, + framework_version=mxnet_version, + ) + + mx.fit() + model = mx.create_model() + + assert model.entry_point == SCRIPT_NAME + + def test_create_model_with_optional_params(sagemaker_session): container_log_level = '"logging.INFO"' source_dir = "s3://mybucket/source" From 6e16ab9a5ae5a30b545ee366cff08dc66012433d Mon Sep 17 00:00:00 2001 From: Lauren Yu <6631887+laurenyu@users.noreply.github.com> Date: Thu, 25 Jul 2019 14:34:35 -0700 Subject: [PATCH 06/19] add integ test --- tests/data/mxnet_mnist/failure_script.py | 17 ----------------- tests/data/mxnet_mnist/sourcedir.tar.gz | Bin 0 -> 2206 bytes tests/integ/test_mxnet_train.py | 13 +++++++++---- 3 files changed, 9 insertions(+), 21 deletions(-) delete mode 100644 tests/data/mxnet_mnist/failure_script.py create mode 100644 tests/data/mxnet_mnist/sourcedir.tar.gz diff --git a/tests/data/mxnet_mnist/failure_script.py b/tests/data/mxnet_mnist/failure_script.py deleted file mode 100644 index ddc2a061c8..0000000000 --- a/tests/data/mxnet_mnist/failure_script.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -from __future__ import absolute_import - - -# For use with integration tests expecting failures. -raise Exception("This failure is expected.") diff --git a/tests/data/mxnet_mnist/sourcedir.tar.gz b/tests/data/mxnet_mnist/sourcedir.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f2feba95380b1146259f6e0fa633f481186ac9d GIT binary patch literal 2206 zcmV;P2x0dhiwFS689H461MOJ-ZrjKaH_-p|N1tKA-~h6fX_B%eCjbj5vF|L@kps)V zL*N?3np{ad^Q+vYZ6!G1Ua9ZVhw9AkQl$K$FIIL@%&Cp$yAnqNmRZ{QNQ*LMf|?*7o}6UTFU?r?|Pzpx!ou8Ip< zf+-2DN|r6F=lDjG^M4%XDUCC7!CA>4?yIAa)$jkvau3Knj(qpv^}y-(hR#999g+$t zUT^63_xrDh9nT@5h*Rdf{lUI-a4;P9kVe;N1FzFNASdIuqqBF%;~$Uguw=lIXAil& z9+?8i$wxb4x4t`oOz=FXMUJ!F-ye9d_XlpT1LkRRmc-eO-|Kv9hy9<}Sc@yI8)A4)Dide}6;& zcY6=|zvm3SZU6sote5KlNc#<)eekHWyW7tGEmph!U$*L-RoDNoJ5Tk0*LA(Y!}+i8 z^md5z(uO>>{^0fhik!qD%Q%Zjl|kz#V!_C$pdtL|oi6zi&I_X)@oa~f$YAI#!?MZ8 zyz0CnDP58*7o_40AYx8tal#0@4_P4y_$l0lxH@8?@?(pShyz{^Afd6TN7=oGKq$03eA>r zHg7Do$@$9Yn1g%-rBKG#73zkQ(~KCSDH%@<@^&;GPrKx&@%i!T#X0$Dban>0F+Q4- z(=+n!baFU8AD>R(?R$V3O+J#pk0*y+!r;{4UUpxUjB{xpdW1zbnKHI^>{+hDzzY_} zvp58tgl6-K&Ka5Kx2(+2_JWmZ%rPz;%!^18r?HSDH*bZq_Hk!c<|zq+StTIPg8;SX zr6Ba0=LwuLQ16{iO)uvKEjg>-=bz%De*T%~S^b>k^Ep^kzvjGt0{2y6gD3k8p&TA+ zQdXr9bRcC#r_+hp48Uj0r2Jx}bVHFIZ;I;w%ac z2I-e+S9dk6j(+30GS5Yz+=uT$MpLG<0c!)d;ZSkO)jr2k&$)d^lbc4?YMdO;8XV-6 zLd|x|FhD_B`{z8)Or@u>K-+m;#Yq&*OIj>Utpsxxyp-PCe3{y}v@{JQ8P*fKvxEu( zjz_AG|0t$e=r%mxS4pybmuDFZp#e3lT`Zbl5l0creAm+%NJG(*9l@}6EBkJtQNqm~gn1^|y)aS1yr{SiALK_(X|h6Vyabi7VaArsy%_VSgl^0l zkSi$z3C$i!gK!AKwI$LDQ!-ss5iSBAe_~xFUgY6|tLKDK+2ex}?mk7Tm8mz0gACz< zg8vdtWn4TWE^;okR9KZT;)noAijw9LO9H5*Zb!SqRF0|(RK2M(wIFm&hkj^+rNzO9 zMjBt96kfL!+=Ife$RSn=+&eT$Sh6}L*(K0v%1B;lvY2>I7+m-Aa9T9alBJU1^1>#( zy5?}@9UbZ<`*rSq&+etlF9ei z9KPOeaOf1+>l-uvLgFP@c;@qAbj>?iQ$3Qbz zV(o)oO87UR>Z*>lirv~qD^I7+!bqeTl!iJJP zfkzOwY>46syJd-4Zyp}Kz4$>3+}v`Jml#9?N5#ZI|2Lt-D7dmTiGd?sz?WHQDA6=2 z0$BjcXn3^W=TQas544!;Jn`2WJzkBndH5UJiM0-F&tmb&@!*l}ipJ1!vVYYcZ1(ig zeEr*O8!#icP*sp16$0>Y zat%X?k8Mpq+tY%vCAU+W?sJ3?XAW?3^w9y*w=q)pw z9@aR%?zfK^;X9F)Yh!|!^5HTLwNB|@`b-~7E6sNK+;V-nLhEZ}{{j$Q6O!Y3ZD!U~~ z@^Z;~)y%VUl>L%Fq@?x-Q11*i-!}2VEjxlS-&~y$<|zygL_Cl~6?}<6XpJDiFY6#M zl(78tz-Cf^hS;NW4h=UGALNdyU8neL8btxs45qQWE7Ncn(-7unIfTI63lk%o(JB!> zX67bwm7&jwZj`L6YgbmA29RTL1L4^2X6u!A!P7MWjP#8-1cha{ahYeA#`Jyg{`BzZ zBsd(O8CM%Elg;OA0N)*tCKG@>KO2oFUjl_*{+lq5rsoK`2~$`7E>`( zgC9OzOjk~54TLqi{cti|ZL~Y*iulB=*tX*Vx77EL#_Dl!MQRO=E864fakbSNBUM-b g*8i)uUE8%?+qGTWwO!k_UC*um0BB>m9RMf*0MzqZqyPW_ literal 0 HcmV?d00001 diff --git a/tests/integ/test_mxnet_train.py b/tests/integ/test_mxnet_train.py index a4965b33e8..57334f9486 100644 --- a/tests/integ/test_mxnet_train.py +++ b/tests/integ/test_mxnet_train.py @@ -30,11 +30,16 @@ @pytest.fixture(scope="module") def mxnet_training_job(sagemaker_session, mxnet_full_version): with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): - script_path = os.path.join(DATA_DIR, "mxnet_mnist", "mnist.py") + s3_prefix = "integ-test-data/mxnet_mnist" data_path = os.path.join(DATA_DIR, "mxnet_mnist") + s3_source = sagemaker_session.upload_data( + path=os.path.join(data_path, "sourcedir.tar.gz"), key_prefix="{}/src".format(s3_prefix) + ) + mx = MXNet( - entry_point=script_path, + entry_point=os.path.join("mxnet_mnist", "mnist.py"), + source_dir=s3_source, role="SageMakerRole", framework_version=mxnet_full_version, py_version=PYTHON_VERSION, @@ -44,10 +49,10 @@ def mxnet_training_job(sagemaker_session, mxnet_full_version): ) train_input = mx.sagemaker_session.upload_data( - path=os.path.join(data_path, "train"), key_prefix="integ-test-data/mxnet_mnist/train" + path=os.path.join(data_path, "train"), key_prefix="{}/train".format(s3_prefix) ) test_input = mx.sagemaker_session.upload_data( - path=os.path.join(data_path, "test"), key_prefix="integ-test-data/mxnet_mnist/test" + path=os.path.join(data_path, "test"), key_prefix="{}/test".format(s3_prefix) ) mx.fit({"train": train_input, "test": test_input}) From b0d1b11fbca91310ca80934f41a4f803c134553f Mon Sep 17 00:00:00 2001 From: Lauren Yu <6631887+laurenyu@users.noreply.github.com> Date: Thu, 5 Sep 2019 16:02:41 -0700 Subject: [PATCH 07/19] testing --- tests/integ/test_mxnet_train.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/integ/test_mxnet_train.py b/tests/integ/test_mxnet_train.py index 93e5b576ca..7bb110a41f 100644 --- a/tests/integ/test_mxnet_train.py +++ b/tests/integ/test_mxnet_train.py @@ -44,8 +44,9 @@ def mxnet_training_job(sagemaker_session, mxnet_full_version): framework_version=mxnet_full_version, py_version=PYTHON_VERSION, train_instance_count=1, - train_instance_type="ml.c4.xlarge", - sagemaker_session=sagemaker_session, + train_instance_type="local", # "ml.c4.xlarge", + # sagemaker_session=sagemaker_session, + image_name="583851319346.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet:1.4.1-cpu-py3", ) train_input = mx.sagemaker_session.upload_data( @@ -59,6 +60,10 @@ def mxnet_training_job(sagemaker_session, mxnet_full_version): return mx.latest_training_job.name +def test_foo(mxnet_training_job): + mxnet_training_job + + @pytest.mark.canary_quick @pytest.mark.regional_testing def test_attach_deploy(mxnet_training_job, sagemaker_session): From d6cf3b5add293cfaf7014b57b380266adbf2d8ea Mon Sep 17 00:00:00 2001 From: Lauren Yu <6631887+laurenyu@users.noreply.github.com> Date: Tue, 10 Sep 2019 10:01:01 -0700 Subject: [PATCH 08/19] testing --- tests/integ/test_mxnet_train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integ/test_mxnet_train.py b/tests/integ/test_mxnet_train.py index 7bb110a41f..92242447b6 100644 --- a/tests/integ/test_mxnet_train.py +++ b/tests/integ/test_mxnet_train.py @@ -46,7 +46,7 @@ def mxnet_training_job(sagemaker_session, mxnet_full_version): train_instance_count=1, train_instance_type="local", # "ml.c4.xlarge", # sagemaker_session=sagemaker_session, - image_name="583851319346.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet:1.4.1-cpu-py3", + image_name="sagemaker-mxnet:1.4.1-cpu-py3", ) train_input = mx.sagemaker_session.upload_data( From 564c8b14239769673d975f02a5dd8ba2c7c4d8bb Mon Sep 17 00:00:00 2001 From: Lauren Yu <6631887+laurenyu@users.noreply.github.com> Date: Tue, 10 Sep 2019 14:40:14 -0700 Subject: [PATCH 09/19] try chainer --- tests/integ/test_chainer_train.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/integ/test_chainer_train.py b/tests/integ/test_chainer_train.py index 5b036cd432..6f7fa4e291 100644 --- a/tests/integ/test_chainer_train.py +++ b/tests/integ/test_chainer_train.py @@ -35,12 +35,17 @@ def test_distributed_cpu_training(sagemaker_local_session, chainer_full_version) @pytest.mark.local_mode -def test_training_with_additional_hyperparameters(sagemaker_local_session, chainer_full_version): - script_path = os.path.join(DATA_DIR, "chainer_mnist", "mnist.py") +def test_training_with_additional_hyperparameters(sagemaker_session, sagemaker_local_session, chainer_full_version): + script_path = os.path.join("chainer_mnist", "mnist.py") data_path = os.path.join(DATA_DIR, "chainer_mnist") + s3_source = sagemaker_session.upload_data( + path=os.path.join(data_path, "sourcedir.tar.gz"), key_prefix="integ-test-data/chainer/src" + ) + chainer = Chainer( entry_point=script_path, + source_dir=s3_source, role="SageMakerRole", train_instance_count=1, train_instance_type="local", @@ -48,7 +53,7 @@ def test_training_with_additional_hyperparameters(sagemaker_local_session, chain py_version=PYTHON_VERSION, sagemaker_session=sagemaker_local_session, hyperparameters={"epochs": 1}, - use_mpi=True, + use_mpi=False, num_processes=2, process_slots_per_host=2, additional_mpi_options="-x NCCL_DEBUG=INFO", From 921181e1fcc25a3dd26eca2d411e21382c8dc80c Mon Sep 17 00:00:00 2001 From: Lauren Yu <6631887+laurenyu@users.noreply.github.com> Date: Tue, 10 Dec 2019 14:11:58 -0800 Subject: [PATCH 10/19] black format --- tests/integ/test_chainer_train.py | 4 +++- tests/unit/test_mxnet.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/integ/test_chainer_train.py b/tests/integ/test_chainer_train.py index 9b2f4296be..eec85c9528 100644 --- a/tests/integ/test_chainer_train.py +++ b/tests/integ/test_chainer_train.py @@ -35,7 +35,9 @@ def test_distributed_cpu_training(sagemaker_local_session, chainer_full_version) @pytest.mark.local_mode -def test_training_with_additional_hyperparameters(sagemaker_session, sagemaker_local_session, chainer_full_version): +def test_training_with_additional_hyperparameters( + sagemaker_session, sagemaker_local_session, chainer_full_version +): script_path = os.path.join("chainer_mnist", "mnist.py") data_path = os.path.join(DATA_DIR, "chainer_mnist") diff --git a/tests/unit/test_mxnet.py b/tests/unit/test_mxnet.py index a239c6ce74..dd90d41e99 100644 --- a/tests/unit/test_mxnet.py +++ b/tests/unit/test_mxnet.py @@ -204,7 +204,9 @@ def test_create_model(sagemaker_session, mxnet_version): @patch("sagemaker.utils.create_tar_file", MagicMock()) -def test_create_model_default_entry_with_mms(sagemaker_session, mxnet_version, skip_if_not_mms_version): +def test_create_model_default_entry_with_mms( + sagemaker_session, mxnet_version, skip_if_not_mms_version +): mx = MXNet( entry_point=SCRIPT_PATH, role=ROLE, From a7d7004dfdfcc01c39af56fec936e97abbc0d805 Mon Sep 17 00:00:00 2001 From: Lauren Yu <6631887+laurenyu@users.noreply.github.com> Date: Tue, 17 Dec 2019 07:43:00 -0800 Subject: [PATCH 11/19] fix merge --- src/sagemaker/xgboost/estimator.py | 2 +- tests/data/mxnet_mnist/failure_script.py | 17 +++++++++++++++++ tests/integ/test_chainer_train.py | 13 +++---------- tests/integ/test_mxnet_train.py | 4 ---- 4 files changed, 21 insertions(+), 15 deletions(-) create mode 100644 tests/data/mxnet_mnist/failure_script.py diff --git a/src/sagemaker/xgboost/estimator.py b/src/sagemaker/xgboost/estimator.py index bf81b7c6c7..142b2522e3 100644 --- a/src/sagemaker/xgboost/estimator.py +++ b/src/sagemaker/xgboost/estimator.py @@ -154,7 +154,7 @@ def create_model( return XGBoostModel( self.model_data, role, - self.entry_point, + self.uploaded_code.script_name, framework_version=self.framework_version, source_dir=self._model_source_dir(), enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, diff --git a/tests/data/mxnet_mnist/failure_script.py b/tests/data/mxnet_mnist/failure_script.py new file mode 100644 index 0000000000..ddc2a061c8 --- /dev/null +++ b/tests/data/mxnet_mnist/failure_script.py @@ -0,0 +1,17 @@ +# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from __future__ import absolute_import + + +# For use with integration tests expecting failures. +raise Exception("This failure is expected.") diff --git a/tests/integ/test_chainer_train.py b/tests/integ/test_chainer_train.py index eec85c9528..4701472e76 100644 --- a/tests/integ/test_chainer_train.py +++ b/tests/integ/test_chainer_train.py @@ -35,19 +35,12 @@ def test_distributed_cpu_training(sagemaker_local_session, chainer_full_version) @pytest.mark.local_mode -def test_training_with_additional_hyperparameters( - sagemaker_session, sagemaker_local_session, chainer_full_version -): - script_path = os.path.join("chainer_mnist", "mnist.py") +def test_training_with_additional_hyperparameters(sagemaker_local_session, chainer_full_version): + script_path = os.path.join(DATA_DIR, "chainer_mnist", "mnist.py") data_path = os.path.join(DATA_DIR, "chainer_mnist") - s3_source = sagemaker_session.upload_data( - path=os.path.join(data_path, "sourcedir.tar.gz"), key_prefix="integ-test-data/chainer/src" - ) - chainer = Chainer( entry_point=script_path, - source_dir=s3_source, role="SageMakerRole", train_instance_count=1, train_instance_type="local", @@ -55,7 +48,7 @@ def test_training_with_additional_hyperparameters( py_version=PYTHON_VERSION, sagemaker_session=sagemaker_local_session, hyperparameters={"epochs": 1}, - use_mpi=False, + use_mpi=True, num_processes=2, process_slots_per_host=2, additional_mpi_options="-x NCCL_DEBUG=INFO", diff --git a/tests/integ/test_mxnet_train.py b/tests/integ/test_mxnet_train.py index 8956724096..6214778111 100644 --- a/tests/integ/test_mxnet_train.py +++ b/tests/integ/test_mxnet_train.py @@ -60,10 +60,6 @@ def mxnet_training_job(sagemaker_session, mxnet_full_version, cpu_instance_type) return mx.latest_training_job.name -def test_foo(mxnet_training_job): - mxnet_training_job - - @pytest.mark.canary_quick @pytest.mark.regional_testing def test_attach_deploy(mxnet_training_job, sagemaker_session, cpu_instance_type): From a9489e21f24a77754e5f022ae6f0933fa75c91c2 Mon Sep 17 00:00:00 2001 From: Lauren Yu <6631887+laurenyu@users.noreply.github.com> Date: Tue, 28 Jan 2020 14:41:44 -0800 Subject: [PATCH 12/19] address pylint --- src/sagemaker/mxnet/model.py | 45 +++++++++++++++--------------------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/src/sagemaker/mxnet/model.py b/src/sagemaker/mxnet/model.py index 19ac45203b..f6c36e73e7 100644 --- a/src/sagemaker/mxnet/model.py +++ b/src/sagemaker/mxnet/model.py @@ -17,8 +17,6 @@ import packaging.version -from sagemaker import fw_utils - import sagemaker from sagemaker.fw_utils import ( create_image_uri, @@ -143,29 +141,13 @@ def prepare_container_def(self, instance_type, accelerator_type=None): dict[str, str]: A container definition object usable with the CreateModel API. """ - is_mms_version = packaging.version.Version( - self.framework_version - ) >= packaging.version.Version(self._LOWEST_MMS_VERSION) - deploy_image = self.image if not deploy_image: region_name = self.sagemaker_session.boto_session.region_name - - framework_name = self.__framework_name__ - if is_mms_version: - framework_name += "-serving" - - deploy_image = create_image_uri( - region_name, - framework_name, - instance_type, - self.framework_version, - self.py_version, - accelerator_type=accelerator_type, - ) + deploy_image = self.serving_image_uri(region_name, instance_type) deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image) - self._upload_code(deploy_key_prefix, is_mms_version) + self._upload_code(deploy_key_prefix, self._is_mms_version) deploy_env = dict(self.env) deploy_env.update(self._framework_env_vars()) @@ -187,10 +169,21 @@ def serving_image_uri(self, region_name, instance_type): str: The appropriate image URI based on the given parameters. """ - return fw_utils.create_image_uri( - region_name, - "-".join([self.__framework_name__, "serving"]), - instance_type, - self.framework_version, - self.py_version, + framework_name = self.__framework_name__ + if self._is_mms_version(): + framework_name += "-serving" + + return create_image_uri( + region_name, framework_name, instance_type, self.framework_version, self.py_version + ) + + def _is_mms_version(self): + """Whether the framework version corresponds to an inference image using + the Multi-Model Server (https://github.com/awslabs/multi-model-server). + + Returns: + bool: If the framework version corresponds to an image using MMS. + """ + return packaging.version.Version(self.framework_version) >= packaging.version.Version( + self._LOWEST_MMS_VERSION ) From 4d949e1bac97bf9614ae5e0155e59910070d05ce Mon Sep 17 00:00:00 2001 From: Lauren Yu <6631887+laurenyu@users.noreply.github.com> Date: Wed, 29 Jan 2020 15:58:30 -0800 Subject: [PATCH 13/19] fix _is_mms_version() call --- src/sagemaker/mxnet/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sagemaker/mxnet/model.py b/src/sagemaker/mxnet/model.py index f6c36e73e7..de008ace0a 100644 --- a/src/sagemaker/mxnet/model.py +++ b/src/sagemaker/mxnet/model.py @@ -147,7 +147,7 @@ def prepare_container_def(self, instance_type, accelerator_type=None): deploy_image = self.serving_image_uri(region_name, instance_type) deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image) - self._upload_code(deploy_key_prefix, self._is_mms_version) + self._upload_code(deploy_key_prefix, self._is_mms_version()) deploy_env = dict(self.env) deploy_env.update(self._framework_env_vars()) From 4074e752b58383afc73a95837f1d47bb9abdda33 Mon Sep 17 00:00:00 2001 From: Lauren Yu <6631887+laurenyu@users.noreply.github.com> Date: Thu, 4 Jun 2020 10:33:48 -0700 Subject: [PATCH 14/19] don't allow for absolute path entry_point with S3 source_dir --- src/sagemaker/chainer/estimator.py | 2 +- src/sagemaker/estimator.py | 16 ++++++--- src/sagemaker/mxnet/estimator.py | 11 ++---- src/sagemaker/pytorch/estimator.py | 2 +- src/sagemaker/rl/estimator.py | 2 +- src/sagemaker/sklearn/estimator.py | 2 +- src/sagemaker/tensorflow/estimator.py | 2 +- src/sagemaker/xgboost/estimator.py | 2 +- tests/integ/test_mxnet_train.py | 2 +- tests/unit/test_mxnet.py | 52 +++++---------------------- 10 files changed, 29 insertions(+), 64 deletions(-) diff --git a/src/sagemaker/chainer/estimator.py b/src/sagemaker/chainer/estimator.py index 7a2f9f8442..80f8902a80 100644 --- a/src/sagemaker/chainer/estimator.py +++ b/src/sagemaker/chainer/estimator.py @@ -216,7 +216,7 @@ def create_model( return ChainerModel( self.model_data, role or self.role, - entry_point or self.uploaded_code.script_name, + entry_point or self._model_entry_point(), source_dir=(source_dir or self._model_source_dir()), enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, container_log_level=self.container_log_level, diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py index db3f47662f..f3608f42e0 100644 --- a/src/sagemaker/estimator.py +++ b/src/sagemaker/estimator.py @@ -1755,17 +1755,25 @@ def _stage_user_code_in_s3(self): ) def _model_source_dir(self): - """Get the appropriate value to pass as source_dir to model constructor - on deploying + """Get the appropriate value to pass as ``source_dir`` to a model constructor. Returns: - str: Either a local or an S3 path pointing to the source_dir to be - used for code by the model to be deployed + str: Either a local or an S3 path pointing to the ``source_dir`` to be + used for code by the model to be deployed """ return ( self.source_dir if self.sagemaker_session.local_mode else self.uploaded_code.s3_prefix ) + def _model_entry_point(self): + """Get the appropriate value to pass as ``entry_point`` to a model constructor. + + Returns: + str: The path to the entry point script. This can be either an absolute path or + a path relative to ``self._model_source_dir()``. + """ + return self.uploaded_code.script_name if self._model_source_dir() else self.entry_point + def hyperparameters(self): """Return the hyperparameters as a dictionary to use for training. diff --git a/src/sagemaker/mxnet/estimator.py b/src/sagemaker/mxnet/estimator.py index 7be2241187..0dc20fc647 100644 --- a/src/sagemaker/mxnet/estimator.py +++ b/src/sagemaker/mxnet/estimator.py @@ -217,10 +217,10 @@ def create_model( if "name" not in kwargs: kwargs["name"] = self._current_job_name - model = MXNetModel( + return MXNetModel( self.model_data, role or self.role, - entry_point, + entry_point or self._model_entry_point(), source_dir=(source_dir or self._model_source_dir()), enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, container_log_level=self.container_log_level, @@ -234,13 +234,6 @@ def create_model( **kwargs ) - if entry_point is None: - model.entry_point = ( - self.entry_point if model._is_mms_version() else self.uploaded_code.script_name - ) - - return model - @classmethod def _prepare_init_params_from_job_description(cls, job_details, model_channel_name=None): """Convert the job description to init params that can be handled by the diff --git a/src/sagemaker/pytorch/estimator.py b/src/sagemaker/pytorch/estimator.py index a4ca086a4f..7ce004be0e 100644 --- a/src/sagemaker/pytorch/estimator.py +++ b/src/sagemaker/pytorch/estimator.py @@ -176,7 +176,7 @@ def create_model( return PyTorchModel( self.model_data, role or self.role, - entry_point or self.uploaded_code.script_name, + entry_point or self._model_entry_point(), source_dir=(source_dir or self._model_source_dir()), enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, container_log_level=self.container_log_level, diff --git a/src/sagemaker/rl/estimator.py b/src/sagemaker/rl/estimator.py index 42a0181d1f..49b7429641 100644 --- a/src/sagemaker/rl/estimator.py +++ b/src/sagemaker/rl/estimator.py @@ -229,7 +229,7 @@ def create_model( if not entry_point and (source_dir or dependencies): raise AttributeError("Please provide an `entry_point`.") - entry_point = entry_point or self.uploaded_code.script_name + entry_point = entry_point or self._model_entry_point() source_dir = source_dir or self._model_source_dir() dependencies = dependencies or self.dependencies diff --git a/src/sagemaker/sklearn/estimator.py b/src/sagemaker/sklearn/estimator.py index 8e4f1a200f..8a020d93c3 100644 --- a/src/sagemaker/sklearn/estimator.py +++ b/src/sagemaker/sklearn/estimator.py @@ -194,7 +194,7 @@ def create_model( return SKLearnModel( self.model_data, role, - entry_point or self.uploaded_code.script_name, + entry_point or self._model_entry_point(), source_dir=(source_dir or self._model_source_dir()), enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, container_log_level=self.container_log_level, diff --git a/src/sagemaker/tensorflow/estimator.py b/src/sagemaker/tensorflow/estimator.py index a6733e2025..78939b967e 100644 --- a/src/sagemaker/tensorflow/estimator.py +++ b/src/sagemaker/tensorflow/estimator.py @@ -660,7 +660,7 @@ def _create_default_model( return TensorFlowModel( self.model_data, role, - entry_point or self.uploaded_code.script_name, + entry_point or self._model_entry_point(), source_dir=source_dir or self._model_source_dir(), enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, env={"SAGEMAKER_REQUIREMENTS": self.requirements_file}, diff --git a/src/sagemaker/xgboost/estimator.py b/src/sagemaker/xgboost/estimator.py index 21a80bf7a2..3757a2eb9e 100644 --- a/src/sagemaker/xgboost/estimator.py +++ b/src/sagemaker/xgboost/estimator.py @@ -174,7 +174,7 @@ def create_model( return XGBoostModel( self.model_data, role, - entry_point or self.uploaded_code.script_name, + entry_point or self._model_entry_point(), framework_version=self.framework_version, source_dir=(source_dir or self._model_source_dir()), enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, diff --git a/tests/integ/test_mxnet_train.py b/tests/integ/test_mxnet_train.py index cd0ebd8767..2e15e205e0 100644 --- a/tests/integ/test_mxnet_train.py +++ b/tests/integ/test_mxnet_train.py @@ -39,7 +39,7 @@ def mxnet_training_job(sagemaker_session, mxnet_full_version, cpu_instance_type) ) mx = MXNet( - entry_point=os.path.join("mxnet_mnist", "mnist.py"), + entry_point="mxnet_mnist/mnist.py", source_dir=s3_source, role="SageMakerRole", framework_version=mxnet_full_version, diff --git a/tests/unit/test_mxnet.py b/tests/unit/test_mxnet.py index 20be63d0ea..6374d40d8d 100644 --- a/tests/unit/test_mxnet.py +++ b/tests/unit/test_mxnet.py @@ -180,7 +180,8 @@ def test_create_model(sagemaker_session, mxnet_version): container_log_level = '"logging.INFO"' source_dir = "s3://mybucket/source" mx = MXNet( - entry_point=SCRIPT_PATH, + entry_point=SCRIPT_NAME, + source_dir=source_dir, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, @@ -188,7 +189,6 @@ def test_create_model(sagemaker_session, mxnet_version): framework_version=mxnet_version, container_log_level=container_log_level, base_job_name="job", - source_dir=source_dir, ) job_name = "new_name" @@ -198,6 +198,7 @@ def test_create_model(sagemaker_session, mxnet_version): assert model.sagemaker_session == sagemaker_session assert model.framework_version == mxnet_version assert model.py_version == mx.py_version + assert model.entry_point == SCRIPT_NAME assert model.role == ROLE assert model.name == job_name assert model.container_log_level == container_log_level @@ -206,55 +207,19 @@ def test_create_model(sagemaker_session, mxnet_version): assert model.vpc_config is None -@patch("sagemaker.utils.create_tar_file", MagicMock()) -def test_create_model_default_entry_with_mms( - sagemaker_session, mxnet_version, skip_if_not_mms_version -): - mx = MXNet( - entry_point=SCRIPT_PATH, - role=ROLE, - sagemaker_session=sagemaker_session, - train_instance_count=INSTANCE_COUNT, - train_instance_type=INSTANCE_TYPE, - framework_version=mxnet_version, - ) - - mx.fit() - model = mx.create_model() - - assert model.entry_point == SCRIPT_PATH - - -@patch("sagemaker.utils.create_tar_file", MagicMock()) -def test_create_model_default_entry_not_mms(sagemaker_session, mxnet_version, skip_if_mms_version): - mx = MXNet( - entry_point=SCRIPT_PATH, - role=ROLE, - sagemaker_session=sagemaker_session, - train_instance_count=INSTANCE_COUNT, - train_instance_type=INSTANCE_TYPE, - framework_version=mxnet_version, - ) - - mx.fit() - model = mx.create_model() - - assert model.entry_point == SCRIPT_NAME - - def test_create_model_with_optional_params(sagemaker_session): container_log_level = '"logging.INFO"' source_dir = "s3://mybucket/source" enable_cloudwatch_metrics = "true" mx = MXNet( - entry_point=SCRIPT_PATH, + entry_point=SCRIPT_NAME, + source_dir=source_dir, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, container_log_level=container_log_level, base_job_name="job", - source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics, ) @@ -286,7 +251,8 @@ def test_create_model_with_custom_image(sagemaker_session): source_dir = "s3://mybucket/source" custom_image = "mxnet:2.0" mx = MXNet( - entry_point=SCRIPT_PATH, + entry_point=SCRIPT_NAME, + source_dir=source_dir, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, @@ -294,7 +260,6 @@ def test_create_model_with_custom_image(sagemaker_session): image_name=custom_image, container_log_level=container_log_level, base_job_name="job", - source_dir=source_dir, ) job_name = "new_name" @@ -303,7 +268,7 @@ def test_create_model_with_custom_image(sagemaker_session): assert model.sagemaker_session == sagemaker_session assert model.image == custom_image - assert model.entry_point == SCRIPT_PATH + assert model.entry_point == SCRIPT_NAME assert model.role == ROLE assert model.name == job_name assert model.container_log_level == container_log_level @@ -823,7 +788,6 @@ def test_create_model_with_custom_hosting_image(sagemaker_session): image_name=custom_image, container_log_level=container_log_level, base_job_name="job", - source_dir=source_dir, ) mx.fit(inputs="s3://mybucket/train", job_name="new_name") From 36ec1a7b00c0fa39fe3f3d5999d1abf8fe39141c Mon Sep 17 00:00:00 2001 From: Lauren Yu <6631887+laurenyu@users.noreply.github.com> Date: Thu, 4 Jun 2020 17:11:51 -0700 Subject: [PATCH 15/19] address flake8 --- tests/unit/test_mxnet.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/test_mxnet.py b/tests/unit/test_mxnet.py index 6374d40d8d..6b58a3e54e 100644 --- a/tests/unit/test_mxnet.py +++ b/tests/unit/test_mxnet.py @@ -776,7 +776,6 @@ def test_model_empty_framework_version(warning, sagemaker_session): def test_create_model_with_custom_hosting_image(sagemaker_session): container_log_level = '"logging.INFO"' - source_dir = "s3://mybucket/source" custom_image = "mxnet:2.0" custom_hosting_image = "mxnet_hosting:2.0" mx = MXNet( From 3933900117369feb239b9be16faad63b441fdea9 Mon Sep 17 00:00:00 2001 From: Lauren Yu <6631887+laurenyu@users.noreply.github.com> Date: Fri, 5 Jun 2020 10:19:55 -0700 Subject: [PATCH 16/19] address local mode and MXNet + MMS --- src/sagemaker/estimator.py | 5 ++++- src/sagemaker/mxnet/estimator.py | 9 +++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py index f3608f42e0..05ddf73025 100644 --- a/src/sagemaker/estimator.py +++ b/src/sagemaker/estimator.py @@ -1772,7 +1772,10 @@ def _model_entry_point(self): str: The path to the entry point script. This can be either an absolute path or a path relative to ``self._model_source_dir()``. """ - return self.uploaded_code.script_name if self._model_source_dir() else self.entry_point + if self.sagemaker_session.local_mode or (self._model_source_dir() is None): + return self.entry_point + + return self.uploaded_code.script_name def hyperparameters(self): """Return the hyperparameters as a dictionary to use for training. diff --git a/src/sagemaker/mxnet/estimator.py b/src/sagemaker/mxnet/estimator.py index 0dc20fc647..c0d9c24774 100644 --- a/src/sagemaker/mxnet/estimator.py +++ b/src/sagemaker/mxnet/estimator.py @@ -217,10 +217,10 @@ def create_model( if "name" not in kwargs: kwargs["name"] = self._current_job_name - return MXNetModel( + model = MXNetModel( self.model_data, role or self.role, - entry_point or self._model_entry_point(), + entry_point, source_dir=(source_dir or self._model_source_dir()), enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, container_log_level=self.container_log_level, @@ -234,6 +234,11 @@ def create_model( **kwargs ) + if entry_point is None: + model.entry_point = ( + self.entry_point if model._is_mms_version() else self._model_entry_point() + ) + @classmethod def _prepare_init_params_from_job_description(cls, job_details, model_channel_name=None): """Convert the job description to init params that can be handled by the From a63563130b63fc373a2600294764fa20ce0f000f Mon Sep 17 00:00:00 2001 From: Lauren Yu <6631887+laurenyu@users.noreply.github.com> Date: Tue, 9 Jun 2020 08:44:00 -0700 Subject: [PATCH 17/19] add forgotten return statement --- src/sagemaker/mxnet/estimator.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/sagemaker/mxnet/estimator.py b/src/sagemaker/mxnet/estimator.py index c0d9c24774..c411015878 100644 --- a/src/sagemaker/mxnet/estimator.py +++ b/src/sagemaker/mxnet/estimator.py @@ -239,6 +239,8 @@ def create_model( self.entry_point if model._is_mms_version() else self._model_entry_point() ) + return model + @classmethod def _prepare_init_params_from_job_description(cls, job_details, model_channel_name=None): """Convert the job description to init params that can be handled by the From 98f4178f30dd0fe7f096c8154b4a8ed60a9c7a30 Mon Sep 17 00:00:00 2001 From: Lauren Yu <6631887+laurenyu@users.noreply.github.com> Date: Tue, 9 Jun 2020 14:55:24 -0700 Subject: [PATCH 18/19] update test for new MMS usage (aka not supported) --- tests/integ/test_mxnet_train.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/integ/test_mxnet_train.py b/tests/integ/test_mxnet_train.py index 2e15e205e0..ee7d31d96a 100644 --- a/tests/integ/test_mxnet_train.py +++ b/tests/integ/test_mxnet_train.py @@ -67,7 +67,13 @@ def test_attach_deploy(mxnet_training_job, sagemaker_session, cpu_instance_type) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = MXNet.attach(mxnet_training_job, sagemaker_session=sagemaker_session) - predictor = estimator.deploy(1, cpu_instance_type, endpoint_name=endpoint_name) + predictor = estimator.deploy( + 1, + cpu_instance_type, + entry_point="mnist.py", + source_dir=os.path.join(DATA_DIR, "mxnet_mnist", "mnist.py"), + endpoint_name=endpoint_name, + ) data = numpy.zeros(shape=(1, 1, 28, 28)) result = predictor.predict(data) assert result is not None From 2b58b04025ff2efc822a114b76e72217ac1b3c97 Mon Sep 17 00:00:00 2001 From: Lauren Yu <6631887+laurenyu@users.noreply.github.com> Date: Wed, 10 Jun 2020 10:36:07 -0700 Subject: [PATCH 19/19] fix integ test --- tests/integ/test_mxnet_train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integ/test_mxnet_train.py b/tests/integ/test_mxnet_train.py index ee7d31d96a..d73b93880c 100644 --- a/tests/integ/test_mxnet_train.py +++ b/tests/integ/test_mxnet_train.py @@ -71,7 +71,7 @@ def test_attach_deploy(mxnet_training_job, sagemaker_session, cpu_instance_type) 1, cpu_instance_type, entry_point="mnist.py", - source_dir=os.path.join(DATA_DIR, "mxnet_mnist", "mnist.py"), + source_dir=os.path.join(DATA_DIR, "mxnet_mnist"), endpoint_name=endpoint_name, ) data = numpy.zeros(shape=(1, 1, 28, 28))