From 1bf8c66e02974c0bd74d7f57e51913da88d54a9f Mon Sep 17 00:00:00 2001 From: Yijie Zhuang Date: Tue, 23 Jun 2020 14:12:11 -0700 Subject: [PATCH 1/3] modify rl ray images mapping for newer versions --- src/sagemaker/fw_utils.py | 2 +- src/sagemaker/rl/estimator.py | 25 ++++++++++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/src/sagemaker/fw_utils.py b/src/sagemaker/fw_utils.py index 1c3e9fcb41..3c73028fed 100644 --- a/src/sagemaker/fw_utils.py +++ b/src/sagemaker/fw_utils.py @@ -69,7 +69,7 @@ "{} framework does not support version {}. Please use one of the following: {}." ) -VALID_PY_VERSIONS = ["py2", "py3", "py37"] +VALID_PY_VERSIONS = ["py2", "py3", "py37", "py36"] VALID_EIA_FRAMEWORKS = [ "tensorflow", "tensorflow-serving", diff --git a/src/sagemaker/rl/estimator.py b/src/sagemaker/rl/estimator.py index 2c945d14a0..0ecf6d866f 100644 --- a/src/sagemaker/rl/estimator.py +++ b/src/sagemaker/rl/estimator.py @@ -25,6 +25,7 @@ logger = logging.getLogger("sagemaker") +DEFAULT_RL_ACCOUNT = "462105765813" SAGEMAKER_ESTIMATOR = "sagemaker_estimator" SAGEMAKER_ESTIMATOR_VALUE = "RLEstimator" PYTHON_VERSION = "py3" @@ -41,6 +42,8 @@ "0.5": {"tensorflow": "1.11"}, "0.6.5": {"tensorflow": "1.12"}, "0.6": {"tensorflow": "1.12"}, + "0.8.2":{"tensorflow": "2.1"}, + "0.8.5":{"tensorflow": "2.1", "pytorch": "1.5"} }, } @@ -57,6 +60,7 @@ class RLFramework(enum.Enum): TENSORFLOW = "tensorflow" MXNET = "mxnet" + PYTORCH = "pytorch" class RLEstimator(Framework): @@ -64,7 +68,7 @@ class RLEstimator(Framework): COACH_LATEST_VERSION_TF = "0.11.1" COACH_LATEST_VERSION_MXNET = "0.11.0" - RAY_LATEST_VERSION = "0.6.5" + RAY_LATEST_VERSION = "0.8.5" def __init__( self, @@ -277,6 +281,18 @@ def train_image(self): """ if self.image_name: return self.image_name + + # use different account for rl images if ray version is later than 0.8.2 + if self.toolkit == RLToolkit.RAY.value and self.toolkit_version >= "0.8.2": + return fw_utils.create_image_uri( + self.sagemaker_session.boto_region_name, + "rl-ray-container", + self.train_instance_type, + self._image_version(), + py_version="py36", + account=DEFAULT_RL_ACCOUNT + ) + return fw_utils.create_image_uri( self.sagemaker_session.boto_region_name, self._image_framework(), @@ -454,6 +470,13 @@ def _validate_toolkit_support(cls, toolkit, toolkit_version, framework): def _image_version(self): """Placeholder docstring""" + if self.toolkit == RLToolkit.RAY.value and self.toolkit_version >= "0.8.2": + frameworkd_tag = None + if self.framework == RLFramework.TENSORFLOW.value: + frameworkd_tag = "tf" + elif self.framework == RLFramework.PYTORCH.value: + frameworkd_tag = "torch" + return "{}-{}-{}".format(self.toolkit, self.toolkit_version, frameworkd_tag) return "{}{}".format(self.toolkit, self.toolkit_version) def _image_framework(self): From af57234b63664f38bc1a5f75c7d29ce8a12834fe Mon Sep 17 00:00:00 2001 From: Yijie Zhuang Date: Fri, 26 Jun 2020 15:49:58 -0700 Subject: [PATCH 2/3] fix format errors --- src/sagemaker/rl/estimator.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/sagemaker/rl/estimator.py b/src/sagemaker/rl/estimator.py index 0ecf6d866f..3248040db9 100644 --- a/src/sagemaker/rl/estimator.py +++ b/src/sagemaker/rl/estimator.py @@ -42,8 +42,8 @@ "0.5": {"tensorflow": "1.11"}, "0.6.5": {"tensorflow": "1.12"}, "0.6": {"tensorflow": "1.12"}, - "0.8.2":{"tensorflow": "2.1"}, - "0.8.5":{"tensorflow": "2.1", "pytorch": "1.5"} + "0.8.2": {"tensorflow": "2.1"}, + "0.8.5": {"tensorflow": "2.1", "pytorch": "1.5"}, }, } @@ -290,8 +290,8 @@ def train_image(self): self.train_instance_type, self._image_version(), py_version="py36", - account=DEFAULT_RL_ACCOUNT - ) + account=DEFAULT_RL_ACCOUNT, + ) return fw_utils.create_image_uri( self.sagemaker_session.boto_region_name, @@ -506,7 +506,10 @@ def default_metric_definitions(cls, toolkit): float_regex = "[-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?" # noqa: W605, E501 return [ - {"Name": "episode_reward_mean", "Regex": "episode_reward_mean: (%s)" % float_regex}, - {"Name": "episode_reward_max", "Regex": "episode_reward_max: (%s)" % float_regex}, + { + "Name": "episode_reward_mean", + "Regex": "episode_reward_mean: (%s)" % float_regex, + }, + {"Name": "episode_reward_max", "Regex": "episode_reward_max: (%s)" % float_regex, }, ] raise ValueError("An unknown RLToolkit enum was passed in. toolkit: {}".format(toolkit)) From ebe8eb3705d7bbbd3cc2f7946deb38484f3b55ac Mon Sep 17 00:00:00 2001 From: Yijie Zhuang Date: Fri, 3 Jul 2020 13:22:48 -0700 Subject: [PATCH 3/3] modify train_ray.py for compatibility --- src/sagemaker/rl/estimator.py | 7 +++++-- tests/data/ray_cartpole/train_ray.py | 4 ++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/sagemaker/rl/estimator.py b/src/sagemaker/rl/estimator.py index 3248040db9..e3d6a99ad6 100644 --- a/src/sagemaker/rl/estimator.py +++ b/src/sagemaker/rl/estimator.py @@ -508,8 +508,11 @@ def default_metric_definitions(cls, toolkit): return [ { "Name": "episode_reward_mean", - "Regex": "episode_reward_mean: (%s)" % float_regex, + "Regex": "episode_reward_mean: {}".format(float_regex), + }, + { + "Name": "episode_reward_max", + "Regex": "episode_reward_max: {}".format(float_regex), }, - {"Name": "episode_reward_max", "Regex": "episode_reward_max: (%s)" % float_regex, }, ] raise ValueError("An unknown RLToolkit enum was passed in. toolkit: {}".format(toolkit)) diff --git a/tests/data/ray_cartpole/train_ray.py b/tests/data/ray_cartpole/train_ray.py index aea02f621c..d19d625b65 100644 --- a/tests/data/ray_cartpole/train_ray.py +++ b/tests/data/ray_cartpole/train_ray.py @@ -5,12 +5,12 @@ from ray.tune.logger import pretty_print # Based on https://github.com/ray-project/ray/blob/master/doc/source/rllib-training.rst#python-api -ray.init(log_to_driver=False) +ray.init(log_to_driver=False, webui_host="127.0.0.1") config = ppo.DEFAULT_CONFIG.copy() config["num_gpus"] = int(os.environ.get("SM_NUM_GPUS", 0)) checkpoint_dir = os.environ.get("SM_MODEL_DIR", "/Users/nadzeya/gym") config["num_workers"] = 1 -agent = ppo.PPOAgent(config=config, env="CartPole-v0") +agent = ppo.PPOTrainer(config=config, env="CartPole-v0") # Can optionally call agent.restore(path) to load a checkpoint.