From 90918024b1abb0eeae03d468526d53053bf24f2b Mon Sep 17 00:00:00 2001 From: manvento Date: Mon, 18 Nov 2019 15:45:12 +0100 Subject: [PATCH 1/2] Fix aws#847 Windows paths start with unit letter and colon. Moreover when composing docker image path it's better to not use os.path.join, because docker image is a unix OS, but os path use separator. --- src/sagemaker/local/image.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/sagemaker/local/image.py b/src/sagemaker/local/image.py index 5a42a10f4c..c7cd51fa69 100644 --- a/src/sagemaker/local/image.py +++ b/src/sagemaker/local/image.py @@ -248,7 +248,11 @@ def retrieve_artifacts(self, compose_data, output_data_config, job_name): for host in self.hosts: volumes = compose_data["services"][str(host)]["volumes"] for volume in volumes: - host_dir, container_dir = volume.split(":") + if re.search(r"[A-Za-z]:", volume): + unit, host_dir, container_dir = volume.split(":") + host_dir = unit + ":" + host_dir + else: + host_dir, container_dir = volume.split(":") if container_dir == "/opt/ml/model": sagemaker.local.utils.recursive_copy(host_dir, model_artifacts) elif container_dir == "/opt/ml/output": @@ -625,7 +629,8 @@ def __init__(self, host_dir, container_dir=None, channel=None): raise ValueError("container_dir and channel cannot be declared together.") self.container_dir = ( - container_dir if container_dir else os.path.join("/opt/ml/input/data", channel) + # path separator should be always in unix format, because docker vm is running unix + container_dir if container_dir else "/opt/ml/input/data/" + channel ) self.host_dir = host_dir if platform.system() == "Darwin" and host_dir.startswith("/var"): From d097e52193c533b19690f845744e9b516dfc39a1 Mon Sep 17 00:00:00 2001 From: manvento Date: Tue, 19 Nov 2019 10:15:44 +0100 Subject: [PATCH 2/2] Fix on regular expression Now it's used only for classical Windows path. --- src/sagemaker/local/image.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/sagemaker/local/image.py b/src/sagemaker/local/image.py index c7cd51fa69..1480a54ab5 100644 --- a/src/sagemaker/local/image.py +++ b/src/sagemaker/local/image.py @@ -44,7 +44,6 @@ DOCKER_COMPOSE_HTTP_TIMEOUT_ENV = "COMPOSE_HTTP_TIMEOUT" DOCKER_COMPOSE_HTTP_TIMEOUT = "120" - # Environment variables to be set during training REGION_ENV_NAME = "AWS_REGION" TRAINING_JOB_NAME_ENV_NAME = "TRAINING_JOB_NAME" @@ -248,7 +247,7 @@ def retrieve_artifacts(self, compose_data, output_data_config, job_name): for host in self.hosts: volumes = compose_data["services"][str(host)]["volumes"] for volume in volumes: - if re.search(r"[A-Za-z]:", volume): + if re.search(r"^[A-Za-z]:", volume): unit, host_dir, container_dir = volume.split(":") host_dir = unit + ":" + host_dir else: @@ -628,10 +627,7 @@ def __init__(self, host_dir, container_dir=None, channel=None): if container_dir and channel: raise ValueError("container_dir and channel cannot be declared together.") - self.container_dir = ( - # path separator should be always in unix format, because docker vm is running unix - container_dir if container_dir else "/opt/ml/input/data/" + channel - ) + self.container_dir = container_dir if container_dir else "/opt/ml/input/data/" + channel self.host_dir = host_dir if platform.system() == "Darwin" and host_dir.startswith("/var"): self.host_dir = os.path.join("/private", host_dir)