You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Describe the bug
When creating a pipeline with a TrainingStep with a TensorFlow Estimator where output_path=Join, the pipeline definition cannot be created if model_dir is not passed as a parameter. On running pipeline.definition() it throws
TypeError: expected str, bytes or os.PathLike object, not Join
To reproduce
Create a Tensorflow Estimator where the output_path given is a Join object. Do not pass any value for the model_dir parameter.
Expected behavior
The SDK generates the model_dir as required.
Screenshots or logs
from sagemaker.tensorflow import TensorFlow
from sagemaker.inputs import TrainingInput
from sagemaker.workflow.steps import TrainingStep
from sagemaker.workflow.step_collections import RegisterModel
from sagemaker.workflow.functions import Join
import time
output_loc = Join(on='/', values=["s3:/", bucket, prefix, s3_output_loc])
# Where to store the trained model
model_path = f"s3://{bucket}/{prefix}/model/"
hyperparameters = {"epochs": training_epochs}
tensorflow_version = "2.4.1"
python_version = "py37"
tf2_estimator = TensorFlow(
source_dir="code",
entry_point="train.py",
instance_type="ml.m5.xlarge",
instance_count=1,
framework_version=tensorflow_version,
#model_dir=False,
role=role,
base_job_name="tf2-california-housing-train",
output_path=output_loc,
hyperparameters=hyperparameters,
py_version=python_version,
)
# Use the tf2_estimator in a Sagemaker pipelines ProcessingStep.
# NOTE how the input to the training job directly references the output of the previous step.
step_train_model = TrainingStep(
name="Train-California-Housing-Model",
estimator=tf2_estimator,
inputs={
"train": TrainingInput(
s3_data=step_preprocess_data.properties.ProcessingOutputConfig.Outputs[
"train"
].S3Output.S3Uri,
content_type="text/csv",
),
"test": TrainingInput(
s3_data=step_preprocess_data.properties.ProcessingOutputConfig.Outputs[
"test"
].S3Output.S3Uri,
content_type="text/csv",
),
},
)
from sagemaker.workflow.pipeline import Pipeline
# Create a Sagemaker Pipeline.
# Each parameter for the pipeline must be set as a parameter explicitly when the pipeline is created.
# Also pass in each of the steps created above.
# Note that the order of execution is determined from each step's dependencies on other steps,
# not on the order they are passed in below.
pipeline = Pipeline(
name=pipeline_name,
parameters=[
processing_instance_type,
training_instance_type,
input_data,
training_epochs,
accuracy_mse_threshold,
endpoint_instance_type,
s3_output_loc,
],
steps=[step_train_model],
)
import json
pipeline.definition()
/opt/conda/lib/python3.7/site-packages/sagemaker/workflow/pipeline.py in definition(self)
290 def definition(self) -> str:
291 """Converts a request structure to string representation for workflow service calls."""
--> 292 request_dict = self.to_request()
293 self._interpolate_step_collection_name_in_depends_on(request_dict["Steps"])
294 request_dict["PipelineExperimentConfig"] = interpolate(
/opt/conda/lib/python3.7/site-packages/sagemaker/workflow/pipeline.py in to_request(self)
89 if self.pipeline_experiment_config is not None
90 else None,
---> 91 "Steps": list_to_request(self.steps),
92 }
93
/opt/conda/lib/python3.7/site-packages/sagemaker/workflow/utilities.py in list_to_request(entities)
40 for entity in entities:
41 if isinstance(entity, Entity):
---> 42 request_dicts.append(entity.to_request())
43 elif isinstance(entity, StepCollection):
44 request_dicts.extend(entity.request_dicts())
/opt/conda/lib/python3.7/site-packages/sagemaker/workflow/steps.py in to_request(self)
359 def to_request(self) -> RequestType:
360 """Updates the request dictionary with cache configuration."""
--> 361 request_dict = super().to_request()
362 if self.cache_config:
363 request_dict.update(self.cache_config.config)
/opt/conda/lib/python3.7/site-packages/sagemaker/workflow/steps.py in to_request(self)
221 def to_request(self) -> RequestType:
222 """Gets the request structure for `ConfigurableRetryStep`."""
--> 223 step_dict = super().to_request()
224 if self.retry_policies:
225 step_dict["RetryPolicies"] = self._resolve_retry_policy(self.retry_policies)
/opt/conda/lib/python3.7/site-packages/sagemaker/workflow/steps.py in to_request(self)
104 "Name": self.name,
105 "Type": self.step_type.value,
--> 106 "Arguments": self.arguments,
107 }
108 if self.depends_on:
/opt/conda/lib/python3.7/site-packages/sagemaker/workflow/steps.py in arguments(self)
340 self.estimator._prepare_for_training(self.job_name)
341 train_args = _TrainingJob._get_train_args(
--> 342 self.estimator, self.inputs, experiment_config=dict()
343 )
344 request_dict = self.estimator.sagemaker_session._get_train_request(**train_args)
/opt/conda/lib/python3.7/site-packages/sagemaker/estimator.py in _get_train_args(cls, estimator, inputs, experiment_config)
1841 config = _Job._load_config(inputs, estimator)
1842
-> 1843 current_hyperparameters = estimator.hyperparameters()
1844 if current_hyperparameters is not None:
1845 hyperparameters = {
/opt/conda/lib/python3.7/site-packages/sagemaker/tensorflow/estimator.py in hyperparameters(self)
362 if self.model_dir is not False:
363 self.model_dir = self.model_dir or self._default_s3_path(
--> 364 "model", mpi=additional_hyperparameters.get(self.LAUNCH_MPI_ENV_NAME, False)
365 )
366 additional_hyperparameters["model_dir"] = self.model_dir
/opt/conda/lib/python3.7/site-packages/sagemaker/tensorflow/estimator.py in _default_s3_path(self, directory, mpi)
379 return "/opt/ml/model"
380 if self._current_job_name:
--> 381 return s3.s3_path_join(self.output_path, self._current_job_name, directory)
382 return None
383
/opt/conda/lib/python3.7/site-packages/sagemaker/s3.py in s3_path_join(*args)
56 return str(pathlib.PurePosixPath(args[0], path)).replace("s3:/", "s3://")
57
---> 58 return str(pathlib.PurePosixPath(*args)).lstrip("/")
59
60
/opt/conda/lib/python3.7/pathlib.py in __new__(cls, *args)
640 if cls is PurePath:
641 cls = PureWindowsPath if os.name == 'nt' else PurePosixPath
--> 642 return cls._from_parts(args)
643
644 def __reduce__(self):
/opt/conda/lib/python3.7/pathlib.py in _from_parts(cls, args, init)
672 # right flavour.
673 self = object.__new__(cls)
--> 674 drv, root, parts = self._parse_args(args)
675 self._drv = drv
676 self._root = root
/opt/conda/lib/python3.7/pathlib.py in _parse_args(cls, args)
656 parts += a._parts
657 else:
--> 658 a = os.fspath(a)
659 if isinstance(a, str):
660 # Force-cast str subclasses to str (issue #21127)
TypeError: expected str, bytes or os.PathLike object, not Join
If applicable, add screenshots or logs to help explain your problem.
System information
A description of your system. Please provide:
SageMaker Python SDK version: 2.92.1
Framework name (eg. PyTorch) or algorithm (eg. KMeans): TensorFlow
Framework version: 2.4.1
Python version: 3.7.10 (Studio Data Science image)
CPU or GPU: CPU
Custom Docker image (Y/N): N
Additional context
If model_dir is passed either a False or Join from my testing it will not throw the error.
The text was updated successfully, but these errors were encountered:
@andjsmi The fix has been released recently in v2.93.0. Could you check if it works for you?
Closing this issue and feel free to reopen if you have any questions.
Describe the bug
When creating a pipeline with a TrainingStep with a TensorFlow Estimator where output_path=Join, the pipeline definition cannot be created if model_dir is not passed as a parameter. On running
pipeline.definition()
it throwsTypeError: expected str, bytes or os.PathLike object, not Join
To reproduce
Create a Tensorflow Estimator where the output_path given is a Join object. Do not pass any value for the model_dir parameter.
Expected behavior
The SDK generates the model_dir as required.
Screenshots or logs
If applicable, add screenshots or logs to help explain your problem.
System information
A description of your system. Please provide:
Additional context
If model_dir is passed either a False or Join from my testing it will not throw the error.
The text was updated successfully, but these errors were encountered: