aws
diff --git a/‎CHANGELOG.md
+17 b/‎CHANGELOG.md
+17
diff --git a/‎VERSION
+1-1 b/‎VERSION
+1-1
diff --git a/‎doc/api/training/smd_model_parallel_release_notes/smd_model_parallel_change_log.rst
+41-7 b/‎doc/api/training/smd_model_parallel_release_notes/smd_model_parallel_change_log.rst
+41-7
diff --git a/‎doc/api/training/smp_versions/latest.rst
+2-2 b/‎doc/api/training/smp_versions/latest.rst
+2-2
diff --git a/‎src/sagemaker/debugger/framework_profile.py
+6 b/‎src/sagemaker/debugger/framework_profile.py
+6
diff --git a/‎src/sagemaker/debugger/metrics_config.py
+6-2 b/‎src/sagemaker/debugger/metrics_config.py
+6-2
diff --git a/‎src/sagemaker/experiments/_environment.py
+12-20 b/‎src/sagemaker/experiments/_environment.py
+12-20
diff --git a/‎src/sagemaker/experiments/_metrics.py
-80 b/‎src/sagemaker/experiments/_metrics.py
-80
diff --git a/‎src/sagemaker/experiments/_utils.py
+3-5 b/‎src/sagemaker/experiments/_utils.py
+3-5
diff --git a/‎src/sagemaker/experiments/run.py
+7-7 b/‎src/sagemaker/experiments/run.py
+7-7
@@ -1,5 +1,22 @@
 # Changelog
 
+## v2.131.0 (2023-01-31)
+
+### Features
+
+ * Display file diff on black-check
+ * Support for environment variables in the HPO
+ * Support role as PipelineParameter in Processor class
+ * Add TrainingImageConfig support for SageMaker training jobs
+
+### Bug Fixes and Other Changes
+
+ * use FeatureGroup's Session in nonconcurrency ingestion
+ * Update feature_group.py ingest() description
+ * Do not use print function. User logger instead
+ * Add batch_get_record and search API for FeatureStore
+ * hashing problem for framework processors with identical source dirs
+
 ## v2.130.0 (2023-01-26)
 
 ### Features
 
@@ -1 +1 @@
-2.130.1.dev0
+2.131.1.dev0
@@ -6,9 +6,47 @@ New features, bug fixes, and improvements are regularly made to the SageMaker
 distributed model parallel library.
 
 
-SageMaker Distributed Model Parallel 1.13.0 Release Notes
+SageMaker Distributed Model Parallel 1.14.0 Release Notes
 =========================================================
 
+*Date: Jan. 30. 2023*
+
+**Currency Updates**
+
+* Added support for PyTorch v1.13.1
+
+**Improvements**
+
+* Upgraded the flash-attention (https://github.com/HazyResearch/flash-attention) library to  v0.2.6.post1
+
+**Migration to AWS Deep Learning Containers**
+
+This version passed benchmark testing and is migrated to the following AWS Deep Learning Containers (DLC):
+
+- SageMaker training container for PyTorch v1.13.1
+
+  .. code::
+
+    763104351884.dkr.ecr.<region>.amazonaws.com/pytorch-training:1.13.1-gpu-py39-cu117-ubuntu20.04-sagemaker
+
+
+Binary file of this version of the library for `custom container
+<https://docs.aws.amazon.com/sagemaker/latest/dg/model-parallel-sm-sdk.html#model-parallel-bring-your-own-container>`_ users:
+
+- For PyTorch 1.13.1
+
+  .. code::
+
+    https://sagemaker-distributed-model-parallel.s3.us-west-2.amazonaws.com/pytorch-1.13.1/build-artifacts/2023-01-19-18-35/smdistributed_modelparallel-1.14.0-cp39-cp39-linux_x86_64.whl
+
+----
+
+Release History
+===============
+
+SageMaker Distributed Model Parallel 1.13.0 Release Notes
+---------------------------------------------------------
+
 *Date: Dec. 15. 2022*
 
 **New Features**
@@ -46,16 +84,12 @@ This version passed benchmark testing and is migrated to the following AWS Deep
 Binary file of this version of the library for `custom container
 <https://docs.aws.amazon.com/sagemaker/latest/dg/model-parallel-sm-sdk.html#model-parallel-bring-your-own-container>`_ users:
 
-- For PyTorch 1.12.0
+- For PyTorch 1.12.1
 
   .. code::
 
     https://sagemaker-distributed-model-parallel.s3.us-west-2.amazonaws.com/pytorch-1.12.1/build-artifacts/2022-12-08-21-34/smdistributed_modelparallel-1.13.0-cp38-cp38-linux_x86_64.whl
 
-----
-
-Release History
-===============
 
 SageMaker Distributed Model Parallel 1.11.0 Release Notes
 ---------------------------------------------------------
@@ -92,7 +126,7 @@ Binary file of this version of the library for `custom container
 
   .. code::
 
-    https://sagemaker-distribu
+    https://sagemaker-distributed-model-parallel.s3.us-west-2.amazonaws.com/pytorch-1.12.0/build-artifacts/2022-08-12-16-58/smdistributed_modelparallel-1.11.0-cp38-cp38-linux_x86_64.whl
 
 SageMaker Distributed Model Parallel 1.10.1 Release Notes
 ---------------------------------------------------------
 
@@ -10,8 +10,8 @@ depending on which version of the library you need to use.
 To use the library, reference the
 **Common API** documentation alongside the framework specific API documentation.
 
-Version 1.11.0, 1.13.0 (Latest)
-===============================
+Version 1.11.0, 1.13.0, 1.14.0 (Latest)
+=======================================
 
 To use the library, reference the Common API documentation alongside the framework specific API documentation.
 
 
@@ -143,6 +143,12 @@ def __init__(
                 profiling. Configure it using the
                 :class:`~sagemaker.debugger.metrics_config.DetailedProfilingConfig` class.
                 Pass ``DetailedProfilingConfig()`` to use the default configuration.
+
+                .. warning::
+                    This detailed framework profiling feature discontinues support for TensorFlow v2.11
+                    and later. To use the detailed profiling feature, use previous versions of
+                    TensorFlow between v2.3.1 and v2.10.0.
+
             dataloader_profiling_config (DataloaderProfilingConfig): The configuration for
                 dataloader metrics profiling. Configure it using the
                 :class:`~sagemaker.debugger.metrics_config.DataloaderProfilingConfig` class.
 
@@ -203,8 +203,7 @@ def __init__(
     ):
         """Specify target steps or a target duration to profile.
 
-        By default, it profiles step 5
-        of training.
+        By default, it profiles step 5 of the training job.
 
         If **profile_default_steps** is set to `True` and none of the other
         range parameters is specified,
@@ -224,6 +223,11 @@ def __init__(
             if one of the two pairs is used. If both pairs are specified, a
             conflict error occurs.
 
+        .. warning::
+            This detailed framework profiling feature discontinues support for TensorFlow v2.11
+            and later. To use the detailed profiling feature, use previous versions of
+            TensorFlow between v2.3.1 and v2.10.0.
+
         """
         assert isinstance(
             profile_default_steps, bool
 
@@ -18,12 +18,13 @@
 import logging
 import os
 
+from sagemaker import Session
 from sagemaker.experiments import trial_component
 from sagemaker.utils import retry_with_backoff
 
 TRAINING_JOB_ARN_ENV = "TRAINING_JOB_ARN"
 PROCESSING_JOB_CONFIG_PATH = "/opt/ml/config/processingjobconfig.json"
-TRANSFORM_JOB_ENV_BATCH_VAR = "SAGEMAKER_BATCH"
+TRANSFORM_JOB_ARN_ENV = "TRANSFORM_JOB_ARN"
 MAX_RETRY_ATTEMPTS = 7
 
 logger = logging.getLogger(__name__)
@@ -40,7 +41,7 @@ class _EnvironmentType(enum.Enum):
 class _RunEnvironment(object):
     """Retrieves job specific data from the environment."""
 
-    def __init__(self, environment_type, source_arn):
+    def __init__(self, environment_type: _EnvironmentType, source_arn: str):
         """Init for _RunEnvironment.
 
         Args:
@@ -53,9 +54,9 @@ def __init__(self, environment_type, source_arn):
     @classmethod
     def load(
         cls,
-        training_job_arn_env=TRAINING_JOB_ARN_ENV,
-        processing_job_config_path=PROCESSING_JOB_CONFIG_PATH,
-        transform_job_batch_var=TRANSFORM_JOB_ENV_BATCH_VAR,
+        training_job_arn_env: str = TRAINING_JOB_ARN_ENV,
+        processing_job_config_path: str = PROCESSING_JOB_CONFIG_PATH,
+        transform_job_arn_env: str = TRANSFORM_JOB_ARN_ENV,
     ):
         """Loads source arn of current job from environment.
 
@@ -64,8 +65,8 @@ def load(
                 (default: `TRAINING_JOB_ARN`).
             processing_job_config_path (str): The processing job config path
                 (default: `/opt/ml/config/processingjobconfig.json`).
-            transform_job_batch_var (str): The environment variable indicating if
-                it is a transform job (default: `SAGEMAKER_BATCH`).
+            transform_job_arn_env (str): The environment key for transform job ARN
+                (default: `TRANSFORM_JOB_ARN_ENV`).
 
         Returns:
             _RunEnvironment: Job data loaded from the environment. None if config does not exist.
@@ -78,16 +79,15 @@ def load(
             environment_type = _EnvironmentType.SageMakerProcessingJob
             source_arn = json.loads(open(processing_job_config_path).read())["ProcessingJobArn"]
             return _RunEnvironment(environment_type, source_arn)
-        if transform_job_batch_var in os.environ and os.environ[transform_job_batch_var] == "true":
+        if transform_job_arn_env in os.environ:
             environment_type = _EnvironmentType.SageMakerTransformJob
-            # TODO: need to figure out how to get source_arn from job env
-            # with Transform team's help.
-            source_arn = ""
+            # TODO: need to update to get source_arn from config file once Transform side ready
+            source_arn = os.environ.get(transform_job_arn_env)
             return _RunEnvironment(environment_type, source_arn)
 
         return None
 
-    def get_trial_component(self, sagemaker_session):
+    def get_trial_component(self, sagemaker_session: Session):
         """Retrieves the trial component from the job in the environment.
 
         Args:
@@ -99,14 +99,6 @@ def get_trial_component(self, sagemaker_session):
         Returns:
             _TrialComponent: The trial component created from the job. None if not found.
         """
-        # TODO: Remove this condition check once we have a way to retrieve source ARN
-        # from transform job env
-        if self.environment_type == _EnvironmentType.SageMakerTransformJob:
-            logger.error(
-                "Currently getting the job trial component from the transform job environment "
-                "is not supported. Returning None."
-            )
-            return None
 
         def _get_trial_component():
             summaries = list(
 
@@ -14,7 +14,6 @@
 from __future__ import absolute_import
 
 import datetime
-import json
 import logging
 import os
 import time
@@ -35,85 +34,6 @@
 logger = logging.getLogger(__name__)
 
 
-# TODO: remove this _SageMakerFileMetricsWriter class
-# when _MetricsManager is fully ready
-class _SageMakerFileMetricsWriter(object):
-    """Write metric data to file."""
-
-    def __init__(self, metrics_file_path=None):
-        """Construct a `_SageMakerFileMetricsWriter` object"""
-        self._metrics_file_path = metrics_file_path
-        self._file = None
-        self._closed = False
-
-    def log_metric(self, metric_name, value, timestamp=None, step=None):
-        """Write a metric to file.
-
-        Args:
-            metric_name (str): The name of the metric.
-            value (float): The value of the metric.
-            timestamp (datetime.datetime): Timestamp of the metric.
-                If not specified, the current UTC time will be used.
-            step (int):  Iteration number of the metric (default: None).
-
-        Raises:
-            SageMakerMetricsWriterException: If the metrics file is closed.
-            AttributeError: If file has been initialized and the writer hasn't been closed.
-        """
-        raw_metric_data = _RawMetricData(
-            metric_name=metric_name, value=value, timestamp=timestamp, step=step
-        )
-        try:
-            logger.debug("Writing metric: %s", raw_metric_data)
-            self._file.write(json.dumps(raw_metric_data.to_record()))
-            self._file.write("\n")
-        except AttributeError as attr_err:
-            if self._closed:
-                raise SageMakerMetricsWriterException("log_metric called on a closed writer")
-            if not self._file:
-                self._file = open(self._get_metrics_file_path(), "a", buffering=1)
-                self._file.write(json.dumps(raw_metric_data.to_record()))
-                self._file.write("\n")
-            else:
-                raise attr_err
-
-    def close(self):
-        """Closes the metric file."""
-        if not self._closed and self._file:
-            self._file.close()
-            self._file = None  # invalidate reference, causing subsequent log_metric to fail.
-        self._closed = True
-
-    def __enter__(self):
-        """Return self"""
-        return self
-
-    def __exit__(self, exc_type, exc_value, exc_traceback):
-        """Execute self.close()"""
-        self.close()
-
-    def __del__(self):
-        """Execute self.close()"""
-        self.close()
-
-    def _get_metrics_file_path(self):
-        """Get file path to store metrics"""
-        pid_filename = "{}.json".format(str(os.getpid()))
-        metrics_file_path = self._metrics_file_path or os.path.join(METRICS_DIR, pid_filename)
-        logger.debug("metrics_file_path = %s", metrics_file_path)
-        return metrics_file_path
-
-
-class SageMakerMetricsWriterException(Exception):
-    """SageMakerMetricsWriterException"""
-
-    def __init__(self, message, errors=None):
-        """Construct a `SageMakerMetricsWriterException` instance"""
-        super().__init__(message)
-        if errors:
-            self.errors = errors
-
-
 class _RawMetricData(object):
     """A Raw Metric Data Object"""
 
 
@@ -127,11 +127,9 @@ def get_tc_and_exp_config_from_job_env(
             num_attempts=4,
         )
     else:  # environment.environment_type == _EnvironmentType.SageMakerTransformJob
-        raise RuntimeError(
-            "Failed to load the Run as loading experiment config "
-            "from transform job environment is not currently supported. "
-            "As a workaround, please explicitly pass in "
-            "the experiment_name and run_name in load_run."
+        job_response = retry_with_backoff(
+            callable_func=lambda: sagemaker_session.describe_transform_job(job_name),
+            num_attempts=4,
         )
 
     job_exp_config = job_response.get("ExperimentConfig", dict())
 
@@ -120,19 +120,18 @@ def __init__(
                 estimator.fit(job_name="my-job")  # Create a training job
 
         In order to reuse an existing run to log extra data, ``load_run`` is recommended.
+        For example, instead of the ``Run`` constructor, the ``load_run`` is recommended to use
+        in a job script to load the existing run created before the job launch.
+        Otherwise, a new run may be created each time you launch a job.
+
         The code snippet below displays how to load the run initialized above
         in a custom training job script, where no ``run_name`` or ``experiment_name``
         is presented as they are automatically retrieved from the experiment config
         in the job environment.
 
-        Note:
-            Instead of the ``Run`` constructor, the ``load_run`` is recommended to use
-            in a job script to load the existing run created before the job launch.
-            Otherwise, a new run may be created each time you launch a job.
-
         .. code:: python
 
-            with load_run() as run:
+            with load_run(sagemaker_session=sagemaker_session) as run:
                 run.log_metric(...)
                 ...
 
@@ -648,7 +647,8 @@ def _append_run_tc_label_to_tags(tags: Optional[List[Dict[str, str]]] = None) ->
         """
         if not tags:
             tags = []
-        tags.append(RUN_TC_TAG)
+        if RUN_TC_TAG not in tags:
+            tags.append(RUN_TC_TAG)
         return tags
 
     def __enter__(self):
Original file line number	Diff line number	Diff line change
`@@ -127,11 +127,9 @@ def get_tc_and_exp_config_from_job_env(`
`127`	`127`	`num_attempts=4,`
`128`	`128`	`)`
`129`	`129`	`else: # environment.environment_type == _EnvironmentType.SageMakerTransformJob`
`130`		`- raise RuntimeError(`
`131`		`- "Failed to load the Run as loading experiment config "`
`132`		`- "from transform job environment is not currently supported. "`
`133`		`- "As a workaround, please explicitly pass in "`
`134`		`- "the experiment_name and run_name in load_run."`
	`130`	`+ job_response = retry_with_backoff(`
	`131`	`+ callable_func=lambda: sagemaker_session.describe_transform_job(job_name),`
	`132`	`+ num_attempts=4,`
`135`	`133`	`)`
`136`	`134`
`137`	`135`	`job_exp_config = job_response.get("ExperimentConfig", dict())`