aws · tejaschumbalkar · Dec 2, 2022 · Dec 2, 2022 · Dec 2, 2022 · Dec 2, 2022
diff --git a/.gitignore b/.gitignore
@@ -30,5 +30,6 @@ env/
 .vscode/
 **/tmp
 .python-version
-**/_repack_model.py
-**/_repack_script_launcher.sh
+**/_repack_script_launcher.sh
+tests/data/**/_repack_model.py
+tests/data/experiment/sagemaker-dev-1.0.tar.gz
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,85 @@
 # Changelog
 
+## v2.122.0 (2022-12-14)
+
+### Features
+
+ * Feature Store dataset builder, delete_record, get_record, list_feature_group
+ * Add OSU region to frameworks for DLC
+
+### Bug Fixes and Other Changes
+
+ * the Hyperband support fix for the HPO
+ * unpin packaging version
+ * Remove content type image/jpg from analysis configuration schema
+
+## v2.121.2 (2022-12-12)
+
+### Bug Fixes and Other Changes
+
+ * Update for Tensorflow Serving 2.11 inference DLCs
+ * Revert "fix: type hint of PySparkProcessor __init__"
+ * Skip Bad Transform Test
+
+## v2.121.1 (2022-12-09)
+
+### Bug Fixes and Other Changes
+
+ * Pop out ModelPackageName from pipeline definition
+ * Fix failing jumpstart cache unit tests
+
+## v2.121.0 (2022-12-08)
+
+### Features
+
+ * Algorithms Region Expansion OSU/DXB
+
+### Bug Fixes and Other Changes
+
+ * FrameworkProcessor S3 uploads
+ * Add constraints file for apache-airflow
+
+## v2.120.0 (2022-12-07)
+
+### Features
+
+ * Add Neo image uri config for Pytorch 1.12
+ * Adding support for SageMaker Training Compiler in PyTorch estimator starting 1.12
+ * Update registries with new region account number mappings.
+ * Add DXB region to frameworks by DLC
+
+### Bug Fixes and Other Changes
+
+ * support idempotency for framework and spark processors
+
+## v2.119.0 (2022-12-03)
+
+### Features
+
+ * Add Code Owners file
+ * Added transform with monitoring pipeline step in transformer
+ * Update TF 2.9 and TF 2.10 inference DLCs
+ * make estimator accept json file as modelparallel config
+ * SageMaker Training Compiler does not support p4de instances
+ * Add support for SparkML v3.3
+
+### Bug Fixes and Other Changes
+
+ * Fix bug forcing uploaded tar to be named sourcedir
+ * Update local_requirements.txt PyYAML version
+ * refactoring : using with statement
+ * Allow Py 3.7 for MMS Test Docker env
+ * fix PySparkProcessor __init__ params type
+ * type hint of PySparkProcessor __init__
+ * Return ARM XGB/SKLearn tags if `image_scope` is `inference_graviton`
+ * Update scipy to 1.7.3 to support M1 development envs
+ * Fixing type hints for Spark processor that has instance type/count params in reverse order
+ * Add DeepAR ap-northeast-3 repository.
+ * Fix AsyncInferenceConfig documentation typo
+ * fix ml_inf to ml_inf1 in Neo multi-version support
+ * Fix type annotations
+ * add neo mvp region accounts
+
 ## v2.118.0 (2022-12-01)
 
 ### Features

diff --git a/CODEOWNERS b/CODEOWNERS
@@ -0,0 +1 @@
+* @aws/sagemaker-ml-frameworks
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-2.118.1.dev0
+2.122.1.dev0
diff --git a/doc/experiments/index.rst b/doc/experiments/index.rst
@@ -0,0 +1,10 @@
+############################
+Amazon SageMaker Experiments
+############################
+
+The SageMaker Python SDK supports to track and organize your machine learning workflow across SageMaker with jobs, such as Processing, Training and Transform, or locally.
+
+.. toctree::
+    :maxdepth: 2
+
+    sagemaker.experiments
diff --git a/doc/experiments/sagemaker.experiments.rst b/doc/experiments/sagemaker.experiments.rst
@@ -0,0 +1,20 @@
+Experiments
+============
+
+Run
+-------------
+
+.. autoclass:: sagemaker.experiments.Run
+    :members:
+
+.. automethod:: sagemaker.experiments.load_run
+
+.. automethod:: sagemaker.experiments.list_runs
+
+.. autoclass:: sagemaker.experiments.SortByType
+    :members:
+    :undoc-members:
+
+.. autoclass:: sagemaker.experiments.SortOrderType
+    :members:
+    :undoc-members:
diff --git a/doc/index.rst b/doc/index.rst
@@ -60,6 +60,16 @@ Orchestrate your SageMaker training and inference workflows with Airflow and Kub
     workflows/index
 
 
+****************************
+Amazon SageMaker Experiments
+****************************
+You can use Amazon SageMaker Experiments to track machine learning experiments.
+
+.. toctree::
+    :maxdepth: 2
+
+    experiments/index
+
 *************************
 Amazon SageMaker Debugger
 *************************

diff --git a/requirements/extras/test_requirements.txt b/requirements/extras/test_requirements.txt
@@ -11,6 +11,7 @@ contextlib2==21.6.0
 awslogs==0.14.0
 black==22.3.0
 stopit==1.1.2
+# Update tox.ini to have correct version of airflow constraints file
 apache-airflow==2.4.1
 apache-airflow-providers-amazon==4.0.0
 attrs==22.1.0
@@ -19,3 +20,4 @@ requests==2.27.1
 sagemaker-experiments==0.1.35
 Jinja2==3.0.3
 pandas>=1.3.5,<1.5
+scikit-learn==1.0.2
diff --git a/setup.py b/setup.py
@@ -48,7 +48,7 @@ def read_requirements(filename):
 # Declare minimal set for installation
 required_packages = [
     "attrs>=20.3.0,<23",
-    "boto3>=1.26.20,<2.0",
+    "boto3>=1.26.28,<2.0",
     "google-pasta",
     "numpy>=1.9.0,<2.0",
     "protobuf>=3.1,<4.0",

diff --git a/src/sagemaker/amazon/amazon_estimator.py b/src/sagemaker/amazon/amazon_estimator.py
@@ -27,7 +27,7 @@
 from sagemaker.deprecations import renamed_warning
 from sagemaker.estimator import EstimatorBase, _TrainingJob
 from sagemaker.inputs import FileSystemInput, TrainingInput
-from sagemaker.utils import sagemaker_timestamp
+from sagemaker.utils import sagemaker_timestamp, check_and_get_run_experiment_config
 from sagemaker.workflow.entities import PipelineVariable
 from sagemaker.workflow.pipeline_context import runnable_by_pipeline
 from sagemaker.workflow import is_pipeline_variable
@@ -242,8 +242,8 @@ def fit(
                 generates a default job name, based on the training image name
                 and current timestamp.
             experiment_config (dict[str, str]): Experiment management configuration.
-                Optionally, the dict can contain three keys:
-                'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'.
+                Optionally, the dict can contain four keys:
+                'ExperimentName', 'TrialName', 'TrialComponentDisplayName' and 'RunName'.
                 The behavior of setting these keys is as follows:
                 * If `ExperimentName` is supplied but `TrialName` is not a Trial will be
                 automatically created and the job's Trial Component associated with the Trial.
@@ -255,6 +255,7 @@ def fit(
         """
         self._prepare_for_training(records, job_name=job_name, mini_batch_size=mini_batch_size)
 
+        experiment_config = check_and_get_run_experiment_config(experiment_config)
         self.latest_training_job = _TrainingJob.start_new(
             self, records, experiment_config=experiment_config
         )

diff --git a/src/sagemaker/apiutils/_base_types.py b/src/sagemaker/apiutils/_base_types.py
@@ -173,8 +173,10 @@ def _search(
                 search_items = search_method_response.get("Results", [])
                 next_token = search_method_response.get(boto_next_token_name)
                 for item in search_items:
-                    if cls.__name__ in item:
-                        yield search_item_factory(item[cls.__name__])
+                    # _TrialComponent class in experiments module is not public currently
+                    class_name = cls.__name__.lstrip("_")
+                    if class_name in item:
+                        yield search_item_factory(item[class_name])
                 if not next_token:
                     break
         except StopIteration:

diff --git a/src/sagemaker/apiutils/_boto_functions.py b/src/sagemaker/apiutils/_boto_functions.py
@@ -68,7 +68,9 @@ def from_boto(boto_dict, boto_name_to_member_name, member_name_to_type):
             api_type, is_collection = member_name_to_type[member_name]
             if is_collection:
                 if isinstance(boto_value, dict):
-                    member_value = api_type.from_boto(boto_value)
+                    member_value = {
+                        key: api_type.from_boto(value) for key, value in boto_value.items()
+                    }
                 else:
                     member_value = [api_type.from_boto(item) for item in boto_value]
             else:

diff --git a/src/sagemaker/clarify.py b/src/sagemaker/clarify.py
@@ -282,7 +282,6 @@
                     "text/csv",
                     "application/jsonlines",
                     "image/jpeg",
-                    "image/jpg",
                     "image/png",
                     "application/x-npy",
                 ),

diff --git a/src/sagemaker/dataset_definition/inputs.py b/src/sagemaker/dataset_definition/inputs.py
@@ -124,8 +124,10 @@ class DatasetDefinition(ApiObject):
     """DatasetDefinition input."""
 
     _custom_boto_types = {
-        "redshift_dataset_definition": (RedshiftDatasetDefinition, True),
-        "athena_dataset_definition": (AthenaDatasetDefinition, True),
+        # RedshiftDatasetDefinition and AthenaDatasetDefinition are not collection
+        # Instead they are singleton objects. Thus, set the is_collection flag to False.
+        "redshift_dataset_definition": (RedshiftDatasetDefinition, False),
+        "athena_dataset_definition": (AthenaDatasetDefinition, False),
     }
 
     def __init__(

diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py
@@ -79,6 +79,7 @@
     get_config_value,
     name_from_base,
     to_string,
+    check_and_get_run_experiment_config,
 )
 from sagemaker.workflow import is_pipeline_variable
 from sagemaker.workflow.entities import PipelineVariable
@@ -1103,8 +1104,8 @@ def fit(
             job_name (str): Training job name. If not specified, the estimator generates
                 a default job name based on the training image name and current timestamp.
             experiment_config (dict[str, str]): Experiment management configuration.
-                Optionally, the dict can contain three keys:
-                'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'.
+                Optionally, the dict can contain four keys:
+                'ExperimentName', 'TrialName', 'TrialComponentDisplayName' and 'RunName'..
                 The behavior of setting these keys is as follows:
                 * If `ExperimentName` is supplied but `TrialName` is not a Trial will be
                 automatically created and the job's Trial Component associated with the Trial.
@@ -1122,6 +1123,7 @@ def fit(
         """
         self._prepare_for_training(job_name=job_name)
 
+        experiment_config = check_and_get_run_experiment_config(experiment_config)
         self.latest_training_job = _TrainingJob.start_new(self, inputs, experiment_config)
         self.jobs.append(self.latest_training_job)
         if wait:
@@ -2023,8 +2025,8 @@ def start_new(cls, estimator, inputs, experiment_config):
             inputs (str): Parameters used when called
                 :meth:`~sagemaker.estimator.EstimatorBase.fit`.
             experiment_config (dict[str, str]): Experiment management configuration.
-                Optionally, the dict can contain three keys:
-                'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'.
+                Optionally, the dict can contain four keys:
+                'ExperimentName', 'TrialName', 'TrialComponentDisplayName' and 'RunName'.
                 The behavior of setting these keys is as follows:
                 * If `ExperimentName` is supplied but `TrialName` is not a Trial will be
                 automatically created and the job's Trial Component associated with the Trial.
@@ -2033,6 +2035,7 @@ def start_new(cls, estimator, inputs, experiment_config):
                 * If both `ExperimentName` and `TrialName` are not supplied the trial component
                 will be unassociated.
                 * `TrialComponentDisplayName` is used for display in Studio.
+                * `RunName` is used to record an experiment run.
         Returns:
             sagemaker.estimator._TrainingJob: Constructed object that captures
             all information about the started training job.
@@ -2053,8 +2056,8 @@ def _get_train_args(cls, estimator, inputs, experiment_config):
             inputs (str): Parameters used when called
                 :meth:`~sagemaker.estimator.EstimatorBase.fit`.
             experiment_config (dict[str, str]): Experiment management configuration.
-                Optionally, the dict can contain three keys:
-                'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'.
+                Optionally, the dict can contain four keys:
+                'ExperimentName', 'TrialName', 'TrialComponentDisplayName' and 'RunName'.
                 The behavior of setting these keys is as follows:
                 * If `ExperimentName` is supplied but `TrialName` is not a Trial will be
                 automatically created and the job's Trial Component associated with the Trial.
@@ -2063,6 +2066,7 @@ def _get_train_args(cls, estimator, inputs, experiment_config):
                 * If both `ExperimentName` and `TrialName` are not supplied the trial component
                 will be unassociated.
                 * `TrialComponentDisplayName` is used for display in Studio.
+                * `RunName` is used to record an experiment run.
 
         Returns:
             Dict: dict for `sagemaker.session.Session.train` method

diff --git a/src/sagemaker/experiments/__init__.py b/src/sagemaker/experiments/__init__.py
@@ -0,0 +1,20 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+"""Sagemaker Experiment Module"""
+from __future__ import absolute_import
+
+from sagemaker.experiments.run import Run  # noqa: F401
+from sagemaker.experiments.run import load_run  # noqa: F401
+from sagemaker.experiments.run import list_runs  # noqa: F401
+from sagemaker.experiments.run import SortOrderType  # noqa: F401
+from sagemaker.experiments.run import SortByType  # noqa: F401