aws
diff --git a/‎.flake8
Lines changed: 1 addition & 0 deletions b/‎.flake8
Lines changed: 1 addition & 0 deletions
diff --git a/‎CHANGELOG.md
Lines changed: 17 additions & 0 deletions b/‎CHANGELOG.md
Lines changed: 17 additions & 0 deletions
diff --git a/‎README.rst
Lines changed: 2 additions & 0 deletions b/‎README.rst
Lines changed: 2 additions & 0 deletions
diff --git a/‎VERSION
Lines changed: 1 addition & 1 deletion b/‎VERSION
Lines changed: 1 addition & 1 deletion
diff --git a/‎requirements/extras/test_requirements.txt
Lines changed: 1 addition & 0 deletions b/‎requirements/extras/test_requirements.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎setup.py
Lines changed: 2 additions & 0 deletions b/‎setup.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/sagemaker/config/config_schema.py
Lines changed: 101 additions & 1 deletion b/‎src/sagemaker/config/config_schema.py
Lines changed: 101 additions & 1 deletion
diff --git a/‎src/sagemaker/experiments/run.py
Lines changed: 30 additions & 20 deletions b/‎src/sagemaker/experiments/run.py
Lines changed: 30 additions & 20 deletions
diff --git a/‎src/sagemaker/fw_utils.py
Lines changed: 4 additions & 1 deletion b/‎src/sagemaker/fw_utils.py
Lines changed: 4 additions & 1 deletion
@@ -3,3 +3,4 @@ application_import_names = sagemaker, tests
 import-order-style = google
 per-file-ignores =
     tests/unit/test_tuner.py: F405
+    src/sagemaker/config/config_schema.py: E501
@@ -1,5 +1,22 @@
 # Changelog
 
+## v2.148.0 (2023-04-20)
+
+### Features
+
+ * [huggingface] Add `torch.distributed` support for Trainium and `torchrun`
+ * Add PyTorch 2.0 to SDK
+
+### Bug Fixes and Other Changes
+
+ * updating batch transform job in monitoring schedule
+
+## v2.147.0 (2023-04-18)
+
+### Features
+
+ * support different types of deletion mode
+
 ## v2.146.1 (2023-04-17)
 
 ### Bug Fixes and Other Changes
 
@@ -133,6 +133,8 @@ To run the integration tests, the following prerequisites must be met
 1. AWS account credentials are available in the environment for the boto3 client to use.
 2. The AWS account has an IAM role named :code:`SageMakerRole`.
    It should have the AmazonSageMakerFullAccess policy attached as well as a policy with `the necessary permissions to use Elastic Inference <https://docs.aws.amazon.com/sagemaker/latest/dg/ei-setup.html>`__.
+3. To run remote_function tests, dummy ecr repo should be created. It can be created by running -
+    :code:`aws ecr create-repository --repository-name remote-function-dummy-container`
 
 We recommend selectively running just those integration tests you'd like to run. You can filter by individual test function names with:
 
 
@@ -1 +1 @@
-2.146.2.dev0
+2.148.1.dev0
@@ -21,3 +21,4 @@ sagemaker-experiments==0.1.35
 Jinja2==3.0.3
 pandas>=1.3.5,<1.5
 scikit-learn==1.0.2
+cloudpickle==2.2.1
@@ -49,6 +49,7 @@ def read_requirements(filename):
 required_packages = [
     "attrs>=20.3.0,<23",
     "boto3>=1.26.28,<2.0",
+    "cloudpickle==2.2.1",
     "google-pasta",
     "numpy>=1.9.0,<2.0",
     "protobuf>=3.1,<4.0",
@@ -62,6 +63,7 @@ def read_requirements(filename):
     "PyYAML==5.4.1",
     "jsonschema",
     "platformdirs",
+    "tblib==1.7.0",
 ]
 
 # Specific use case dependencies
 
@@ -44,6 +44,17 @@
 SAGEMAKER = "SageMaker"
 PYTHON_SDK = "PythonSDK"
 MODULES = "Modules"
+REMOTE_FUNCTION = "RemoteFunction"
+DEPENDENCIES = "Dependencies"
+PRE_EXECUTION_SCRIPT = "PreExecutionScript"
+PRE_EXECUTION_COMMANDS = "PreExecutionCommands"
+ENVIRONMENT_VARIABLES = "EnvironmentVariables"
+IMAGE_URI = "ImageUri"
+INCLUDE_LOCAL_WORKDIR = "IncludeLocalWorkDir"
+INSTANCE_TYPE = "InstanceType"
+S3_KMS_KEY_ID = "S3KmsKeyId"
+S3_ROOT_URI = "S3RootUri"
+JOB_CONDA_ENV = "JobCondaEnvironment"
 OFFLINE_STORE_CONFIG = "OfflineStoreConfig"
 ONLINE_STORE_CONFIG = "OnlineStoreConfig"
 S3_STORAGE_CONFIG = "S3StorageConfig"
@@ -221,6 +232,49 @@ def _simple_path(*args: str):
     SAGEMAKER, MODEL_PACKAGE, VALIDATION_SPECIFICATION, VALIDATION_PROFILES
 )
 
+REMOTE_FUNCTION_DEPENDENCIES = _simple_path(
+    SAGEMAKER, PYTHON_SDK, MODULES, REMOTE_FUNCTION, DEPENDENCIES
+)
+REMOTE_FUNCTION_PRE_EXECUTION_COMMANDS = _simple_path(
+    SAGEMAKER, PYTHON_SDK, MODULES, REMOTE_FUNCTION, PRE_EXECUTION_COMMANDS
+)
+REMOTE_FUNCTION_PRE_EXECUTION_SCRIPT = _simple_path(
+    SAGEMAKER, PYTHON_SDK, MODULES, REMOTE_FUNCTION, PRE_EXECUTION_SCRIPT
+)
+REMOTE_FUNCTION_ENVIRONMENT_VARIABLES = _simple_path(
+    SAGEMAKER, PYTHON_SDK, MODULES, REMOTE_FUNCTION, ENVIRONMENT_VARIABLES
+)
+REMOTE_FUNCTION_IMAGE_URI = _simple_path(SAGEMAKER, PYTHON_SDK, MODULES, REMOTE_FUNCTION, IMAGE_URI)
+REMOTE_FUNCTION_INCLUDE_LOCAL_WORKDIR = _simple_path(
+    SAGEMAKER, PYTHON_SDK, MODULES, REMOTE_FUNCTION, INCLUDE_LOCAL_WORKDIR
+)
+REMOTE_FUNCTION_INSTANCE_TYPE = _simple_path(
+    SAGEMAKER, PYTHON_SDK, MODULES, REMOTE_FUNCTION, INSTANCE_TYPE
+)
+REMOTE_FUNCTION_JOB_CONDA_ENV = _simple_path(
+    SAGEMAKER, PYTHON_SDK, MODULES, REMOTE_FUNCTION, JOB_CONDA_ENV
+)
+REMOTE_FUNCTION_ROLE_ARN = _simple_path(SAGEMAKER, PYTHON_SDK, MODULES, REMOTE_FUNCTION, ROLE_ARN)
+REMOTE_FUNCTION_S3_KMS_KEY_ID = _simple_path(
+    SAGEMAKER, PYTHON_SDK, MODULES, REMOTE_FUNCTION, S3_KMS_KEY_ID
+)
+REMOTE_FUNCTION_S3_ROOT_URI = _simple_path(
+    SAGEMAKER, PYTHON_SDK, MODULES, REMOTE_FUNCTION, S3_ROOT_URI
+)
+REMOTE_FUNCTION_TAGS = _simple_path(SAGEMAKER, PYTHON_SDK, MODULES, REMOTE_FUNCTION, TAGS)
+REMOTE_FUNCTION_VOLUME_KMS_KEY_ID = _simple_path(
+    SAGEMAKER, PYTHON_SDK, MODULES, REMOTE_FUNCTION, VOLUME_KMS_KEY_ID
+)
+REMOTE_FUNCTION_VPC_CONFIG_SUBNETS = _simple_path(
+    SAGEMAKER, PYTHON_SDK, MODULES, REMOTE_FUNCTION, VPC_CONFIG, SUBNETS
+)
+REMOTE_FUNCTION_VPC_CONFIG_SECURITY_GROUP_IDS = _simple_path(
+    SAGEMAKER, PYTHON_SDK, MODULES, REMOTE_FUNCTION, VPC_CONFIG, SECURITY_GROUP_IDS
+)
+REMOTE_FUNCTION_ENABLE_INTER_CONTAINER_TRAFFIC_ENCRYPTION = _simple_path(
+    SAGEMAKER, PYTHON_SDK, MODULES, REMOTE_FUNCTION, ENABLE_INTER_CONTAINER_TRAFFIC_ENCRYPTION
+)
+
 # Paths for reference elsewhere in the SDK.
 # Names include the schema version since the paths could change with other schema versions
 MONITORING_SCHEDULE_INTER_CONTAINER_ENCRYPTION_PATH = _simple_path(
@@ -245,7 +299,6 @@ def _simple_path(*args: str):
     SAGEMAKER, TRAINING_JOB, ENABLE_INTER_CONTAINER_TRAFFIC_ENCRYPTION
 )
 
-
 SAGEMAKER_PYTHON_SDK_CONFIG_SCHEMA = {
     "$schema": "https://json-schema.org/draft/2020-12/schema",
     TYPE: OBJECT,
@@ -377,6 +430,23 @@ def _simple_path(*args: str):
             "minItems": 0,
             "maxItems": 50,
         },
+        # Regex is taken from https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CreateTrainingJob.html#sagemaker-CreateTrainingJob-request-Environment
+        "environmentVariables": {
+            TYPE: OBJECT,
+            ADDITIONAL_PROPERTIES: False,
+            PATTERN_PROPERTIES: {
+                r"([a-zA-Z_][a-zA-Z0-9_]*){1,512}": {
+                    TYPE: "string",
+                    "pattern": r"[\S\s]*",
+                    "maxLength": 512,
+                }
+            },
+            "maxProperties": 48,
+        },
+        # Regex is taken from https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_S3DataSource.html#sagemaker-Type-S3DataSource-S3Uri
+        "s3Uri": {TYPE: "string", "pattern": "^(https|s3)://([^/]+)/?(.*)$", "maxLength": 1024},
+        # Regex is taken from https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_AlgorithmSpecification.html#sagemaker-Type-AlgorithmSpecification-ContainerEntrypoint
+        "preExecutionCommand": {TYPE: "string", "pattern": r".*"},
     },
     PROPERTIES: {
         SCHEMA_VERSION: {
@@ -406,6 +476,36 @@ def _simple_path(*args: str):
                             # Any SageMaker Python SDK specific configuration will be added here.
                             TYPE: OBJECT,
                             ADDITIONAL_PROPERTIES: False,
+                            PROPERTIES: {
+                                REMOTE_FUNCTION: {
+                                    TYPE: OBJECT,
+                                    ADDITIONAL_PROPERTIES: False,
+                                    PROPERTIES: {
+                                        DEPENDENCIES: {TYPE: "string"},
+                                        PRE_EXECUTION_COMMANDS: {
+                                            TYPE: "array",
+                                            "items": {"$ref": "#/definitions/preExecutionCommand"},
+                                        },
+                                        PRE_EXECUTION_SCRIPT: {TYPE: "string"},
+                                        ENABLE_INTER_CONTAINER_TRAFFIC_ENCRYPTION: {
+                                            TYPE: "boolean"
+                                        },
+                                        ENVIRONMENT_VARIABLES: {
+                                            "$ref": "#/definitions/environmentVariables"
+                                        },
+                                        IMAGE_URI: {TYPE: "string"},
+                                        INCLUDE_LOCAL_WORKDIR: {TYPE: "boolean"},
+                                        INSTANCE_TYPE: {TYPE: "string"},
+                                        JOB_CONDA_ENV: {TYPE: "string"},
+                                        ROLE_ARN: {"$ref": "#/definitions/roleArn"},
+                                        S3_KMS_KEY_ID: {"$ref": "#/definitions/kmsKeyId"},
+                                        S3_ROOT_URI: {"$ref": "#/definitions/s3Uri"},
+                                        TAGS: {"$ref": "#/definitions/tags"},
+                                        VOLUME_KMS_KEY_ID: {"$ref": "#/definitions/kmsKeyId"},
+                                        VPC_CONFIG: {"$ref": "#/definitions/vpcConfig"},
+                                    },
+                                }
+                            },
                         }
                     },
                 },
 
@@ -715,6 +715,14 @@ def __exit__(self, exc_type, exc_value, exc_traceback):
 
         self.close()
 
+    def __getstate__(self):
+        """Overriding this method to prevent instance of Run from being pickled.
+
+        Raise:
+            NotImplementedError: If attempting to pickle this instance.
+        """
+        raise NotImplementedError("Instance of Run type is not allowed to be pickled.")
+
 
 def load_run(
     run_name: Optional[str] = None,
@@ -787,36 +795,38 @@ def load_run(
     Returns:
         Run: The loaded Run object.
     """
-    sagemaker_session = sagemaker_session or _utils.default_session()
     environment = _RunEnvironment.load()
 
     verify_load_input_names(run_name=run_name, experiment_name=experiment_name)
 
-    if run_name or environment:
-        if run_name:
-            logger.warning(
-                "run_name is explicitly supplied in load_run, "
-                "which will be prioritized to load the Run object. "
-                "In other words, the run name in the experiment config, fetched from the "
-                "job environment or the current run context, will be ignored."
-            )
-        else:
-            exp_config = get_tc_and_exp_config_from_job_env(
-                environment=environment, sagemaker_session=sagemaker_session
-            )
-            run_name = Run._extract_run_name_from_tc_name(
-                trial_component_name=exp_config[RUN_NAME],
-                experiment_name=exp_config[EXPERIMENT_NAME],
-            )
-            experiment_name = exp_config[EXPERIMENT_NAME]
-
+    if run_name:
+        logger.warning(
+            "run_name is explicitly supplied in load_run, "
+            "which will be prioritized to load the Run object. "
+            "In other words, the run name in the experiment config, fetched from the "
+            "job environment or the current run context, will be ignored."
+        )
         run_instance = Run(
             experiment_name=experiment_name,
             run_name=run_name,
-            sagemaker_session=sagemaker_session,
+            sagemaker_session=sagemaker_session or _utils.default_session(),
         )
     elif _RunContext.get_current_run():
         run_instance = _RunContext.get_current_run()
+    elif environment:
+        exp_config = get_tc_and_exp_config_from_job_env(
+            environment=environment, sagemaker_session=sagemaker_session or _utils.default_session()
+        )
+        run_name = Run._extract_run_name_from_tc_name(
+            trial_component_name=exp_config[RUN_NAME],
+            experiment_name=exp_config[EXPERIMENT_NAME],
+        )
+        experiment_name = exp_config[EXPERIMENT_NAME]
+        run_instance = Run(
+            experiment_name=experiment_name,
+            run_name=run_name,
+            sagemaker_session=sagemaker_session or _utils.default_session(),
+        )
     else:
         raise RuntimeError(
             "Failed to load a Run object. "
 
@@ -135,6 +135,7 @@
         "1.12.0",
         "1.12.1",
         "1.13.1",
+        "2.0.0",
     ],
 }
 
@@ -148,10 +149,11 @@
     "1.12.0",
     "1.12.1",
     "1.13.1",
+    "2.0.0",
 ]
 
 
-TORCH_DISTRIBUTED_GPU_SUPPORTED_FRAMEWORK_VERSIONS = ["1.13.1"]
+TORCH_DISTRIBUTED_GPU_SUPPORTED_FRAMEWORK_VERSIONS = ["1.13.1", "2.0.0"]
 
 TRAINIUM_SUPPORTED_DISTRIBUTION_STRATEGIES = ["torch_distributed"]
 TRAINIUM_SUPPORTED_TORCH_DISTRIBUTED_FRAMEWORK_VERSIONS = [
@@ -161,6 +163,7 @@
     "1.12.0",
     "1.12.1",
     "1.13.1",
+    "2.0.0",
 ]
 
 SMDISTRIBUTED_SUPPORTED_STRATEGIES = ["dataparallel", "modelparallel"]