Merge remote-tracking branch 'origin/new_regions' into new_regions

secastillo · secastillo · commit db8cdd08a946 · 2023-03-21T11:04:22.000-04:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,25 @@
 # Changelog
 
+## v2.140.0 (2023-03-17)
+
+### Features
+
+ * SDK changes for TRCOMP support
+
+### Bug Fixes and Other Changes
+
+ * [Feature - Hugging Face] Update Transformers 4.26 - PyTorch 1.13.1 Image uri
+
+## v2.139.0 (2023-03-15)
+
+### Features
+
+ * Add XGBoost framework 1.7-1 version
+
+### Bug Fixes and Other Changes
+
+ * Fix image_uris.retrieve() function to return ValueError when framework is not allowed for an instance_type
+
 ## v2.138.0 (2023-03-13)
 
 ### Features
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-2.138.1.dev0
+2.140.1.dev0
diff --git a/src/sagemaker/huggingface/training_compiler/config.py b/src/sagemaker/huggingface/training_compiler/config.py
@@ -102,6 +102,17 @@ def validate(cls, estimator):
 
         super(TrainingCompilerConfig, cls).validate(estimator)
 
+        if estimator.pytorch_version:
+            if (Version(estimator.pytorch_version) in SpecifierSet("< 1.9")) or (
+                Version(estimator.pytorch_version) in SpecifierSet("> 1.11")
+            ):
+                error_helper_string = (
+                    "SageMaker Training Compiler is only supported "
+                    "with HuggingFace PyTorch 1.9-1.11. "
+                    "Received pytorch_version={} which is unsupported."
+                )
+                raise ValueError(error_helper_string.format(estimator.pytorch_version))
+
         if estimator.image_uri:
             error_helper_string = (
                 "Overriding the image URI is currently not supported "
diff --git a/src/sagemaker/image_uri_config/huggingface.json b/src/sagemaker/image_uri_config/huggingface.json
@@ -8,7 +8,8 @@
             "4.10": "4.10.2",
             "4.11": "4.11.0",
             "4.12": "4.12.3",
-            "4.17": "4.17.0"
+            "4.17": "4.17.0",
+            "4.26": "4.26.0"
         },
         "versions": {
             "4.4.2": {
@@ -772,6 +773,47 @@
                     "repository": "huggingface-tensorflow-training",
                     "container_version": {"gpu": "cu112-ubuntu20.04"}
                 }
+            },
+            "4.26.0": {
+                "version_aliases": {
+                    "pytorch1.13": "pytorch1.13.1"
+                },
+                "pytorch1.13.1": {
+                    "py_versions": ["py39"],
+                    "registries": {
+                        "af-south-1": "626614931356",
+                        "ap-east-1": "871362719292",
+                        "ap-northeast-1": "763104351884",
+                        "ap-northeast-2": "763104351884",
+                        "ap-northeast-3": "364406365360",
+                        "ap-south-1": "763104351884",
+                        "ap-southeast-1": "763104351884",
+                        "ap-southeast-2": "763104351884",
+                        "ap-southeast-3": "907027046896",
+                        "ca-central-1": "763104351884",
+                        "cn-north-1": "727897471807",
+                        "cn-northwest-1": "727897471807",
+                        "eu-central-1": "763104351884",
+                        "eu-north-1": "763104351884",
+                        "eu-west-1": "763104351884",
+                        "eu-west-2": "763104351884",
+                        "eu-west-3": "763104351884",
+                        "eu-south-1": "692866216735",
+                        "me-south-1": "217643126080",
+                        "me-central-1": "914824155844",
+                        "sa-east-1": "763104351884",
+                        "us-east-1": "763104351884",
+                        "us-east-2": "763104351884",
+                        "us-gov-east-1": "446045086412",
+                        "us-gov-west-1": "442386744353",
+                        "us-iso-east-1": "886529160074",
+                        "us-isob-east-1": "094389454867",
+                        "us-west-1": "763104351884",
+                        "us-west-2": "763104351884"
+                    },
+                    "repository": "huggingface-pytorch-training",
+                    "container_version": {"gpu": "cu117-ubuntu20.04"}
+                }
             }
         }
     },
@@ -783,7 +825,8 @@
             "4.10": "4.10.2",
             "4.11": "4.11.0",
             "4.12": "4.12.3",
-            "4.17": "4.17.0"
+            "4.17": "4.17.0",
+            "4.26": "4.26.0"
         },
         "versions": {
             "4.6.1": {
@@ -1337,6 +1380,51 @@
                     "repository": "huggingface-tensorflow-inference",
                     "container_version": {"gpu": "cu112-ubuntu20.04", "cpu": "ubuntu20.04" }
                 }
+            },
+            "4.26.0": {
+                "version_aliases": {
+                    "pytorch1.13": "pytorch1.13.1"
+                },
+                "pytorch1.13.1": {
+                    "py_versions": ["py39"],
+                    "registries": {
+                        "af-south-1": "626614931356",
+                        "ap-east-1": "871362719292",
+                        "ap-northeast-1": "763104351884",
+                        "ap-northeast-2": "763104351884",
+                        "ap-northeast-3": "364406365360",
+                        "ap-south-1": "763104351884",
+                        "ap-south-2": "772153158452",
+                        "ap-southeast-1": "763104351884",
+                        "ap-southeast-2": "763104351884",
+                        "ap-southeast-3": "907027046896",
+                        "ap-southeast-4": "457447274322",
+                        "ca-central-1": "763104351884",
+                        "cn-north-1": "727897471807",
+                        "cn-northwest-1": "727897471807",
+                        "eu-central-1": "763104351884",
+                        "eu-central-2": "380420809688",
+                        "eu-north-1": "763104351884",
+                        "eu-west-1": "763104351884",
+                        "eu-west-2": "763104351884",
+                        "eu-west-3": "763104351884",
+                        "eu-south-1": "692866216735",
+                        "eu-south-2": "503227376785",
+                        "me-south-1": "217643126080",
+                        "me-central-1": "914824155844",
+                        "sa-east-1": "763104351884",
+                        "us-east-1": "763104351884",
+                        "us-east-2": "763104351884",
+                        "us-gov-east-1": "446045086412",
+                        "us-gov-west-1": "442386744353",
+                        "us-iso-east-1": "886529160074",
+                        "us-isob-east-1": "094389454867",
+                        "us-west-1": "763104351884",
+                        "us-west-2": "763104351884"
+                    },
+                    "repository": "huggingface-pytorch-inference",
+                    "container_version": {"gpu": "cu117-ubuntu20.04", "cpu": "ubuntu20.04" }
+                }
             }
         }
     }
diff --git a/src/sagemaker/tensorflow/training_compiler/config.py b/src/sagemaker/tensorflow/training_compiler/config.py
@@ -26,6 +26,7 @@ class TrainingCompilerConfig(BaseConfig):
 
     SUPPORTED_INSTANCE_CLASS_PREFIXES = ["p3", "p3dn", "g4dn", "p4d", "g5"]
     MIN_SUPPORTED_VERSION = "2.9"
+    MAX_SUPPORTED_VERSION = "2.11"
 
     def __init__(self, enabled=True, debug=False):
         """This class initializes a ``TrainingCompilerConfig`` instance.
@@ -91,15 +92,17 @@ def validate(cls, estimator):
         super(TrainingCompilerConfig, cls).validate(estimator)
 
         if estimator.framework_version:
-            if Version(estimator.framework_version) in SpecifierSet(
-                f"< {cls.MIN_SUPPORTED_VERSION}"
+            if Version(estimator.framework_version) not in SpecifierSet(
+                f">= {cls.MIN_SUPPORTED_VERSION}", f"<= {cls.MAX_SUPPORTED_VERSION}"
             ):
                 error_helper_string = (
                     "SageMaker Training Compiler only supports TensorFlow version "
-                    ">= {} but received {}"
+                    "between {} to {} but received {}"
                 )
                 error_helper_string = error_helper_string.format(
-                    cls.MIN_SUPPORTED_VERSION, estimator.framework_version
+                    cls.MIN_SUPPORTED_VERSION,
+                    cls.MAX_SUPPORTED_VERSION,
+                    estimator.framework_version,
                 )
                 raise ValueError(error_helper_string)
 
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -273,7 +273,12 @@ def huggingface_pytorch_training_version(huggingface_training_version):
 
 @pytest.fixture(scope="module")
 def huggingface_pytorch_training_py_version(huggingface_pytorch_training_version):
-    return "py38" if Version(huggingface_pytorch_training_version) >= Version("1.9") else "py36"
+    if Version(huggingface_pytorch_training_version) >= Version("1.13"):
+        return "py39"
+    elif Version(huggingface_pytorch_training_version) >= Version("1.9"):
+        return "py38"
+    else:
+        return "py36"
 
 
 @pytest.fixture(scope="module")
@@ -328,9 +333,12 @@ def huggingface_training_compiler_pytorch_py_version(
 def huggingface_pytorch_latest_training_py_version(
     huggingface_training_pytorch_latest_version,
 ):
-    return (
-        "py38" if Version(huggingface_training_pytorch_latest_version) >= Version("1.9") else "py36"
-    )
+    if Version(huggingface_training_pytorch_latest_version) >= Version("1.13"):
+        return "py39"
+    elif Version(huggingface_training_pytorch_latest_version) >= Version("1.9"):
+        return "py38"
+    else:
+        return "py36"
 
 
 @pytest.fixture(scope="module")
@@ -347,11 +355,12 @@ def pytorch_training_compiler_py_version(
 def huggingface_pytorch_latest_inference_py_version(
     huggingface_inference_pytorch_latest_version,
 ):
-    return (
-        "py38"
-        if Version(huggingface_inference_pytorch_latest_version) >= Version("1.9")
-        else "py36"
-    )
+    if Version(huggingface_inference_pytorch_latest_version) >= Version("1.13"):
+        return "py39"
+    elif Version(huggingface_inference_pytorch_latest_version) >= Version("1.9"):
+        return "py38"
+    else:
+        return "py36"
 
 
 @pytest.fixture(scope="module")
diff --git a/tests/unit/sagemaker/training_compiler/test_huggingface_pytorch_compiler.py b/tests/unit/sagemaker/training_compiler/test_huggingface_pytorch_compiler.py
@@ -218,7 +218,7 @@ def test_unsupported_gpu_instance(
         ).fit()
 
 
-def test_unsupported_framework_version(
+def test_unsupported_framework_version_min(
     huggingface_training_compiler_version,
 ):
     with pytest.raises(ValueError):
@@ -229,9 +229,24 @@ def test_unsupported_framework_version(
             instance_count=INSTANCE_COUNT,
             instance_type=INSTANCE_TYPE,
             transformers_version=huggingface_training_compiler_version,
-            pytorch_version=".".join(
-                ["99"] * len(huggingface_training_compiler_version.split("."))
-            ),
+            pytorch_version="1.8",
+            enable_sagemaker_metrics=False,
+            compiler_config=TrainingCompilerConfig(),
+        ).fit()
+
+
+def test_unsupported_framework_version_max(
+    huggingface_training_compiler_version,
+):
+    with pytest.raises(ValueError):
+        HuggingFace(
+            py_version="py38",
+            entry_point=SCRIPT_PATH,
+            role=ROLE,
+            instance_count=INSTANCE_COUNT,
+            instance_type=INSTANCE_TYPE,
+            transformers_version=huggingface_training_compiler_version,
+            pytorch_version="1.12",
             enable_sagemaker_metrics=False,
             compiler_config=TrainingCompilerConfig(),
         ).fit()
diff --git a/tests/unit/sagemaker/training_compiler/test_tensorflow_compiler.py b/tests/unit/sagemaker/training_compiler/test_tensorflow_compiler.py
@@ -189,7 +189,7 @@ def test_gpu_instance(
                 compiler_config=TrainingCompilerConfig(),
             ).fit()
 
-    def test_framework_version(self, tensorflow_training_py_version):
+    def test_framework_version_min(self, tensorflow_training_py_version):
         with pytest.raises(ValueError):
             TensorFlow(
                 py_version=tensorflow_training_py_version,
@@ -202,6 +202,19 @@ def test_framework_version(self, tensorflow_training_py_version):
                 compiler_config=TrainingCompilerConfig(),
             ).fit()
 
+    def test_framework_version_max(self, tensorflow_training_py_version):
+        with pytest.raises(ValueError):
+            TensorFlow(
+                py_version=tensorflow_training_py_version,
+                entry_point=SCRIPT_PATH,
+                role=ROLE,
+                instance_count=INSTANCE_COUNT,
+                instance_type=INSTANCE_TYPE,
+                framework_version="2.12",
+                enable_sagemaker_metrics=False,
+                compiler_config=TrainingCompilerConfig(),
+            ).fit()
+
     def test_mwms(self, tensorflow_training_version, tensorflow_training_py_version):
         with pytest.raises(ValueError):
             TensorFlow(