Merge remote-tracking branch 'upstream/master' into feat/fw-processor-normargs

athewsey · athewsey · commit 02e0db13f870 · 2021-09-24T09:45:00.000+08:00
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -11,13 +11,15 @@ _Put an `x` in the boxes that apply. You can also fill these out after creating
 #### General
 
 - [ ] I have read the [CONTRIBUTING](https://github.com/aws/sagemaker-python-sdk/blob/master/CONTRIBUTING.md) doc
+- [ ] I certify that the changes I am introducing will be backword compatible, and I have discussed concerns about this, if any, with the Python SDK team
 - [ ] I used the commit message format described in [CONTRIBUTING](https://github.com/aws/sagemaker-python-sdk/blob/master/CONTRIBUTING.md#committing-your-change)
 - [ ] I have passed the region in to all S3 and STS clients that I've initialized as part of this change.
 - [ ] I have updated any necessary documentation, including [READMEs](https://github.com/aws/sagemaker-python-sdk/blob/master/README.rst) and [API docs](https://github.com/aws/sagemaker-python-sdk/tree/master/doc) (if appropriate)
 
 #### Tests
 
 - [ ] I have added tests that prove my fix is effective or that my feature works (if appropriate)
+- [ ] I have added unit and/or integration tests as appropriate to ensure backward compatibility of the changes
 - [ ] I have checked that my tests are not configured for a specific region or account (if appropriate)
 - [ ] I have used [`unique_name_from_base`](https://github.com/aws/sagemaker-python-sdk/blob/master/src/sagemaker/utils.py#L77) to create resource names in integ tests (if appropriate)
 
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,11 @@
 # Changelog
 
+## v2.59.3.post0 (2021-09-22)
+
+### Documentation Changes
+
+ * Info about offline s3 bucket key when creating feature group
+
 ## v2.59.3 (2021-09-20)
 
 ## v2.59.2 (2021-09-15)
diff --git a/src/sagemaker/feature_store/feature_group.py b/src/sagemaker/feature_store/feature_group.py
@@ -457,6 +457,13 @@ def create(
             online_store_kms_key_id (str): KMS key id for online store.
             enable_online_store (bool): whether to enable online store or not.
             offline_store_kms_key_id (str): KMS key id for offline store.
+                If a KMS encryption key is not specified, SageMaker encrypts all data at
+                rest using the default AWS KMS key. By defining your bucket-level key for
+                SSE, you can reduce the cost of AWS KMS requests.
+                For more information, see
+                `Bucket Key
+                <https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucket-key.html>`_
+                in the Amazon S3 User Guide.
             disable_glue_table_creation (bool): whether to turn off Glue table creation no not.
             data_catalog_config (DataCatalogConfig): configuration for Metadata store.
             description (str): description of the FeatureGroup.
diff --git a/src/sagemaker/image_uri_config/huggingface.json b/src/sagemaker/image_uri_config/huggingface.json
@@ -147,6 +147,7 @@
                 "version_aliases": {
                     "pytorch1.6": "pytorch1.6.0",
                     "pytorch1.7": "pytorch1.7.1",
+                    "pytorch1.8": "pytorch1.8.1",
                     "tensorflow2.4": "tensorflow2.4.1"
                 },
                 "pytorch1.6.0": {
@@ -178,7 +179,8 @@
                         "us-west-1": "763104351884",
                         "us-west-2": "763104351884"
                     },
-                    "repository": "huggingface-pytorch-training"
+                    "repository": "huggingface-pytorch-training",
+                    "container_version": {"gpu":"cu110-ubuntu18.04"}
                 },
                 "pytorch1.7.1": {
                     "py_versions": ["py36"],
@@ -209,7 +211,40 @@
                         "us-west-1": "763104351884",
                         "us-west-2": "763104351884"
                     },
-                    "repository": "huggingface-pytorch-training"
+                    "repository": "huggingface-pytorch-training",
+                    "container_version": {"gpu":"cu110-ubuntu18.04"}
+                },
+                "pytorch1.8.1": {
+                    "py_versions": ["py36"],
+                    "registries": {
+                        "af-south-1": "626614931356",
+                        "ap-east-1": "871362719292",
+                        "ap-northeast-1": "763104351884",
+                        "ap-northeast-2": "763104351884",
+                        "ap-northeast-3": "364406365360",
+                        "ap-south-1": "763104351884",
+                        "ap-southeast-1": "763104351884",
+                        "ap-southeast-2": "763104351884",
+                        "ca-central-1": "763104351884",
+                        "cn-north-1": "727897471807",
+                        "cn-northwest-1": "727897471807",
+                        "eu-central-1": "763104351884",
+                        "eu-north-1": "763104351884",
+                        "eu-west-1": "763104351884",
+                        "eu-west-2": "763104351884",
+                        "eu-west-3": "763104351884",
+                        "eu-south-1": "692866216735",
+                        "me-south-1": "217643126080",
+                        "sa-east-1": "763104351884",
+                        "us-east-1": "763104351884",
+                        "us-east-2": "763104351884",
+                        "us-gov-west-1": "442386744353",
+                        "us-iso-east-1": "886529160074",
+                        "us-west-1": "763104351884",
+                        "us-west-2": "763104351884"
+                    },
+                    "repository": "huggingface-pytorch-training",
+                    "container_version": {"gpu":"cu111-ubuntu18.04"}
                 },
                 "tensorflow2.4.1": {
                     "py_versions": ["py37"],
@@ -240,7 +275,8 @@
                         "us-west-1": "763104351884",
                         "us-west-2": "763104351884"
                     },
-                    "repository": "huggingface-tensorflow-training"
+                    "repository": "huggingface-tensorflow-training",
+                    "container_version": {"gpu":"cu110-ubuntu18.04"}
                 }
             }
         }
@@ -286,7 +322,40 @@
                         "us-west-1": "763104351884",
                         "us-west-2": "763104351884"
                     },
-                    "repository": "huggingface-pytorch-inference"
+                    "repository": "huggingface-pytorch-inference",
+                    "container_version": {"gpu":"cu110-ubuntu18.04", "cpu":"ubuntu18.04" }
+                },
+                "pytorch1.8.1": {
+                    "py_versions": ["py36"],
+                    "registries": {
+                        "af-south-1": "626614931356",
+                        "ap-east-1": "871362719292",
+                        "ap-northeast-1": "763104351884",
+                        "ap-northeast-2": "763104351884",
+                        "ap-northeast-3": "364406365360",
+                        "ap-south-1": "763104351884",
+                        "ap-southeast-1": "763104351884",
+                        "ap-southeast-2": "763104351884",
+                        "ca-central-1": "763104351884",
+                        "cn-north-1": "727897471807",
+                        "cn-northwest-1": "727897471807",
+                        "eu-central-1": "763104351884",
+                        "eu-north-1": "763104351884",
+                        "eu-west-1": "763104351884",
+                        "eu-west-2": "763104351884",
+                        "eu-west-3": "763104351884",
+                        "eu-south-1": "692866216735",
+                        "me-south-1": "217643126080",
+                        "sa-east-1": "763104351884",
+                        "us-east-1": "763104351884",
+                        "us-east-2": "763104351884",
+                        "us-gov-west-1": "442386744353",
+                        "us-iso-east-1": "886529160074",
+                        "us-west-1": "763104351884",
+                        "us-west-2": "763104351884"
+                    },
+                    "repository": "huggingface-pytorch-inference",
+                    "container_version": {"gpu":"cu111-ubuntu18.04", "cpu":"ubuntu18.04" }
                 },
                 "tensorflow2.4.1": {
                     "py_versions": ["py37"],
@@ -317,7 +386,8 @@
                         "us-west-1": "763104351884",
                         "us-west-2": "763104351884"
                     },
-                    "repository": "huggingface-tensorflow-inference"
+                    "repository": "huggingface-tensorflow-inference",
+                    "container_version": {"gpu":"cu110-ubuntu18.04", "cpu":"ubuntu18.04" }
                 }
             }
         }
diff --git a/src/sagemaker/image_uris.py b/src/sagemaker/image_uris.py
@@ -41,6 +41,9 @@ def retrieve(
 ):
     """Retrieves the ECR URI for the Docker image matching the given arguments.
 
+    Ideally this function should not be called directly, rather it should be called from the
+    fit() function inside framework estimator.
+
     Args:
         framework (str): The name of the framework or algorithm.
         region (str): The AWS region.
@@ -56,7 +59,11 @@ def retrieve(
         image_scope (str): The image type, i.e. what it is used for.
             Valid values: "training", "inference", "eia". If ``accelerator_type`` is set,
             ``image_scope`` is ignored.
-        container_version (str): the version of docker image
+        container_version (str): the version of docker image.
+            Ideally the value of parameter should be created inside the framework.
+            For custom use, see the list of supported container versions:
+            https://github.com/aws/deep-learning-containers/blob/master/available_images.md
+            (default: None).
         distribution (dict): A dictionary with information on how to run distributed training
             (default: None).
 
@@ -66,10 +73,12 @@ def retrieve(
     Raises:
         ValueError: If the combination of arguments specified is not supported.
     """
+
     config = _config_for_framework_and_scope(framework, image_scope, accelerator_type)
     original_version = version
     version = _validate_version_and_set_if_needed(version, config, framework)
     version_config = config["versions"][_version_for_config(version, config)]
+
     if framework == HUGGING_FACE_FRAMEWORK:
         if version_config.get("version_aliases"):
             full_base_framework_version = version_config["version_aliases"].get(
@@ -81,7 +90,6 @@ def retrieve(
 
     py_version = _validate_py_version_and_set_if_needed(py_version, version_config, framework)
     version_config = version_config.get(py_version) or version_config
-
     registry = _registry_from_region(region, version_config["registries"])
     hostname = utils._botocore_resolver().construct_endpoint("ecr", region)["hostname"]
 
@@ -91,11 +99,16 @@ def retrieve(
         instance_type, config.get("processors") or version_config.get("processors")
     )
 
+    # if container version is available in .json file, utilize that
+    if version_config.get("container_version"):
+        container_version = version_config["container_version"][processor]
+
     if framework == HUGGING_FACE_FRAMEWORK:
         pt_or_tf_version = (
             re.compile("^(pytorch|tensorflow)(.*)$").match(base_framework_version).group(2)
         )
         tag_prefix = f"{pt_or_tf_version}-transformers{original_version}"
+
     else:
         tag_prefix = version_config.get("tag_prefix", version)
 
@@ -105,6 +118,7 @@ def retrieve(
         py_version,
         container_version,
     )
+
     if _should_auto_select_container_version(instance_type, distribution):
         container_versions = {
             "tensorflow-2.3-gpu-py37": "cu110-ubuntu18.04-v3",
@@ -120,7 +134,9 @@ def retrieve(
             "pytorch-1.6-gpu-py3": "cu110-ubuntu18.04-v3",
             "pytorch-1.6.0-gpu-py3": "cu110-ubuntu18.04",
         }
+
         key = "-".join([framework, tag])
+
         if key in container_versions:
             tag = "-".join([tag, container_versions[key]])
 
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -400,7 +400,7 @@ def _huggingface_base_fm_version(huggingface_version, base_fw, fixture_prefix):
             if len(original_version.split(".")) == 2:
                 base_fw_version = ".".join(base_fw_version.split(".")[:-1])
             versions.append(base_fw_version)
-    return versions
+    return sorted(versions, reverse=True)
 
 
 def _generate_huggingface_base_fw_latest_versions(