Merge remote-tracking branch 'upstream/master' into fix/fw-processor-normargs

athewsey · athewsey · commit f6c331201ad8 · 2021-11-24T16:52:57.000+08:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,11 @@
 # Changelog
 
+## v2.69.0 (2021-11-12)
+
+### Features
+
+ * Hugging Face Transformers 4.12 for Pt1.9/TF2.5
+
 ## v2.68.0 (2021-11-02)
 
 ### Features
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-2.68.1.dev0
+2.69.1.dev0
diff --git a/src/sagemaker/image_uri_config/huggingface.json b/src/sagemaker/image_uri_config/huggingface.json
@@ -6,7 +6,8 @@
             "4.5": "4.5.0",
             "4.6": "4.6.1",
             "4.10": "4.10.2",
-            "4.11": "4.11.0"
+            "4.11": "4.11.0",
+            "4.12": "4.12.3"
         },
         "versions": {
             "4.4.2": {
@@ -416,8 +417,7 @@
                     "repository": "huggingface-tensorflow-training",
                     "container_version": {"gpu": "cu112-ubuntu18.04"}
                 }
-            }
-            ,
+            },
             "4.11.0": {
                 "version_aliases": {
                     "pytorch1.9": "pytorch1.9.0",
@@ -487,6 +487,76 @@
                     "repository": "huggingface-tensorflow-training",
                     "container_version": {"gpu": "cu112-ubuntu18.04"}
                 }
+            },
+            "4.12.3": {
+                "version_aliases": {
+                    "pytorch1.9": "pytorch1.9.1",
+                    "tensorflow2.5": "tensorflow2.5.1"
+                },
+                "pytorch1.9.1": {
+                    "py_versions": ["py38"],
+                    "registries": {
+                        "af-south-1": "626614931356",
+                        "ap-east-1": "871362719292",
+                        "ap-northeast-1": "763104351884",
+                        "ap-northeast-2": "763104351884",
+                        "ap-northeast-3": "364406365360",
+                        "ap-south-1": "763104351884",
+                        "ap-southeast-1": "763104351884",
+                        "ap-southeast-2": "763104351884",
+                        "ca-central-1": "763104351884",
+                        "cn-north-1": "727897471807",
+                        "cn-northwest-1": "727897471807",
+                        "eu-central-1": "763104351884",
+                        "eu-north-1": "763104351884",
+                        "eu-west-1": "763104351884",
+                        "eu-west-2": "763104351884",
+                        "eu-west-3": "763104351884",
+                        "eu-south-1": "692866216735",
+                        "me-south-1": "217643126080",
+                        "sa-east-1": "763104351884",
+                        "us-east-1": "763104351884",
+                        "us-east-2": "763104351884",
+                        "us-gov-west-1": "442386744353",
+                        "us-iso-east-1": "886529160074",
+                        "us-west-1": "763104351884",
+                        "us-west-2": "763104351884"
+                    },
+                    "repository": "huggingface-pytorch-training",
+                    "container_version": {"gpu": "cu111-ubuntu20.04"}
+                },
+                "tensorflow2.5.1": {
+                    "py_versions": ["py37"],
+                    "registries": {
+                        "af-south-1": "626614931356",
+                        "ap-east-1": "871362719292",
+                        "ap-northeast-1": "763104351884",
+                        "ap-northeast-2": "763104351884",
+                        "ap-northeast-3": "364406365360",
+                        "ap-south-1": "763104351884",
+                        "ap-southeast-1": "763104351884",
+                        "ap-southeast-2": "763104351884",
+                        "ca-central-1": "763104351884",
+                        "cn-north-1": "727897471807",
+                        "cn-northwest-1": "727897471807",
+                        "eu-central-1": "763104351884",
+                        "eu-north-1": "763104351884",
+                        "eu-south-1": "692866216735",
+                        "eu-west-1": "763104351884",
+                        "eu-west-2": "763104351884",
+                        "eu-west-3": "763104351884",
+                        "me-south-1": "217643126080",
+                        "sa-east-1": "763104351884",
+                        "us-east-1": "763104351884",
+                        "us-east-2": "763104351884",
+                        "us-gov-west-1": "442386744353",
+                        "us-iso-east-1": "886529160074",
+                        "us-west-1": "763104351884",
+                        "us-west-2": "763104351884"
+                    },
+                    "repository": "huggingface-tensorflow-training",
+                    "container_version": {"gpu": "cu112-ubuntu18.04"}
+                }
             }
         }
     },
@@ -496,7 +566,8 @@
         "version_aliases": {
             "4.6": "4.6.1",
             "4.10": "4.10.2",
-            "4.11": "4.11.0"
+            "4.11": "4.11.0",
+            "4.12": "4.12.3"
         },
         "versions": {
             "4.6.1": {
@@ -806,6 +877,76 @@
                     "repository": "huggingface-tensorflow-inference",
                     "container_version": {"gpu": "cu112-ubuntu18.04", "cpu": "ubuntu18.04" }
                 }
+            },
+            "4.12.3": {
+                "version_aliases": {
+                    "pytorch1.9": "pytorch1.9.1",
+                    "tensorflow2.5": "tensorflow2.5.1"
+                },
+                "pytorch1.9.1": {
+                    "py_versions": ["py38"],
+                    "registries": {
+                        "af-south-1": "626614931356",
+                        "ap-east-1": "871362719292",
+                        "ap-northeast-1": "763104351884",
+                        "ap-northeast-2": "763104351884",
+                        "ap-northeast-3": "364406365360",
+                        "ap-south-1": "763104351884",
+                        "ap-southeast-1": "763104351884",
+                        "ap-southeast-2": "763104351884",
+                        "ca-central-1": "763104351884",
+                        "cn-north-1": "727897471807",
+                        "cn-northwest-1": "727897471807",
+                        "eu-central-1": "763104351884",
+                        "eu-north-1": "763104351884",
+                        "eu-west-1": "763104351884",
+                        "eu-west-2": "763104351884",
+                        "eu-west-3": "763104351884",
+                        "eu-south-1": "692866216735",
+                        "me-south-1": "217643126080",
+                        "sa-east-1": "763104351884",
+                        "us-east-1": "763104351884",
+                        "us-east-2": "763104351884",
+                        "us-gov-west-1": "442386744353",
+                        "us-iso-east-1": "886529160074",
+                        "us-west-1": "763104351884",
+                        "us-west-2": "763104351884"
+                    },
+                    "repository": "huggingface-pytorch-inference",
+                    "container_version": {"gpu": "cu111-ubuntu20.04", "cpu": "ubuntu20.04" }
+                },
+                "tensorflow2.5.1": {
+                    "py_versions": ["py37"],
+                    "registries": {
+                        "af-south-1": "626614931356",
+                        "ap-east-1": "871362719292",
+                        "ap-northeast-1": "763104351884",
+                        "ap-northeast-2": "763104351884",
+                        "ap-northeast-3": "364406365360",
+                        "ap-south-1": "763104351884",
+                        "ap-southeast-1": "763104351884",
+                        "ap-southeast-2": "763104351884",
+                        "ca-central-1": "763104351884",
+                        "cn-north-1": "727897471807",
+                        "cn-northwest-1": "727897471807",
+                        "eu-central-1": "763104351884",
+                        "eu-north-1": "763104351884",
+                        "eu-south-1": "692866216735",
+                        "eu-west-1": "763104351884",
+                        "eu-west-2": "763104351884",
+                        "eu-west-3": "763104351884",
+                        "me-south-1": "217643126080",
+                        "sa-east-1": "763104351884",
+                        "us-east-1": "763104351884",
+                        "us-east-2": "763104351884",
+                        "us-gov-west-1": "442386744353",
+                        "us-iso-east-1": "886529160074",
+                        "us-west-1": "763104351884",
+                        "us-west-2": "763104351884"
+                    },
+                    "repository": "huggingface-tensorflow-inference",
+                    "container_version": {"gpu": "cu112-ubuntu18.04", "cpu": "ubuntu18.04" }
+                }
             }
         }
     }
diff --git a/tests/data/huggingface/run_tf.py b/tests/data/huggingface/run_tf.py
@@ -4,11 +4,23 @@
 import time
 
 import tensorflow as tf
+import transformers
 from datasets import load_dataset
-
 from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
 
 
+def _get_dataset_features(dataset, tokenizer, columns=[]):
+    if transformers.__version__ > "4.12.0":
+        features = {x: dataset[x] for x in columns}
+    else:
+        features = {
+            x: dataset[x].to_tensor(default_value=0, shape=[None, tokenizer.model_max_length])
+            for x in columns
+        }
+
+    return features
+
+
 if __name__ == "__main__":
 
     parser = argparse.ArgumentParser()
@@ -57,10 +69,10 @@
     )
     train_dataset.set_format(type="tensorflow", columns=["input_ids", "attention_mask", "label"])
 
-    train_features = {
-        x: train_dataset[x].to_tensor(default_value=0, shape=[None, tokenizer.model_max_length])
-        for x in ["input_ids", "attention_mask"]
-    }
+    train_features = _get_dataset_features(
+        train_dataset, tokenizer, columns=["input_ids", "attention_mask"]
+    )
+
     tf_train_dataset = tf.data.Dataset.from_tensor_slices(
         (train_features, train_dataset["label"])
     ).batch(args.per_device_train_batch_size)
@@ -71,10 +83,10 @@
     )
     test_dataset.set_format(type="tensorflow", columns=["input_ids", "attention_mask", "label"])
 
-    test_features = {
-        x: test_dataset[x].to_tensor(default_value=0, shape=[None, tokenizer.model_max_length])
-        for x in ["input_ids", "attention_mask"]
-    }
+    test_features = _get_dataset_features(
+        test_dataset, tokenizer, columns=["input_ids", "attention_mask"]
+    )
+
     tf_test_dataset = tf.data.Dataset.from_tensor_slices(
         (test_features, test_dataset["label"])
     ).batch(args.per_device_eval_batch_size)
diff --git a/tests/integ/test_huggingface.py b/tests/integ/test_huggingface.py
@@ -158,7 +158,7 @@ def test_huggingface_inference(
     huggingface_pytorch_latest_inference_py_version,
 ):
     env = {
-        "HF_MODEL_ID": "sshleifer/tiny-distilbert-base-uncased-finetuned-sst-2-english",
+        "HF_MODEL_ID": "philschmid/tiny-distilbert-classification",
         "HF_TASK": "text-classification",
     }
     endpoint_name = unique_name_from_base("test-hf-inference")

Original file line number	Diff line number	Diff line change
`@@ -158,7 +158,7 @@ def test_huggingface_inference(`
`158`	`158`	`huggingface_pytorch_latest_inference_py_version,`
`159`	`159`	`):`
`160`	`160`	`env = {`
`161`		`- "HF_MODEL_ID": "sshleifer/tiny-distilbert-base-uncased-finetuned-sst-2-english",`
	`161`	`+ "HF_MODEL_ID": "philschmid/tiny-distilbert-classification",`
`162`	`162`	`"HF_TASK": "text-classification",`
`163`	`163`	`}`
`164`	`164`	`endpoint_name = unique_name_from_base("test-hf-inference")`