Skip to content

Commit f6c3312

Browse files
committed
Merge remote-tracking branch 'upstream/master' into fix/fw-processor-normargs
2 parents a7b7783 + 99f023e commit f6c3312

File tree

5 files changed

+174
-15
lines changed

5 files changed

+174
-15
lines changed

CHANGELOG.md

+6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# Changelog
22

3+
## v2.69.0 (2021-11-12)
4+
5+
### Features
6+
7+
* Hugging Face Transformers 4.12 for Pt1.9/TF2.5
8+
39
## v2.68.0 (2021-11-02)
410

511
### Features

VERSION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.68.1.dev0
1+
2.69.1.dev0

src/sagemaker/image_uri_config/huggingface.json

+145-4
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
"4.5": "4.5.0",
77
"4.6": "4.6.1",
88
"4.10": "4.10.2",
9-
"4.11": "4.11.0"
9+
"4.11": "4.11.0",
10+
"4.12": "4.12.3"
1011
},
1112
"versions": {
1213
"4.4.2": {
@@ -416,8 +417,7 @@
416417
"repository": "huggingface-tensorflow-training",
417418
"container_version": {"gpu": "cu112-ubuntu18.04"}
418419
}
419-
}
420-
,
420+
},
421421
"4.11.0": {
422422
"version_aliases": {
423423
"pytorch1.9": "pytorch1.9.0",
@@ -487,6 +487,76 @@
487487
"repository": "huggingface-tensorflow-training",
488488
"container_version": {"gpu": "cu112-ubuntu18.04"}
489489
}
490+
},
491+
"4.12.3": {
492+
"version_aliases": {
493+
"pytorch1.9": "pytorch1.9.1",
494+
"tensorflow2.5": "tensorflow2.5.1"
495+
},
496+
"pytorch1.9.1": {
497+
"py_versions": ["py38"],
498+
"registries": {
499+
"af-south-1": "626614931356",
500+
"ap-east-1": "871362719292",
501+
"ap-northeast-1": "763104351884",
502+
"ap-northeast-2": "763104351884",
503+
"ap-northeast-3": "364406365360",
504+
"ap-south-1": "763104351884",
505+
"ap-southeast-1": "763104351884",
506+
"ap-southeast-2": "763104351884",
507+
"ca-central-1": "763104351884",
508+
"cn-north-1": "727897471807",
509+
"cn-northwest-1": "727897471807",
510+
"eu-central-1": "763104351884",
511+
"eu-north-1": "763104351884",
512+
"eu-west-1": "763104351884",
513+
"eu-west-2": "763104351884",
514+
"eu-west-3": "763104351884",
515+
"eu-south-1": "692866216735",
516+
"me-south-1": "217643126080",
517+
"sa-east-1": "763104351884",
518+
"us-east-1": "763104351884",
519+
"us-east-2": "763104351884",
520+
"us-gov-west-1": "442386744353",
521+
"us-iso-east-1": "886529160074",
522+
"us-west-1": "763104351884",
523+
"us-west-2": "763104351884"
524+
},
525+
"repository": "huggingface-pytorch-training",
526+
"container_version": {"gpu": "cu111-ubuntu20.04"}
527+
},
528+
"tensorflow2.5.1": {
529+
"py_versions": ["py37"],
530+
"registries": {
531+
"af-south-1": "626614931356",
532+
"ap-east-1": "871362719292",
533+
"ap-northeast-1": "763104351884",
534+
"ap-northeast-2": "763104351884",
535+
"ap-northeast-3": "364406365360",
536+
"ap-south-1": "763104351884",
537+
"ap-southeast-1": "763104351884",
538+
"ap-southeast-2": "763104351884",
539+
"ca-central-1": "763104351884",
540+
"cn-north-1": "727897471807",
541+
"cn-northwest-1": "727897471807",
542+
"eu-central-1": "763104351884",
543+
"eu-north-1": "763104351884",
544+
"eu-south-1": "692866216735",
545+
"eu-west-1": "763104351884",
546+
"eu-west-2": "763104351884",
547+
"eu-west-3": "763104351884",
548+
"me-south-1": "217643126080",
549+
"sa-east-1": "763104351884",
550+
"us-east-1": "763104351884",
551+
"us-east-2": "763104351884",
552+
"us-gov-west-1": "442386744353",
553+
"us-iso-east-1": "886529160074",
554+
"us-west-1": "763104351884",
555+
"us-west-2": "763104351884"
556+
},
557+
"repository": "huggingface-tensorflow-training",
558+
"container_version": {"gpu": "cu112-ubuntu18.04"}
559+
}
490560
}
491561
}
492562
},
@@ -496,7 +566,8 @@
496566
"version_aliases": {
497567
"4.6": "4.6.1",
498568
"4.10": "4.10.2",
499-
"4.11": "4.11.0"
569+
"4.11": "4.11.0",
570+
"4.12": "4.12.3"
500571
},
501572
"versions": {
502573
"4.6.1": {
@@ -806,6 +877,76 @@
806877
"repository": "huggingface-tensorflow-inference",
807878
"container_version": {"gpu": "cu112-ubuntu18.04", "cpu": "ubuntu18.04" }
808879
}
880+
},
881+
"4.12.3": {
882+
"version_aliases": {
883+
"pytorch1.9": "pytorch1.9.1",
884+
"tensorflow2.5": "tensorflow2.5.1"
885+
},
886+
"pytorch1.9.1": {
887+
"py_versions": ["py38"],
888+
"registries": {
889+
"af-south-1": "626614931356",
890+
"ap-east-1": "871362719292",
891+
"ap-northeast-1": "763104351884",
892+
"ap-northeast-2": "763104351884",
893+
"ap-northeast-3": "364406365360",
894+
"ap-south-1": "763104351884",
895+
"ap-southeast-1": "763104351884",
896+
"ap-southeast-2": "763104351884",
897+
"ca-central-1": "763104351884",
898+
"cn-north-1": "727897471807",
899+
"cn-northwest-1": "727897471807",
900+
"eu-central-1": "763104351884",
901+
"eu-north-1": "763104351884",
902+
"eu-west-1": "763104351884",
903+
"eu-west-2": "763104351884",
904+
"eu-west-3": "763104351884",
905+
"eu-south-1": "692866216735",
906+
"me-south-1": "217643126080",
907+
"sa-east-1": "763104351884",
908+
"us-east-1": "763104351884",
909+
"us-east-2": "763104351884",
910+
"us-gov-west-1": "442386744353",
911+
"us-iso-east-1": "886529160074",
912+
"us-west-1": "763104351884",
913+
"us-west-2": "763104351884"
914+
},
915+
"repository": "huggingface-pytorch-inference",
916+
"container_version": {"gpu": "cu111-ubuntu20.04", "cpu": "ubuntu20.04" }
917+
},
918+
"tensorflow2.5.1": {
919+
"py_versions": ["py37"],
920+
"registries": {
921+
"af-south-1": "626614931356",
922+
"ap-east-1": "871362719292",
923+
"ap-northeast-1": "763104351884",
924+
"ap-northeast-2": "763104351884",
925+
"ap-northeast-3": "364406365360",
926+
"ap-south-1": "763104351884",
927+
"ap-southeast-1": "763104351884",
928+
"ap-southeast-2": "763104351884",
929+
"ca-central-1": "763104351884",
930+
"cn-north-1": "727897471807",
931+
"cn-northwest-1": "727897471807",
932+
"eu-central-1": "763104351884",
933+
"eu-north-1": "763104351884",
934+
"eu-south-1": "692866216735",
935+
"eu-west-1": "763104351884",
936+
"eu-west-2": "763104351884",
937+
"eu-west-3": "763104351884",
938+
"me-south-1": "217643126080",
939+
"sa-east-1": "763104351884",
940+
"us-east-1": "763104351884",
941+
"us-east-2": "763104351884",
942+
"us-gov-west-1": "442386744353",
943+
"us-iso-east-1": "886529160074",
944+
"us-west-1": "763104351884",
945+
"us-west-2": "763104351884"
946+
},
947+
"repository": "huggingface-tensorflow-inference",
948+
"container_version": {"gpu": "cu112-ubuntu18.04", "cpu": "ubuntu18.04" }
949+
}
809950
}
810951
}
811952
}

tests/data/huggingface/run_tf.py

+21-9
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,23 @@
44
import time
55

66
import tensorflow as tf
7+
import transformers
78
from datasets import load_dataset
8-
99
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
1010

1111

12+
def _get_dataset_features(dataset, tokenizer, columns=[]):
13+
if transformers.__version__ > "4.12.0":
14+
features = {x: dataset[x] for x in columns}
15+
else:
16+
features = {
17+
x: dataset[x].to_tensor(default_value=0, shape=[None, tokenizer.model_max_length])
18+
for x in columns
19+
}
20+
21+
return features
22+
23+
1224
if __name__ == "__main__":
1325

1426
parser = argparse.ArgumentParser()
@@ -57,10 +69,10 @@
5769
)
5870
train_dataset.set_format(type="tensorflow", columns=["input_ids", "attention_mask", "label"])
5971

60-
train_features = {
61-
x: train_dataset[x].to_tensor(default_value=0, shape=[None, tokenizer.model_max_length])
62-
for x in ["input_ids", "attention_mask"]
63-
}
72+
train_features = _get_dataset_features(
73+
train_dataset, tokenizer, columns=["input_ids", "attention_mask"]
74+
)
75+
6476
tf_train_dataset = tf.data.Dataset.from_tensor_slices(
6577
(train_features, train_dataset["label"])
6678
).batch(args.per_device_train_batch_size)
@@ -71,10 +83,10 @@
7183
)
7284
test_dataset.set_format(type="tensorflow", columns=["input_ids", "attention_mask", "label"])
7385

74-
test_features = {
75-
x: test_dataset[x].to_tensor(default_value=0, shape=[None, tokenizer.model_max_length])
76-
for x in ["input_ids", "attention_mask"]
77-
}
86+
test_features = _get_dataset_features(
87+
test_dataset, tokenizer, columns=["input_ids", "attention_mask"]
88+
)
89+
7890
tf_test_dataset = tf.data.Dataset.from_tensor_slices(
7991
(test_features, test_dataset["label"])
8092
).batch(args.per_device_eval_batch_size)

tests/integ/test_huggingface.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ def test_huggingface_inference(
158158
huggingface_pytorch_latest_inference_py_version,
159159
):
160160
env = {
161-
"HF_MODEL_ID": "sshleifer/tiny-distilbert-base-uncased-finetuned-sst-2-english",
161+
"HF_MODEL_ID": "philschmid/tiny-distilbert-classification",
162162
"HF_TASK": "text-classification",
163163
}
164164
endpoint_name = unique_name_from_base("test-hf-inference")

0 commit comments

Comments
 (0)