|
7 | 7 | from datasets import load_dataset
|
8 | 8 |
|
9 | 9 | from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
|
| 10 | +import transformers |
10 | 11 |
|
11 | 12 |
|
12 | 13 | if __name__ == "__main__":
|
|
57 | 58 | )
|
58 | 59 | train_dataset.set_format(type="tensorflow", columns=["input_ids", "attention_mask", "label"])
|
59 | 60 |
|
60 |
| - train_features = {x: train_dataset[x] for x in ["input_ids", "attention_mask"]} |
61 |
| - |
| 61 | + if transformers.__version__ > "4.12.0": |
| 62 | + train_features = {x: train_dataset[x] for x in ["input_ids", "attention_mask"]} |
| 63 | + else: |
| 64 | + train_features = { |
| 65 | + x: train_dataset[x].to_tensor(default_value=0, shape=[None, tokenizer.model_max_length]) |
| 66 | + for x in ["input_ids", "attention_mask"] |
| 67 | + } |
| 68 | + |
| 69 | + |
62 | 70 | tf_train_dataset = tf.data.Dataset.from_tensor_slices(
|
63 | 71 | (train_features, train_dataset["label"])
|
64 | 72 | ).batch(args.per_device_train_batch_size)
|
|
69 | 77 | )
|
70 | 78 | test_dataset.set_format(type="tensorflow", columns=["input_ids", "attention_mask", "label"])
|
71 | 79 |
|
72 |
| - test_features = {x: test_dataset[x] for x in ["input_ids", "attention_mask"]} |
| 80 | + if transformers.__version__ > "4.12.0": |
| 81 | + test_features = {x: test_dataset[x] for x in ["input_ids", "attention_mask"]} |
| 82 | + else: |
| 83 | + test_features = { |
| 84 | + x: test_dataset[x].to_tensor(default_value=0, shape=[None, tokenizer.model_max_length]) |
| 85 | + for x in ["input_ids", "attention_mask"] |
| 86 | + } |
| 87 | + |
73 | 88 |
|
74 | 89 | tf_test_dataset = tf.data.Dataset.from_tensor_slices(
|
75 | 90 | (test_features, test_dataset["label"])
|
|
0 commit comments