Skip to content

Commit 182a9cb

Browse files
new: Add multitask config
1 parent b61de98 commit 182a9cb

File tree

1 file changed

+41
-0
lines changed

1 file changed

+41
-0
lines changed
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
[DEFAULT]
2+
version = 2020.3.18_multitask
3+
description = Multitask model trained on a combination of Reach and Rodrigues
4+
data. The Rodrigues data have been concatenated into a single continuous
5+
document and then cut into sequences of length=line_length, so that the
6+
Rodrigues data and Reach data have the same lengths without need for much
7+
padding or truncating.
8+
deep_reference_parser_version = b61de984f95be36445287c40af4e65a403637692
9+
10+
[data]
11+
# Note that test and valid proportion are only used for data creation steps,
12+
# not when running the train command.
13+
test_proportion = 0.25
14+
valid_proportion = 0.25
15+
data_path = data/
16+
respect_line_endings = 0
17+
respect_doc_endings = 1
18+
line_limit = 150
19+
policy_train = data/multitask/2020.3.18_multitask_train.tsv
20+
policy_test = data/multitask/2020.3.18_multitask_test.tsv
21+
policy_valid = data/multitask/2020.3.18_multitask_valid.tsv
22+
s3_slug = https://datalabs-public.s3.eu-west-2.amazonaws.com/deep_reference_parser/
23+
24+
[build]
25+
output_path = models/multitask/2020.3.18_multitask/
26+
output = crf
27+
word_embeddings = embeddings/2020.1.1-wellcome-embeddings-300.txt
28+
pretrained_embedding = 0
29+
dropout = 0.5
30+
lstm_hidden = 400
31+
word_embedding_size = 300
32+
char_embedding_size = 100
33+
char_embedding_type = BILSTM
34+
optimizer = adam
35+
36+
[train]
37+
epochs = 60
38+
batch_size = 100
39+
early_stopping_patience = 5
40+
metric = val_f1
41+

0 commit comments

Comments
 (0)