-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathconfig.cfg
69 lines (62 loc) · 1.65 KB
/
config.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# Pipeline
[nlp]
lang = "eds"
pipeline = ["linker"]
components = ${components}
tokenizer = {"@tokenizers": "eds.tokenizer"}
[components.linker]
@factory = "eds.span_linker"
rescale = 20
metric = "cosine"
threshold = 0.
reference_mode = "concept"
probability_mode = "sigmoid"
init_weights = true
[components.linker.embedding]
@factory = "eds.span_pooler"
span_getter = "entities"
pooling_mode = "mean"
hidden_size = 350
[components.linker.embedding.embedding]
@factory = "eds.transformer"
span_getter = "entities"
model = "bert-base-multilingual-uncased"
[val_docs]
@readers = "standoff"
path = "data/dataset/train/"
span_setter = "entities"
notes_as_span_attribute = "cui"
split_fragments = false
# Scripts
[pretrain]
nlp = ${nlp}
dropout = 0.2
seed = 42
max_steps = 20000
transformer_lr = 5e-5
task_lr = 1e-4
batch_size = 512
max_grad_norm = 10.0
warmup_rate = 0.1
val_docs = ${val_docs}
umls_path = "data/umls/2014AB/"
query = "GRP IN ('ANAT','CHEM','DEVI','DISO','GEOG','LIVB','OBJC','PHEN','PHYS','PROC') and LAT in ('FRE', 'ENG', 'ITA', 'SPA')"
cui_query = "LAT = 'FRE'"
output_dir = "artifacts/model-inter"
debug = false
[train_classifier]
nlp = "artifacts/model-inter"
seed = 42
task_lr = 1e-4
batch_size = 512
warmup_rate = 0
training_top_k = 200
val_docs = ${val_docs}
umls_path = "data/umls/2014AB/"
query = "GRP IN ('ANAT','CHEM','DEVI','DISO','GEOG','LIVB','OBJC','PHEN','PHYS','PROC') and (LAT in ('FRE') or (LAT in ('ENG', 'ITA', 'SPA') AND SAB IN ('CHV', 'SNOMEDCT_US', 'MTH', 'NCI', 'MSH', 'MSHITA', 'MSHSPA', 'SCTSPA')))"
cui_query = "LAT = 'FRE' or LAT = 'ENG'"
output_dir = "artifacts/model-last"
max_steps = 10000
debug = false
[evaluate]
data = ${val_docs}