-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtraineval_be_en.sh
55 lines (48 loc) · 1.64 KB
/
traineval_be_en.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/bin/bash
set -euo pipefail
RAW_DATA=data/ted_raw/be_en/
BINARIZED_DATA=data/ted_binarized/be_spm8000/be_en/
MODEL_DIR=checkpoints/ted_be_spm8000/be_en/
COMET_DIR=COMET/comet
mkdir -p $MODEL_DIR
fairseq-train \
$BINARIZED_DATA \
--task translation \
--arch transformer_iwslt_de_en \
--max-epoch 80 \
--patience 5 \
--distributed-world-size 1 \
--share-all-embeddings \
--no-epoch-checkpoints \
--dropout 0.3 \
--optimizer 'adam' --adam-betas '(0.9, 0.98)' --lr-scheduler 'inverse_sqrt' \
--warmup-init-lr 1e-7 --warmup-updates 4000 --lr 2e-4 \
--criterion 'label_smoothed_cross_entropy' --label-smoothing 0.1 \
--max-tokens 4500 \
--update-freq 2 \
--seed 2 \
--save-dir $MODEL_DIR \
--log-interval 20 2>&1 | tee $MODEL_DIR/train.log
# translate & eval the valid and test set
fairseq-generate $BINARIZED_DATA \
--gen-subset test \
--path $MODEL_DIR/checkpoint_best.pt \
--batch-size 32 \
--remove-bpe sentencepiece \
--beam 5 | grep ^H | cut -c 3- | sort -n | cut -f3- > "$MODEL_DIR"/test_b5.pred
echo "evaluating test set"
python score.py "$MODEL_DIR"/test_b5.pred "$RAW_DATA"/test.en \
--src "$RAW_DATA"/test.be \
--comet-dir $COMET_DIR \
| tee "$MODEL_DIR"/test_b5.score
fairseq-generate $BINARIZED_DATA \
--gen-subset valid \
--path $MODEL_DIR/checkpoint_best.pt \
--batch-size 32 \
--remove-bpe sentencepiece \
--beam 5 | grep ^H | cut -c 3- | sort -n | cut -f3- > "$MODEL_DIR"/valid_b5.pred
echo "evaluating valid set"
python score.py "$MODEL_DIR"/valid_b5.pred "$RAW_DATA"/dev.en \
--src "$RAW_DATA"/dev.be \
--comet-dir $COMET_DIR \
| tee "$MODEL_DIR"/valid_b5.score