-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbenchmark.yaml
56 lines (50 loc) · 1.51 KB
/
benchmark.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
---
# data
train: data/train_processed.csv
test: data/test_processed.csv
train_use: data/train_use.joblib
test_use: data/test_use.joblib
resources: data/resources.csv
quick: False #set True if want to quickly test if the script runs smoothly
batch_size: 1024
epochs: 3
train_output_csv: dpcnn_raw_train.csv #to be changed for each experiment
test_output_csv: dpcnn_raw_test.csv #to be changed for each experiment
# seed
seed: 0
# Text - only set one of them to be True
embedding:
tfidf: False #benchmark
use: False #ZW
word_vector: True #ZW
word_path: data/crawl-300d-2M.vec
# Feature engineering -
# for benchmark params, always set True
total_price: True #benchmark
quantity: True #benchmark
submission_time: True #benchmark
polarity: True #benchmark
subjectivity: False #WH
sent_count: False #WH
word_count: False #WH
max_price: False #WH
mean_price: False #MR
min_price: False #MR
pos_count: False #GY
keyword_count: False #GY
common_word_count: False #GY
# Categorical Features - only set one of them to be True
cat_encoding:
one_hot_encoding: True #benchmark
mean_encoding: False #Kennard
count_encoding: False #Kennard
# Numerical Features
polynomial: False #implemented yet, added by #WH
# Model architecture - only set one of them to be True
model_type:
nn: False #benchmark
dpcnn: True #not fully implemented yet, to be added by #Kennard
xgb: False #implemented, added by #GY
lgbm: False #implemented, to be added by #MR
rfc: False #implemented, added by #Kennard
ftrl: False #implemented, added by #Kennard