-
Notifications
You must be signed in to change notification settings - Fork 2
/
conf.yml
82 lines (65 loc) · 1.93 KB
/
conf.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
model : lstmattn # {lstm, lstmattn, bert, lgbm, lstmroberta, lastquery, saint, lstmalbertattn}
# (비일반화) : base feature만 사용
# - lstm,lstmattn,bert
# (일반화) : 추가한 컬럼까지 범주형으로 사용
wandb :
using: False
project: DKT
## 자신의 wandb 아이디를 적어주세요
entity: pomdeyo
tags:
- baseline
##main params
task_name: lstmattn_k-fold
seed: 42
device: cuda
data_dir: /opt/ml/input/data/train_dataset
file_name: train_data.csv
test_file_name: test_data.csv
asset_dir: asset/
model_dir: models/
output_dir: output/
max_seq_len: 20
num_workers: 1
##K-fold params
use_kfold : True #n개의 fold를 이용하여 k-fold를 진행한다.
use_stratify : False
n_fold : 5
split_by_user : False #k-fold를 수행할 dataset을 user 기준으로 split
##모델
hidden_dim : 128
n_layers : 4
n_heads : 2
drop_out: 0.2
#train
n_epochs: 200
batch_size: 64
lr: 0.0001
clip_grad : 10
patience : 20
log_steps : 50
split_ratio : 0.7
#중요
optimizer : adamW
scheduler: linear_warmup
#use only in lgbm
lgbm:
model_params: {
'objective': 'binary', # 이진 분류
'boosting_type': 'gbdt',
'metric': 'auc', # 평가 지표 설정
'feature_fraction': 0.8, # 원래 0.8 피처 샘플링 비율
'bagging_fraction': 0.8, # 원래 0.8 데이터 샘플링 비율
'bagging_freq': 1,
'n_estimators': 10000, # 트리 개수
'early_stopping_rounds': 100,
'seed': 42,
'verbose': -1,
'n_jobs': -1,
}
verbose_eval : 100 #ori 100
num_boost_round : 500
early_stopping_rounds : 100
## LGBM feature enginnering 용 args
make_sharing_feature : True #extract statistics featrue from train + test(except last rows)
use_test_data : True #use test_data for train