Skip to content

Commit d00b9e4

Browse files
committed
feat: support koelectra v3
1 parent a7be955 commit d00b9e4

File tree

11 files changed

+35114
-15
lines changed

11 files changed

+35114
-15
lines changed
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,23 +13,23 @@
1313
"do_train": true,
1414
"do_eval": true,
1515
"max_seq_len": 50,
16-
"num_train_epochs": 8,
16+
"num_train_epochs": 15,
1717
"weight_decay": 0.0,
1818
"gradient_accumulation_steps": 1,
1919
"adam_epsilon": 1e-8,
2020
"warmup_steps": 0,
2121
"max_steps": -1,
2222
"max_grad_norm": 1.0,
2323
"no_cuda": false,
24-
"model_type": "koelectra-base",
24+
"model_type": "koelectra-base-v1",
2525
"model_name_or_path": "monologg/koelectra-base-discriminator",
26-
"output_dir": "koelectra-base-goemotions-ckpt",
26+
"output_dir": "koelectra-base-v1-goemotions-ckpt",
2727
"seed": 42,
2828
"train_batch_size": 32,
2929
"eval_batch_size": 64,
3030
"logging_steps": 500,
3131
"save_steps": 500,
3232
"learning_rate": 5e-5,
3333
"threshold": 0.3,
34-
"tokenizer_dir": "tokenizer"
34+
"tokenizer_dir": "tokenizer-v1"
3535
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"task": "goemotions",
3+
"data_dir": "data",
4+
"ckpt_dir": "ckpt",
5+
"train_file": "train.tsv",
6+
"dev_file": "dev.tsv",
7+
"test_file": "test.tsv",
8+
"label_file": "label.txt",
9+
"evaluate_test_during_training": false,
10+
"eval_all_checkpoints": true,
11+
"save_optimizer": false,
12+
"do_lower_case": false,
13+
"do_train": true,
14+
"do_eval": true,
15+
"max_seq_len": 50,
16+
"num_train_epochs": 15,
17+
"weight_decay": 0.0,
18+
"gradient_accumulation_steps": 1,
19+
"adam_epsilon": 1e-8,
20+
"warmup_steps": 0,
21+
"max_steps": -1,
22+
"max_grad_norm": 1.0,
23+
"no_cuda": false,
24+
"model_type": "koelectra-base-v3",
25+
"model_name_or_path": "monologg/koelectra-base-v3-discriminator",
26+
"output_dir": "koelectra-base-v3-goemotions-ckpt",
27+
"seed": 42,
28+
"train_batch_size": 32,
29+
"eval_batch_size": 64,
30+
"logging_steps": 500,
31+
"save_steps": 500,
32+
"learning_rate": 5e-5,
33+
"threshold": 0.3,
34+
"tokenizer_dir": "tokenizer-v3"
35+
}
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,15 @@
2121
"max_steps": -1,
2222
"max_grad_norm": 1.0,
2323
"no_cuda": false,
24-
"model_type": "koelectra-small",
24+
"model_type": "koelectra-small-v1",
2525
"model_name_or_path": "monologg/koelectra-small-discriminator",
26-
"output_dir": "koelectra-small-goemotions-ckpt",
27-
"seed": 777,
26+
"output_dir": "koelectra-small-v1-goemotions-ckpt",
27+
"seed": 42,
2828
"train_batch_size": 32,
2929
"eval_batch_size": 64,
3030
"logging_steps": 500,
3131
"save_steps": 500,
3232
"learning_rate": 5e-5,
3333
"threshold": 0.3,
34-
"tokenizer_dir": "tokenizer"
34+
"tokenizer_dir": "tokenizer-v1"
3535
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"task": "goemotions",
3+
"data_dir": "data",
4+
"ckpt_dir": "ckpt",
5+
"train_file": "train.tsv",
6+
"dev_file": "dev.tsv",
7+
"test_file": "test.tsv",
8+
"label_file": "label.txt",
9+
"evaluate_test_during_training": false,
10+
"eval_all_checkpoints": true,
11+
"save_optimizer": false,
12+
"do_lower_case": false,
13+
"do_train": true,
14+
"do_eval": true,
15+
"max_seq_len": 50,
16+
"num_train_epochs": 15,
17+
"weight_decay": 0.0,
18+
"gradient_accumulation_steps": 1,
19+
"adam_epsilon": 1e-8,
20+
"warmup_steps": 0,
21+
"max_steps": -1,
22+
"max_grad_norm": 1.0,
23+
"no_cuda": false,
24+
"model_type": "koelectra-small-v3",
25+
"model_name_or_path": "monologg/koelectra-small-v3-discriminator",
26+
"output_dir": "koelectra-small-v3-goemotions-ckpt",
27+
"seed": 42,
28+
"train_batch_size": 32,
29+
"eval_batch_size": 64,
30+
"logging_steps": 500,
31+
"save_steps": 500,
32+
"learning_rate": 5e-5,
33+
"threshold": 0.3,
34+
"tokenizer_dir": "tokenizer-v3"
35+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"unk_token": "[UNK]",
3+
"sep_token": "[SEP]",
4+
"pad_token": "[PAD]",
5+
"cls_token": "[CLS]",
6+
"mask_token": "[MASK]",
7+
"additional_special_tokens": [
8+
"[NAME]",
9+
"[RELIGION]"
10+
]
11+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"do_lower_case": false,
3+
"max_len": 512,
4+
"unk_token": "[UNK]",
5+
"sep_token": "[SEP]",
6+
"pad_token": "[PAD]",
7+
"cls_token": "[CLS]",
8+
"mask_token": "[MASK]",
9+
"additional_special_tokens": [
10+
"[NAME]",
11+
"[RELIGION]"
12+
]
13+
}

0 commit comments

Comments
Β (0)