Skip to content
4 changes: 2 additions & 2 deletions easyeditor/editors/editor.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,10 @@ def __init__(self, hparams: HyperParams):
else:
raise NotImplementedError

if self.tok is not None and (isinstance(self.tok, GPT2Tokenizer) or isinstance(self.tok, GPT2TokenizerFast) or isinstance(self.tok, LlamaTokenizer) or isinstance(self.tok, LlamaTokenizerFast) or isinstance(self.tok, PreTrainedTokenizerFast)) and (hparams.alg_name not in ['ROME', 'MEMIT', 'EMMET', 'R-ROME']):
if self.tok is not None and (isinstance(self.tok, GPT2Tokenizer) or isinstance(self.tok, GPT2TokenizerFast) or isinstance(self.tok, LlamaTokenizer) or isinstance(self.tok, LlamaTokenizerFast) or isinstance(self.tok, PreTrainedTokenizerFast)) and (hparams.alg_name not in ['ROME', 'MEMIT', 'EMMET', 'R-ROME','AlphaEdit']):
LOG.info('AutoRegressive Model detected, set the padding side of Tokenizer to left...')
self.tok.padding_side = 'left'
if self.tok is not None and ('mistral' in self.model_name.lower() or 'llama' in self.model_name.lower() or 'qwen' in self.model_name.lower()) and (hparams.alg_name in ['ROME', 'MEMIT', 'EMMET', 'R-ROME']):
if self.tok is not None and ('mistral' in self.model_name.lower() or 'llama' in self.model_name.lower() or 'qwen' in self.model_name.lower()) and (hparams.alg_name in ['ROME', 'MEMIT', 'EMMET', 'R-ROME','AlphaEdit']):
LOG.info('AutoRegressive Model detected, set the padding side of Tokenizer to right...')
self.tok.padding_side = 'right'
else:
Expand Down
2 changes: 1 addition & 1 deletion easyeditor/editors/steer_editor.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from transformers import AutoProcessor, LlavaForConditionalGeneration
from transformers import InstructBlipProcessor, InstructBlipForConditionalGeneration
from ..util.globals import *
from ..evaluate import compute_safety_edit_quality, ccks_compute_safety_edit_quality
from ..evaluate import compute_safety_edit_quality
from ..util import nethook
from ..util.hparams import HyperParams
from ..util.alg_dict import *
Expand Down
4 changes: 2 additions & 2 deletions hparams/AlphaEdit/llama3-8b.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
alg_name: "AlphaEdit"
model_name: "./hugging_cache/llama-3-8b"
model_name: "./hugging_cache/llama-3-8b-instruct"
stats_dir: "./data/stats"
# Make sure that the projection matrix P has been downloaded from the baidu netdisk (For details, please refer to the EasyEdit/easyeditor/models/alphaedit/README.md) beforehand to avoid double computation.
# But if the projection matrix P which we provided is not needed, then nothing needs to be done to the P_loc field;
Expand Down Expand Up @@ -27,4 +27,4 @@ mom2_dataset: "wikipedia"
mom2_n_samples: 100000
mom2_dtype: "float32"
nullspace_threshold: 2e-2
L2: 10
L2: 1
30 changes: 30 additions & 0 deletions hparams/AlphaEdit/llama3.1-8b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
alg_name: "AlphaEdit"
model_name: "./hugging_cache/llama-3.1-8b-instruct"
stats_dir: "./data/stats"
# Make sure that the projection matrix P has been downloaded from the baidu netdisk (For details, please refer to the EasyEdit/easyeditor/models/alphaedit/README.md) beforehand to avoid double computation.
# But if the projection matrix P which we provided is not needed, then nothing needs to be done to the P_loc field;
# just run the program, and the program will compute P and save it locally automatically.
P_loc: "./null_space_project.pt"
device: 0
layers: [4, 5, 6, 7, 8]
clamp_norm_factor: 0.75
layer_selection: "all"
fact_token: "subject_last"
v_num_grad_steps: 25
v_lr: 1e-1
v_loss_layer: 31
v_weight_decay: 0.5
kl_factor: 0.0625
mom2_adjustment: true
mom2_update_weight: 15000
rewrite_module_tmp: "model.layers.{}.mlp.down_proj"
layer_module_tmp: "model.layers.{}"
mlp_module_tmp: "model.layers.{}.mlp"
attn_module_tmp: "model.layers.{}.self_attn"
ln_f_module: "model.norm"
lm_head_module: "lm_head"
mom2_dataset: "wikipedia"
mom2_n_samples: 100000
mom2_dtype: "float32"
nullspace_threshold: 2e-2
L2: 10
20 changes: 20 additions & 0 deletions hparams/DPO/llama-7b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
alg_name: "DPO"
model_name: "./hugging_cache/llama-2-7b"
device: 0

lora_type: "adalora"
layers: []
num_steps: 7
batch_size: 1
max_length: 30
lr: 5e-5
weight_decay: 0
kl_factor: 0
rank: 8
lora_alpha: 32
lora_dropout: 0.1
norm_constraint: false
target_modules: ["q_proj", "v_proj"] #["up_proj", "down_proj"] #["q_proj", "v_proj"]
model_parallel: False
alpha: 0.99
beta: 0.1
3 changes: 2 additions & 1 deletion hparams/DeCo/llama.yaml → hparams/DeCo/llama-7b.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
alg_name: "deco"
model_name: "./huggyllama-7b" or "./llava-7b-hf"
# model_name: "./huggyllama-7b" or "./llava-7b-hf"
model_name: "./hugging_cache/llama-2-7b"
device: 1
alpha: 0.6
threshold_top_p: 0.9
Expand Down
File renamed without changes.
18 changes: 18 additions & 0 deletions hparams/LoRA/llama3-8b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
alg_name: "LoRA"
model_name: "./hugging_cache/llama-3-8b-instruct"
device: 0

lora_type: "adalora"
layers: []
num_steps: 70
batch_size: 1
max_length: 50
lr: 5e-3
weight_decay: 0
kl_factor: 0
rank: 8
lora_alpha: 32
lora_dropout: 0.1
norm_constraint: false
target_modules: ["q_proj", "v_proj"] #["up_proj", "down_proj"] #["q_proj", "v_proj"]
model_parallel: false
18 changes: 18 additions & 0 deletions hparams/LoRA/llama3.1-8b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
alg_name: "LoRA"
model_name: "./hugging_cache/llama-3.1-8b-instruct"
device: 1

lora_type: "adalora"
layers: []
num_steps: 50
batch_size: 1
max_length: 30
lr: 5e-3
weight_decay: 0
kl_factor: 0
rank: 8
lora_alpha: 32
lora_dropout: 0.1
norm_constraint: false
target_modules: ["q_proj", "v_proj"] #["up_proj", "down_proj"] #["q_proj", "v_proj"]
model_parallel: false
25 changes: 25 additions & 0 deletions hparams/QLoRA/llama-7b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
alg_name: "QLoRA"
model_name: "./hugging_cache/llama-2-7b"
device: 1

# QLoRA specific settings
quantization_bit: 4
double_quant: true
quant_type: "nf4" # nf4, fp4, int4, int8

# LoRA settings
lora_type: "lora" # QLoRA typically uses standard LoRA, not AdaLoRA
lora_r: 8
lora_alpha: 32
lora_dropout: 0.1
target_modules: ["q_proj", "v_proj"]

# Training settings
num_steps: 1
batch_size: 1
max_length: 30
lr: 5e-3
weight_decay: 0.0

# Additional settings
model_parallel: false
35 changes: 35 additions & 0 deletions hparams/WISE/llama3-8b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
alg_name: "WISE"
model_name: "./hugging_cache/llama-3-8b-instruct"

device: 0

mask_ratio: 0.2
edit_lr: 0.9
n_iter: 30
norm_constraint: 1.0
act_margin: [2.0, 20.0, 10.0] # alpha, beta, gamma
act_ratio: 0.88
save_freq: 500
merge_freq: 1000
merge_alg: 'ties'
objective_optimization: 'only_label'
inner_params:
- model.layers[29].mlp.down_proj.weight


## alternative: WISE-Merge, WISE-Retrieve

# for merge (if merge)
densities: 0.53
weights: 1.0

# for retrieve (if retrieve, pls set to True)
retrieve: True
replay: False # True --> will replay the past editing instances: see https://arxiv.org/abs/2405.14768 Appendix B.3

model_parallel: False
use_chat_template: True

# for save and load
# save_path: "./wise_checkpoint/wise.pt"
# load_path: "./wise_checkpoint/wise.pt"
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@ model_name: "./hugging_cache/llama-3.1-8b-instruct"
device: 0

mask_ratio: 0.2
edit_lr: 1.0
n_iter: 70
edit_lr: 0.9
n_iter: 30
norm_constraint: 1.0
act_margin: [5.0, 20.0, 10.0] # alpha, beta, gamma
act_margin: [5.0, 10.0, 10.0] # alpha, beta, gamma
act_ratio: 0.88
save_freq: 500
merge_freq: 1000
merge_alg: 'ties'
objective_optimization: 'only_label'
inner_params:
- model.layers[27].mlp.down_proj.weight
- model.layers[29].mlp.down_proj.weight


## alternative: WISE-Merge, WISE-Retrieve
Expand Down
Loading