Skip to content

EasyEdit Example For US President #416

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Nov 19, 2024
4 changes: 2 additions & 2 deletions easyeditor/editors/editor.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,10 @@ def __init__(self, hparams: HyperParams):
else:
raise NotImplementedError

if self.tok is not None and (isinstance(self.tok, GPT2Tokenizer) or isinstance(self.tok, GPT2TokenizerFast) or isinstance(self.tok, LlamaTokenizer) or isinstance(self.tok, LlamaTokenizerFast) or isinstance(self.tok, PreTrainedTokenizerFast)) and (hparams.alg_name not in ['ROME', 'MEMIT', 'EMMET', 'R-ROME']):
if self.tok is not None and (isinstance(self.tok, GPT2Tokenizer) or isinstance(self.tok, GPT2TokenizerFast) or isinstance(self.tok, LlamaTokenizer) or isinstance(self.tok, LlamaTokenizerFast) or isinstance(self.tok, PreTrainedTokenizerFast)) and (hparams.alg_name not in ['ROME', 'MEMIT', 'EMMET', 'R-ROME','AlphaEdit']):
LOG.info('AutoRegressive Model detected, set the padding side of Tokenizer to left...')
self.tok.padding_side = 'left'
if self.tok is not None and ('mistral' in self.model_name.lower() or 'llama' in self.model_name.lower() or 'qwen' in self.model_name.lower()) and (hparams.alg_name in ['ROME', 'MEMIT', 'EMMET', 'R-ROME']):
if self.tok is not None and ('mistral' in self.model_name.lower() or 'llama' in self.model_name.lower() or 'qwen' in self.model_name.lower()) and (hparams.alg_name in ['ROME', 'MEMIT', 'EMMET', 'R-ROME','AlphaEdit']):
LOG.info('AutoRegressive Model detected, set the padding side of Tokenizer to right...')
self.tok.padding_side = 'right'
else:
Expand Down
2 changes: 1 addition & 1 deletion easyeditor/editors/steer_editor.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from transformers import AutoProcessor, LlavaForConditionalGeneration
from transformers import InstructBlipProcessor, InstructBlipForConditionalGeneration
from ..util.globals import *
from ..evaluate import compute_safety_edit_quality, ccks_compute_safety_edit_quality
from ..evaluate import compute_safety_edit_quality
from ..util import nethook
from ..util.hparams import HyperParams
from ..util.alg_dict import *
Expand Down
4 changes: 2 additions & 2 deletions hparams/AlphaEdit/llama3-8b.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
alg_name: "AlphaEdit"
model_name: "./hugging_cache/llama-3-8b"
model_name: "./hugging_cache/llama-3-8b-instruct"
stats_dir: "./data/stats"
# Make sure that the projection matrix P has been downloaded from the baidu netdisk (For details, please refer to the EasyEdit/easyeditor/models/alphaedit/README.md) beforehand to avoid double computation.
# But if the projection matrix P which we provided is not needed, then nothing needs to be done to the P_loc field;
Expand Down Expand Up @@ -27,4 +27,4 @@ mom2_dataset: "wikipedia"
mom2_n_samples: 100000
mom2_dtype: "float32"
nullspace_threshold: 2e-2
L2: 10
L2: 1
30 changes: 30 additions & 0 deletions hparams/AlphaEdit/llama3.1-8b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
alg_name: "AlphaEdit"
model_name: "./hugging_cache/llama-3.1-8b-instruct"
stats_dir: "./data/stats"
# Make sure that the projection matrix P has been downloaded from the baidu netdisk (For details, please refer to the EasyEdit/easyeditor/models/alphaedit/README.md) beforehand to avoid double computation.
# But if the projection matrix P which we provided is not needed, then nothing needs to be done to the P_loc field;
# just run the program, and the program will compute P and save it locally automatically.
P_loc: "./null_space_project.pt"
device: 0
layers: [4, 5, 6, 7, 8]
clamp_norm_factor: 0.75
layer_selection: "all"
fact_token: "subject_last"
v_num_grad_steps: 25
v_lr: 1e-1
v_loss_layer: 31
v_weight_decay: 0.5
kl_factor: 0.0625
mom2_adjustment: true
mom2_update_weight: 15000
rewrite_module_tmp: "model.layers.{}.mlp.down_proj"
layer_module_tmp: "model.layers.{}"
mlp_module_tmp: "model.layers.{}.mlp"
attn_module_tmp: "model.layers.{}.self_attn"
ln_f_module: "model.norm"
lm_head_module: "lm_head"
mom2_dataset: "wikipedia"
mom2_n_samples: 100000
mom2_dtype: "float32"
nullspace_threshold: 2e-2
L2: 10
20 changes: 20 additions & 0 deletions hparams/DPO/llama-7b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
alg_name: "DPO"
model_name: "./hugging_cache/llama-2-7b"
device: 0

lora_type: "adalora"
layers: []
num_steps: 7
batch_size: 1
max_length: 30
lr: 5e-5
weight_decay: 0
kl_factor: 0
rank: 8
lora_alpha: 32
lora_dropout: 0.1
norm_constraint: false
target_modules: ["q_proj", "v_proj"] #["up_proj", "down_proj"] #["q_proj", "v_proj"]
model_parallel: False
alpha: 0.99
beta: 0.1
3 changes: 2 additions & 1 deletion hparams/DeCo/llama.yaml → hparams/DeCo/llama-7b.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
alg_name: "deco"
model_name: "./huggyllama-7b" or "./llava-7b-hf"
# model_name: "./huggyllama-7b" or "./llava-7b-hf"
model_name: "./hugging_cache/llama-2-7b"
device: 1
alpha: 0.6
threshold_top_p: 0.9
Expand Down
File renamed without changes.
18 changes: 18 additions & 0 deletions hparams/LoRA/llama3-8b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
alg_name: "LoRA"
model_name: "./hugging_cache/llama-3-8b-instruct"
device: 0

lora_type: "adalora"
layers: []
num_steps: 70
batch_size: 1
max_length: 50
lr: 5e-3
weight_decay: 0
kl_factor: 0
rank: 8
lora_alpha: 32
lora_dropout: 0.1
norm_constraint: false
target_modules: ["q_proj", "v_proj"] #["up_proj", "down_proj"] #["q_proj", "v_proj"]
model_parallel: false
18 changes: 18 additions & 0 deletions hparams/LoRA/llama3.1-8b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
alg_name: "LoRA"
model_name: "./hugging_cache/llama-3.1-8b-instruct"
device: 1

lora_type: "adalora"
layers: []
num_steps: 50
batch_size: 1
max_length: 30
lr: 5e-3
weight_decay: 0
kl_factor: 0
rank: 8
lora_alpha: 32
lora_dropout: 0.1
norm_constraint: false
target_modules: ["q_proj", "v_proj"] #["up_proj", "down_proj"] #["q_proj", "v_proj"]
model_parallel: false
25 changes: 25 additions & 0 deletions hparams/QLoRA/llama-7b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
alg_name: "QLoRA"
model_name: "./hugging_cache/llama-2-7b"
device: 1

# QLoRA specific settings
quantization_bit: 4
double_quant: true
quant_type: "nf4" # nf4, fp4, int4, int8

# LoRA settings
lora_type: "lora" # QLoRA typically uses standard LoRA, not AdaLoRA
lora_r: 8
lora_alpha: 32
lora_dropout: 0.1
target_modules: ["q_proj", "v_proj"]

# Training settings
num_steps: 1
batch_size: 1
max_length: 30
lr: 5e-3
weight_decay: 0.0

# Additional settings
model_parallel: false
35 changes: 35 additions & 0 deletions hparams/WISE/llama3-8b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
alg_name: "WISE"
model_name: "./hugging_cache/llama-3-8b-instruct"

device: 0

mask_ratio: 0.2
edit_lr: 0.9
n_iter: 30
norm_constraint: 1.0
act_margin: [2.0, 20.0, 10.0] # alpha, beta, gamma
act_ratio: 0.88
save_freq: 500
merge_freq: 1000
merge_alg: 'ties'
objective_optimization: 'only_label'
inner_params:
- model.layers[29].mlp.down_proj.weight


## alternative: WISE-Merge, WISE-Retrieve

# for merge (if merge)
densities: 0.53
weights: 1.0

# for retrieve (if retrieve, pls set to True)
retrieve: True
replay: False # True --> will replay the past editing instances: see https://arxiv.org/abs/2405.14768 Appendix B.3

model_parallel: False
use_chat_template: True

# for save and load
# save_path: "./wise_checkpoint/wise.pt"
# load_path: "./wise_checkpoint/wise.pt"
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@ model_name: "./hugging_cache/llama-3.1-8b-instruct"
device: 0

mask_ratio: 0.2
edit_lr: 1.0
n_iter: 70
edit_lr: 0.9
n_iter: 30
norm_constraint: 1.0
act_margin: [5.0, 20.0, 10.0] # alpha, beta, gamma
act_margin: [5.0, 10.0, 10.0] # alpha, beta, gamma
act_ratio: 0.88
save_freq: 500
merge_freq: 1000
merge_alg: 'ties'
objective_optimization: 'only_label'
inner_params:
- model.layers[27].mlp.down_proj.weight
- model.layers[29].mlp.down_proj.weight


## alternative: WISE-Merge, WISE-Retrieve
Expand Down
Loading