zjunlp · zxlzr · Nov 19, 2024 · Nov 10, 2024 · Nov 11, 2024 · Nov 11, 2024
diff --git a/easyeditor/editors/editor.py b/easyeditor/editors/editor.py
@@ -128,10 +128,10 @@ def __init__(self, hparams: HyperParams):
             else:
                 raise NotImplementedError
 
-            if self.tok is not None and (isinstance(self.tok, GPT2Tokenizer) or isinstance(self.tok, GPT2TokenizerFast) or isinstance(self.tok, LlamaTokenizer) or isinstance(self.tok, LlamaTokenizerFast) or isinstance(self.tok, PreTrainedTokenizerFast)) and (hparams.alg_name not in ['ROME', 'MEMIT', 'EMMET', 'R-ROME']):
+            if self.tok is not None and (isinstance(self.tok, GPT2Tokenizer) or isinstance(self.tok, GPT2TokenizerFast) or isinstance(self.tok, LlamaTokenizer) or isinstance(self.tok, LlamaTokenizerFast) or isinstance(self.tok, PreTrainedTokenizerFast)) and (hparams.alg_name not in ['ROME', 'MEMIT', 'EMMET', 'R-ROME','AlphaEdit']):
                 LOG.info('AutoRegressive Model detected, set the padding side of Tokenizer to left...')
                 self.tok.padding_side = 'left'
-            if self.tok is not None and ('mistral' in self.model_name.lower() or 'llama' in self.model_name.lower() or 'qwen' in self.model_name.lower()) and (hparams.alg_name in ['ROME', 'MEMIT', 'EMMET', 'R-ROME']):
+            if self.tok is not None and ('mistral' in self.model_name.lower() or 'llama' in self.model_name.lower() or 'qwen' in self.model_name.lower()) and (hparams.alg_name in ['ROME', 'MEMIT', 'EMMET', 'R-ROME','AlphaEdit']):
                 LOG.info('AutoRegressive Model detected, set the padding side of Tokenizer to right...')
                 self.tok.padding_side = 'right'
         else:

diff --git a/easyeditor/editors/steer_editor.py b/easyeditor/editors/steer_editor.py
@@ -16,7 +16,7 @@
 from transformers import AutoProcessor, LlavaForConditionalGeneration
 from transformers import InstructBlipProcessor, InstructBlipForConditionalGeneration
 from ..util.globals import *
-from ..evaluate import compute_safety_edit_quality, ccks_compute_safety_edit_quality
+from ..evaluate import compute_safety_edit_quality
 from ..util import nethook
 from ..util.hparams import HyperParams
 from ..util.alg_dict import *

diff --git a/hparams/AlphaEdit/llama3-8b.yaml b/hparams/AlphaEdit/llama3-8b.yaml
@@ -1,5 +1,5 @@
 alg_name: "AlphaEdit"
-model_name: "./hugging_cache/llama-3-8b"
+model_name: "./hugging_cache/llama-3-8b-instruct"
 stats_dir: "./data/stats"
 # Make sure that the projection matrix P has been downloaded from the baidu netdisk (For details, please refer to the EasyEdit/easyeditor/models/alphaedit/README.md) beforehand to avoid double computation.
 # But if the projection matrix P which we provided is not needed, then nothing needs to be done to the P_loc field; 
@@ -27,4 +27,4 @@ mom2_dataset: "wikipedia"
 mom2_n_samples: 100000
 mom2_dtype: "float32"
 nullspace_threshold: 2e-2
-L2: 10
+L2: 1
diff --git a/hparams/AlphaEdit/llama3.1-8b.yaml b/hparams/AlphaEdit/llama3.1-8b.yaml
@@ -0,0 +1,30 @@
+alg_name: "AlphaEdit"
+model_name: "./hugging_cache/llama-3.1-8b-instruct"
+stats_dir: "./data/stats"
+# Make sure that the projection matrix P has been downloaded from the baidu netdisk (For details, please refer to the EasyEdit/easyeditor/models/alphaedit/README.md) beforehand to avoid double computation.
+# But if the projection matrix P which we provided is not needed, then nothing needs to be done to the P_loc field; 
+# just run the program, and the program will compute P and save it locally automatically.
+P_loc: "./null_space_project.pt"
+device: 0
+layers: [4, 5, 6, 7, 8]
+clamp_norm_factor: 0.75
+layer_selection: "all"
+fact_token: "subject_last"
+v_num_grad_steps: 25
+v_lr: 1e-1
+v_loss_layer: 31
+v_weight_decay: 0.5
+kl_factor: 0.0625
+mom2_adjustment: true
+mom2_update_weight: 15000
+rewrite_module_tmp: "model.layers.{}.mlp.down_proj"
+layer_module_tmp: "model.layers.{}"
+mlp_module_tmp: "model.layers.{}.mlp"
+attn_module_tmp: "model.layers.{}.self_attn"
+ln_f_module: "model.norm"
+lm_head_module: "lm_head"
+mom2_dataset: "wikipedia"
+mom2_n_samples: 100000
+mom2_dtype: "float32"
+nullspace_threshold: 2e-2
+L2: 10
diff --git a/hparams/DPO/llama-7b.yaml b/hparams/DPO/llama-7b.yaml
@@ -0,0 +1,20 @@
+alg_name: "DPO"
+model_name: "./hugging_cache/llama-2-7b"
+device: 0
+
+lora_type: "adalora"
+layers: []
+num_steps: 7
+batch_size: 1
+max_length: 30
+lr: 5e-5
+weight_decay: 0
+kl_factor: 0
+rank: 8
+lora_alpha: 32
+lora_dropout: 0.1
+norm_constraint: false
+target_modules: ["q_proj", "v_proj"]  #["up_proj", "down_proj"] #["q_proj", "v_proj"]
+model_parallel: False
+alpha: 0.99
+beta: 0.1
diff --git a/hparams/DeCo/llama.yaml → hparams/DeCo/llama-7b.yaml b/hparams/DeCo/llama.yaml → hparams/DeCo/llama-7b.yaml
@@ -1,5 +1,6 @@
 alg_name: "deco"
-model_name: "./huggyllama-7b" or "./llava-7b-hf"
+# model_name: "./huggyllama-7b" or "./llava-7b-hf"
+model_name: "./hugging_cache/llama-2-7b" 
 device: 1
 alpha: 0.6
 threshold_top_p: 0.9

diff --git a/hparams/GRACE/llama-7B.yaml → hparams/GRACE/llama-7b.yaml b/hparams/GRACE/llama-7B.yaml → hparams/GRACE/llama-7b.yaml
diff --git a/hparams/LoRA/llama3-8b.yaml b/hparams/LoRA/llama3-8b.yaml
@@ -0,0 +1,18 @@
+alg_name: "LoRA"
+model_name: "./hugging_cache/llama-3-8b-instruct"
+device: 0
+
+lora_type: "adalora"
+layers: []
+num_steps: 70
+batch_size: 1
+max_length: 50
+lr: 5e-3
+weight_decay: 0
+kl_factor: 0
+rank: 8
+lora_alpha: 32
+lora_dropout: 0.1
+norm_constraint: false
+target_modules: ["q_proj", "v_proj"]  #["up_proj", "down_proj"] #["q_proj", "v_proj"]
+model_parallel: false
diff --git a/hparams/LoRA/llama3.1-8b.yaml b/hparams/LoRA/llama3.1-8b.yaml
@@ -0,0 +1,18 @@
+alg_name: "LoRA"
+model_name: "./hugging_cache/llama-3.1-8b-instruct"
+device: 1
+
+lora_type: "adalora"
+layers: []
+num_steps: 50
+batch_size: 1
+max_length: 30
+lr: 5e-3
+weight_decay: 0
+kl_factor: 0
+rank: 8
+lora_alpha: 32
+lora_dropout: 0.1
+norm_constraint: false
+target_modules: ["q_proj", "v_proj"]  #["up_proj", "down_proj"] #["q_proj", "v_proj"]
+model_parallel: false
diff --git a/hparams/QLoRA/llama-7b.yaml b/hparams/QLoRA/llama-7b.yaml
@@ -0,0 +1,25 @@
+alg_name: "QLoRA"
+model_name: "./hugging_cache/llama-2-7b"
+device: 1
+
+# QLoRA specific settings
+quantization_bit: 4
+double_quant: true
+quant_type: "nf4" # nf4, fp4， int4, int8
+
+# LoRA settings
+lora_type: "lora"  # QLoRA typically uses standard LoRA, not AdaLoRA
+lora_r: 8
+lora_alpha: 32
+lora_dropout: 0.1
+target_modules: ["q_proj", "v_proj"]
+
+# Training settings
+num_steps: 1
+batch_size: 1
+max_length: 30
+lr: 5e-3
+weight_decay: 0.0
+
+# Additional settings
+model_parallel: false
diff --git a/hparams/WISE/llama3-8b.yaml b/hparams/WISE/llama3-8b.yaml
@@ -0,0 +1,35 @@
+alg_name: "WISE"
+model_name: "./hugging_cache/llama-3-8b-instruct"
+
+device: 0
+
+mask_ratio: 0.2
+edit_lr: 0.9
+n_iter: 30
+norm_constraint: 1.0
+act_margin: [2.0, 20.0, 10.0] # alpha, beta, gamma
+act_ratio: 0.88
+save_freq: 500
+merge_freq: 1000
+merge_alg: 'ties'
+objective_optimization: 'only_label'
+inner_params:
+- model.layers[29].mlp.down_proj.weight
+
+
+## alternative: WISE-Merge, WISE-Retrieve
+
+# for merge (if merge)
+densities: 0.53
+weights: 1.0
+
+# for retrieve (if retrieve, pls set to True)
+retrieve: True
+replay: False # True --> will replay the past editing instances: see https://arxiv.org/abs/2405.14768 Appendix B.3
+
+model_parallel: False
+use_chat_template: True
+
+# for save and load
+# save_path: "./wise_checkpoint/wise.pt"
+# load_path: "./wise_checkpoint/wise.pt"
diff --git a/hparams/WISE/llama-3-8b.yaml → hparams/WISE/llama3.1-8b.yaml b/hparams/WISE/llama-3-8b.yaml → hparams/WISE/llama3.1-8b.yaml
@@ -4,17 +4,17 @@ model_name: "./hugging_cache/llama-3.1-8b-instruct"
 device: 0
 
 mask_ratio: 0.2
-edit_lr: 1.0
-n_iter: 70
+edit_lr: 0.9
+n_iter: 30
 norm_constraint: 1.0
-act_margin: [5.0, 20.0, 10.0] # alpha, beta, gamma
+act_margin: [5.0, 10.0, 10.0] # alpha, beta, gamma
 act_ratio: 0.88
 save_freq: 500
 merge_freq: 1000
 merge_alg: 'ties'
 objective_optimization: 'only_label'
 inner_params:
-- model.layers[27].mlp.down_proj.weight
+- model.layers[29].mlp.down_proj.weight
 
 
 ## alternative: WISE-Merge, WISE-Retrieve