@@ -233,7 +233,7 @@ class LoraConfig(PeftConfig):
233233 Otherwise, it will use the original default value of `lora_alpha/r`.
234234 modules_to_save (`List[str]`):
235235 List of modules apart from adapter layers to be set as trainable and saved in the final checkpoint.
236- init_lora_weights (`bool` | `Literal["gaussian", "eva", "olora", "pissa", "pissa_niter_[number of iters]", "corda", "loftq"]`):
236+ init_lora_weights (`bool` | `Literal["gaussian", "eva", "olora", "pissa", "pissa_niter_[number of iters]", "corda", "loftq", "orthogonal" ]`):
237237 How to initialize the weights of the adapter layers. Passing True (default) results in the default
238238 initialization from the reference implementation from Microsoft, with the LoRA B weight being set to 0.
239239 This means that without further training, the LoRA adapter will be a no-op. Setting the initialization to
@@ -252,7 +252,9 @@ class LoraConfig(PeftConfig):
252252 a 7B model within seconds, and the training effect is approximately equivalent to using SVD. Passing
253253 `'corda'` results in the initialization of <ahref='https://arxiv.org/abs/2406.05223' >Context-Oriented
254254 Decomposition Adaptation</a>, which converges even more rapidly than PiSSA in Instruction-Previewed Mode,
255- and preserves world knowledge better than LoRA in Knowledge-Preserved Mode.
255+ and preserves world knowledge better than LoRA in Knowledge-Preserved Mode. Passing `"orthogonal"` results
256+ in LoRA A and B being intialized orthogonally; in this, it resembles `"olora"`, but the base weights are
257+ left untouched (requires `r` to be even, only supported for linear layers for now).
256258 layers_to_transform (`Union[List[int], int]`):
257259 The layer indices to transform. If a list of ints is passed, it will apply the adapter to the layer indices
258260 that are specified in this list. If a single integer is passed, it will apply the transformations on the
@@ -356,7 +358,8 @@ class LoraConfig(PeftConfig):
356358 },
357359 )
358360 init_lora_weights : (
359- bool | Literal ["gaussian" , "eva" , "olora" , "pissa" , "pissa_niter_[number of iters]" , "corda" , "loftq" ]
361+ bool
362+ | Literal ["gaussian" , "eva" , "olora" , "pissa" , "pissa_niter_[number of iters]" , "corda" , "loftq" , "orthogonal" ]
360363 ) = field (
361364 default = True ,
362365 metadata = {
@@ -375,7 +378,8 @@ class LoraConfig(PeftConfig):
375378 "[number of iters] indicates the number of subspace iterations to perform fsvd, and must be a "
376379 "nonnegative integer. "
377380 "Passing `'corda'` results in CorDA initialization. "
378- "Pass `'loftq'` to use LoftQ initialization."
381+ "Pass `'loftq'` to use LoftQ initialization. "
382+ "Pass `'orthogonal'` for orthogonal initialization of LoRA A and B."
379383 ),
380384 },
381385 )
0 commit comments