12
12
# See the License for the specific language governing permissions and
13
13
# limitations under the License.
14
14
15
- import logging
16
- from typing import Any , Dict , List , Optional , Union
15
+ from typing import Dict , List , Optional , Union
17
16
18
- from sparseml .core .factory import ModifierFactory
17
+ from sparseml .core import Modifier
18
+ from sparseml .core .model .base import ModifiableModel
19
19
from sparseml .core .state import State
20
- from sparseml .modifiers .pruning .wanda .base import WandaPruningModifier
21
20
22
21
23
22
__all__ = ["SparseGPTModifier" ]
24
23
25
- _LOGGER = logging .getLogger (__name__ )
26
24
27
-
28
- class SparseGPTModifier (WandaPruningModifier ):
25
+ class SparseGPTModifier (Modifier ):
29
26
"""
30
27
Modifier for applying the one-shot OBCQ algorithm to a model
31
28
@@ -41,84 +38,91 @@ class SparseGPTModifier(WandaPruningModifier):
41
38
- on_finalize
42
39
- LayerCompressor.revert_layer_wrappers()
43
40
44
- :param block_size: Used to determine number of columns to compress in one pass
45
- :param quantize: Whether or not to quantize weights during SparseGPT. Set to
46
- True to quantize using an existing quantization modifier, or pass in the
47
- configuration for a quantization modifier if one does not already exist
48
- in the recipe
49
41
:param sparsity: Sparsity to compress model to
42
+ :param sparsity_profile: Can be set to 'owl' to use Outlier Weighed
43
+ Layerwise Sparsity (OWL), more information can be found
44
+ in the paper https://arxiv.org/pdf/2310.05175
45
+ :param owl_m: Number of outliers to use for OWL
46
+ :param owl_lmbda: Lambda value to use for OWL
47
+ :param mask_structure: String to define the structure of the mask to apply.
48
+ Must be of the form N:M where N, M are integers that define a custom block
49
+ shape. Defaults to 0:0 which represents an unstructured mask.
50
+ :param sequential_update: Whether or not to update weights sequentially by layer,
51
+ True saves on GPU memory
52
+ :param targets: list of layer names to compress during OBCQ, or '__ALL__'
53
+ to compress every layer in the model
54
+ :param block_size: Used to determine number of columns to compress in one pass
50
55
:param dampening_frac: Amount of dampening to apply to H, as a fraction of the
51
56
diagonal norm
57
+ :param preserve_sparsity_mask: Whether or not to preserve the sparsity mask
58
+ during when applying sparsegpt, this becomes useful when starting from a
59
+ previously pruned model, defaults to False.
52
60
"""
53
61
54
- block_size : int = 128
55
- quantize : Union [bool , Dict ] = False
56
62
sparsity : Union [float , List [float ]] = 0.0
63
+ sparsity_profile : Optional [str ] = None
64
+ owl_m : Optional [int ] = None
65
+ owl_lmbda : Optional [float ] = None
66
+ mask_structure : str = "0:0"
67
+ sequential_update : Optional [bool ] = False
68
+ targets : Union [str , List [str ], None ] = None
69
+ block_size : int = 128
57
70
dampening_frac : Optional [float ] = 0.01
58
- quantization_modifier_ : Any = None
71
+ preserve_sparsity_mask : bool = False
72
+ prunen_ : Optional [int ] = None
73
+ prunem_ : Optional [int ] = None
74
+ compressible_layers_ : Optional [List ] = None
59
75
60
76
def on_initialize_structure (self , state : State , ** kwargs ):
61
77
"""
62
- Check the model's quantization state matches that expected by this modifier,
63
- adding a default quantization scheme if needed
78
+ Initialize the structure of the model for compression.
79
+ This modifier does not modifiy the model structure, so this method
80
+ is a no-op.
81
+
82
+ :param state: session state storing input model and calibration data
83
+ """
84
+ return True
85
+
86
+ def compressible_layers (self ) -> Dict :
87
+ """
88
+ Retrieves the modules corresponding to a list of
89
+ compressible layer names
90
+
91
+ :precondition: self.model is set and is a `ModifiableModel`
92
+ :precondition: The `ModifiableModel` implements a `get_layers`
93
+ method
94
+ :return: dictionary of modules to compress
95
+ """
96
+ if not isinstance (self .model , ModifiableModel ):
97
+ raise ValueError (
98
+ "`self.model` must be a ModifiableModel to use "
99
+ f"the { self .__class__ .__qualname__ } modifier but got "
100
+ f"{ type (self .model )} instead"
101
+ )
102
+
103
+ return self .model .get_layers (self .targets )
104
+
105
+ def _validate_layerwise_sparsity (self ):
106
+ if isinstance (self .sparsity , float ):
107
+ # single sparsity will be applied to all layers
108
+ return
109
+
110
+ target_layers = list (self .compressible_layers_ .keys ())
111
+
112
+ if len (target_layers ) != len (self .sparsity ):
113
+ raise ValueError (
114
+ "Number of layer targets must match the number of "
115
+ f"sparsities. Got { len (target_layers )} layers and "
116
+ f"{ len (self .sparsity )} sparsities"
117
+ )
118
+
119
+ def on_finalize (self , state : State , ** kwargs ):
120
+ """
121
+ Nothing to do on finalize, on this level.
122
+ Quantization Modifier if any will be finalized in the subclass
64
123
65
124
:param state: session state storing input model and calibration data
125
+ :param kwargs: additional arguments
126
+ :return: True
66
127
"""
67
- quantization_already_active = state .model .qat_active ()
68
- if isinstance (self .quantize , bool ):
69
- if not self .quantize and quantization_already_active :
70
- _LOGGER .warning (
71
- "SparseGPT quantization is set to False, but a "
72
- "quantization modifier is already active on the model "
73
- "resetting quantize to True"
74
- )
75
- self .quantize = True
76
- elif self .quantize and not quantization_already_active :
77
- _LOGGER .warning (
78
- "SparseGPT quantization is set to True without an "
79
- "active quantization modifier. Creating a default "
80
- "8-bit quantization modifier"
81
- )
82
- default_quant_config = {"QuantizationModifier" : {}}
83
- self ._build_quant_modifier_from_dict (
84
- default_quant_config , state .framework
85
- )
86
- return # use existing quantization modifier if there is one
87
- else :
88
- if not isinstance (self .quantize , Dict ):
89
- raise ValueError (
90
- "SparseGPTModifier.quantize accepts only a single "
91
- "quantization modifier or a boolean. Found "
92
- f"type { type (self .quantize )} "
93
- )
94
- if len (self .quantize ) != 1 :
95
- raise ValueError (
96
- "SparseGPTModifier.quantize accepts only a single "
97
- "quantization modifier or a boolean. Found "
98
- f"{ len (self .quantize )} modifiers"
99
- )
100
- if quantization_already_active :
101
- _LOGGER .warning (
102
- "Attempting to initialize quantization for SparseGPT "
103
- "but a quantization modifier has already been applied. "
104
- "The quantization configuration defined under the "
105
- "SparseGPT modifier will be ignored."
106
- )
107
- self .quantize = True
108
- return
109
- self ._build_quant_modifier_from_dict (self .quantize , state .framework )
110
- self .quantize = True
111
-
112
- if self .quantization_modifier_ :
113
- self .quantization_modifier_ .on_initialize_structure (state , ** kwargs )
114
-
115
- def _build_quant_modifier_from_dict (self , quant_config , framework ):
116
- modifier_type = list (quant_config .keys ())[0 ]
117
- modifier_args = quant_config [modifier_type ]
118
- self .quantization_modifier_ = ModifierFactory .create (
119
- modifier_type ,
120
- framework = framework ,
121
- allow_registered = True ,
122
- allow_experimental = True ,
123
- ** modifier_args ,
124
- )
128
+ return True
0 commit comments