56
56
from torchao .experimental .quant_api import (
57
57
int8_dynamic_activation_intx_weight ,
58
58
IntxWeightEmbeddingQuantizer ,
59
+ UIntxWeightOnlyLinearQuantizer ,
59
60
)
60
61
from torchao .quantization .granularity import (
61
62
PerGroup ,
@@ -137,12 +138,12 @@ def quantize_model(
137
138
group_size = q_kwargs ["groupsize" ]
138
139
bit_width = q_kwargs ["bitwidth" ]
139
140
has_weight_zeros = q_kwargs ["has_weight_zeros" ]
140
- granularity = PerRow () if group_size == - 1 else PerGroup (group_size )
141
+ granularity = PerRow () if group_size == - 1 else PerGroup (group_size )
141
142
weight_dtype = getattr (torch , f"int{ bit_width } " )
142
143
143
144
try :
144
145
quantize_ (
145
- model ,
146
+ model ,
146
147
int8_dynamic_activation_intx_weight (
147
148
weight_dtype = weight_dtype ,
148
149
granularity = granularity ,
@@ -154,7 +155,7 @@ def quantize_model(
154
155
print ("Encountered error during quantization: {e}" )
155
156
print ("Trying with PlainLayout" )
156
157
quantize_ (
157
- model ,
158
+ model ,
158
159
int8_dynamic_activation_intx_weight (
159
160
weight_dtype = weight_dtype ,
160
161
granularity = granularity ,
@@ -946,38 +947,5 @@ def quantized_model(self) -> nn.Module:
946
947
"linear:int4" : Int4WeightOnlyQuantizer ,
947
948
"linear:a8wxdq" : None , # uses quantize_ API
948
949
"linear:a8w4dq" : Int8DynActInt4WeightQuantizer ,
950
+ "linear:afpwx" : UIntxWeightOnlyLinearQuantizer ,
949
951
}
950
-
951
- try :
952
- import importlib .util
953
- import os
954
- import sys
955
-
956
- torchao_build_path = f"{ os .getcwd ()} /torchao-build"
957
-
958
- # Try loading quantizer
959
- torchao_experimental_quant_api_spec = importlib .util .spec_from_file_location (
960
- "torchao_experimental_quant_api" ,
961
- f"{ torchao_build_path } /src/ao/torchao/experimental/quant_api.py" ,
962
- )
963
- torchao_experimental_quant_api = importlib .util .module_from_spec (
964
- torchao_experimental_quant_api_spec
965
- )
966
- sys .modules ["torchao_experimental_quant_api" ] = torchao_experimental_quant_api
967
- torchao_experimental_quant_api_spec .loader .exec_module (
968
- torchao_experimental_quant_api
969
- )
970
- from torchao_experimental_quant_api import UIntxWeightOnlyLinearQuantizer
971
- quantizer_class_dict ["linear:afpwx" ] = UIntxWeightOnlyLinearQuantizer
972
-
973
- # Try loading custom op
974
- try :
975
- libname = "libtorchao_ops_mps_aten.dylib"
976
- libpath = f"{ torchao_build_path } /cmake-out/lib/{ libname } "
977
- torch .ops .load_library (libpath )
978
- print ("Loaded torchao mps ops." )
979
- except Exception as e :
980
- print ("Unable to load torchao mps ops library." )
981
-
982
- except Exception as e :
983
- print ("Unable to import torchao experimental quant_api with error: " , e )
0 commit comments