@@ -309,6 +309,7 @@ def model_save_quantized_weights(model, filename=None, custom_objects={}):
309
309
# Weights store the weight in the format that software inference uses.
310
310
weights .append (weight )
311
311
312
+ q_name = ""
312
313
if quantizer :
313
314
if isinstance (quantizer , six .string_types ):
314
315
q_name = quantizer
@@ -318,11 +319,10 @@ def model_save_quantized_weights(model, filename=None, custom_objects={}):
318
319
q_name = quantizer .name
319
320
elif hasattr (quantizer , "__class__" ):
320
321
q_name = quantizer .__class__ .__name__
321
- else :
322
- q_name = ""
322
+
323
323
if quantizer and ("_po2" in q_name ):
324
324
# Quantized_relu_po2 does not have a sign.
325
- if isinstance ( quantizer , quantized_po2 ) :
325
+ if q_name == " quantized_po2" :
326
326
has_sign = True
327
327
sign = np .sign (weight )
328
328
# Makes sure values are -1 or +1 only
@@ -332,7 +332,7 @@ def model_save_quantized_weights(model, filename=None, custom_objects={}):
332
332
hw_weight = np .round (np .log2 (np .abs (weight )))
333
333
signs .append (sign )
334
334
scales .append ([])
335
- elif (isinstance ( quantizer , quantized_bits ) and
335
+ elif (q_name == " quantized_bits" and
336
336
quantizer .alpha == "auto_po2" ):
337
337
unsigned_bits = quantizer .bits - quantizer .keep_negative
338
338
m = K .cast_to_floatx (pow (2 , unsigned_bits ))
@@ -1352,3 +1352,181 @@ def quantized_model_dump(model,
1352
1352
print ("writing the layer output tensor to " , filename )
1353
1353
with open (filename , "w" ) as fid :
1354
1354
tensor_data .astype (np .float32 ).tofile (fid )
1355
+
1356
+
1357
+ def clone_model_and_freeze_auto_po2_scale (
1358
+ orig_model , orig_model_path = None , quantize_model_weights = False ):
1359
+ """Clone model and freeze the scale value of auto_po2 type quantizers.
1360
+
1361
+ Args:
1362
+ orig_model: original model which will be used to clone the new model.
1363
+ If set to None, the function will load the original model
1364
+ from orig_model_path argument.
1365
+ orig_model_path: The path to the original model file.
1366
+ If set to None, the function will load the original model from the
1367
+ orig_model argument.
1368
+ quantize_model_weights: Bool to quantize weights to HW format.
1369
+ If set to False, the model weights will be in float format.
1370
+ If set to True, the model weights will be in HW format and the function
1371
+ will also check if the hw weights extracted from the new model matches
1372
+ the original model.
1373
+
1374
+ Returns:
1375
+ A tuple of the new model and the new model's hw weights.
1376
+
1377
+ Note:
1378
+ + When using this function to retrain model with fixed scale value.
1379
+ Set quantize_model_weights to False in this case.
1380
+ + This function only supports a collection of common layers that will use
1381
+ auto_po2 quantizers. For less common layers, it will raise errors and we
1382
+ will add more support case by case.
1383
+
1384
+ Example usage:
1385
+ model, _ = clone_model_and_freeze_auto_po2_scale(
1386
+ orig_model_path="path/to/model",
1387
+ quantize_model_weights=False)
1388
+ """
1389
+
1390
+ def _create_bn_layer (layer_cfg , bn_inv_quantizer ):
1391
+ # Clone batch normalization layer with the new inverse quantizer.
1392
+ if bn_inv_quantizer is not None :
1393
+ layer_cfg ["inverse_quantizer" ]["config" ] = bn_inv_quantizer .get_config ()
1394
+ return QBatchNormalization (** layer_cfg )
1395
+
1396
+ def _create_qconv2d_layer (layer_cfg , kernel_quantizer ):
1397
+ # Clone QConv2D layer wiht the new kernel quantizers.
1398
+ if kernel_quantizer is not None :
1399
+ layer_cfg ["kernel_quantizer" ]["config" ] = kernel_quantizer .get_config ()
1400
+ return QConv2D (** layer_cfg )
1401
+
1402
+ def _create_qdepthwise_conv2d_layer (layer_cfg , depthwise_quantizer ):
1403
+ # Clone QDepthwiseConv2D layer with the new depthwise_quantizer quantizer.
1404
+ if depthwise_quantizer is not None :
1405
+ layer_cfg ["depthwise_quantizer" ][
1406
+ "config" ] = depthwise_quantizer .get_config ()
1407
+ return QDepthwiseConv2D (** layer_cfg )
1408
+
1409
+ def _create_qdense_layer (layer_cfg , kernel_quantizer ):
1410
+ # Clone QDense layer with the new kernel quantizer.
1411
+ if kernel_quantizer is not None :
1412
+ layer_cfg ["kernel_quantizer" ]["config" ] = kernel_quantizer .get_config ()
1413
+ return QDense (** layer_cfg )
1414
+
1415
+ def _create_other_layer (orig_layer ):
1416
+ # Clone other layers.
1417
+ config = orig_layer .get_config ()
1418
+ return orig_layer .__class__ .from_config (config )
1419
+
1420
+ def _create_quantized_bits_with_post_training_scale (q ):
1421
+ # Create a new quantized_bits instance with the fixed scale value.
1422
+ if q is not None :
1423
+ q_cfg = q .get_config ()
1424
+ q_cfg ["post_training_scale" ] = q .scale .numpy ()
1425
+ q = quantized_bits (** q_cfg )
1426
+ return q
1427
+
1428
+ def _find_auto_po2_quantizer (layer ):
1429
+ # Find the auto_po2 quantizer in the layer. Note that we allow at
1430
+ # most one auto_po2 quantizer in each layer due to the limitation of
1431
+ # the current HW implementation.
1432
+ num_auto_po2_quantizers = 0
1433
+ auto_po2_quantizer = None
1434
+ if hasattr (layer , "quantizers" ):
1435
+ for q in layer .quantizers :
1436
+ if hasattr (q , "alpha" ) and q .alpha == "auto_po2" :
1437
+ num_auto_po2_quantizers += 1
1438
+ auto_po2_quantizer = q
1439
+ if num_auto_po2_quantizers > 1 :
1440
+ raise ValueError (
1441
+ f"{ layer .name } has more than one auto_po2 quantizer. "
1442
+ "Please check if this is expected." )
1443
+ else :
1444
+ return auto_po2_quantizer
1445
+
1446
+ def _check_hw_weights_equal (hw_weights_1 , hw_weights_2 ):
1447
+ # Check if the hw weights extracted from the new model matches the
1448
+ # original model.
1449
+ for layer_name in hw_weights_2 .keys ():
1450
+ for key in hw_weights_2 [layer_name ].keys ():
1451
+
1452
+ val1 = hw_weights_2 [layer_name ][key ]
1453
+ val2 = hw_weights_1 [layer_name ][key ]
1454
+ if isinstance (val1 , list ):
1455
+ for (v1 , v2 ) in zip (val1 , val2 ):
1456
+ if not np .all (v1 == v2 ):
1457
+ raise ValueError (
1458
+ f"{ layer_name } /{ key } : No Match! v1={ v1 } , v2={ v2 } " )
1459
+ else :
1460
+ if not np .all (val1 == val2 ):
1461
+ raise ValueError (
1462
+ f"{ layer_name } /{ key } : No Match! val1={ val1 } , val2={ val2 } " )
1463
+
1464
+ # Load the original model with float weights.
1465
+ # Note: weights will be quantized later in silicon flow by calling
1466
+ # model_save_quantized_weights.
1467
+ if orig_model is not None and orig_model_path is not None :
1468
+ raise ValueError (
1469
+ "Only one of orig_model and orig_model_path can be set." )
1470
+ elif orig_model is None and orig_model_path is None :
1471
+ raise ValueError (
1472
+ "One of orig_model and orig_model_path must be set." )
1473
+ elif orig_model_path is not None :
1474
+ orig_model = load_qmodel (orig_model_path , compile = False )
1475
+
1476
+ # Quantize model weights and compute quantizer scale values.
1477
+ quantized_model = tf .keras .models .clone_model (orig_model )
1478
+ quantized_model .set_weights (orig_model .get_weights ())
1479
+ # In silicon flow, weight binary files are generated from hw weights.
1480
+ orig_hw_weights = model_save_quantized_weights (
1481
+ quantized_model )
1482
+
1483
+ # Create a new model with fixed scale quantizers.
1484
+ x = inputs = tf .keras .Input (
1485
+ shape = orig_model .input_shape [1 :], name = orig_model .layers [0 ].name )
1486
+ for layer in quantized_model .layers [1 :]:
1487
+ layer_class = layer .__class__ .__name__
1488
+ auto_po2_quantizer = _find_auto_po2_quantizer (layer )
1489
+ auto_po2_quantizer_with_frozen_scale = (
1490
+ _create_quantized_bits_with_post_training_scale (auto_po2_quantizer ))
1491
+ layer_cfg = layer .get_config ()
1492
+
1493
+ # To be compatible with different python versions, we do not use
1494
+ # match-case style here.
1495
+ if layer_class == "QConv2D" :
1496
+ x = _create_qconv2d_layer (layer_cfg ,
1497
+ auto_po2_quantizer_with_frozen_scale )(x )
1498
+ elif layer_class == "QDepthwiseConv2D" :
1499
+ x = _create_qdepthwise_conv2d_layer (
1500
+ layer_cfg , auto_po2_quantizer_with_frozen_scale )(x )
1501
+ elif layer_class == "QBatchNormalization" :
1502
+ x = _create_bn_layer (layer_cfg ,
1503
+ auto_po2_quantizer_with_frozen_scale )(x )
1504
+ elif layer_class == "QDense" :
1505
+ x = _create_qdense_layer (layer_cfg ,
1506
+ auto_po2_quantizer_with_frozen_scale )(x )
1507
+ else :
1508
+ x = _create_other_layer (layer )(x )
1509
+
1510
+ new_model = tf .keras .Model (inputs , x )
1511
+ # Set the weights of the new model to the original model (float weights).
1512
+ new_model .set_weights (orig_model .get_weights ())
1513
+
1514
+ # Check if the new model still has auto_po2 quantizer.
1515
+ # This function only supports a colleciton of common layers that will use
1516
+ # auto_po2 quantizers. For less common layers, we need to add extra support
1517
+ # in the future.
1518
+ for layer in new_model .layers :
1519
+ q = _find_auto_po2_quantizer (layer )
1520
+ if q is not None and q .post_training_scale is None :
1521
+ raise ValueError (
1522
+ f"{ layer .name } in the new model still has auto_po2 quantizer with "
1523
+ "adaptive scales. Please check if this is expected!" )
1524
+
1525
+ new_hw_weights = None
1526
+ if quantize_model_weights :
1527
+ new_hw_weights = model_save_quantized_weights (new_model )
1528
+ # Check if the hw weights extracted from the new model matches the original
1529
+ # nima model.
1530
+ _check_hw_weights_equal (orig_hw_weights , new_hw_weights )
1531
+
1532
+ return new_model , new_hw_weights
0 commit comments