keras-team · fchollet · Jan 15, 2025 · Dec 13, 2024 · Dec 16, 2024 · Dec 16, 2024
diff --git a/keras/api/_tf_keras/keras/quantizers/__init__.py b/keras/api/_tf_keras/keras/quantizers/__init__.py
@@ -12,4 +12,9 @@
 from keras.src.quantizers.quantizers import abs_max_quantize
 from keras.src.quantizers.quantizers import compute_float8_amax_history
 from keras.src.quantizers.quantizers import compute_float8_scale
+from keras.src.quantizers.quantizers import fake_quant_with_min_max_args
+from keras.src.quantizers.quantizers import fake_quant_with_min_max_vars
+from keras.src.quantizers.quantizers import (
+    fake_quant_with_min_max_vars_per_channel,
+)
 from keras.src.quantizers.quantizers import quantize_and_dequantize
diff --git a/keras/api/quantizers/__init__.py b/keras/api/quantizers/__init__.py
@@ -12,4 +12,9 @@
 from keras.src.quantizers.quantizers import abs_max_quantize
 from keras.src.quantizers.quantizers import compute_float8_amax_history
 from keras.src.quantizers.quantizers import compute_float8_scale
+from keras.src.quantizers.quantizers import fake_quant_with_min_max_args
+from keras.src.quantizers.quantizers import fake_quant_with_min_max_vars
+from keras.src.quantizers.quantizers import (
+    fake_quant_with_min_max_vars_per_channel,
+)
 from keras.src.quantizers.quantizers import quantize_and_dequantize
diff --git a/keras/src/quantizers/__init__.py b/keras/src/quantizers/__init__.py
@@ -6,6 +6,11 @@
 from keras.src.quantizers.quantizers import abs_max_quantize
 from keras.src.quantizers.quantizers import compute_float8_amax_history
 from keras.src.quantizers.quantizers import compute_float8_scale
+from keras.src.quantizers.quantizers import fake_quant_with_min_max_args
+from keras.src.quantizers.quantizers import fake_quant_with_min_max_vars
+from keras.src.quantizers.quantizers import (
+    fake_quant_with_min_max_vars_per_channel,
+)
 from keras.src.quantizers.quantizers import quantize_and_dequantize
 from keras.src.saving import serialization_lib
 from keras.src.utils.naming import to_snake_case

diff --git a/keras/src/quantizers/quantizers.py b/keras/src/quantizers/quantizers.py
@@ -127,6 +127,155 @@ def get_config(self):
         }
 
 
+def adjust_and_nudge(min_range, max_range, num_bits, narrow_range):
+    """Adjusts and nudges the quantization range for better accuracy."""
+
+    quant_max = ops.cast(ops.subtract(ops.power(2, num_bits), 1.0), "float32")
+
+    quant_min = ops.cast(0.0 if not narrow_range else 1.0, "float32")
+
+    # Calculate the scale and ensure it's positive
+    scale = ops.divide(
+        ops.subtract(max_range, min_range), ops.subtract(quant_max, quant_min)
+    )
+
+    inv_scale = ops.reciprocal(scale)
+
+    # Calculate the zero point from the min range
+    zero_point_from_min = quant_min - ops.divide(min_range, scale)
+
+    # Ensure zero point is within valid range [0, quant_max]
+    zero_point = ops.clip(zero_point_from_min, quant_min, quant_max)
+
+    # Nudge zero point if it's very close to an integer
+    nudged_zero_point = ops.round(zero_point)
+
+    # Calculate nudged limits
+    nudged_min = ops.multiply(ops.subtract(quant_min, nudged_zero_point), scale)
+    nudged_max = ops.multiply(ops.subtract(quant_max, nudged_zero_point), scale)
+
+    return nudged_min, nudged_max, scale, inv_scale
+
+
+@keras_export("keras.quantizers.fake_quant_with_min_max_vars_per_channel")
+def fake_quant_with_min_max_vars_per_channel(
+    inputs,
+    min_vals,
+    max_vals,
+    num_bits,
+    narrow_range=False,
+    axis=None,
+):
+    """
+    Perform per-channel fake quantization.
+
+    Args:
+        inputs: Input tensor of float type
+        min_vals: Per-channel minimum values
+        max_vals: Per-channel maximum values
+        num_bits: Quantization bit width (e.g., 8 for int8)
+        narrow_range: Whether to use narrow quantization range
+        axis: Axis along which to perform per-channel quantization
+
+    Returns:
+        Fake-quantized tensor
+    """
+    inputs = ops.convert_to_tensor(inputs)
+    min_vals = ops.convert_to_tensor(min_vals)
+    max_vals = ops.convert_to_tensor(max_vals)
+
+    @ops.custom_gradient
+    def _fake_quant_with_min_max_vars_per_channel(x, min_val, max_val):
+        # Calculate quantization parameters for all channels at once
+        nudged_min, nudged_max, scale, inv_scale = adjust_and_nudge(
+            min_val, max_val, num_bits, narrow_range
+        )
+
+        quant_zero = ops.floor(
+            ops.add(ops.multiply(-nudged_min, inv_scale), 0.5)
+        )
+        x_clamped = ops.clip(x, nudged_min, nudged_max)
+        x_clamped_shifted = ops.subtract(x_clamped, nudged_min)
+        result = ops.multiply(
+            ops.floor(
+                ops.add(
+                    ops.subtract(
+                        ops.multiply(x_clamped_shifted, inv_scale), quant_zero
+                    ),
+                    0.5,
+                )
+            ),
+            scale,
+        )
+
+        # Create gradient mask for all channels
+        masks = ops.cast(
+            (x >= nudged_min) & (x <= nudged_max),
+            dtype="float32",
+        )
+
+        def grad(*args, upstream=None):
+            if upstream is None:
+                (upstream,) = args
+
+            # Gradient for x
+            dx = ops.multiply(upstream, masks)
+            axes = [i for i in range(len(dx.shape)) if i != axis]
+            # Gradient for min_val
+            # When x is clipped to min, the gradient flows to min_val
+            min_mask = ops.cast(x <= nudged_min, dtype="float32")
+            grad_min = ops.multiply(upstream, min_mask)
+            if axis is not None:
+                grad_min = ops.sum(grad_min, axis=axes)
+            else:
+                grad_min = ops.sum(grad_min)
+
+            # Gradient for max_val
+            # When x is clipped to max, the gradient flows to max_val
+            max_mask = ops.cast(x >= nudged_max, dtype="float32")
+            grad_max = ops.multiply(upstream, max_mask)
+            if axis is not None:
+                grad_max = ops.sum(grad_max, axis=axes)
+            else:
+                grad_max = ops.sum(grad_max)
+
+            return dx, grad_min, grad_max
+
+        return result, grad
+
+    return _fake_quant_with_min_max_vars_per_channel(inputs, min_vals, max_vals)
+
+
+@keras_export("keras.quantizers.fake_quant_with_min_max_args")
+def fake_quant_with_min_max_args(
+    inputs,
+    min_vals,
+    max_vals,
+    num_bits=8,
+    narrow_range=False,
+    axis=None,
+):
+    """Fake quantization operation matching TensorFlow's implementation."""
+    return fake_quant_with_min_max_vars_per_channel(
+        inputs, min_vals, max_vals, num_bits, narrow_range, axis
+    )
+
+
+@keras_export("keras.quantizers.fake_quant_with_min_max_vars")
+def fake_quant_with_min_max_vars(
+    inputs,
+    min_vals,
+    max_vals,
+    num_bits=8,
+    narrow_range=False,
+    axis=None,
+):
+    """Fake quantization operation matching TensorFlow's implementation."""
+    return fake_quant_with_min_max_vars_per_channel(
+        inputs, min_vals, max_vals, num_bits, narrow_range, axis
+    )
+
+
 """Float8-related methods"""