pytorch · tarun292 · Apr 10, 2025 · digantdesai · Apr 11, 2025 · digantdesai
@@ -8,6 +8,7 @@
 import operator
 
 import torch
+from executorch.exir.program._program import _update_exported_program_graph_module
 
 from torch.ao.quantization.pt2e.utils import (
     _filter_sym_size_users,
@@ -194,3 +195,10 @@ def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
         graph_module.graph.eliminate_dead_code()
         graph_module.recompile()
         return PassResult(graph_module, True)
+
+def duplicate_dynamic_quant_chain_pass(
+    ep: torch.export.ExportedProgram,
+) -> torch.export.ExportedProgram:
+    res = DuplicateDynamicQuantChainPass()(ep.graph_module)
+    assert res is not None
+    return _update_exported_program_graph_module(ep, res.graph_module)
@@ -37,5 +37,6 @@ runtime.python_library(
         ":xnnpack_preprocess",
         "//executorch/backends/xnnpack/partition:xnnpack_partitioner",
         "//executorch/backends/xnnpack/utils:xnnpack_utils",
+        "//executorch/backends/xnnpack/recipes:xnnpack_recipes"
     ],
 )
@@ -22,13 +22,14 @@
 
 # XNNPACK Backend
 from .xnnpack_preprocess import XnnpackBackend
-
+from .recipes.recipes import get_xnnpack_recipe
 
 __all__ = [
     "XnnpackDynamicallyQuantizedPartitioner",
     "XnnpackPartitioner",
     "XnnpackBackend",
     "capture_graph_for_xnnpack",
+    "get_xnnpack_recipe",
     "get_xnnpack_capture_config",
     "get_xnnpack_edge_compile_config",
     "get_xnnpack_executorch_backend_config",

@@ -0,0 +1,18 @@
+load("@fbcode_macros//build_defs:python_library.bzl", "python_library")
+
+
+oncall("executorch")
+
+python_library(
+    name = "xnnpack_recipes",
+    srcs = [
+        "recipes.py",
+    ],
+    deps = [
+        "//caffe2:torch",
+        "//executorch/exir:lib",
+        "//executorch/backends/transforms:duplicate_dynamic_quant_chain",
+        "//executorch/backends/xnnpack/quantizer:xnnpack_quantizer",
+        "//executorch/backends/xnnpack/partition:xnnpack_partitioner",
+    ],
+)
@@ -0,0 +1,51 @@
+# (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+# pyre-strict
+from typing import Any, Callable
+
+from executorch.backends.transforms.duplicate_dynamic_quant_chain import (
+    duplicate_dynamic_quant_chain_pass,
+    DuplicateDynamicQuantChainPass,
+)
+
+from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner
+
+from executorch.backends.xnnpack.quantizer.xnnpack_quantizer import (
+    get_symmetric_quantization_config,
+    XNNPACKQuantizer,
+)
+from executorch.exir import ExportRecipe
+
+def get_generic_fp32_cpu_recipe() -> ExportRecipe:
-def get_generic_fp32_cpu_recipe() -> ExportRecipe:
+def get_fp32_recipe() -> ExportRecipe:
-def get_generic_fp32_cpu_recipe() -> ExportRecipe:
+def get_fp32_recipe() -> ExportRecipe:
+    quantizer = XNNPACKQuantizer()
+    operator_config = get_symmetric_quantization_config(is_per_channel=False)
+    quantizer.set_global(operator_config)
-    quantizer = XNNPACKQuantizer()
-    operator_config = get_symmetric_quantization_config(is_per_channel=False)
-    quantizer.set_global(operator_config)
-    quantizer = XNNPACKQuantizer()
-    operator_config = get_symmetric_quantization_config(is_per_channel=False)
-    quantizer.set_global(operator_config)
+    return ExportRecipe(
+       name = "fp32_recipe",
-       name = "fp32_recipe",
+      name = "fp32",
-       name = "fp32_recipe",
+      name = "fp32",
+       quantizer = None,
+       partitioners=[XnnpackPartitioner()],
+
-
-
+    )
+
+def get_dynamic_quant_recipe() -> ExportRecipe:
+    quantizer = XNNPACKQuantizer()
+    operator_config = get_symmetric_quantization_config(
+        is_per_channel=True, is_dynamic=True
+    )
+    quantizer.set_global(operator_config)
+    DuplicateDynamicQuantChainPass
+    return ExportRecipe(
+       name = "dynamic_quant_recipe",
+       quantizer = quantizer,
+       partitioners=[XnnpackPartitioner()],
+       pre_edge_transform_passes=duplicate_dynamic_quant_chain_pass,
+    )
+
+RECIPE_MAP: dict[str, Callable[[], ExportRecipe]] = {
+    "FP32_CPU_ACCELERATED_RECIPE": get_generic_fp32_cpu_recipe,
+    "DYNAMIC_QUANT_CPU_ACCELERATED_RECIPE": get_dynamic_quant_recipe,
-    "FP32_CPU_ACCELERATED_RECIPE": get_generic_fp32_cpu_recipe,
-    "DYNAMIC_QUANT_CPU_ACCELERATED_RECIPE": get_dynamic_quant_recipe,
+    "FP32_RECIPE": get_fp32_recipe,
+    "DYNAMIC_QUANT_RECIPE": get_dynamic_quant_recipe,
-    "FP32_CPU_ACCELERATED_RECIPE": get_generic_fp32_cpu_recipe,
-    "DYNAMIC_QUANT_CPU_ACCELERATED_RECIPE": get_dynamic_quant_recipe,
+    "FP32_RECIPE": get_fp32_recipe,
+    "DYNAMIC_QUANT_RECIPE": get_dynamic_quant_recipe,
+}
+
+def get_xnnpack_recipe(recipe_name:str, **kwargs: Any) -> ExportRecipe:
+    assert recipe_name in RECIPE_MAP, f"Recipe {recipe_name} not found."
+    return RECIPE_MAP[recipe_name](**kwargs)
@@ -64,7 +64,8 @@ def define_common_targets():
             "//executorch/backends/xnnpack/serialization:xnnpack_flatbuffer_header",
             "//executorch/extension/threadpool:threadpool",
             "//executorch/runtime/core/exec_aten/util:tensor_util",
-            "//executorch/runtime/executor:pte_data_map"
+            "//executorch/runtime/executor:pte_data_map",
+            "//executorch/backends/xnnpack/recipes:xnnpack_recipes",
         ],
         # XnnpackBackend.cpp needs to compile with executor as whole
         # @lint-ignore BUCKLINT: Avoid `link_whole=True` (https://fburl.com/avoid-link-whole)

@@ -93,3 +93,13 @@ runtime.python_test(
         "libtorch",
     ],
 )
+
+runtime.python_test(
+    name = "test_xnnpack_recipes",
+    srcs = glob([
+        "recipes/*.py",
+    ]),
+    deps = [
+        "//executorch/backends/xnnpack:xnnpack_delegate",
+    ],
+)
@@ -0,0 +1,37 @@
+# (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+# pyre-strict
+
+import unittest
+
+import torch
+from executorch.backends.xnnpack import get_xnnpack_recipe
+from executorch.export import export
+from torch.testing._internal.common_quantization import TestHelperModules
+
+class TestXnnpackRecipes(unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+
+    def tearDown(self) -> None:
+        super().tearDown()
+
+    def test_basic_recipe(self) -> None:
+        m_eager = TestHelperModules.TwoLinearModule().eval()
+        example_inputs = [(torch.randn(9, 8),)]
+        export_session = export(
+            model=m_eager, 
+            example_inputs=example_inputs, 
+            export_recipe=get_xnnpack_recipe("FP32_CPU_ACCELERATED_RECIPE")
+        )
+        export_session.export()
+
+    def test_dynamic_quant_recipe(self) -> None:
+        m_eager = TestHelperModules.TwoLinearModule().eval()
+        example_inputs = [(torch.randn(9, 8),)]
+        export_session = export(
+            model=m_eager, 
+            example_inputs=example_inputs, 
+            export_recipe=get_xnnpack_recipe("DYNAMIC_QUANT_CPU_ACCELERATED_RECIPE")
+        )
+        export_session.export()