Skip to content

Add some basic xnnpack recipes #10035

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: gh/tarun292/5/base
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions backends/transforms/duplicate_dynamic_quant_chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import operator

import torch
from executorch.exir.program._program import _update_exported_program_graph_module

from torch.ao.quantization.pt2e.utils import (
_filter_sym_size_users,
Expand Down Expand Up @@ -194,3 +195,10 @@ def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
graph_module.graph.eliminate_dead_code()
graph_module.recompile()
return PassResult(graph_module, True)

def duplicate_dynamic_quant_chain_pass(
ep: torch.export.ExportedProgram,
) -> torch.export.ExportedProgram:
res = DuplicateDynamicQuantChainPass()(ep.graph_module)
assert res is not None
return _update_exported_program_graph_module(ep, res.graph_module)
1 change: 1 addition & 0 deletions backends/xnnpack/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,6 @@ runtime.python_library(
":xnnpack_preprocess",
"//executorch/backends/xnnpack/partition:xnnpack_partitioner",
"//executorch/backends/xnnpack/utils:xnnpack_utils",
"//executorch/backends/xnnpack/recipes:xnnpack_recipes"
],
)
3 changes: 2 additions & 1 deletion backends/xnnpack/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,14 @@

# XNNPACK Backend
from .xnnpack_preprocess import XnnpackBackend

from .recipes.recipes import get_xnnpack_recipe

__all__ = [
"XnnpackDynamicallyQuantizedPartitioner",
"XnnpackPartitioner",
"XnnpackBackend",
"capture_graph_for_xnnpack",
"get_xnnpack_recipe",
"get_xnnpack_capture_config",
"get_xnnpack_edge_compile_config",
"get_xnnpack_executorch_backend_config",
Expand Down
18 changes: 18 additions & 0 deletions backends/xnnpack/recipes/TARGETS
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
load("@fbcode_macros//build_defs:python_library.bzl", "python_library")


oncall("executorch")

python_library(
name = "xnnpack_recipes",
srcs = [
"recipes.py",
],
deps = [
"//caffe2:torch",
"//executorch/exir:lib",
"//executorch/backends/transforms:duplicate_dynamic_quant_chain",
"//executorch/backends/xnnpack/quantizer:xnnpack_quantizer",
"//executorch/backends/xnnpack/partition:xnnpack_partitioner",
],
)
51 changes: 51 additions & 0 deletions backends/xnnpack/recipes/recipes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.

# pyre-strict
from typing import Any, Callable

from executorch.backends.transforms.duplicate_dynamic_quant_chain import (
duplicate_dynamic_quant_chain_pass,
DuplicateDynamicQuantChainPass,
)

from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner

from executorch.backends.xnnpack.quantizer.xnnpack_quantizer import (
get_symmetric_quantization_config,
XNNPACKQuantizer,
)
from executorch.exir import ExportRecipe

def get_generic_fp32_cpu_recipe() -> ExportRecipe:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if namespaced to XNNPACK then cpu may not be needed?

Suggested change
def get_generic_fp32_cpu_recipe() -> ExportRecipe:
def get_fp32_recipe() -> ExportRecipe:

quantizer = XNNPACKQuantizer()
operator_config = get_symmetric_quantization_config(is_per_channel=False)
quantizer.set_global(operator_config)
Comment on lines +20 to +22
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
quantizer = XNNPACKQuantizer()
operator_config = get_symmetric_quantization_config(is_per_channel=False)
quantizer.set_global(operator_config)

return ExportRecipe(
name = "fp32_recipe",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit

Suggested change
name = "fp32_recipe",
name = "fp32",

quantizer = None,
partitioners=[XnnpackPartitioner()],

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change

)

def get_dynamic_quant_recipe() -> ExportRecipe:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

organizationally maybe quant recipes can be in a separate folder?

quantizer = XNNPACKQuantizer()
operator_config = get_symmetric_quantization_config(
is_per_channel=True, is_dynamic=True
)
quantizer.set_global(operator_config)
DuplicateDynamicQuantChainPass
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i've actually done some work to remove the need for this. I guess it's ok to have for now as it should still work.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's remove it if not needed?

return ExportRecipe(
name = "dynamic_quant_recipe",
quantizer = quantizer,
partitioners=[XnnpackPartitioner()],
pre_edge_transform_passes=duplicate_dynamic_quant_chain_pass,
)

RECIPE_MAP: dict[str, Callable[[], ExportRecipe]] = {
"FP32_CPU_ACCELERATED_RECIPE": get_generic_fp32_cpu_recipe,
"DYNAMIC_QUANT_CPU_ACCELERATED_RECIPE": get_dynamic_quant_recipe,
Comment on lines +45 to +46
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit

Suggested change
"FP32_CPU_ACCELERATED_RECIPE": get_generic_fp32_cpu_recipe,
"DYNAMIC_QUANT_CPU_ACCELERATED_RECIPE": get_dynamic_quant_recipe,
"FP32_RECIPE": get_fp32_recipe,
"DYNAMIC_QUANT_RECIPE": get_dynamic_quant_recipe,

}

def get_xnnpack_recipe(recipe_name:str, **kwargs: Any) -> ExportRecipe:
assert recipe_name in RECIPE_MAP, f"Recipe {recipe_name} not found."
return RECIPE_MAP[recipe_name](**kwargs)
3 changes: 2 additions & 1 deletion backends/xnnpack/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ def define_common_targets():
"//executorch/backends/xnnpack/serialization:xnnpack_flatbuffer_header",
"//executorch/extension/threadpool:threadpool",
"//executorch/runtime/core/exec_aten/util:tensor_util",
"//executorch/runtime/executor:pte_data_map"
"//executorch/runtime/executor:pte_data_map",
"//executorch/backends/xnnpack/recipes:xnnpack_recipes",
],
# XnnpackBackend.cpp needs to compile with executor as whole
# @lint-ignore BUCKLINT: Avoid `link_whole=True` (https://fburl.com/avoid-link-whole)
Expand Down
10 changes: 10 additions & 0 deletions backends/xnnpack/test/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,13 @@ runtime.python_test(
"libtorch",
],
)

runtime.python_test(
name = "test_xnnpack_recipes",
srcs = glob([
"recipes/*.py",
]),
deps = [
"//executorch/backends/xnnpack:xnnpack_delegate",
],
)
37 changes: 37 additions & 0 deletions backends/xnnpack/test/recipes/test_xnnpack_recipes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.

# pyre-strict

import unittest

import torch
from executorch.backends.xnnpack import get_xnnpack_recipe
from executorch.export import export
from torch.testing._internal.common_quantization import TestHelperModules

class TestXnnpackRecipes(unittest.TestCase):
def setUp(self) -> None:
super().setUp()

def tearDown(self) -> None:
super().tearDown()
Comment on lines +13 to +17
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need these?


def test_basic_recipe(self) -> None:
m_eager = TestHelperModules.TwoLinearModule().eval()
example_inputs = [(torch.randn(9, 8),)]
export_session = export(
model=m_eager,
example_inputs=example_inputs,
export_recipe=get_xnnpack_recipe("FP32_CPU_ACCELERATED_RECIPE")
)
export_session.export()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will this raise? should we catch it?


def test_dynamic_quant_recipe(self) -> None:
m_eager = TestHelperModules.TwoLinearModule().eval()
example_inputs = [(torch.randn(9, 8),)]
export_session = export(
model=m_eager,
example_inputs=example_inputs,
export_recipe=get_xnnpack_recipe("DYNAMIC_QUANT_CPU_ACCELERATED_RECIPE")
)
export_session.export()
Loading