Merge branch 'main' into bump_ao_2262025

Jack-Khuu · web-flow · commit e8db72a04242 · 2025-02-26T15:04:49.000-08:00
diff --git a/docs/source/backends-xnnpack.md b/docs/source/backends-xnnpack.md
@@ -121,4 +121,3 @@ target_link_libraries(
 ```
 
 No additional steps are necessary to use the backend beyond linking the target. Any XNNPACK-delegated .pte file will automatically run on the registered backend.
-
diff --git a/examples/models/llama/source_transformation/quantize.py b/examples/models/llama/source_transformation/quantize.py
@@ -14,8 +14,6 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
-from executorch.backends.vulkan._passes import VkInt4WeightOnlyQuantizer
-
 from executorch.extension.llm.export.builder import DType
 
 from sentencepiece import SentencePieceProcessor
@@ -180,6 +178,8 @@ def quantize(  # noqa C901
         model = gptq_quantizer.quantize(model, inputs)
         return model
     elif qmode == "vulkan_4w":
+        from executorch.backends.vulkan._passes import VkInt4WeightOnlyQuantizer
+
         q_group_size = 256 if group_size is None else group_size
         model = VkInt4WeightOnlyQuantizer(groupsize=q_group_size).quantize(model)
 

Original file line number	Diff line number	Diff line change
`@@ -121,4 +121,3 @@ target_link_libraries(`
`121`	`121`	```
`122`	`122`
`123`	`123`	`No additional steps are necessary to use the backend beyond linking the target. Any XNNPACK-delegated .pte file will automatically run on the registered backend.`
`124`		`-`