Update on "[XNNPACK][Partitioner] SDPA Config"

mcr229 · mcr229 · commit 662f2856e2b0 · 2024-08-19T15:44:33.000-07:00
We add the SDPA Config here for partitioner. Currently there is an issue with SDPA when used from the FairSeq Multihead attention models, so I currently have it disabled for the base partitioner until we resolve that. Otherwise, for our tests, we can use the SDPA correctly from there. We have to track D60553559. Will follow up on this later. Differential Revision: [D60323285](https://our.internmc.facebook.com/intern/diff/D60323285/) [ghstack-poisoned]
diff --git a/backends/xnnpack/operators/op_sdpa.py b/backends/xnnpack/operators/op_sdpa.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from typing import Dict
+from typing import cast, Dict
 
 import torch
 from executorch.backends.transforms import get_shape
@@ -69,7 +69,7 @@ def define_node(
         embedding_dim = q_shape[-1]
         scale = 1 / (embedding_dim**0.5)
         if "scale" in node.kwargs and node.kwargs["scale"]:
-            scale = node.kwargs["scale"]
+            scale = cast(float, node.kwargs["scale"])
 
         t = torch.full((embedding_dim,), scale, dtype=mask_dtype)
         scale_node = self.get_fake_attr("scale", t)