pytorch
diff --git a/‎.gitignore
Lines changed: 3 additions & 0 deletions b/‎.gitignore
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/cadence/aot/TARGETS
Lines changed: 1 addition & 0 deletions b/‎backends/cadence/aot/TARGETS
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/cadence/aot/simplify_ops.py
Lines changed: 4 additions & 25 deletions b/‎backends/cadence/aot/simplify_ops.py
Lines changed: 4 additions & 25 deletions
diff --git a/‎backends/cadence/aot/utils.py
Lines changed: 29 additions & 0 deletions b/‎backends/cadence/aot/utils.py
Lines changed: 29 additions & 0 deletions
diff --git a/‎examples/models/llama/model_args.py
Lines changed: 1 addition & 1 deletion b/‎examples/models/llama/model_args.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/LlmModuleInstrumentationTest.java
Lines changed: 0 additions & 126 deletions b/‎extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/LlmModuleInstrumentationTest.java
Lines changed: 0 additions & 126 deletions
diff --git a/‎extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/LlmModuleInstrumentationTest.kt
Lines changed: 115 additions & 0 deletions b/‎extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/LlmModuleInstrumentationTest.kt
Lines changed: 115 additions & 0 deletions
@@ -40,3 +40,6 @@ xcuserdata/
 .swiftpm/
 *.xcworkspace/
 *.xcframework/
+
+# Android
+*.aar
@@ -211,6 +211,7 @@ python_library(
     typing = True,
     deps = [
         ":pass_utils",
+        ":utils",
         "//executorch/backends/cadence/aot:pass_utils",
         "//executorch/exir:pass_base",
         "//executorch/exir/dialects:lib",
 
@@ -16,10 +16,10 @@
     CadencePassAttribute,
     register_cadence_pass,
 )
+from executorch.backends.cadence.aot.utils import rebind
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.dialects.edge._ops import EdgeOpOverload
 from executorch.exir.pass_base import ExportPass, ProxyValue
-from torch.fx.operator_schemas import get_signature_for_torch_op
 
 
 @register_cadence_pass(CadencePassAttribute(opt_level=0))
@@ -117,32 +117,11 @@ class BindOptionalArgsPass(ExportPass):
     def call_operator(self, op, args, kwargs, meta):
         if not isinstance(op, EdgeOpOverload):
             return super().call_operator(op, args, kwargs, meta)
-        assert callable(op)
 
-        torch_op_schemas = get_signature_for_torch_op(op._op)
-        if len(torch_op_schemas) == 0:
-            return super().call_operator(op, args, kwargs, meta)
-
-        matched_schemas = []
-        # Iterate through all of the schema until we find one that matches
-        # If one matches, populate `new_args_and_kwargs` with the new args/kwargs
-        # values. If none matches, `new_args_and_kwargs` will be None
-        for candidate_signature in torch_op_schemas:
-            try:
-                candidate_signature.bind(*args, **kwargs)
-                matched_schemas.append(candidate_signature)
-            except TypeError:
-                continue
-
-        if len(matched_schemas) != 1:
-            # Did not match any schema. Cannot normalize
-            return super().call_operator(op, args, kwargs, meta)
-
-        sig = matched_schemas[0]
-        bound_args = sig.bind(*args, **kwargs)
-        bound_args.apply_defaults()
+        if (updated_args := rebind(op, args, kwargs)) is not None:
+            args, kwargs = updated_args
 
-        return super().call_operator(op, bound_args.args, bound_args.kwargs, meta)
+        return super().call_operator(op, args, kwargs, meta)
 
 
 # This class encapsulates all the functions that simplify the op's args
 
@@ -18,7 +18,9 @@
 from executorch.exir import ExecutorchProgramManager, memory
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.dialects.edge._ops import EdgeOpOverload, EdgeOpOverloadPacket
+from executorch.exir.pass_base import Argument
 from tabulate import tabulate
+from torch.fx.operator_schemas import get_signature_for_torch_op
 
 from torch.utils._pytree import tree_flatten
 
@@ -308,3 +310,30 @@ def get_size(self, exir_id: int) -> int:
 # Return default memory config for the backend
 def get_default_memory_config() -> MemoryConfig:
     return MemoryConfig(memory_sizes=[0x1000000000])
+
+
+def rebind(
+    op: EdgeOpOverload, args: tuple[Argument, ...], kwargs: dict[str, Argument]
+) -> Optional[tuple[tuple[Argument, ...], dict[str, Argument]]]:
+    """Populates optional args and binds args/kwargs based on schema."""
+    torch_op_schemas = get_signature_for_torch_op(op._op)
+
+    matched_schemas = []
+    # Iterate through all of the schema until we find one that matches
+    # If one matches, populate `new_args_and_kwargs` with the new args/kwargs
+    # values. If none matches, `new_args_and_kwargs` will be None
+    for candidate_signature in torch_op_schemas:
+        try:
+            candidate_signature.bind(*args, **kwargs)
+            matched_schemas.append(candidate_signature)
+        except TypeError:
+            continue
+
+    if len(matched_schemas) != 1:
+        # Did not match any schema. Cannot normalize
+        return None
+
+    bound_args = matched_schemas[0].bind(*args, **kwargs)
+    bound_args.apply_defaults()
+
+    return bound_args.args, bound_args.kwargs
@@ -14,7 +14,7 @@ class ModelArgs:
     multiple_of: int = 256  # make SwiGLU hidden layer size multiple of large power of 2
     ffn_dim_multiplier: Optional[float] = None
     norm_eps: float = 1e-5
-    max_batch_size: int = 32
+    max_batch_size: int = 1
     max_seq_len: int = 2048
     max_context_len: int = 2048
     moe: bool = False  # True to enable the MoE (Mixture of Experts)
 
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+package org.pytorch.executorch
+
+import android.Manifest
+import androidx.test.InstrumentationRegistry
+import androidx.test.ext.junit.runners.AndroidJUnit4
+import androidx.test.rule.GrantPermissionRule
+import org.apache.commons.io.FileUtils
+import org.json.JSONException
+import org.json.JSONObject
+import org.junit.Assert
+import org.junit.Before
+import org.junit.Rule
+import org.junit.Test
+import org.junit.runner.RunWith
+import org.pytorch.executorch.extension.llm.LlmCallback
+import org.pytorch.executorch.extension.llm.LlmModule
+import java.io.File
+import java.io.IOException
+import java.net.URISyntaxException
+
+/** Unit tests for [org.pytorch.executorch.extension.llm.LlmModule].  */
+@RunWith(AndroidJUnit4::class)
+class LlmModuleInstrumentationTest : LlmCallback {
+    private val results: MutableList<String> = ArrayList()
+    private val tokensPerSecond: MutableList<Float> = ArrayList()
+    private var llmModule: LlmModule? = null
+
+    @Before
+    @Throws(IOException::class)
+    fun setUp() {
+        // copy zipped test resources to local device
+        val addPteFile = File(getTestFilePath(TEST_FILE_NAME))
+        var inputStream = javaClass.getResourceAsStream(TEST_FILE_NAME)
+        FileUtils.copyInputStreamToFile(inputStream, addPteFile)
+        inputStream.close()
+
+        val tokenizerFile = File(getTestFilePath(TOKENIZER_FILE_NAME))
+        inputStream = javaClass.getResourceAsStream(TOKENIZER_FILE_NAME)
+        FileUtils.copyInputStreamToFile(inputStream, tokenizerFile)
+        inputStream.close()
+
+        llmModule =
+            LlmModule(getTestFilePath(TEST_FILE_NAME), getTestFilePath(TOKENIZER_FILE_NAME), 0.0f)
+    }
+
+    @get:Rule
+    var runtimePermissionRule: GrantPermissionRule =
+        GrantPermissionRule.grant(Manifest.permission.READ_EXTERNAL_STORAGE)
+
+    @Test
+    @Throws(IOException::class, URISyntaxException::class)
+    fun testGenerate() {
+        val loadResult = llmModule!!.load()
+        // Check that the model can be load successfully
+        Assert.assertEquals(OK.toLong(), loadResult.toLong())
+
+        llmModule!!.generate(TEST_PROMPT, SEQ_LEN, this@LlmModuleInstrumentationTest)
+        Assert.assertEquals(results.size.toLong(), SEQ_LEN.toLong())
+        Assert.assertTrue(tokensPerSecond[tokensPerSecond.size - 1] > 0)
+    }
+
+    @Test
+    @Throws(IOException::class, URISyntaxException::class)
+    fun testGenerateAndStop() {
+        llmModule!!.generate(TEST_PROMPT, SEQ_LEN, object : LlmCallback {
+            override fun onResult(result: String) {
+                this@LlmModuleInstrumentationTest.onResult(result)
+                llmModule!!.stop()
+            }
+
+            override fun onStats(stats: String) {
+                this@LlmModuleInstrumentationTest.onStats(stats)
+            }
+        })
+
+        val stoppedResultSize = results.size
+        Assert.assertTrue(stoppedResultSize < SEQ_LEN)
+    }
+
+    override fun onResult(result: String) {
+        results.add(result)
+    }
+
+    override fun onStats(stats: String) {
+        var tps = 0f
+        try {
+            val jsonObject = JSONObject(stats)
+            val numGeneratedTokens = jsonObject.getInt("generated_tokens")
+            val inferenceEndMs = jsonObject.getInt("inference_end_ms")
+            val promptEvalEndMs = jsonObject.getInt("prompt_eval_end_ms")
+            tps = numGeneratedTokens.toFloat() / (inferenceEndMs - promptEvalEndMs) * 1000
+            tokensPerSecond.add(tps)
+        } catch (_: JSONException) {
+        }
+    }
+
+    companion object {
+        private const val TEST_FILE_NAME = "/stories.pte"
+        private const val TOKENIZER_FILE_NAME = "/tokenizer.bin"
+        private const val TEST_PROMPT = "Hello"
+        private const val OK = 0x00
+        private const val SEQ_LEN = 32
+
+        private fun getTestFilePath(fileName: String): String {
+            return InstrumentationRegistry.getInstrumentation().targetContext.externalCacheDir.toString() + fileName
+        }
+    }
+}