pytorch
diff --git a/‎.ci/scripts/gather_test_models.py
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/gather_test_models.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/test.sh
Lines changed: 3 additions & 1 deletion b/‎.ci/scripts/test.sh
Lines changed: 3 additions & 1 deletion
diff --git a/‎examples/models/llava/install_requirements.sh
Lines changed: 7 additions & 1 deletion b/‎examples/models/llava/install_requirements.sh
Lines changed: 7 additions & 1 deletion
diff --git a/‎examples/models/llava/main.py
Lines changed: 66 additions & 0 deletions b/‎examples/models/llava/main.py
Lines changed: 66 additions & 0 deletions
@@ -23,7 +23,7 @@
         "w2l": "linux.12xlarge",
         "ic4": "linux.12xlarge",
         "resnet50": "linux.12xlarge",
-        "llava": "linux.4xlarge",
+        "llava": "linux.12xlarge",
         # This one causes timeout on smaller runner, the root cause is unclear (T161064121)
         "dl3": "linux.12xlarge",
         "emformer_join": "linux.12xlarge",
 
@@ -67,12 +67,14 @@ test_model() {
     run_portable_executor_runner
     rm "./${MODEL_NAME}.pte"
   fi
+  STRICT="--strict"
   if [[ "${MODEL_NAME}" == "llava" ]]; then
     # Install requirements for llava
     bash examples/models/llava/install_requirements.sh
+    STRICT="--no-strict"
   fi
   # python3 -m examples.portable.scripts.export --model_name="llama2" should works too
-  "${PYTHON_EXECUTABLE}" -m examples.portable.scripts.export --model_name="${MODEL_NAME}"
+  "${PYTHON_EXECUTABLE}" -m examples.portable.scripts.export --model_name="${MODEL_NAME}" "${STRICT}"
   run_portable_executor_runner
 }
 
 
@@ -17,8 +17,14 @@ pip install protobuf
 # Reinstall bitsandbytes to make it compatible.
 pip install bitsandbytes -I
 
+# numpy needs to be pin to 1.24. 1.26.4 will error out
+pip install numpy==1.24
+
+# Newer transformer will give TypeError: LlavaLlamaForCausalLM.forward() got an unexpected keyword argument 'cache_position'
+pip install transformers==4.37.2
+
 # The deps of llava can have different versions than deps of ExecuTorch.
 # For example, torch version required from llava is older than ExecuTorch.
 # To make both work, recover ExecuTorch's original dependencies by rerunning
 # the install_requirements.sh.
-bash -x ./install_requirements.sh
+bash -x ./install_requirements.sh --pybind xnnpack
@@ -0,0 +1,66 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch
+
+from model import LlavaModel
+
+
+def main():
+
+    llava_model = LlavaModel()
+    llava = llava_model.get_eager_model()
+
+    prompt_before_image, resized, prompt_after_image = llava_model.get_example_inputs()
+    logging.info(f"Prompt: {llava_model.prompt}")
+    preprocessed = llava.image_preprocess(resized)
+    with torch.inference_mode():
+        output_ids = llava_model.model.generate(
+            llava_model.input_ids,
+            images=preprocessed,
+            image_sizes=[preprocessed.size],
+            do_sample=False,
+            num_beams=1,
+            max_new_tokens=10,
+            use_cache=True,
+        )
+
+    outputs = llava_model.tokenizer.batch_decode(output_ids, skip_special_tokens=True)[
+        0
+    ].strip()
+    logging.info(f"Reference output: {outputs}")
+
+    # comparing with llava result
+    # prefill_logits = llava.prefill(prompt_before_image, resized, prompt_after_image)
+    # prefill_logits_ref = llava.prefill_ref(*inputs)[0]
+    # print(f"Prefill logits all close? {torch.allclose(prefill_logits, prefill_logits_ref, atol=1e-3)}")
+
+    # prefill_logits = llava.prefill(*inputs)
+    # context_len = prefill_logits.shape[1]
+    # print(prefill_logits)
+    # # first token
+    # new_tokens = [torch.argmax(prefill_logits[..., -1, :]).item()]
+    # # print(tokenizer.decode(new_tokens))
+    # for i in range(llava_model.args.max_new_tokens):
+    #     print(i, llava_model.tokenizer.decode(new_tokens[i]))
+    #     logits = llava.forward(
+    #         torch.tensor([new_tokens[i]]), torch.tensor([context_len + i])
+    #     )
+    #     new_tokens.append(torch.argmax(logits[-1, :]))
+    prefill_logits = llava.prefill(prompt_before_image, resized, prompt_after_image)
+    context_len = prefill_logits.shape[1]
+    logging.info(prefill_logits)
+    new_tokens = [torch.argmax(prefill_logits[..., -1, :]).item()]
+    i = 0
+    logging.info(i, llava_model.tokenizer.decode(new_tokens[i]))
+    logits = llava.step(torch.tensor([new_tokens[i]]), torch.tensor([context_len + i]))
+    logging.info(logits)
+
+
+if __name__ == "__main__":
+    main()