Merge branch 'main' into unsupported-llama2.c

Michael Gschwind · Michael Gschwind · commit d37117ae3a06 · 2024-05-13T09:15:39.000-07:00
diff --git a/.ci/scripts/run-docs b/.ci/scripts/run-docs
@@ -55,13 +55,23 @@ if [ "$1" == "gguf" ]; then
         echo "*******************************************"
         bash -x ./run-gguf.sh
         echo "::endgroup::"
-<<<<<<< HEAD
 fi
 
 
 if [ "$1" == "advanced" ]; then
+        echo "::group::Create script to run advanced"
+        python3 scripts/updown.py --file docs/ADVANCED-USERS.md --replace 'llama3:stories15M,-l 3:-l 2,meta-llama/Meta-Llama-3-8B-Instruct:stories15M' --suppress huggingface-cli,HF_TOKEN > ./run-advanced.sh
+        # for good measure, if something happened to updown processor,
+        # and it did not error out, fail with an exit 1
+        echo "exit 1" >> ./run-advanced.sh
+        echo "::endgroup::"
+
+        echo "::group::Run advanced"
+        echo "*******************************************"
+        cat ./run-advanced.sh
+        echo "*******************************************"
+        bash -x ./run-advanced.sh
+        echo "::endgroup::"
    echo "TBD"
 fi            
-=======
-fi
->>>>>>> e3db2486f80b71b3143945a44f58d50c02488c90
+
diff --git a/.github/workflows/run-readme-pr.yml b/.github/workflows/run-readme-pr.yml
@@ -154,6 +154,7 @@ jobs:
         echo "*******************************************"
         echo "::endgroup::"
 
+
   test-advanced-any:
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     secrets: inherit
@@ -173,27 +174,15 @@ jobs:
         export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
         echo "::endgroup::"
 
-        echo "::group::Create script to run advanced"
-        python3 scripts/updown.py --file docs/ADVANCED-USERS.md --replace 'llama3:stories15M,-l 3:-l 2,meta-llama/Meta-Llama-3-8B-Instruct:stories15M' --suppress huggingface-cli,HF_TOKEN > ./run-advanced.sh
-        # for good measure, if something happened to updown processor,
-        # and it did not error out, fail with an exit 1
-        echo "exit 1" >> ./run-advanced.sh
-        echo "::endgroup::"
-
-        echo "::group::Run advanced"
-        echo "*******************************************"
-        cat ./run-advanced.sh
-        echo "*******************************************"
-        bash -x ./run-advanced.sh
-=======
+        .ci/scripts/run-docs advanced
 
         echo "::group::Completion"
         echo "tests complete"
         echo "*******************************************"
->>>>>>> e3db2486f80b71b3143945a44f58d50c02488c90
         echo "::endgroup::"
 
-  test-gguf-cpu:
+
+  test-advanced-cpu:
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     secrets: inherit
     with:
@@ -212,9 +201,10 @@ jobs:
         export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
         echo "::endgroup::"
 
-        TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs gguf
+        TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs advanced
 
         echo "::group::Completion"
         echo "tests complete"
         echo "*******************************************"
         echo "::endgroup::"
+
diff --git a/unsupported/llama2.c/runner/run.cpp b/unsupported/llama2.c/runner/run.cpp
@@ -151,31 +151,22 @@ float* forward(Transformer* transformer, int token, int pos) {
     torch::Tensor token_tensor = torch::from_blob(token_buffer, {1, 1}, torch::kLong);
     torch::Tensor pos_tensor = torch::from_blob(pos_buffer, {1}, torch::kLong);
     std::vector<torch::Tensor> inputs{token_tensor, pos_tensor};
-
+    // call AOTI model
     torch::Tensor result = transformer->runner->run(inputs)[0];
     auto logits = result[0].data_ptr();
-
 #else // __ET_MODEL__
     ManagedTensor pos_managed(
         pos_buffer, sizeof(int64_t), { 1 }, ScalarType::Long);
-#ifndef __KV_CACHE__
-    // @lint-ignore CLANGTIDY facebook-hte-LocalUncheckedArrayBounds
-    ManagedTensor tokens_managed(&(s->toks[pos]), /*ignored*/sizeof(int64_t)*(pos+1), {1, 1}, ScalarType::Long);
-#else // __KV_CACHE__
     ManagedTensor tokens_managed(
       token_buffer, sizeof(int64_t), {1, 1}, ScalarType::Long);
-#endif
     std::vector<EValue> inputs;
     auto tmp1 = EValue(tokens_managed.get_aliasing_tensor());
     auto tmp2 = EValue(pos_managed.get_aliasing_tensor());
-
     inputs.push_back(tmp1);
     inputs.push_back(tmp2);
+    // call ET model
     Result<std::vector<EValue>> outputs_res = transformer->runner->forward(inputs);
-    if (!outputs_res.ok()) {
-        fprintf(stderr, "Executorch forward() failed.");
-        exit(EXIT_FAILURE);
-    }
+    assert(outputs_res.ok());
     std::vector<EValue> result = outputs_res.get();
     auto logits = result[0].toTensor().const_data_ptr();
 #endif