pytorch
diff --git a/‎.ci/scripts/test_eval_llama_mmlu.sh
Lines changed: 64 additions & 0 deletions b/‎.ci/scripts/test_eval_llama_mmlu.sh
Lines changed: 64 additions & 0 deletions
diff --git a/‎.ci/scripts/test_eval_llama_wikitext.sh
Lines changed: 62 additions & 0 deletions b/‎.ci/scripts/test_eval_llama_wikitext.sh
Lines changed: 62 additions & 0 deletions
diff --git a/‎.ci/scripts/test_llama_runner_eager.sh
Lines changed: 62 additions & 0 deletions b/‎.ci/scripts/test_llama_runner_eager.sh
Lines changed: 62 additions & 0 deletions
diff --git a/‎.github/scripts/propose_ghstack_orig_pr.py
Lines changed: 14 additions & 7 deletions b/‎.github/scripts/propose_ghstack_orig_pr.py
Lines changed: 14 additions & 7 deletions
diff --git a/‎.github/workflows/ghstack_land.yml
Lines changed: 1 addition & 3 deletions b/‎.github/workflows/ghstack_land.yml
Lines changed: 1 addition & 3 deletions
diff --git a/‎.github/workflows/pull.yml
Lines changed: 81 additions & 0 deletions b/‎.github/workflows/pull.yml
Lines changed: 81 additions & 0 deletions
diff --git a/‎.github/workflows/update-viablestrict.yml
Lines changed: 3 additions & 1 deletion b/‎.github/workflows/update-viablestrict.yml
Lines changed: 3 additions & 1 deletion
@@ -0,0 +1,64 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+
+if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
+    PYTHON_EXECUTABLE=python3
+fi
+
+# Download and prepare stories model artifacts
+prepare_model_artifacts() {
+    echo "Preparing stories model artifacts"
+    wget -O stories110M.pt "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt"
+    wget -O tokenizer.model "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model"
+    echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json
+}
+
+run_and_verify() {
+    NOW=$(date +"%H:%M:%S")
+    echo "Starting to run eval_llama at ${NOW}"
+    if [[ ! -f "stories110M.pt" ]]; then
+        echo "stories110M.pt is missing."
+        exit 1
+    fi
+    if [[ ! -f "tokenizer.model" ]]; then
+        echo "tokenizer.model is missing."
+        exit 1
+    fi
+    if [[ ! -f "params.json" ]]; then
+        echo "params.json is missing."
+        exit 1
+    fi
+    $PYTHON_EXECUTABLE -m examples.models.llama.eval_llama \
+	-c stories110M.pt \
+	-p params.json \
+	-t tokenizer.model \
+	-kv \
+	-d fp32 \
+	--tasks mmlu \
+	-f 5 \
+	--max_seq_length 2048 \
+	--limit 5 > result.txt
+
+    # Verify result.txt
+    RESULT=$(cat result.txt)
+    EXPECTED_TASK="mmlu"
+    EXPECTED_RESULT="acc"
+    if [[ "${RESULT}" == "${EXPECTED_TASK}: {"*"${EXPECTED_RESULT}"* ]]; then
+        echo "Actual result: ${RESULT}"
+        echo "Success"
+        exit 0
+    else
+        echo "Actual result: ${RESULT}"
+        echo "Failure; results not the same"
+        exit 1
+    fi
+}
+
+prepare_model_artifacts
+run_and_verify
@@ -0,0 +1,62 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+
+if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
+    PYTHON_EXECUTABLE=python3
+fi
+
+# Download and prepare stories model artifacts
+prepare_model_artifacts() {
+    echo "Preparing stories model artifacts"
+    wget -O stories110M.pt "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt"
+    wget -O tokenizer.model "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model"
+    echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json
+}
+
+run_and_verify() {
+    NOW=$(date +"%H:%M:%S")
+    echo "Starting to run eval_llama at ${NOW}"
+    if [[ ! -f "stories110M.pt" ]]; then
+        echo "stories110M.pt is missing."
+        exit 1
+    fi
+    if [[ ! -f "tokenizer.model" ]]; then
+        echo "tokenizer.model is missing."
+        exit 1
+    fi
+    if [[ ! -f "params.json" ]]; then
+        echo "params.json is missing."
+        exit 1
+    fi
+    $PYTHON_EXECUTABLE -m examples.models.llama.eval_llama \
+	-c stories110M.pt \
+	-p params.json \
+	-t tokenizer.model \
+	-kv \
+	-d fp32 \
+	--max_seq_length 2048 \
+	--limit 5 > result.txt
+
+    # Verify result.txt
+    RESULT=$(cat result.txt)
+    EXPECTED_TASK="wikitext"
+    EXPECTED_RESULT="word_perplexity"
+    if [[ "${RESULT}" == "${EXPECTED_TASK}: {"*"${EXPECTED_RESULT}"* ]]; then
+        echo "Actual result: ${RESULT}"
+        echo "Success"
+        exit 0
+    else
+        echo "Actual result: ${RESULT}"
+        echo "Failure; results not the same"
+        exit 1
+    fi
+}
+
+prepare_model_artifacts
+run_and_verify
@@ -0,0 +1,62 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+
+if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
+    PYTHON_EXECUTABLE=python3
+fi
+
+# Download and prepare stories model artifacts
+prepare_model_artifacts() {
+    echo "Preparing stories model artifacts"
+    wget -O stories110M.pt "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt"
+    wget -O tokenizer.model "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model"
+    echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json
+}
+
+run_and_verify() {
+    NOW=$(date +"%H:%M:%S")
+    echo "Starting to run eval_llama at ${NOW}"
+    if [[ ! -f "stories110M.pt" ]]; then
+        echo "stories110M.pt is missing."
+        exit 1
+    fi
+    if [[ ! -f "tokenizer.model" ]]; then
+        echo "tokenizer.model is missing."
+        exit 1
+    fi
+    if [[ ! -f "params.json" ]]; then
+        echo "params.json is missing."
+        exit 1
+    fi
+    $PYTHON_EXECUTABLE -m examples.models.llama.runner.eager \
+	-c stories110M.pt \
+	-p params.json \
+	-t tokenizer.model \
+	-kv \
+	-d fp32 \
+	--max_seq_length 32 \
+	--temperature 0 \
+	--prompt "Once upon a time," > result.txt
+
+    # Verify result.txt
+    RESULT=$(cat result.txt)
+    EXPECTED_RESULT="there was a little girl"
+    if [[ "${RESULT}" == *"${EXPECTED_RESULT}"* ]]; then
+        echo "Actual result: ${RESULT}"
+        echo "Success"
+        exit 0
+    else
+        echo "Actual result: ${RESULT}"
+        echo "Failure; results not the same"
+        exit 1
+    fi
+}
+
+prepare_model_artifacts
+run_and_verify
@@ -26,9 +26,9 @@ def parse_args():
         required=True,
     )
     parser.add_argument(
-        "--pr",
-        type=int,
-        help="Number of the PR in the stack to check and create corresponding PR",
+        "--ref",
+        type=str,
+        help="Ref fo PR in the stack to check and create corresponding PR",
         required=True,
     )
     return parser.parse_args()
@@ -68,12 +68,18 @@ def extract_stack_from_body(pr_body: str) -> List[int]:
     return list(reversed(prs))
 
 
-def get_pr_stack_from_number(pr_number: int, repo: Repository) -> List[int]:
+def get_pr_stack_from_number(ref: str, repo: Repository) -> List[int]:
+    if ref.isnumeric():
+        pr_number = int(ref)
+    else:
+        branch_name = ref.replace("refs/heads/", "")
+        pr_number = repo.get_branch(branch_name).commit.get_pulls()[0].number
+
     pr_stack = extract_stack_from_body(repo.get_pull(pr_number).body)
 
     if not pr_stack:
         raise Exception(
-            f"Could not find PR stack in body of #{pr_number}. "
+            f"Could not find PR stack in body of ref. "
             + "Please make sure that the PR was created with ghstack."
         )
 
@@ -100,7 +106,8 @@ def create_prs_for_orig_branch(pr_stack: List[int], repo: Repository):
 ghstack PR base: https://github.com/pytorch/executorch/tree/{pr.base.ref}
 ghstack PR head: https://github.com/pytorch/executorch/tree/{pr.head.ref}
 Merge bot PR base: https://github.com/pytorch/executorch/tree/{orig_branch_merge_base}
-Merge bot PR head: https://github.com/pytorch/executorch/tree/{orig_branch_merge_head}"""
+Merge bot PR head: https://github.com/pytorch/executorch/tree/{orig_branch_merge_head}
+@diff-train-skip-merge"""
 
         existing_orig_pr = repo.get_pulls(
             head="pytorch:" + orig_branch_merge_head,
@@ -128,7 +135,7 @@ def main():
 
     with Github(auth=Auth.Token(os.environ["GITHUB_TOKEN"])) as gh:
         repo = gh.get_repo(args.repo)
-        create_prs_for_orig_branch(get_pr_stack_from_number(args.pr, repo), repo)
+        create_prs_for_orig_branch(get_pr_stack_from_number(args.ref, repo), repo)
 
 
 if __name__ == "__main__":
 
@@ -32,9 +32,7 @@ jobs:
       run: |
         pip install pygithub
 
-        PR_NUMBER=$(echo "$GITHUB_REF" | grep -oE '[0-9]+')
-
-        python .github/scripts/propose_ghstack_orig_pr.py --pr $PR_NUMBER --repo pytorch/executorch
+        python .github/scripts/propose_ghstack_orig_pr.py --ref $GITHUB_REF --repo pytorch/executorch
       env:
         GITHUB_TOKEN: ${{ secrets.GH_PYTORCHBOT_CHERRY_PICK_TOKEN }}
         GITHUB_REF: ${{ github.ref }}
@@ -447,3 +447,84 @@ jobs:
 
         # run e2e (export, tokenizer and runner)
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_phi_3_mini.sh
+
+  test-eval_llama-wikitext-linux:
+    name: test-eval_llama-wikitext-linux
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    strategy:
+      fail-fast: false
+    with:
+      runner: linux.24xlarge
+      docker-image: executorch-ubuntu-22.04-clang12
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 90
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
+
+        # install pybind
+        bash install_requirements.sh --pybind xnnpack
+
+        # install llama requirements
+        bash examples/models/llama/install_requirements.sh
+
+        # run eval_llama wikitext task
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_eval_llama_wikitext.sh
+
+  test-eval_llama-mmlu-linux:
+    name: test-eval_llama-mmlu-linux
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    strategy:
+      fail-fast: false
+    with:
+      runner: linux.24xlarge
+      docker-image: executorch-ubuntu-22.04-clang12
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 90
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
+
+        # install pybind
+        bash install_requirements.sh --pybind xnnpack
+
+        # install llama requirements
+        bash examples/models/llama/install_requirements.sh
+
+        # run eval_llama mmlu task
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_eval_llama_mmlu.sh
+
+  test-llama_runner_eager-linux:
+    name: test-llama_runner_eager-linux
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    strategy:
+      fail-fast: false
+    with:
+      runner: linux.24xlarge
+      docker-image: executorch-ubuntu-22.04-clang12
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 90
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
+
+        # install pybind
+        bash install_requirements.sh --pybind xnnpack
+
+        # install llama requirements
+        bash examples/models/llama/install_requirements.sh
+
+        # run llama runner in eager mode
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_runner_eager.sh
@@ -22,4 +22,6 @@ jobs:
           stable-branch: viable/strict
           requires: '[\"pull\", \"lint\", \"trunk\", \"Build documentation\", \"^Android$\", \"^Apple$\"]'
           secret-bot-token: ${{ secrets.UPDATEBOT_TOKEN }}
-          rockset-api-key: ${{ secrets.ROCKSET_API_KEY }}
+          clickhouse-url: ${{ secrets.CLICKHOUSE_URL }}
+          clickhouse-username: ${{ secrets.CLICKHOUSE_VIABLESTRICT_USERNAME }}
+          clickhouse-password: ${{ secrets.CLICKHOUSE_VIABLESTRICT_PASSWORD }}