Skip to content

Commit 7493aae

Browse files
helunwencserfacebook-github-bot
authored andcommitted
fix llama eager runner and add ci (#6344)
Summary: Pull Request resolved: #6344 imported-using-ghimport Test Plan: Imported from OSS Reviewed By: dvorjackz Differential Revision: D64567826 Pulled By: helunwencser fbshipit-source-id: 5b9f72c4f691ca3888c8cc5e4ab934a764ba7d18
1 parent 179fd69 commit 7493aae

File tree

4 files changed

+96
-5
lines changed

4 files changed

+96
-5
lines changed
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -exu
9+
10+
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
11+
PYTHON_EXECUTABLE=python3
12+
fi
13+
14+
# Download and prepare stories model artifacts
15+
prepare_model_artifacts() {
16+
echo "Preparing stories model artifacts"
17+
wget -O stories110M.pt "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt"
18+
wget -O tokenizer.model "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model"
19+
echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json
20+
}
21+
22+
run_and_verify() {
23+
NOW=$(date +"%H:%M:%S")
24+
echo "Starting to run eval_llama at ${NOW}"
25+
if [[ ! -f "stories110M.pt" ]]; then
26+
echo "stories110M.pt is missing."
27+
exit 1
28+
fi
29+
if [[ ! -f "tokenizer.model" ]]; then
30+
echo "tokenizer.model is missing."
31+
exit 1
32+
fi
33+
if [[ ! -f "params.json" ]]; then
34+
echo "params.json is missing."
35+
exit 1
36+
fi
37+
$PYTHON_EXECUTABLE -m examples.models.llama.runner.eager \
38+
-c stories110M.pt \
39+
-p params.json \
40+
-t tokenizer.model \
41+
-kv \
42+
-d fp32 \
43+
--max_seq_length 32 \
44+
--temperature 0 \
45+
--prompt "Once upon a time," > result.txt
46+
47+
# Verify result.txt
48+
RESULT=$(cat result.txt)
49+
EXPECTED_RESULT="there was a little girl"
50+
if [[ "${RESULT}" == *"${EXPECTED_RESULT}"* ]]; then
51+
echo "Actual result: ${RESULT}"
52+
echo "Success"
53+
exit 0
54+
else
55+
echo "Actual result: ${RESULT}"
56+
echo "Failure; results not the same"
57+
exit 1
58+
fi
59+
}
60+
61+
prepare_model_artifacts
62+
run_and_verify

.github/workflows/pull.yml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,3 +501,30 @@ jobs:
501501
502502
# run eval_llama mmlu task
503503
PYTHON_EXECUTABLE=python bash .ci/scripts/test_eval_llama_mmlu.sh
504+
505+
test-llama_runner_eager-linux:
506+
name: test-llama_runner_eager-linux
507+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
508+
strategy:
509+
fail-fast: false
510+
with:
511+
runner: linux.24xlarge
512+
docker-image: executorch-ubuntu-22.04-clang12
513+
submodules: 'true'
514+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
515+
timeout: 90
516+
script: |
517+
# The generic Linux job chooses to use base env, not the one setup by the image
518+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
519+
conda activate "${CONDA_ENV}"
520+
521+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
522+
523+
# install pybind
524+
bash install_requirements.sh --pybind xnnpack
525+
526+
# install llama requirements
527+
bash examples/models/llama/install_requirements.sh
528+
529+
# run llama runner in eager mode
530+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_runner_eager.sh

examples/models/llama/runner/eager.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@
1111
import torch
1212

1313
from examples.models.llama.llama_transformer import ModelArgs
14-
from executorch.examples.models.llama2.export_llama_lib import (
14+
from executorch.examples.models.llama.export_llama_lib import (
1515
_prepare_for_llama_export,
1616
build_args_parser as _build_args_parser,
1717
)
18-
from executorch.examples.models.llama2.runner.generation import LlamaRunner
18+
from executorch.examples.models.llama.runner.generation import LlamaRunner
1919
from executorch.extension.llm.export import LLMEdgeManager
2020

2121

examples/models/llama/runner/generation.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import torch
1111

1212
from executorch.examples.models.llama.llama_transformer import ModelArgs
13-
from executorch.examples.models.llama.tokenizer.tiktoken import Tokenizer
13+
from executorch.extension.llm.tokenizer.utils import get_tokenizer
1414

1515

1616
class CompletionPrediction(TypedDict, total=False):
@@ -53,7 +53,7 @@ def next_token(logits: torch.Tensor, temperature: float, top_p: float) -> int:
5353
class LlamaRunner(ABC):
5454
def __init__(self, tokenizer_path: str, model_args: ModelArgs):
5555
self.params = model_args
56-
self.tokenizer = Tokenizer(tokenizer_path)
56+
self.tokenizer = get_tokenizer(tokenizer_path)
5757
assert model_args.vocab_size == self.tokenizer.n_words
5858

5959
@abstractmethod
@@ -93,7 +93,9 @@ def generate( # noqa: C901
9393
else:
9494
logits = self.forward(tokens=torch.tensor([tokens], dtype=torch.long))
9595
current_token = next_token(logits, temperature, top_p)
96-
if current_token in self.tokenizer.stop_tokens:
96+
if current_token == self.tokenizer.eos_id or (
97+
hasattr(self, "stop_tokens") and current_token in self.stop_tokens
98+
):
9799
break
98100
tokens.append(current_token)
99101

0 commit comments

Comments
 (0)