Skip to content

Commit 2545c9f

Browse files
committed
ci: Add llama3 workflow in perioidic
Adds a llama3 testing workflow for periodic, downloads this using huggingface-cli. This is somewhat of a working prototype, I left a couple of TODOS in places where things could be done better if given more time. Signed-off-by: Eli Uriegas <[email protected]>
1 parent 64840c3 commit 2545c9f

File tree

3 files changed

+66
-3
lines changed

3 files changed

+66
-3
lines changed

.ci/scripts/download_llama.sh

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#!/usr/bin/env bash
2+
3+
set -eou pipefail
4+
5+
install_huggingface_cli() {
6+
pip install -U "huggingface_hub[cli]"
7+
}
8+
9+
download_checkpoint() {
10+
# This funciton is "technically re-usable but ymmv"
11+
# includes org name, like <org>/<repo>
12+
local repo_name=$1
13+
local include=$2
14+
# basically just removes the org in <org>/<repo>
15+
local local_dir=${repo_name##/*}
16+
17+
mkdir -p "${local_dir}"
18+
huggingface-cli download \
19+
"${repo_name}" \
20+
--include "${include}" \
21+
--local-dir "${local_dir}"
22+
}
23+
24+
normalize_llama_checpoint() {
25+
# normalizes the checkpoint file into something that the rest of
26+
# the testing scripts understand
27+
local repo_name=$1
28+
local local_dir=${repo_name##/*}
29+
mkdir -p "${local_dir}"
30+
mv "${local_dir}/original/*" "${local_dir}"
31+
mv "${local_dir}/consolidated.00.pth" "${local_dir}/model.pth"
32+
rmdir "${local_dir/original/}"
33+
}
34+
35+
# install huggingface-cli if not already installed
36+
if ! command -v huggingface-cli; then
37+
install_huggingface_cli
38+
fi
39+
40+
# TODO: Eventually you could extend this to download different models
41+
# taking in some arguments similar to .ci/scripts/wget_checkpoint.sh
42+
download_checkpoint "meta-llama/Meta-Llama-3-8B" "original/*"
43+
normalize_llama_checpoint "meta-llama/Meta-Llama-3-8B"

.ci/scripts/gather_test_models.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919
"mistralai/Mistral-7B-v0.1": "https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/config.json,https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/generation_config.json,https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/pytorch_model-00001-of-00002.bin,https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/pytorch_model-00002-of-00002.bin,https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/pytorch_model.bin.index.json,https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/special_tokens_map.json,https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/tokenizer.json,https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/tokenizer.model,https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/tokenizer_config.json",
2020
"mistralai/Mistral-7B-Instruct-v0.1": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/config.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/generation_config.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/pytorch_model-00001-of-00002.bin,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/pytorch_model-00002-of-00002.bin,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/pytorch_model.bin.index.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/special_tokens_map.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/tokenizer.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/tokenizer.model,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/tokenizer_config.json",
2121
"mistralai/Mistral-7B-Instruct-v0.2": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/config.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/generation_config.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/pytorch_model-00001-of-00003.bin,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/pytorch_model-00002-of-00003.bin,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/pytorch_model-00003-of-00003.bin,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/pytorch_model.bin.index.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/special_tokens_map.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/tokenizer.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/tokenizer.model,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/tokenizer_config.json",
22+
23+
# huggingface-cli prefixed Models will download using the huggingface-cli tool
24+
# TODO: Convert all of the MODEL_REPOS with a NamedTuple that includes the install_method
25+
"huggingface-cli/meta-llama/Meta-Llama-3-8B": "",
2226
}
2327

2428
JOB_RUNNERS = {
@@ -67,7 +71,7 @@ def model_should_run_on_event(model: str, event: str) -> bool:
6771
elif event == "push":
6872
return model in []
6973
elif event == "periodic":
70-
return model in ["openlm-research/open_llama_7b"]
74+
return model in ["openlm-research/open_llama_7b", "huggingface-cli/meta-llama/Meta-Llama-3-8B"]
7175
else:
7276
return False
7377

@@ -105,12 +109,22 @@ def export_models_for_ci() -> dict[str, dict]:
105109
if not model_should_run_on_event(repo_name, event):
106110
continue
107111

112+
# This is mostly temporary to get this finished quickly while
113+
# doing minimal changes, see TODO at the top of the file to
114+
# see how this should probably be done
115+
install_method = "wget"
116+
final_repo_name = repo_name
117+
if repo_name.startswith("huggingface-cli"):
118+
install_method = "huggingface-cli"
119+
final_repo_name = repo_name.replace("huggingface-cli/", "")
120+
108121
record = {
109-
"repo_name": repo_name,
110-
"model_name": repo_name.split("/")[-1],
122+
"repo_name": final_repo_name,
123+
"model_name": final_repo_name.split("/")[-1],
111124
"resources": MODEL_REPOS[repo_name],
112125
"runner": runner[0],
113126
"platform": runner[1],
127+
"install_method": install_method,
114128
"timeout": 90,
115129
}
116130

.github/workflows/periodic.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,12 @@ jobs:
113113
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
114114
name: test-gpu (${{ matrix.platform }}, ${{ matrix.model_name }})
115115
needs: gather-models-gpu
116+
secrets: inherit
116117
strategy:
117118
matrix: ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
118119
fail-fast: false
119120
with:
121+
secrets-env: "HF_TOKEN_PERIODIC"
120122
runner: ${{ matrix.runner }}
121123
gpu-arch-type: cuda
122124
gpu-arch-version: "12.1"
@@ -134,6 +136,10 @@ jobs:
134136
135137
echo "::group::Download checkpoint"
136138
export REPO_NAME=${{ matrix.repo_name }}
139+
case "${{ matrix.install_method }}" in
140+
wget) bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }} ;;
141+
huggingface-cli) bash .ci/scripts/download_llama.sh ;;
142+
esac
137143
bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
138144
echo "::endgroup::"
139145

0 commit comments

Comments
 (0)