ci: Add llama3 gpu workflow in perioidic (#399)

seemethere · web-flow · commit 525acfbbb1f8 · 2024-04-23T16:27:26.000-07:00
diff --git a/.ci/scripts/download_llama.sh b/.ci/scripts/download_llama.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+
+set -xeou pipefail
+
+shopt -s globstar
+
+install_huggingface_cli() {
+	pip install -U "huggingface_hub[cli]"
+}
+
+download_checkpoint() {
+	# This funciton is "technically re-usable but ymmv"
+	# includes org name, like <org>/<repo>
+	local repo_name=$1
+	local include=$2
+	# basically just removes the org in <org>/<repo>
+	local local_dir="checkpoints/${repo_name}"
+
+	mkdir -p "${local_dir}"
+	huggingface-cli download \
+		"${repo_name}" \
+		--quiet \
+		--include "${include}" \
+		--local-dir "${local_dir}"
+}
+
+# install huggingface-cli if not already installed
+if ! command -v huggingface-cli; then
+	install_huggingface_cli
+fi
+
+# TODO: Eventually you could extend this to download different models
+# taking in some arguments similar to .ci/scripts/wget_checkpoint.sh
+download_checkpoint "meta-llama/Meta-Llama-3-8B" "original/*"
diff --git a/.ci/scripts/gather_test_models.py b/.ci/scripts/gather_test_models.py
@@ -19,6 +19,10 @@
     "mistralai/Mistral-7B-v0.1": "https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/config.json,https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/generation_config.json,https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/pytorch_model-00001-of-00002.bin,https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/pytorch_model-00002-of-00002.bin,https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/pytorch_model.bin.index.json,https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/special_tokens_map.json,https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/tokenizer.json,https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/tokenizer.model,https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/tokenizer_config.json",
     "mistralai/Mistral-7B-Instruct-v0.1": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/config.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/generation_config.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/pytorch_model-00001-of-00002.bin,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/pytorch_model-00002-of-00002.bin,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/pytorch_model.bin.index.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/special_tokens_map.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/tokenizer.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/tokenizer.model,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/tokenizer_config.json",
     "mistralai/Mistral-7B-Instruct-v0.2": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/config.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/generation_config.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/pytorch_model-00001-of-00003.bin,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/pytorch_model-00002-of-00003.bin,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/pytorch_model-00003-of-00003.bin,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/pytorch_model.bin.index.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/special_tokens_map.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/tokenizer.json,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/tokenizer.model,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/tokenizer_config.json",
+
+    # huggingface-cli prefixed Models will download using the huggingface-cli tool
+    # TODO: Convert all of the MODEL_REPOS with a NamedTuple that includes the install_method
+    "huggingface-cli/meta-llama/Meta-Llama-3-8B": "",
 }
 
 JOB_RUNNERS = {
@@ -57,7 +61,7 @@ def parse_args() -> Any:
     return parser.parse_args()
 
 
-def model_should_run_on_event(model: str, event: str) -> bool:
+def model_should_run_on_event(model: str, event: str, backend: str) -> bool:
     """
     A helper function to decide whether a model should be tested on an event (pull_request/push)
     We put higher priority and fast models to pull request and rest to push.
@@ -67,7 +71,11 @@ def model_should_run_on_event(model: str, event: str) -> bool:
     elif event == "push":
         return model in []
     elif event == "periodic":
-        return model in ["openlm-research/open_llama_7b"]
+        # test llama3 on gpu only, see description in https://github.com/pytorch/torchchat/pull/399 for reasoning
+        if backend == "gpu":
+            return model in ["openlm-research/open_llama_7b", "huggingface-cli/meta-llama/Meta-Llama-3-8B"]
+        else:
+            return model in ["openlm-research/open_llama_7b"]
     else:
         return False
 
@@ -102,15 +110,25 @@ def export_models_for_ci() -> dict[str, dict]:
         MODEL_REPOS.keys(),
         JOB_RUNNERS[backend].items(),
     ):
-        if not model_should_run_on_event(repo_name, event):
+        if not model_should_run_on_event(repo_name, event, backend):
             continue
 
+        # This is mostly temporary to get this finished quickly while
+        # doing minimal changes, see TODO at the top of the file to
+        # see how this should probably be done
+        install_method = "wget"
+        final_repo_name = repo_name
+        if repo_name.startswith("huggingface-cli"):
+            install_method = "huggingface-cli"
+            final_repo_name = repo_name.replace("huggingface-cli/", "")
+
         record = {
-            "repo_name": repo_name,
-            "model_name": repo_name.split("/")[-1],
+            "repo_name": final_repo_name,
+            "model_name": final_repo_name.split("/")[-1],
             "resources": MODEL_REPOS[repo_name],
             "runner": runner[0],
             "platform": runner[1],
+            "install_method": install_method,
             "timeout": 90,
         }
 
diff --git a/.github/workflows/periodic.yml b/.github/workflows/periodic.yml
@@ -113,10 +113,12 @@ jobs:
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     name: test-gpu (${{ matrix.platform }}, ${{ matrix.model_name }})
     needs: gather-models-gpu
+    secrets: inherit
     strategy:
       matrix: ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
       fail-fast: false
     with:
+      secrets-env: "HF_TOKEN_PERIODIC"
       runner: ${{ matrix.runner }}
       gpu-arch-type: cuda
       gpu-arch-version: "12.1"
@@ -126,15 +128,25 @@ jobs:
         echo "::endgroup::"
 
         echo "::group::Install required packages"
-        pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121
+        pip install --progress-bar off --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121
         pip install -r ./requirements.txt
         pip list
         python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
         echo "::endgroup::"
 
         echo "::group::Download checkpoint"
-        export REPO_NAME=${{ matrix.repo_name }}
-        bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
+        export REPO_NAME="${{ matrix.repo_name }}"
+        case "${{ matrix.install_method }}" in
+          wget)
+            bash .ci/scripts/wget_checkpoint.sh "${REPO_NAME}" "${{ matrix.resources }}"
+            ;;
+          huggingface-cli)
+            (
+              set +x
+              HF_TOKEN="${SECRET_HF_TOKEN_PERIODIC}" bash .ci/scripts/download_llama.sh
+            )
+            ;;
+        esac
         echo "::endgroup::"
 
         echo "::group::Convert checkpoint"