Skip to content

Commit 4efed41

Browse files
mikekgfbmalfet
andcommitted
rm kludge for 4b float32 (#271)
* rm kludge for 4b float32 * fix comments * print version * remove tv ta which hold back progress * remove more torchaudio torchvision * add version info * tab->spc, again * Uninstall torch on M1 before re-installing it As machine is dirty * Update pull.yml * Copy-pasting the same code around But again, I have not started that --------- Co-authored-by: Nikita Shulga <[email protected]>
1 parent 87a7311 commit 4efed41

File tree

4 files changed

+105
-47
lines changed

4 files changed

+105
-47
lines changed

.github/workflows/periodic.yml

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ jobs:
4747
echo "$(uname -a)"
4848
- name: Install dependencies
4949
run: |
50-
pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
50+
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
5151
pip install -r requirements.txt
5252
pip list
5353
- name: Download checkpoints
@@ -80,7 +80,7 @@ jobs:
8080
echo "$(uname -a)"
8181
- name: Install dependencies
8282
run: |
83-
pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
83+
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
8484
pip install -r requirements.txt
8585
pip list
8686
- name: Download checkpoints
@@ -124,7 +124,7 @@ jobs:
124124
echo "::endgroup::"
125125
126126
echo "::group::Install required packages"
127-
pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu121
127+
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121
128128
pip install -r ./requirements.txt
129129
pip list
130130
echo "::endgroup::"
@@ -140,5 +140,37 @@ jobs:
140140
141141
echo "::group::Run inference"
142142
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "compile"
143+
test-gpu-aoti:
144+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
145+
name: test-gpu-aoti (${{ matrix.platform }}, ${{ matrix.model_name }})
146+
needs: gather-models-gpu
147+
strategy:
148+
matrix: ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
149+
fail-fast: false
150+
with:
151+
runner: linux.g5.4xlarge.nvidia.gpu
152+
gpu-arch-type: cuda
153+
gpu-arch-version: "12.1"
154+
script: |
155+
echo "::group::Print machine info"
156+
nvidia-smi
157+
echo "::endgroup::"
158+
159+
echo "::group::Install required packages"
160+
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121
161+
pip install -r ./requirements.txt
162+
pip list
163+
echo "::endgroup::"
164+
165+
echo "::group::Download checkpoint"
166+
export REPO_NAME=${{ matrix.repo_name }}
167+
bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
168+
echo "::endgroup::"
169+
170+
echo "::group::Convert checkpoint"
171+
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
172+
echo "::endgroup::"
173+
174+
echo "::group::Run inference"
143175
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti"
144176
echo "::endgroup::"

.github/workflows/pull.yml

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,15 @@ jobs:
4646
echo "$(uname -a)"
4747
- name: Install dependencies
4848
run: |
49-
pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
49+
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
5050
pip install -r requirements.txt
5151
pip list
5252
- name: Download checkpoints
5353
run: |
5454
bash ${TORCHCHAT_ROOT}/.ci/scripts/wget_checkpoint.sh ${{ matrix.repo_name }} "${{ matrix.resources }}"
5555
- name: Run validation
5656
run: |
57+
python3 -c "import torch;print(torch.__version__, torch.version.git_version)"
5758
pushd ${TORCHCHAT_ROOT}
5859
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
5960
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "compile"
@@ -79,7 +80,7 @@ jobs:
7980
echo "$(uname -a)"
8081
- name: Install dependencies
8182
run: |
82-
pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
83+
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
8384
pip install -r requirements.txt
8485
pip list
8586
- name: Download checkpoints
@@ -123,7 +124,7 @@ jobs:
123124
echo "::endgroup::"
124125
125126
echo "::group::Install required packages"
126-
pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu121
127+
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121
127128
pip install -r ./requirements.txt
128129
pip list
129130
echo "::endgroup::"
@@ -157,7 +158,7 @@ jobs:
157158
echo "::endgroup::"
158159
159160
echo "::group::Install required packages"
160-
pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu121
161+
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121
161162
pip install -r ./requirements.txt
162163
pip list
163164
echo "::endgroup::"
@@ -326,7 +327,7 @@ jobs:
326327
- name: Install requirements
327328
run: |
328329
echo "Installing pip packages"
329-
pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
330+
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
330331
pip install -r requirements.txt
331332
332333
- name: Download Stories files
@@ -372,7 +373,7 @@ jobs:
372373
fi
373374
- name: Install requirements
374375
run: |
375-
pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
376+
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
376377
pip install -r requirements.txt
377378
- name: Download checkpoints
378379
run: |
@@ -436,19 +437,21 @@ jobs:
436437
with:
437438
runner: macos-m1-stable
438439
script: |
440+
set -x
441+
# NS: Remove previous installation of torch first
442+
# as this script does not isntall anything into conda env but rather as system dep
443+
pip uninstall -y torch || true
439444
set -eou pipefail
440445
441446
echo "::group::Print machine info"
442447
uname -a
443-
if [ $(uname -s) == Darwin ]; then
444-
sysctl machdep.cpu.brand_string
445-
sysctl machdep.cpu.core_count
446-
fi
448+
sysctl machdep.cpu.brand_string
449+
sysctl machdep.cpu.core_count
447450
echo "::endgroup::"
448451
449452
echo "::group::Install requirements"
450453
# Install requirements
451-
pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
454+
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
452455
ls -la
453456
pwd
454457
pip install -r requirements.txt
@@ -519,7 +522,7 @@ jobs:
519522
run: |
520523
echo "Intalling pip packages"
521524
pip install gguf
522-
pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
525+
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
523526
pip install -r requirements.txt
524527
525528
git clone https://github.com/ggerganov/llama.cpp.git
@@ -547,19 +550,22 @@ jobs:
547550
with:
548551
runner: macos-m1-stable
549552
script: |
553+
set -x
554+
# NS: Remove previous installation of torch first
555+
# as this script does not isntall anything into conda env but rather as system dep
556+
pip uninstall -y torch || true
557+
550558
set -eou pipefail
551559
552560
echo "::group::Print machine info"
553561
uname -a
554-
if [ $(uname -s) == Darwin ]; then
555-
sysctl machdep.cpu.brand_string
556-
sysctl machdep.cpu.core_count
557-
fi
562+
sysctl machdep.cpu.brand_string
563+
sysctl machdep.cpu.core_count
558564
echo "::endgroup::"
559565
560566
echo "::group::Install requirements"
561567
# Install requirements
562-
pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
568+
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
563569
ls -la
564570
pwd
565571
pip install -r requirements.txt
@@ -624,7 +630,7 @@ jobs:
624630
- name: Install requirements
625631
run: |
626632
pip install gguf
627-
pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
633+
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
628634
pip install -r requirements.txt
629635
- name: Download GGUF
630636
run: |

.github/workflows/torch_version.yml

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
name: torch version
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
pull_request:
8+
workflow_dispatch:
9+
10+
jobs:
11+
gguf-load-test:
12+
strategy:
13+
matrix:
14+
runner: [ubuntu-latest macos-14]
15+
runs-on: ${{matrix.runner}}
16+
steps:
17+
- name: Checkout repo
18+
uses: actions/checkout@v2
19+
- name: Setup Python
20+
uses: actions/setup-python@v2
21+
with:
22+
python-version: 3.11
23+
- name: Print machine info
24+
run: |
25+
uname -a
26+
if [ $(uname -s) == Darwin ]; then
27+
sysctl machdep.cpu.brand_string
28+
sysctl machdep.cpu.core_count
29+
fi
30+
- name: Install requirements
31+
run: |
32+
echo "Intalling pip packages"
33+
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
34+
pip install -r requirements.txt
35+
- name: Load files
36+
run: |
37+
touch test.py
38+
echo "import torch" >> test.py
39+
echo 'print(f"torch version gitversion is {torch.version.git_version}")' >> test.py
40+
cat test.py
41+
python test.py
42+
43+
echo "Tests complete."

quantize.py

Lines changed: 3 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -755,35 +755,12 @@ def linear_forward_int4(x, weight_int4pack, scales_and_zeros, out_features, grou
755755
origin_x_size = x.size()
756756
x = x.reshape(-1, origin_x_size[-1])
757757

758-
if (
759-
(x.dtype == torch.float32)
760-
and (("cpu" in str(x.device)))
761-
or "cuda" in str(x.device)
762-
):
763-
c = torch.ops.aten._weight_int4pack_mm(
764-
x.to(
765-
torch.bfloat16
766-
), # TODO: should probably make a warning if x is not already bfloat16
767-
weight_int4pack,
768-
groupsize,
769-
scales_and_zeros.to(
770-
torch.bfloat16
771-
), # TODO: should probably make a warning if not already bfloat16
772-
).to(
773-
x.dtype
774-
) # cast back to x.dtype
775-
elif ((x.dtype == torch.float32) and (("mps" in str(x.device)))) or (
776-
"cuda" in str(x.device)
777-
):
758+
if "cuda" in str(x.device):
778759
c = torch.ops.aten._weight_int4pack_mm(
779-
x.to(
780-
torch.float16
781-
), # TODO: should probably make a warning if x is not already bfloat16
760+
x.to(torch.bfloat16),
782761
weight_int4pack,
783762
groupsize,
784-
scales_and_zeros.to(
785-
torch.float16
786-
), # TODO: should probably make a warning if not already bfloat16
763+
scales_and_zeros.to(torch.bfloat16),
787764
).to(
788765
x.dtype
789766
) # cast back to x.dtype

0 commit comments

Comments
 (0)