Skip to content

Commit cc9d76e

Browse files
committed
Fix for MGPU-AL with Nemotron 51b
1 parent a299424 commit cc9d76e

File tree

1 file changed

+1
-0
lines changed

1 file changed

+1
-0
lines changed

koboldcpp.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1147,6 +1147,7 @@ def autoset_gpu_layers(ctxsize,sdquanted,blasbatchsize,flashattention,quantkv,mm
11471147
print(f"Model layers: {layers} ; Size per layer: {sizeperlayer/1024/1024:.3f} MiB ; Attention heads: {headcount} ; Head size : {headkvlen}")
11481148
print("***")
11491149
if headcount > 0:
1150+
if headcount == 120: headcount = 8
11501151
print(f"STEP_2a : PRECISE CALC of the ratio possible because detected model attention heads: {headcount} > 0")
11511152
print(f"COEFS : BBS: {bbs}, BBS.Ratio: {bbs_ratio}, FA: {fa}, FA.Ratio: {fa_ratio}, MMQ: {mmq}, MMQ.Ratio: {mmq_ratio}, Quant KV mode: {kvq}, QKV bpw: {kvbpw} bits")
11521153
print(f"Secondary Coefficients : Lowvram: {lv} ; LowVram-ConText.Ratio: {lvctx_ratio} ; LowVram-ComPute.Ratio: {lvcomp_ratio}")

0 commit comments

Comments
 (0)