Skip to content

Commit 297b770

Browse files
authored
[RISCV] Fix stack size computation when M extension disabled (#78602)
Ensure that getVLENFactoredAmount does not fail when the scale amount requires the use of a non-trivial multiplication but the M extension is not enabled. In such case, perform the multiplication using shifts and adds.
1 parent 150a58b commit 297b770

File tree

2 files changed

+89
-6
lines changed

2 files changed

+89
-6
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3126,18 +3126,39 @@ void RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
31263126
.addReg(ScaledRegister, RegState::Kill)
31273127
.addReg(DestReg, RegState::Kill)
31283128
.setMIFlag(Flag);
3129-
} else {
3129+
} else if (STI.hasStdExtM() || STI.hasStdExtZmmul()) {
31303130
Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass);
31313131
movImm(MBB, II, DL, N, NumOfVReg, Flag);
3132-
if (!STI.hasStdExtM() && !STI.hasStdExtZmmul())
3133-
MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
3134-
MF.getFunction(),
3135-
"M- or Zmmul-extension must be enabled to calculate the vscaled size/"
3136-
"offset."});
31373132
BuildMI(MBB, II, DL, get(RISCV::MUL), DestReg)
31383133
.addReg(DestReg, RegState::Kill)
31393134
.addReg(N, RegState::Kill)
31403135
.setMIFlag(Flag);
3136+
} else {
3137+
Register Acc = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3138+
BuildMI(MBB, II, DL, get(RISCV::ADDI), Acc)
3139+
.addReg(RISCV::X0)
3140+
.addImm(0)
3141+
.setMIFlag(Flag);
3142+
uint32_t PrevShiftAmount = 0;
3143+
for (uint32_t ShiftAmount = 0; NumOfVReg >> ShiftAmount; ShiftAmount++) {
3144+
if (NumOfVReg & (1 << ShiftAmount)) {
3145+
if (ShiftAmount)
3146+
BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
3147+
.addReg(DestReg, RegState::Kill)
3148+
.addImm(ShiftAmount - PrevShiftAmount)
3149+
.setMIFlag(Flag);
3150+
if (NumOfVReg >> (ShiftAmount + 1))
3151+
BuildMI(MBB, II, DL, get(RISCV::ADD), Acc)
3152+
.addReg(Acc, RegState::Kill)
3153+
.addReg(DestReg)
3154+
.setMIFlag(Flag);
3155+
PrevShiftAmount = ShiftAmount;
3156+
}
3157+
}
3158+
BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg)
3159+
.addReg(DestReg, RegState::Kill)
3160+
.addReg(Acc)
3161+
.setMIFlag(Flag);
31413162
}
31423163
}
31433164

llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
; RUN: | FileCheck %s --check-prefixes=CHECK,NOZBA
44
; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zba -verify-machineinstrs < %s \
55
; RUN: | FileCheck %s --check-prefixes=CHECK,ZBA
6+
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
7+
; RUN: | FileCheck %s --check-prefixes=CHECK,NOMUL
68

79
define void @lmul1() nounwind {
810
; CHECK-LABEL: lmul1:
@@ -243,6 +245,26 @@ define void @lmul4_and_2_x2_1() nounwind {
243245
; ZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
244246
; ZBA-NEXT: addi sp, sp, 48
245247
; ZBA-NEXT: ret
248+
;
249+
; NOMUL-LABEL: lmul4_and_2_x2_1:
250+
; NOMUL: # %bb.0:
251+
; NOMUL-NEXT: addi sp, sp, -48
252+
; NOMUL-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
253+
; NOMUL-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
254+
; NOMUL-NEXT: addi s0, sp, 48
255+
; NOMUL-NEXT: csrr a0, vlenb
256+
; NOMUL-NEXT: li a1, 0
257+
; NOMUL-NEXT: slli a0, a0, 2
258+
; NOMUL-NEXT: add a1, a1, a0
259+
; NOMUL-NEXT: slli a0, a0, 1
260+
; NOMUL-NEXT: add a0, a0, a1
261+
; NOMUL-NEXT: sub sp, sp, a0
262+
; NOMUL-NEXT: andi sp, sp, -32
263+
; NOMUL-NEXT: addi sp, s0, -48
264+
; NOMUL-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
265+
; NOMUL-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
266+
; NOMUL-NEXT: addi sp, sp, 48
267+
; NOMUL-NEXT: ret
246268
%v1 = alloca <vscale x 4 x i64>
247269
%v3 = alloca <vscale x 4 x i64>
248270
%v2 = alloca <vscale x 2 x i64>
@@ -425,6 +447,26 @@ define void @lmul_8_x5() nounwind {
425447
; ZBA-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
426448
; ZBA-NEXT: addi sp, sp, 80
427449
; ZBA-NEXT: ret
450+
;
451+
; NOMUL-LABEL: lmul_8_x5:
452+
; NOMUL: # %bb.0:
453+
; NOMUL-NEXT: addi sp, sp, -80
454+
; NOMUL-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
455+
; NOMUL-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
456+
; NOMUL-NEXT: addi s0, sp, 80
457+
; NOMUL-NEXT: csrr a0, vlenb
458+
; NOMUL-NEXT: li a1, 0
459+
; NOMUL-NEXT: slli a0, a0, 3
460+
; NOMUL-NEXT: add a1, a1, a0
461+
; NOMUL-NEXT: slli a0, a0, 2
462+
; NOMUL-NEXT: add a0, a0, a1
463+
; NOMUL-NEXT: sub sp, sp, a0
464+
; NOMUL-NEXT: andi sp, sp, -64
465+
; NOMUL-NEXT: addi sp, s0, -80
466+
; NOMUL-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
467+
; NOMUL-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
468+
; NOMUL-NEXT: addi sp, sp, 80
469+
; NOMUL-NEXT: ret
428470
%v1 = alloca <vscale x 8 x i64>
429471
%v2 = alloca <vscale x 8 x i64>
430472
%v3 = alloca <vscale x 8 x i64>
@@ -467,6 +509,26 @@ define void @lmul_8_x9() nounwind {
467509
; ZBA-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
468510
; ZBA-NEXT: addi sp, sp, 80
469511
; ZBA-NEXT: ret
512+
;
513+
; NOMUL-LABEL: lmul_8_x9:
514+
; NOMUL: # %bb.0:
515+
; NOMUL-NEXT: addi sp, sp, -80
516+
; NOMUL-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
517+
; NOMUL-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
518+
; NOMUL-NEXT: addi s0, sp, 80
519+
; NOMUL-NEXT: csrr a0, vlenb
520+
; NOMUL-NEXT: li a1, 0
521+
; NOMUL-NEXT: slli a0, a0, 3
522+
; NOMUL-NEXT: add a1, a1, a0
523+
; NOMUL-NEXT: slli a0, a0, 3
524+
; NOMUL-NEXT: add a0, a0, a1
525+
; NOMUL-NEXT: sub sp, sp, a0
526+
; NOMUL-NEXT: andi sp, sp, -64
527+
; NOMUL-NEXT: addi sp, s0, -80
528+
; NOMUL-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
529+
; NOMUL-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
530+
; NOMUL-NEXT: addi sp, sp, 80
531+
; NOMUL-NEXT: ret
470532
%v1 = alloca <vscale x 8 x i64>
471533
%v2 = alloca <vscale x 8 x i64>
472534
%v3 = alloca <vscale x 8 x i64>

0 commit comments

Comments
 (0)