Skip to content

[RISCV] Exploit sh3add/sh2add for stack offsets by shifted 12-bit constants #87950

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,31 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
return;
}

// Use shNadd if doing so lets us materialize a 12 bit immediate with a single
// instruction. This saves 1 instruction over the full lui/addi+add fallback
// path. We avoid anything which can be done with a single lui as it might
// be compressible. Note that the sh1add case is fully covered by the 2x addi
// case just above and is thus ommitted.
if (ST.hasStdExtZba() && (Val & 0xFFF) != 0) {
unsigned Opc = 0;
if (isShiftedInt<12, 3>(Val)) {
Opc = RISCV::SH3ADD;
Val = Val >> 3;
} else if (isShiftedInt<12, 2>(Val)) {
Opc = RISCV::SH2ADD;
Val = Val >> 2;
}
if (Opc) {
Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
TII->movImm(MBB, II, DL, ScratchReg, Val, Flag);
BuildMI(MBB, II, DL, TII->get(Opc), DestReg)
.addReg(ScratchReg, RegState::Kill)
.addReg(SrcReg, getKillRegState(KillSrcReg))
.setMIFlag(Flag);
return;
}
}

unsigned Opc = RISCV::ADD;
if (Val < 0) {
Val = -Val;
Expand Down
323 changes: 210 additions & 113 deletions llvm/test/CodeGen/RISCV/prolog-epilogue.ll
Original file line number Diff line number Diff line change
Expand Up @@ -182,43 +182,77 @@ define void @frame_4kb() {
}

define void @frame_4kb_offset_128() {
; RV32-LABEL: frame_4kb_offset_128:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -2032
; RV32-NEXT: .cfi_def_cfa_offset 2032
; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: lui a0, 1
; RV32-NEXT: addi a0, a0, 128
; RV32-NEXT: sub sp, sp, a0
; RV32-NEXT: .cfi_def_cfa_offset 6256
; RV32-NEXT: addi a0, sp, 12
; RV32-NEXT: call callee
; RV32-NEXT: lui a0, 1
; RV32-NEXT: addi a0, a0, 128
; RV32-NEXT: add sp, sp, a0
; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 2032
; RV32-NEXT: ret
; RV32I-LABEL: frame_4kb_offset_128:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -2032
; RV32I-NEXT: .cfi_def_cfa_offset 2032
; RV32I-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
; RV32I-NEXT: .cfi_offset ra, -4
; RV32I-NEXT: lui a0, 1
; RV32I-NEXT: addi a0, a0, 128
; RV32I-NEXT: sub sp, sp, a0
; RV32I-NEXT: .cfi_def_cfa_offset 6256
; RV32I-NEXT: addi a0, sp, 12
; RV32I-NEXT: call callee
; RV32I-NEXT: lui a0, 1
; RV32I-NEXT: addi a0, a0, 128
; RV32I-NEXT: add sp, sp, a0
; RV32I-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 2032
; RV32I-NEXT: ret
;
; RV64-LABEL: frame_4kb_offset_128:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -2032
; RV64-NEXT: .cfi_def_cfa_offset 2032
; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: lui a0, 1
; RV64-NEXT: addiw a0, a0, 128
; RV64-NEXT: sub sp, sp, a0
; RV64-NEXT: .cfi_def_cfa_offset 6256
; RV64-NEXT: addi a0, sp, 8
; RV64-NEXT: call callee
; RV64-NEXT: lui a0, 1
; RV64-NEXT: addiw a0, a0, 128
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 2032
; RV64-NEXT: ret
; RV32ZBA-LABEL: frame_4kb_offset_128:
; RV32ZBA: # %bb.0:
; RV32ZBA-NEXT: addi sp, sp, -2032
; RV32ZBA-NEXT: .cfi_def_cfa_offset 2032
; RV32ZBA-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
; RV32ZBA-NEXT: .cfi_offset ra, -4
; RV32ZBA-NEXT: li a0, -528
; RV32ZBA-NEXT: sh3add sp, a0, sp
; RV32ZBA-NEXT: .cfi_def_cfa_offset 6256
; RV32ZBA-NEXT: addi a0, sp, 12
; RV32ZBA-NEXT: call callee
; RV32ZBA-NEXT: li a0, 528
; RV32ZBA-NEXT: sh3add sp, a0, sp
; RV32ZBA-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
; RV32ZBA-NEXT: addi sp, sp, 2032
; RV32ZBA-NEXT: ret
;
; RV64I-LABEL: frame_4kb_offset_128:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -2032
; RV64I-NEXT: .cfi_def_cfa_offset 2032
; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
; RV64I-NEXT: .cfi_offset ra, -8
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addiw a0, a0, 128
; RV64I-NEXT: sub sp, sp, a0
; RV64I-NEXT: .cfi_def_cfa_offset 6256
; RV64I-NEXT: addi a0, sp, 8
; RV64I-NEXT: call callee
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addiw a0, a0, 128
; RV64I-NEXT: add sp, sp, a0
; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 2032
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: frame_4kb_offset_128:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: addi sp, sp, -2032
; RV64ZBA-NEXT: .cfi_def_cfa_offset 2032
; RV64ZBA-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
; RV64ZBA-NEXT: .cfi_offset ra, -8
; RV64ZBA-NEXT: li a0, -528
; RV64ZBA-NEXT: sh3add sp, a0, sp
; RV64ZBA-NEXT: .cfi_def_cfa_offset 6256
; RV64ZBA-NEXT: addi a0, sp, 8
; RV64ZBA-NEXT: call callee
; RV64ZBA-NEXT: li a0, 528
; RV64ZBA-NEXT: sh3add sp, a0, sp
; RV64ZBA-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
; RV64ZBA-NEXT: addi sp, sp, 2032
; RV64ZBA-NEXT: ret
%a = alloca [6240 x i8]
call void @callee(ptr %a)
ret void
Expand Down Expand Up @@ -266,86 +300,154 @@ define void @frame_8kb() {
}

define void @frame_8kb_offset_128() {
; RV32-LABEL: frame_8kb_offset_128:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -2032
; RV32-NEXT: .cfi_def_cfa_offset 2032
; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: lui a0, 2
; RV32-NEXT: addi a0, a0, 128
; RV32-NEXT: sub sp, sp, a0
; RV32-NEXT: .cfi_def_cfa_offset 10352
; RV32-NEXT: addi a0, sp, 12
; RV32-NEXT: call callee
; RV32-NEXT: lui a0, 2
; RV32-NEXT: addi a0, a0, 128
; RV32-NEXT: add sp, sp, a0
; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 2032
; RV32-NEXT: ret
; RV32I-LABEL: frame_8kb_offset_128:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -2032
; RV32I-NEXT: .cfi_def_cfa_offset 2032
; RV32I-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
; RV32I-NEXT: .cfi_offset ra, -4
; RV32I-NEXT: lui a0, 2
; RV32I-NEXT: addi a0, a0, 128
; RV32I-NEXT: sub sp, sp, a0
; RV32I-NEXT: .cfi_def_cfa_offset 10352
; RV32I-NEXT: addi a0, sp, 12
; RV32I-NEXT: call callee
; RV32I-NEXT: lui a0, 2
; RV32I-NEXT: addi a0, a0, 128
; RV32I-NEXT: add sp, sp, a0
; RV32I-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 2032
; RV32I-NEXT: ret
;
; RV64-LABEL: frame_8kb_offset_128:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -2032
; RV64-NEXT: .cfi_def_cfa_offset 2032
; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: lui a0, 2
; RV64-NEXT: addiw a0, a0, 128
; RV64-NEXT: sub sp, sp, a0
; RV64-NEXT: .cfi_def_cfa_offset 10352
; RV64-NEXT: addi a0, sp, 8
; RV64-NEXT: call callee
; RV64-NEXT: lui a0, 2
; RV64-NEXT: addiw a0, a0, 128
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 2032
; RV64-NEXT: ret
; RV32ZBA-LABEL: frame_8kb_offset_128:
; RV32ZBA: # %bb.0:
; RV32ZBA-NEXT: addi sp, sp, -2032
; RV32ZBA-NEXT: .cfi_def_cfa_offset 2032
; RV32ZBA-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
; RV32ZBA-NEXT: .cfi_offset ra, -4
; RV32ZBA-NEXT: li a0, -1040
; RV32ZBA-NEXT: sh3add sp, a0, sp
; RV32ZBA-NEXT: .cfi_def_cfa_offset 10352
; RV32ZBA-NEXT: addi a0, sp, 12
; RV32ZBA-NEXT: call callee
; RV32ZBA-NEXT: li a0, 1040
; RV32ZBA-NEXT: sh3add sp, a0, sp
; RV32ZBA-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
; RV32ZBA-NEXT: addi sp, sp, 2032
; RV32ZBA-NEXT: ret
;
; RV64I-LABEL: frame_8kb_offset_128:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -2032
; RV64I-NEXT: .cfi_def_cfa_offset 2032
; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
; RV64I-NEXT: .cfi_offset ra, -8
; RV64I-NEXT: lui a0, 2
; RV64I-NEXT: addiw a0, a0, 128
; RV64I-NEXT: sub sp, sp, a0
; RV64I-NEXT: .cfi_def_cfa_offset 10352
; RV64I-NEXT: addi a0, sp, 8
; RV64I-NEXT: call callee
; RV64I-NEXT: lui a0, 2
; RV64I-NEXT: addiw a0, a0, 128
; RV64I-NEXT: add sp, sp, a0
; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 2032
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: frame_8kb_offset_128:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: addi sp, sp, -2032
; RV64ZBA-NEXT: .cfi_def_cfa_offset 2032
; RV64ZBA-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
; RV64ZBA-NEXT: .cfi_offset ra, -8
; RV64ZBA-NEXT: li a0, -1040
; RV64ZBA-NEXT: sh3add sp, a0, sp
; RV64ZBA-NEXT: .cfi_def_cfa_offset 10352
; RV64ZBA-NEXT: addi a0, sp, 8
; RV64ZBA-NEXT: call callee
; RV64ZBA-NEXT: li a0, 1040
; RV64ZBA-NEXT: sh3add sp, a0, sp
; RV64ZBA-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
; RV64ZBA-NEXT: addi sp, sp, 2032
; RV64ZBA-NEXT: ret
%a = alloca [10336 x i8]
call void @callee(ptr %a)
ret void
}

define void @frame_16kb_minus_80() {
; RV32-LABEL: frame_16kb_minus_80:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -2032
; RV32-NEXT: .cfi_def_cfa_offset 2032
; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: lui a0, 4
; RV32-NEXT: addi a0, a0, -80
; RV32-NEXT: sub sp, sp, a0
; RV32-NEXT: .cfi_def_cfa_offset 18336
; RV32-NEXT: addi a0, sp, 12
; RV32-NEXT: call callee
; RV32-NEXT: lui a0, 4
; RV32-NEXT: addi a0, a0, -80
; RV32-NEXT: add sp, sp, a0
; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 2032
; RV32-NEXT: ret
; RV32I-LABEL: frame_16kb_minus_80:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -2032
; RV32I-NEXT: .cfi_def_cfa_offset 2032
; RV32I-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
; RV32I-NEXT: .cfi_offset ra, -4
; RV32I-NEXT: lui a0, 4
; RV32I-NEXT: addi a0, a0, -80
; RV32I-NEXT: sub sp, sp, a0
; RV32I-NEXT: .cfi_def_cfa_offset 18336
; RV32I-NEXT: addi a0, sp, 12
; RV32I-NEXT: call callee
; RV32I-NEXT: lui a0, 4
; RV32I-NEXT: addi a0, a0, -80
; RV32I-NEXT: add sp, sp, a0
; RV32I-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 2032
; RV32I-NEXT: ret
;
; RV64-LABEL: frame_16kb_minus_80:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -2032
; RV64-NEXT: .cfi_def_cfa_offset 2032
; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: lui a0, 4
; RV64-NEXT: addiw a0, a0, -80
; RV64-NEXT: sub sp, sp, a0
; RV64-NEXT: .cfi_def_cfa_offset 18336
; RV64-NEXT: addi a0, sp, 8
; RV64-NEXT: call callee
; RV64-NEXT: lui a0, 4
; RV64-NEXT: addiw a0, a0, -80
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 2032
; RV64-NEXT: ret
; RV32ZBA-LABEL: frame_16kb_minus_80:
; RV32ZBA: # %bb.0:
; RV32ZBA-NEXT: addi sp, sp, -2032
; RV32ZBA-NEXT: .cfi_def_cfa_offset 2032
; RV32ZBA-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
; RV32ZBA-NEXT: .cfi_offset ra, -4
; RV32ZBA-NEXT: li a0, -2038
; RV32ZBA-NEXT: sh3add sp, a0, sp
; RV32ZBA-NEXT: .cfi_def_cfa_offset 18336
; RV32ZBA-NEXT: addi a0, sp, 12
; RV32ZBA-NEXT: call callee
; RV32ZBA-NEXT: li a0, 2038
; RV32ZBA-NEXT: sh3add sp, a0, sp
; RV32ZBA-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
; RV32ZBA-NEXT: addi sp, sp, 2032
; RV32ZBA-NEXT: ret
;
; RV64I-LABEL: frame_16kb_minus_80:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -2032
; RV64I-NEXT: .cfi_def_cfa_offset 2032
; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
; RV64I-NEXT: .cfi_offset ra, -8
; RV64I-NEXT: lui a0, 4
; RV64I-NEXT: addiw a0, a0, -80
; RV64I-NEXT: sub sp, sp, a0
; RV64I-NEXT: .cfi_def_cfa_offset 18336
; RV64I-NEXT: addi a0, sp, 8
; RV64I-NEXT: call callee
; RV64I-NEXT: lui a0, 4
; RV64I-NEXT: addiw a0, a0, -80
; RV64I-NEXT: add sp, sp, a0
; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 2032
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: frame_16kb_minus_80:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: addi sp, sp, -2032
; RV64ZBA-NEXT: .cfi_def_cfa_offset 2032
; RV64ZBA-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
; RV64ZBA-NEXT: .cfi_offset ra, -8
; RV64ZBA-NEXT: li a0, -2038
; RV64ZBA-NEXT: sh3add sp, a0, sp
; RV64ZBA-NEXT: .cfi_def_cfa_offset 18336
; RV64ZBA-NEXT: addi a0, sp, 8
; RV64ZBA-NEXT: call callee
; RV64ZBA-NEXT: li a0, 2038
; RV64ZBA-NEXT: sh3add sp, a0, sp
; RV64ZBA-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
; RV64ZBA-NEXT: addi sp, sp, 2032
; RV64ZBA-NEXT: ret
%a = alloca [18320 x i8]
call void @callee(ptr %a)
ret void
Expand Down Expand Up @@ -430,8 +532,3 @@ define void @frame_32kb() {
call void @callee(ptr %a)
ret void
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; RV32I: {{.*}}
; RV32ZBA: {{.*}}
; RV64I: {{.*}}
; RV64ZBA: {{.*}}
Loading