Skip to content

[RISCV] Use QC_E_ADDI while eliminating the frameindex #139515

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,30 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
return;
}

// Use the QC_E_ADDI instruction from the Xqcilia extension that can take a
// signed 26-bit immediate.
if (ST.hasVendorXqcilia() && isInt<26>(Val)) {
// The one case where using this instruction is sub-optimal is if Val can be
// materialized with a single compressible LUI and following add/sub is also
// compressible. Avoid doing this if that is the case.
int Hi20 = (Val & 0xFFFFF000) >> 12;
bool IsCompressLUI =
((Val & 0xFFF) == 0) && (Hi20 != 0) &&
(isUInt<5>(Hi20) || (Hi20 >= 0xfffe0 && Hi20 <= 0xfffff));
bool IsCompressAddSub =
(SrcReg == DestReg) &&
((Val > 0 && RISCV::GPRNoX0RegClass.contains(SrcReg)) ||
(Val < 0 && RISCV::GPRCRegClass.contains(SrcReg)));

if (!(IsCompressLUI && IsCompressAddSub)) {
BuildMI(MBB, II, DL, TII->get(RISCV::QC_E_ADDI), DestReg)
.addReg(SrcReg, getKillRegState(KillSrcReg))
.addImm(Val)
.setMIFlag(Flag);
return;
}
}

// Try to split the offset across two ADDIs. We need to keep the intermediate
// result aligned after each ADDI. We need to determine the maximum value we
// can put in each ADDI. In the negative direction, we can use -2048 which is
Expand Down
196 changes: 196 additions & 0 deletions llvm/test/CodeGen/RISCV/stack-offset.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefixes=RV32,RV32I
; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+experimental-xqcilia < %s \
; RUN: | FileCheck %s -check-prefixes=RV32XQCILIA
; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+zba < %s \
; RUN: | FileCheck %s -check-prefixes=RV32,RV32ZBA
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
Expand Down Expand Up @@ -39,6 +41,27 @@ define void @test() {
; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV32XQCILIA-LABEL: test:
; RV32XQCILIA: # %bb.0:
; RV32XQCILIA-NEXT: addi sp, sp, -2032
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 2032
; RV32XQCILIA-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
; RV32XQCILIA-NEXT: .cfi_offset ra, -4
; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -3168
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 5200
; RV32XQCILIA-NEXT: addi a0, sp, 12
; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 2060
; RV32XQCILIA-NEXT: qc.e.addi a2, sp, 4108
; RV32XQCILIA-NEXT: qc.e.addi a3, sp, 5132
; RV32XQCILIA-NEXT: call inspect
; RV32XQCILIA-NEXT: qc.e.addi sp, sp, 3168
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 2032
; RV32XQCILIA-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
; RV32XQCILIA-NEXT: .cfi_restore ra
; RV32XQCILIA-NEXT: addi sp, sp, 2032
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
; RV32XQCILIA-NEXT: ret
;
; RV32ZBA-LABEL: test:
; RV32ZBA: # %bb.0:
; RV32ZBA-NEXT: addi sp, sp, -2032
Expand Down Expand Up @@ -150,6 +173,25 @@ define void @align_8() {
; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV32XQCILIA-LABEL: align_8:
; RV32XQCILIA: # %bb.0:
; RV32XQCILIA-NEXT: addi sp, sp, -256
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
; RV32XQCILIA-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
; RV32XQCILIA-NEXT: .cfi_offset ra, -4
; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -3856
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 4112
; RV32XQCILIA-NEXT: addi a0, sp, 7
; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 4104
; RV32XQCILIA-NEXT: call inspect
; RV32XQCILIA-NEXT: qc.e.addi sp, sp, 3856
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
; RV32XQCILIA-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
; RV32XQCILIA-NEXT: .cfi_restore ra
; RV32XQCILIA-NEXT: addi sp, sp, 256
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
; RV32XQCILIA-NEXT: ret
;
; RV32ZBA-LABEL: align_8:
; RV32ZBA: # %bb.0:
; RV32ZBA-NEXT: addi sp, sp, -2032
Expand Down Expand Up @@ -246,6 +288,25 @@ define void @align_4() {
; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV32XQCILIA-LABEL: align_4:
; RV32XQCILIA: # %bb.0:
; RV32XQCILIA-NEXT: addi sp, sp, -256
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
; RV32XQCILIA-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
; RV32XQCILIA-NEXT: .cfi_offset ra, -4
; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -3856
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 4112
; RV32XQCILIA-NEXT: addi a0, sp, 7
; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 4104
; RV32XQCILIA-NEXT: call inspect
; RV32XQCILIA-NEXT: qc.e.addi sp, sp, 3856
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
; RV32XQCILIA-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
; RV32XQCILIA-NEXT: .cfi_restore ra
; RV32XQCILIA-NEXT: addi sp, sp, 256
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
; RV32XQCILIA-NEXT: ret
;
; RV32ZBA-LABEL: align_4:
; RV32ZBA: # %bb.0:
; RV32ZBA-NEXT: addi sp, sp, -2032
Expand Down Expand Up @@ -342,6 +403,25 @@ define void @align_2() {
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
;
; RV32XQCILIA-LABEL: align_2:
; RV32XQCILIA: # %bb.0:
; RV32XQCILIA-NEXT: addi sp, sp, -256
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
; RV32XQCILIA-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
; RV32XQCILIA-NEXT: .cfi_offset ra, -4
; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -3856
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 4112
; RV32XQCILIA-NEXT: addi a0, sp, 9
; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 4106
; RV32XQCILIA-NEXT: call inspect
; RV32XQCILIA-NEXT: qc.e.addi sp, sp, 3856
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
; RV32XQCILIA-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
; RV32XQCILIA-NEXT: .cfi_restore ra
; RV32XQCILIA-NEXT: addi sp, sp, 256
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
; RV32XQCILIA-NEXT: ret
;
; RV64-LABEL: align_2:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -2032
Expand Down Expand Up @@ -395,6 +475,25 @@ define void @align_1() {
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
;
; RV32XQCILIA-LABEL: align_1:
; RV32XQCILIA: # %bb.0:
; RV32XQCILIA-NEXT: addi sp, sp, -256
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
; RV32XQCILIA-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
; RV32XQCILIA-NEXT: .cfi_offset ra, -4
; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -3856
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 4112
; RV32XQCILIA-NEXT: addi a0, sp, 10
; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 4107
; RV32XQCILIA-NEXT: call inspect
; RV32XQCILIA-NEXT: qc.e.addi sp, sp, 3856
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
; RV32XQCILIA-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
; RV32XQCILIA-NEXT: .cfi_restore ra
; RV32XQCILIA-NEXT: addi sp, sp, 256
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
; RV32XQCILIA-NEXT: ret
;
; RV64-LABEL: align_1:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -2032
Expand Down Expand Up @@ -422,3 +521,100 @@ define void @align_1() {
call void (...) @inspect(ptr %p1, ptr %p2)
ret void
}

define void @align_1_lui() {
; RV32-LABEL: align_1_lui:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -2032
; RV32-NEXT: .cfi_def_cfa_offset 2032
; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: lui a0, 1
; RV32-NEXT: sub sp, sp, a0
; RV32-NEXT: .cfi_def_cfa_offset 6128
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: lui a1, 1
; RV32-NEXT: addi a1, a1, 2027
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: call inspect
; RV32-NEXT: lui a0, 1
; RV32-NEXT: add sp, sp, a0
; RV32-NEXT: .cfi_def_cfa_offset 2032
; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
; RV32-NEXT: .cfi_restore ra
; RV32-NEXT: addi sp, sp, 2032
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
;
; RV32XQCILIA-LABEL: align_1_lui:
; RV32XQCILIA: # %bb.0:
; RV32XQCILIA-NEXT: addi sp, sp, -2032
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 2032
; RV32XQCILIA-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
; RV32XQCILIA-NEXT: .cfi_offset ra, -4
; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -4096
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 6128
; RV32XQCILIA-NEXT: addi a0, sp, 8
; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 6123
; RV32XQCILIA-NEXT: call inspect
; RV32XQCILIA-NEXT: lui a0, 1
; RV32XQCILIA-NEXT: add sp, sp, a0
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 2032
; RV32XQCILIA-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
; RV32XQCILIA-NEXT: .cfi_restore ra
; RV32XQCILIA-NEXT: addi sp, sp, 2032
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
; RV32XQCILIA-NEXT: ret
;
; RV64I-LABEL: align_1_lui:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -2032
; RV64I-NEXT: .cfi_def_cfa_offset 2032
; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
; RV64I-NEXT: .cfi_offset ra, -8
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addiw a0, a0, 16
; RV64I-NEXT: sub sp, sp, a0
; RV64I-NEXT: .cfi_def_cfa_offset 6144
; RV64I-NEXT: addi a0, sp, 20
; RV64I-NEXT: lui a1, 1
; RV64I-NEXT: addiw a1, a1, 2039
; RV64I-NEXT: add a1, sp, a1
; RV64I-NEXT: call inspect
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addiw a0, a0, 16
; RV64I-NEXT: add sp, sp, a0
; RV64I-NEXT: .cfi_def_cfa_offset 2032
; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
; RV64I-NEXT: .cfi_restore ra
; RV64I-NEXT: addi sp, sp, 2032
; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: align_1_lui:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: addi sp, sp, -2032
; RV64ZBA-NEXT: .cfi_def_cfa_offset 2032
; RV64ZBA-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
; RV64ZBA-NEXT: .cfi_offset ra, -8
; RV64ZBA-NEXT: li a0, -514
; RV64ZBA-NEXT: sh3add sp, a0, sp
; RV64ZBA-NEXT: .cfi_def_cfa_offset 6144
; RV64ZBA-NEXT: addi a0, sp, 20
; RV64ZBA-NEXT: lui a1, 1
; RV64ZBA-NEXT: addiw a1, a1, 2039
; RV64ZBA-NEXT: add a1, sp, a1
; RV64ZBA-NEXT: call inspect
; RV64ZBA-NEXT: li a0, 514
; RV64ZBA-NEXT: sh3add sp, a0, sp
; RV64ZBA-NEXT: .cfi_def_cfa_offset 2032
; RV64ZBA-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
; RV64ZBA-NEXT: .cfi_restore ra
; RV64ZBA-NEXT: addi sp, sp, 2032
; RV64ZBA-NEXT: .cfi_def_cfa_offset 0
; RV64ZBA-NEXT: ret
%p2 = alloca i8, align 1
%p1 = alloca [6115 x i8], align 1
call void (...) @inspect(ptr %p1, ptr %p2)
ret void
}