-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Use QC_E_ADDI while eliminating the frameindex #139515
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
The QC_E_ADDI instruction from the Xqcilia extension takes a signed 26-bit immediate and can be used instead of splitting the offset across two ADDI's while eliminating the frameindex.
@llvm/pr-subscribers-backend-risc-v Author: Sudharsan Veeravalli (svs-quic) ChangesThe QC_E_ADDI instruction from the Xqcilia extension takes a signed 26-bit immediate and can be used instead of splitting the offset across two ADDI's while eliminating the frameindex. Full diff: https://github.com/llvm/llvm-project/pull/139515.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index c6f6c9007b2b1..8c754222a3b0c 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -288,6 +288,17 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
return;
}
+ // Use the QC_E_ADDI instruction from the Xqcilia extension that can take a
+ // signed 26-bit immediate. Avoid anything which can be done with a single lui
+ // as it might be compressible.
+ if (ST.hasVendorXqcilia() && isInt<26>(Val) && (Val & 0xFFF) != 0) {
+ BuildMI(MBB, II, DL, TII->get(RISCV::QC_E_ADDI), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrcReg))
+ .addImm(Val)
+ .setMIFlag(Flag);
+ return;
+ }
+
// Try to split the offset across two ADDIs. We need to keep the intermediate
// result aligned after each ADDI. We need to determine the maximum value we
// can put in each ADDI. In the negative direction, we can use -2048 which is
diff --git a/llvm/test/CodeGen/RISCV/stack-offset.ll b/llvm/test/CodeGen/RISCV/stack-offset.ll
index 402d3546eae29..cdc117675af97 100644
--- a/llvm/test/CodeGen/RISCV/stack-offset.ll
+++ b/llvm/test/CodeGen/RISCV/stack-offset.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefixes=RV32,RV32I
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+experimental-xqcilia < %s \
+; RUN: | FileCheck %s -check-prefixes=RV32XQCILIA
; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+zba < %s \
; RUN: | FileCheck %s -check-prefixes=RV32,RV32ZBA
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
@@ -39,6 +41,27 @@ define void @test() {
; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
+; RV32XQCILIA-LABEL: test:
+; RV32XQCILIA: # %bb.0:
+; RV32XQCILIA-NEXT: addi sp, sp, -2032
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 2032
+; RV32XQCILIA-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
+; RV32XQCILIA-NEXT: .cfi_offset ra, -4
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -3168
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 5200
+; RV32XQCILIA-NEXT: addi a0, sp, 12
+; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 2060
+; RV32XQCILIA-NEXT: qc.e.addi a2, sp, 4108
+; RV32XQCILIA-NEXT: qc.e.addi a3, sp, 5132
+; RV32XQCILIA-NEXT: call inspect
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, 3168
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 2032
+; RV32XQCILIA-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
+; RV32XQCILIA-NEXT: .cfi_restore ra
+; RV32XQCILIA-NEXT: addi sp, sp, 2032
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
+; RV32XQCILIA-NEXT: ret
+;
; RV32ZBA-LABEL: test:
; RV32ZBA: # %bb.0:
; RV32ZBA-NEXT: addi sp, sp, -2032
@@ -150,6 +173,25 @@ define void @align_8() {
; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
+; RV32XQCILIA-LABEL: align_8:
+; RV32XQCILIA: # %bb.0:
+; RV32XQCILIA-NEXT: addi sp, sp, -256
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
+; RV32XQCILIA-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
+; RV32XQCILIA-NEXT: .cfi_offset ra, -4
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -3856
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 4112
+; RV32XQCILIA-NEXT: addi a0, sp, 7
+; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 4104
+; RV32XQCILIA-NEXT: call inspect
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, 3856
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
+; RV32XQCILIA-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
+; RV32XQCILIA-NEXT: .cfi_restore ra
+; RV32XQCILIA-NEXT: addi sp, sp, 256
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
+; RV32XQCILIA-NEXT: ret
+;
; RV32ZBA-LABEL: align_8:
; RV32ZBA: # %bb.0:
; RV32ZBA-NEXT: addi sp, sp, -2032
@@ -246,6 +288,25 @@ define void @align_4() {
; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
+; RV32XQCILIA-LABEL: align_4:
+; RV32XQCILIA: # %bb.0:
+; RV32XQCILIA-NEXT: addi sp, sp, -256
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
+; RV32XQCILIA-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
+; RV32XQCILIA-NEXT: .cfi_offset ra, -4
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -3856
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 4112
+; RV32XQCILIA-NEXT: addi a0, sp, 7
+; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 4104
+; RV32XQCILIA-NEXT: call inspect
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, 3856
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
+; RV32XQCILIA-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
+; RV32XQCILIA-NEXT: .cfi_restore ra
+; RV32XQCILIA-NEXT: addi sp, sp, 256
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
+; RV32XQCILIA-NEXT: ret
+;
; RV32ZBA-LABEL: align_4:
; RV32ZBA: # %bb.0:
; RV32ZBA-NEXT: addi sp, sp, -2032
@@ -342,6 +403,25 @@ define void @align_2() {
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
;
+; RV32XQCILIA-LABEL: align_2:
+; RV32XQCILIA: # %bb.0:
+; RV32XQCILIA-NEXT: addi sp, sp, -256
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
+; RV32XQCILIA-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
+; RV32XQCILIA-NEXT: .cfi_offset ra, -4
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -3856
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 4112
+; RV32XQCILIA-NEXT: addi a0, sp, 9
+; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 4106
+; RV32XQCILIA-NEXT: call inspect
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, 3856
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
+; RV32XQCILIA-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
+; RV32XQCILIA-NEXT: .cfi_restore ra
+; RV32XQCILIA-NEXT: addi sp, sp, 256
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
+; RV32XQCILIA-NEXT: ret
+;
; RV64-LABEL: align_2:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -2032
@@ -395,6 +475,25 @@ define void @align_1() {
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
;
+; RV32XQCILIA-LABEL: align_1:
+; RV32XQCILIA: # %bb.0:
+; RV32XQCILIA-NEXT: addi sp, sp, -256
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
+; RV32XQCILIA-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
+; RV32XQCILIA-NEXT: .cfi_offset ra, -4
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -3856
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 4112
+; RV32XQCILIA-NEXT: addi a0, sp, 10
+; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 4107
+; RV32XQCILIA-NEXT: call inspect
+; RV32XQCILIA-NEXT: qc.e.addi sp, sp, 3856
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
+; RV32XQCILIA-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
+; RV32XQCILIA-NEXT: .cfi_restore ra
+; RV32XQCILIA-NEXT: addi sp, sp, 256
+; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
+; RV32XQCILIA-NEXT: ret
+;
; RV64-LABEL: align_1:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -2032
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
If SrcReg and DestReg are the same you could also use qc.e.addai
but I think stack frames larger than 2^25 bytes are unlikely, so we can probably skip that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/10/builds/5459 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/18/builds/16073 Here is the relevant piece of the build log for the reference
|
The QC_E_ADDI instruction from the Xqcilia extension takes a signed 26-bit immediate and can be used instead of splitting the offset across two ADDI's while eliminating the frameindex.