Skip to content

Commit de0bcd0

Browse files
authored
[RISCV] Use QC_E_ADDI while eliminating the frameindex (#139515)
The QC_E_ADDI instruction from the Xqcilia extension takes a signed 26-bit immediate and can be used instead of splitting the offset across two ADDI's while eliminating the frameindex.
1 parent 5c25061 commit de0bcd0

File tree

2 files changed

+220
-0
lines changed

2 files changed

+220
-0
lines changed

llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,30 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
291291
return;
292292
}
293293

294+
// Use the QC_E_ADDI instruction from the Xqcilia extension that can take a
295+
// signed 26-bit immediate.
296+
if (ST.hasVendorXqcilia() && isInt<26>(Val)) {
297+
// The one case where using this instruction is sub-optimal is if Val can be
298+
// materialized with a single compressible LUI and following add/sub is also
299+
// compressible. Avoid doing this if that is the case.
300+
int Hi20 = (Val & 0xFFFFF000) >> 12;
301+
bool IsCompressLUI =
302+
((Val & 0xFFF) == 0) && (Hi20 != 0) &&
303+
(isUInt<5>(Hi20) || (Hi20 >= 0xfffe0 && Hi20 <= 0xfffff));
304+
bool IsCompressAddSub =
305+
(SrcReg == DestReg) &&
306+
((Val > 0 && RISCV::GPRNoX0RegClass.contains(SrcReg)) ||
307+
(Val < 0 && RISCV::GPRCRegClass.contains(SrcReg)));
308+
309+
if (!(IsCompressLUI && IsCompressAddSub)) {
310+
BuildMI(MBB, II, DL, TII->get(RISCV::QC_E_ADDI), DestReg)
311+
.addReg(SrcReg, getKillRegState(KillSrcReg))
312+
.addImm(Val)
313+
.setMIFlag(Flag);
314+
return;
315+
}
316+
}
317+
294318
// Try to split the offset across two ADDIs. We need to keep the intermediate
295319
// result aligned after each ADDI. We need to determine the maximum value we
296320
// can put in each ADDI. In the negative direction, we can use -2048 which is

llvm/test/CodeGen/RISCV/stack-offset.ll

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
33
; RUN: | FileCheck %s -check-prefixes=RV32,RV32I
4+
; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+experimental-xqcilia < %s \
5+
; RUN: | FileCheck %s -check-prefixes=RV32XQCILIA
46
; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+zba < %s \
57
; RUN: | FileCheck %s -check-prefixes=RV32,RV32ZBA
68
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
@@ -39,6 +41,27 @@ define void @test() {
3941
; RV32I-NEXT: .cfi_def_cfa_offset 0
4042
; RV32I-NEXT: ret
4143
;
44+
; RV32XQCILIA-LABEL: test:
45+
; RV32XQCILIA: # %bb.0:
46+
; RV32XQCILIA-NEXT: addi sp, sp, -2032
47+
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 2032
48+
; RV32XQCILIA-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
49+
; RV32XQCILIA-NEXT: .cfi_offset ra, -4
50+
; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -3168
51+
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 5200
52+
; RV32XQCILIA-NEXT: addi a0, sp, 12
53+
; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 2060
54+
; RV32XQCILIA-NEXT: qc.e.addi a2, sp, 4108
55+
; RV32XQCILIA-NEXT: qc.e.addi a3, sp, 5132
56+
; RV32XQCILIA-NEXT: call inspect
57+
; RV32XQCILIA-NEXT: qc.e.addi sp, sp, 3168
58+
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 2032
59+
; RV32XQCILIA-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
60+
; RV32XQCILIA-NEXT: .cfi_restore ra
61+
; RV32XQCILIA-NEXT: addi sp, sp, 2032
62+
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
63+
; RV32XQCILIA-NEXT: ret
64+
;
4265
; RV32ZBA-LABEL: test:
4366
; RV32ZBA: # %bb.0:
4467
; RV32ZBA-NEXT: addi sp, sp, -2032
@@ -150,6 +173,25 @@ define void @align_8() {
150173
; RV32I-NEXT: .cfi_def_cfa_offset 0
151174
; RV32I-NEXT: ret
152175
;
176+
; RV32XQCILIA-LABEL: align_8:
177+
; RV32XQCILIA: # %bb.0:
178+
; RV32XQCILIA-NEXT: addi sp, sp, -256
179+
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
180+
; RV32XQCILIA-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
181+
; RV32XQCILIA-NEXT: .cfi_offset ra, -4
182+
; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -3856
183+
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 4112
184+
; RV32XQCILIA-NEXT: addi a0, sp, 7
185+
; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 4104
186+
; RV32XQCILIA-NEXT: call inspect
187+
; RV32XQCILIA-NEXT: qc.e.addi sp, sp, 3856
188+
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
189+
; RV32XQCILIA-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
190+
; RV32XQCILIA-NEXT: .cfi_restore ra
191+
; RV32XQCILIA-NEXT: addi sp, sp, 256
192+
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
193+
; RV32XQCILIA-NEXT: ret
194+
;
153195
; RV32ZBA-LABEL: align_8:
154196
; RV32ZBA: # %bb.0:
155197
; RV32ZBA-NEXT: addi sp, sp, -2032
@@ -246,6 +288,25 @@ define void @align_4() {
246288
; RV32I-NEXT: .cfi_def_cfa_offset 0
247289
; RV32I-NEXT: ret
248290
;
291+
; RV32XQCILIA-LABEL: align_4:
292+
; RV32XQCILIA: # %bb.0:
293+
; RV32XQCILIA-NEXT: addi sp, sp, -256
294+
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
295+
; RV32XQCILIA-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
296+
; RV32XQCILIA-NEXT: .cfi_offset ra, -4
297+
; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -3856
298+
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 4112
299+
; RV32XQCILIA-NEXT: addi a0, sp, 7
300+
; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 4104
301+
; RV32XQCILIA-NEXT: call inspect
302+
; RV32XQCILIA-NEXT: qc.e.addi sp, sp, 3856
303+
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
304+
; RV32XQCILIA-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
305+
; RV32XQCILIA-NEXT: .cfi_restore ra
306+
; RV32XQCILIA-NEXT: addi sp, sp, 256
307+
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
308+
; RV32XQCILIA-NEXT: ret
309+
;
249310
; RV32ZBA-LABEL: align_4:
250311
; RV32ZBA: # %bb.0:
251312
; RV32ZBA-NEXT: addi sp, sp, -2032
@@ -342,6 +403,25 @@ define void @align_2() {
342403
; RV32-NEXT: .cfi_def_cfa_offset 0
343404
; RV32-NEXT: ret
344405
;
406+
; RV32XQCILIA-LABEL: align_2:
407+
; RV32XQCILIA: # %bb.0:
408+
; RV32XQCILIA-NEXT: addi sp, sp, -256
409+
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
410+
; RV32XQCILIA-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
411+
; RV32XQCILIA-NEXT: .cfi_offset ra, -4
412+
; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -3856
413+
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 4112
414+
; RV32XQCILIA-NEXT: addi a0, sp, 9
415+
; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 4106
416+
; RV32XQCILIA-NEXT: call inspect
417+
; RV32XQCILIA-NEXT: qc.e.addi sp, sp, 3856
418+
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
419+
; RV32XQCILIA-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
420+
; RV32XQCILIA-NEXT: .cfi_restore ra
421+
; RV32XQCILIA-NEXT: addi sp, sp, 256
422+
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
423+
; RV32XQCILIA-NEXT: ret
424+
;
345425
; RV64-LABEL: align_2:
346426
; RV64: # %bb.0:
347427
; RV64-NEXT: addi sp, sp, -2032
@@ -395,6 +475,25 @@ define void @align_1() {
395475
; RV32-NEXT: .cfi_def_cfa_offset 0
396476
; RV32-NEXT: ret
397477
;
478+
; RV32XQCILIA-LABEL: align_1:
479+
; RV32XQCILIA: # %bb.0:
480+
; RV32XQCILIA-NEXT: addi sp, sp, -256
481+
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
482+
; RV32XQCILIA-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
483+
; RV32XQCILIA-NEXT: .cfi_offset ra, -4
484+
; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -3856
485+
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 4112
486+
; RV32XQCILIA-NEXT: addi a0, sp, 10
487+
; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 4107
488+
; RV32XQCILIA-NEXT: call inspect
489+
; RV32XQCILIA-NEXT: qc.e.addi sp, sp, 3856
490+
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 256
491+
; RV32XQCILIA-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
492+
; RV32XQCILIA-NEXT: .cfi_restore ra
493+
; RV32XQCILIA-NEXT: addi sp, sp, 256
494+
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
495+
; RV32XQCILIA-NEXT: ret
496+
;
398497
; RV64-LABEL: align_1:
399498
; RV64: # %bb.0:
400499
; RV64-NEXT: addi sp, sp, -2032
@@ -422,3 +521,100 @@ define void @align_1() {
422521
call void (...) @inspect(ptr %p1, ptr %p2)
423522
ret void
424523
}
524+
525+
define void @align_1_lui() {
526+
; RV32-LABEL: align_1_lui:
527+
; RV32: # %bb.0:
528+
; RV32-NEXT: addi sp, sp, -2032
529+
; RV32-NEXT: .cfi_def_cfa_offset 2032
530+
; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
531+
; RV32-NEXT: .cfi_offset ra, -4
532+
; RV32-NEXT: lui a0, 1
533+
; RV32-NEXT: sub sp, sp, a0
534+
; RV32-NEXT: .cfi_def_cfa_offset 6128
535+
; RV32-NEXT: addi a0, sp, 8
536+
; RV32-NEXT: lui a1, 1
537+
; RV32-NEXT: addi a1, a1, 2027
538+
; RV32-NEXT: add a1, sp, a1
539+
; RV32-NEXT: call inspect
540+
; RV32-NEXT: lui a0, 1
541+
; RV32-NEXT: add sp, sp, a0
542+
; RV32-NEXT: .cfi_def_cfa_offset 2032
543+
; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
544+
; RV32-NEXT: .cfi_restore ra
545+
; RV32-NEXT: addi sp, sp, 2032
546+
; RV32-NEXT: .cfi_def_cfa_offset 0
547+
; RV32-NEXT: ret
548+
;
549+
; RV32XQCILIA-LABEL: align_1_lui:
550+
; RV32XQCILIA: # %bb.0:
551+
; RV32XQCILIA-NEXT: addi sp, sp, -2032
552+
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 2032
553+
; RV32XQCILIA-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
554+
; RV32XQCILIA-NEXT: .cfi_offset ra, -4
555+
; RV32XQCILIA-NEXT: qc.e.addi sp, sp, -4096
556+
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 6128
557+
; RV32XQCILIA-NEXT: addi a0, sp, 8
558+
; RV32XQCILIA-NEXT: qc.e.addi a1, sp, 6123
559+
; RV32XQCILIA-NEXT: call inspect
560+
; RV32XQCILIA-NEXT: lui a0, 1
561+
; RV32XQCILIA-NEXT: add sp, sp, a0
562+
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 2032
563+
; RV32XQCILIA-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
564+
; RV32XQCILIA-NEXT: .cfi_restore ra
565+
; RV32XQCILIA-NEXT: addi sp, sp, 2032
566+
; RV32XQCILIA-NEXT: .cfi_def_cfa_offset 0
567+
; RV32XQCILIA-NEXT: ret
568+
;
569+
; RV64I-LABEL: align_1_lui:
570+
; RV64I: # %bb.0:
571+
; RV64I-NEXT: addi sp, sp, -2032
572+
; RV64I-NEXT: .cfi_def_cfa_offset 2032
573+
; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
574+
; RV64I-NEXT: .cfi_offset ra, -8
575+
; RV64I-NEXT: lui a0, 1
576+
; RV64I-NEXT: addiw a0, a0, 16
577+
; RV64I-NEXT: sub sp, sp, a0
578+
; RV64I-NEXT: .cfi_def_cfa_offset 6144
579+
; RV64I-NEXT: addi a0, sp, 20
580+
; RV64I-NEXT: lui a1, 1
581+
; RV64I-NEXT: addiw a1, a1, 2039
582+
; RV64I-NEXT: add a1, sp, a1
583+
; RV64I-NEXT: call inspect
584+
; RV64I-NEXT: lui a0, 1
585+
; RV64I-NEXT: addiw a0, a0, 16
586+
; RV64I-NEXT: add sp, sp, a0
587+
; RV64I-NEXT: .cfi_def_cfa_offset 2032
588+
; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
589+
; RV64I-NEXT: .cfi_restore ra
590+
; RV64I-NEXT: addi sp, sp, 2032
591+
; RV64I-NEXT: .cfi_def_cfa_offset 0
592+
; RV64I-NEXT: ret
593+
;
594+
; RV64ZBA-LABEL: align_1_lui:
595+
; RV64ZBA: # %bb.0:
596+
; RV64ZBA-NEXT: addi sp, sp, -2032
597+
; RV64ZBA-NEXT: .cfi_def_cfa_offset 2032
598+
; RV64ZBA-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
599+
; RV64ZBA-NEXT: .cfi_offset ra, -8
600+
; RV64ZBA-NEXT: li a0, -514
601+
; RV64ZBA-NEXT: sh3add sp, a0, sp
602+
; RV64ZBA-NEXT: .cfi_def_cfa_offset 6144
603+
; RV64ZBA-NEXT: addi a0, sp, 20
604+
; RV64ZBA-NEXT: lui a1, 1
605+
; RV64ZBA-NEXT: addiw a1, a1, 2039
606+
; RV64ZBA-NEXT: add a1, sp, a1
607+
; RV64ZBA-NEXT: call inspect
608+
; RV64ZBA-NEXT: li a0, 514
609+
; RV64ZBA-NEXT: sh3add sp, a0, sp
610+
; RV64ZBA-NEXT: .cfi_def_cfa_offset 2032
611+
; RV64ZBA-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
612+
; RV64ZBA-NEXT: .cfi_restore ra
613+
; RV64ZBA-NEXT: addi sp, sp, 2032
614+
; RV64ZBA-NEXT: .cfi_def_cfa_offset 0
615+
; RV64ZBA-NEXT: ret
616+
%p2 = alloca i8, align 1
617+
%p1 = alloca [6115 x i8], align 1
618+
call void (...) @inspect(ptr %p1, ptr %p2)
619+
ret void
620+
}

0 commit comments

Comments
 (0)