Skip to content

Commit 5bd01ac

Browse files
[AArch64] Re-enable rematerialization for streaming-mode-changing functions. (llvm#83235)
We can add implicit defs/uses of the 'VG' register to the instructions to prevent the register allocator from rematerializing values in between streaming-mode changes, as the def/use of VG will further nail down the ordering that comes out of ISel. This avoids the heavy-handed approach to prevent any kind of rematerialization. While we could add 'VG' as a Use to all SVE instructions, we only really need to do this for instructions that are rematerializable, as the smstart/smstop instructions and pseudos act as scheduling barriers which is sufficient to prevent other instructions from being scheduled in between the streaming-mode-changing call sequence. However, we may revisit this in the future.
1 parent 80a328b commit 5bd01ac

26 files changed

+167
-187
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7518,6 +7518,22 @@ void AArch64TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
75187518
(AArch64::GPR32RegClass.contains(MO.getReg()) ||
75197519
AArch64::GPR64RegClass.contains(MO.getReg())))
75207520
MI.removeOperand(I);
7521+
7522+
// Add an implicit use of 'VG' for ADDXri/SUBXri, which are instructions that
7523+
// have nothing to do with VG, were it not that they are used to materialise a
7524+
// frame-address. If they contain a frame-index to a scalable vector, this
7525+
// will likely require an ADDVL instruction to materialise the address, thus
7526+
// reading VG.
7527+
const MachineFunction &MF = *MI.getMF();
7528+
if (MF.getInfo<AArch64FunctionInfo>()->hasStreamingModeChanges() &&
7529+
(MI.getOpcode() == AArch64::ADDXri ||
7530+
MI.getOpcode() == AArch64::SUBXri)) {
7531+
const MachineOperand &MO = MI.getOperand(1);
7532+
if (MO.isFI() && MF.getFrameInfo().getStackID(MO.getIndex()) ==
7533+
TargetStackID::ScalableVector)
7534+
MI.addOperand(MachineOperand::CreateReg(AArch64::VG, /*IsDef=*/false,
7535+
/*IsImplicit=*/true));
7536+
}
75217537
}
75227538

75237539
SDValue AArch64TargetLowering::changeStreamingMode(

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2809,6 +2809,7 @@ class AddSubImmShift<bit isSub, bit setFlags, RegisterClass dstRegtype,
28092809
let Inst{23-22} = imm{13-12}; // '00' => lsl #0, '01' => lsl #12
28102810
let Inst{21-10} = imm{11-0};
28112811
let DecoderMethod = "DecodeAddSubImmShift";
2812+
let hasPostISelHook = 1;
28122813
}
28132814

28142815
class BaseAddSubRegPseudo<RegisterClass regtype,

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 0 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -9481,58 +9481,6 @@ unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) {
94819481
return AArch64::BLR;
94829482
}
94839483

9484-
bool AArch64InstrInfo::isReallyTriviallyReMaterializable(
9485-
const MachineInstr &MI) const {
9486-
const MachineFunction &MF = *MI.getMF();
9487-
const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
9488-
9489-
// If the function contains changes to streaming mode, then there
9490-
// is a danger that rematerialised instructions end up between
9491-
// instruction sequences (e.g. call sequences, or prolog/epilogue)
9492-
// where the streaming-SVE mode is temporarily changed.
9493-
if (AFI.hasStreamingModeChanges()) {
9494-
// Avoid rematerializing rematerializable instructions that use/define
9495-
// scalable values, such as 'pfalse' or 'ptrue', which result in different
9496-
// results when the runtime vector length is different.
9497-
const MachineRegisterInfo &MRI = MF.getRegInfo();
9498-
const MachineFrameInfo &MFI = MF.getFrameInfo();
9499-
if (any_of(MI.operands(), [&MRI, &MFI](const MachineOperand &MO) {
9500-
if (MO.isFI() &&
9501-
MFI.getStackID(MO.getIndex()) == TargetStackID::ScalableVector)
9502-
return true;
9503-
if (!MO.isReg())
9504-
return false;
9505-
9506-
if (MO.getReg().isVirtual()) {
9507-
const TargetRegisterClass *RC = MRI.getRegClass(MO.getReg());
9508-
return AArch64::ZPRRegClass.hasSubClassEq(RC) ||
9509-
AArch64::PPRRegClass.hasSubClassEq(RC);
9510-
}
9511-
return AArch64::ZPRRegClass.contains(MO.getReg()) ||
9512-
AArch64::PPRRegClass.contains(MO.getReg());
9513-
}))
9514-
return false;
9515-
9516-
// Avoid rematerializing instructions that return a value that is
9517-
// different depending on vector length, even when it is not returned
9518-
// in a scalable vector/predicate register.
9519-
switch (MI.getOpcode()) {
9520-
default:
9521-
break;
9522-
case AArch64::RDVLI_XI:
9523-
case AArch64::ADDVL_XXI:
9524-
case AArch64::ADDPL_XXI:
9525-
case AArch64::CNTB_XPiI:
9526-
case AArch64::CNTH_XPiI:
9527-
case AArch64::CNTW_XPiI:
9528-
case AArch64::CNTD_XPiI:
9529-
return false;
9530-
}
9531-
}
9532-
9533-
return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
9534-
}
9535-
95369484
MachineBasicBlock::iterator
95379485
AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI,
95389486
Register TargetReg, bool FrameSetup) const {

llvm/lib/Target/AArch64/AArch64InstrInfo.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -381,8 +381,6 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
381381
int64_t &ByteSized,
382382
int64_t &VGSized);
383383

384-
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override;
385-
386384
// Return true if address of the form BaseReg + Scale * ScaledReg + Offset can
387385
// be used for a load/store of NumBytes. BaseReg is always present and
388386
// implicit.

llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,9 @@ AArch64RegisterInfo::getStrictlyReservedRegs(const MachineFunction &MF) const {
443443
Reserved.set(SubReg);
444444
}
445445

446+
// VG cannot be allocated
447+
Reserved.set(AArch64::VG);
448+
446449
if (MF.getSubtarget<AArch64Subtarget>().hasSME2()) {
447450
for (MCSubRegIterator SubReg(AArch64::ZT0, this, /*self=*/true);
448451
SubReg.isValid(); ++SubReg)

llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,8 @@ def MSRpstatePseudo :
233233
(ins svcr_op:$pstatefield, timm0_1:$imm, GPR64:$rtpstate, timm0_1:$expected_pstate, variable_ops), []>,
234234
Sched<[WriteSys]> {
235235
let hasPostISelHook = 1;
236+
let Uses = [VG];
237+
let Defs = [VG];
236238
}
237239

238240
def : Pat<(AArch64_smstart (i32 svcr_op:$pstate), (i64 GPR64:$rtpstate), (i64 timm0_1:$expected_pstate)),

llvm/lib/Target/AArch64/SMEInstrFormats.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,8 @@ def MSRpstatesvcrImm1
223223
let Inst{8} = imm;
224224
let Inst{7-5} = 0b011; // op2
225225
let hasPostISelHook = 1;
226+
let Uses = [VG];
227+
let Defs = [VG];
226228
}
227229

228230
def : InstAlias<"smstart", (MSRpstatesvcrImm1 0b011, 0b1)>;

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,7 @@ class sve_int_ptrue<bits<2> sz8_64, bits<3> opc, string asm, PPRRegOp pprty,
365365
let ElementSize = pprty.ElementSize;
366366
let hasSideEffects = 0;
367367
let isReMaterializable = 1;
368+
let Uses = [VG];
368369
}
369370

370371
multiclass sve_int_ptrue<bits<3> opc, string asm, SDPatternOperator op> {
@@ -755,6 +756,7 @@ class sve_int_pfalse<bits<6> opc, string asm>
755756

756757
let hasSideEffects = 0;
757758
let isReMaterializable = 1;
759+
let Uses = [VG];
758760
}
759761

760762
multiclass sve_int_pfalse<bits<6> opc, string asm> {
@@ -1090,6 +1092,7 @@ class sve_int_count<bits<3> opc, string asm>
10901092

10911093
let hasSideEffects = 0;
10921094
let isReMaterializable = 1;
1095+
let Uses = [VG];
10931096
}
10941097

10951098
multiclass sve_int_count<bits<3> opc, string asm, SDPatternOperator op> {
@@ -1982,6 +1985,7 @@ class sve_int_dup_mask_imm<string asm>
19821985
let DecoderMethod = "DecodeSVELogicalImmInstruction";
19831986
let hasSideEffects = 0;
19841987
let isReMaterializable = 1;
1988+
let Uses = [VG];
19851989
}
19861990

19871991
multiclass sve_int_dup_mask_imm<string asm> {
@@ -2862,6 +2866,7 @@ class sve_int_arith_vl<bit opc, string asm, bit streaming_sve = 0b0>
28622866
let Inst{4-0} = Rd;
28632867

28642868
let hasSideEffects = 0;
2869+
let Uses = [VG];
28652870
}
28662871

28672872
class sve_int_read_vl_a<bit op, bits<5> opc2, string asm, bit streaming_sve = 0b0>
@@ -2882,6 +2887,7 @@ class sve_int_read_vl_a<bit op, bits<5> opc2, string asm, bit streaming_sve = 0b
28822887

28832888
let hasSideEffects = 0;
28842889
let isReMaterializable = 1;
2890+
let Uses = [VG];
28852891
}
28862892

28872893
//===----------------------------------------------------------------------===//
@@ -4699,6 +4705,7 @@ class sve_int_dup_imm<bits<2> sz8_64, string asm,
46994705

47004706
let hasSideEffects = 0;
47014707
let isReMaterializable = 1;
4708+
let Uses = [VG];
47024709
}
47034710

47044711
multiclass sve_int_dup_imm<string asm> {
@@ -4741,6 +4748,7 @@ class sve_int_dup_fpimm<bits<2> sz8_64, Operand fpimmtype,
47414748

47424749
let hasSideEffects = 0;
47434750
let isReMaterializable = 1;
4751+
let Uses = [VG];
47444752
}
47454753

47464754
multiclass sve_int_dup_fpimm<string asm> {
@@ -5657,6 +5665,7 @@ class sve_int_index_ii<bits<2> sz8_64, string asm, ZPRRegOp zprty,
56575665

56585666
let hasSideEffects = 0;
56595667
let isReMaterializable = 1;
5668+
let Uses = [VG];
56605669
}
56615670

56625671
multiclass sve_int_index_ii<string asm> {
@@ -9308,6 +9317,7 @@ class sve2p1_ptrue_pn<string mnemonic, bits<2> sz, PNRP8to15RegOp pnrty, SDPatte
93089317

93099318
let hasSideEffects = 0;
93109319
let isReMaterializable = 1;
9320+
let Uses = [VG];
93119321
}
93129322

93139323

llvm/test/CodeGen/AArch64/debug-info-sve-dbg-declare.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ body: |
193193
liveins: $z0, $z1, $p0, $p1, $w0
194194
195195
renamable $p2 = COPY killed $p0
196-
renamable $p0 = PTRUE_S 31
196+
renamable $p0 = PTRUE_S 31, implicit $vg
197197
ST1W_IMM killed renamable $z0, renamable $p0, %stack.0.z0.addr, 0 :: (store unknown-size into %ir.z0.addr, align 16)
198198
ST1W_IMM killed renamable $z1, renamable $p0, %stack.1.z1.addr, 0 :: (store unknown-size into %ir.z1.addr, align 16)
199199
STR_PXI killed renamable $p2, %stack.2.p0.addr, 0 :: (store unknown-size into %ir.p0.addr, align 2)

llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ body: |
111111
STRXui killed renamable $x1, %stack.1, 0, debug-location !8
112112
DBG_VALUE %stack.1, $noreg, !11, !DIExpression(DW_OP_constu, 16, DW_OP_plus, DW_OP_deref), debug-location !8
113113
114-
renamable $p2 = PTRUE_S 31, debug-location !DILocation(line: 4, column: 1, scope: !5)
114+
renamable $p2 = PTRUE_S 31, implicit $vg, debug-location !DILocation(line: 4, column: 1, scope: !5)
115115
ST1W_IMM renamable $z0, renamable $p2, %stack.2, 0, debug-location !DILocation(line: 5, column: 1, scope: !5)
116116
DBG_VALUE %stack.2, $noreg, !12, !DIExpression(DW_OP_deref), debug-location !DILocation(line: 5, column: 1, scope: !5)
117117
ST1W_IMM renamable $z1, killed renamable $p2, %stack.3, 0, debug-location !DILocation(line: 6, column: 1, scope: !5)

llvm/test/CodeGen/AArch64/live-debugvalues-sve.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ body: |
145145
liveins: $z1
146146
147147
ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp, debug-location !34
148-
renamable $p0 = PTRUE_S 31, debug-location !34
148+
renamable $p0 = PTRUE_S 31, implicit $vg, debug-location !34
149149
$x0 = ADDXri %stack.0, 0, 0, debug-location !34
150150
ST1W_IMM renamable $z1, killed renamable $p0, %stack.0, 0, debug-location !34 :: (store unknown-size into %stack.0, align 16)
151151
$z0 = COPY renamable $z1, debug-location !34
@@ -157,7 +157,7 @@ body: |
157157
$z7 = COPY renamable $z1, debug-location !34
158158
BL @bar, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $z0, implicit $z1, implicit $z2, implicit $z3, implicit $z4, implicit $z5, implicit $z6, implicit $z7, implicit $x0, implicit-def $sp, implicit-def $z0, implicit-def $z1, debug-location !34
159159
ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp, debug-location !34
160-
renamable $p0 = PTRUE_S 31, debug-location !34
160+
renamable $p0 = PTRUE_S 31, implicit $vg, debug-location !34
161161
$z3 = IMPLICIT_DEF
162162
renamable $z1 = LD1W_IMM renamable $p0, %stack.0, 0, debug-location !34 :: (load unknown-size from %stack.0, align 16)
163163
ST1W_IMM renamable $z3, killed renamable $p0, %stack.0, 0 :: (store unknown-size into %stack.0, align 16)

llvm/test/CodeGen/AArch64/sve-localstackalloc.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ body: |
4848
%2:gpr32 = COPY $w0
4949
%1:zpr = COPY $z1
5050
%0:zpr = COPY $z0
51-
%5:ppr_3b = PTRUE_B 31
51+
%5:ppr_3b = PTRUE_B 31, implicit $vg
5252
%6:gpr64sp = ADDXri %stack.0, 0, 0
5353
ST1B_IMM %1, %5, %6, 1 :: (store unknown-size, align 16)
5454
ST1B_IMM %0, %5, %stack.0, 0 :: (store unknown-size into %stack.0, align 16)

llvm/test/CodeGen/AArch64/sve-pfalse-machine-cse.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,15 @@ body: |
1111
; CHECK: liveins: $p0
1212
; CHECK-NEXT: {{ $}}
1313
; CHECK-NEXT: [[COPY:%[0-9]+]]:ppr = COPY $p0
14-
; CHECK-NEXT: [[PFALSE:%[0-9]+]]:ppr = PFALSE
14+
; CHECK-NEXT: [[PFALSE:%[0-9]+]]:ppr = PFALSE implicit $vg
1515
; CHECK-NEXT: [[UZP1_PPP_B:%[0-9]+]]:ppr = UZP1_PPP_B [[COPY]], [[PFALSE]]
1616
; CHECK-NEXT: [[UZP1_PPP_B1:%[0-9]+]]:ppr = UZP1_PPP_B killed [[UZP1_PPP_B]], [[PFALSE]]
1717
; CHECK-NEXT: $p0 = COPY [[UZP1_PPP_B1]]
1818
; CHECK-NEXT: RET_ReallyLR implicit $p0
1919
%0:ppr = COPY $p0
20-
%2:ppr = PFALSE
20+
%2:ppr = PFALSE implicit $vg
2121
%3:ppr = UZP1_PPP_B %0, %2
22-
%4:ppr = PFALSE
22+
%4:ppr = PFALSE implicit $vg
2323
%5:ppr = UZP1_PPP_B killed %3, %4
2424
$p0 = COPY %5
2525
RET_ReallyLR implicit $p0

llvm/test/CodeGen/AArch64/sve-pseudos-expand-undef.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ body: |
2626
name: expand_mls_to_msb
2727
body: |
2828
bb.0:
29-
renamable $p0 = PTRUE_B 31
29+
renamable $p0 = PTRUE_B 31, implicit $vg
3030
renamable $z0 = MLS_ZPZZZ_B_UNDEF killed renamable $p0, killed renamable $z2, killed renamable $z0, killed renamable $z1
3131
RET_ReallyLR implicit $z0
3232
...
@@ -36,7 +36,7 @@ body: |
3636
name: expand_mla_to_mad
3737
body: |
3838
bb.0:
39-
renamable $p0 = PTRUE_B 31
39+
renamable $p0 = PTRUE_B 31, implicit $vg
4040
renamable $z0 = MLA_ZPZZZ_B_UNDEF killed renamable $p0, killed renamable $z2, killed renamable $z0, killed renamable $z1
4141
RET_ReallyLR implicit $z0
4242
...

llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ body: |
174174
%1:zpr = COPY $z0
175175
%0:ppr_3b = COPY $p0
176176
%2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv
177-
%3:ppr = PTRUE_B 31
177+
%3:ppr = PTRUE_B 31, implicit $vg
178178
PTEST_PP killed %3, killed %2, implicit-def $nzcv
179179
%4:gpr32 = COPY $wzr
180180
%5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
@@ -409,14 +409,14 @@ body: |
409409
410410
; CHECK-LABEL: name: cmpeq_imm_nxv16i8_ptest_not_all_active
411411
; CHECK: %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv
412-
; CHECK-NEXT: %3:ppr = PTRUE_B 0
412+
; CHECK-NEXT: %3:ppr = PTRUE_B 0, implicit $vg
413413
; CHECK-NEXT: PTEST_PP killed %3, killed %2, implicit-def $nzcv
414414
; CHECK-NEXT: %4:gpr32 = COPY $wzr
415415
; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
416416
%1:zpr = COPY $z0
417417
%0:ppr_3b = COPY $p0
418418
%2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv
419-
%3:ppr = PTRUE_B 0
419+
%3:ppr = PTRUE_B 0, implicit $vg
420420
PTEST_PP killed %3, killed %2, implicit-def $nzcv
421421
%4:gpr32 = COPY $wzr
422422
%5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
@@ -446,14 +446,14 @@ body: |
446446
447447
; CHECK-LABEL: name: cmpeq_imm_nxv16i8_ptest_of_halfs
448448
; CHECK: %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv
449-
; CHECK-NEXT: %3:ppr = PTRUE_H 31
449+
; CHECK-NEXT: %3:ppr = PTRUE_H 31, implicit $vg
450450
; CHECK-NEXT: PTEST_PP killed %3, killed %2, implicit-def $nzcv
451451
; CHECK-NEXT: %4:gpr32 = COPY $wzr
452452
; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv
453453
%1:zpr = COPY $z0
454454
%0:ppr_3b = COPY $p0
455455
%2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv
456-
%3:ppr = PTRUE_H 31
456+
%3:ppr = PTRUE_H 31, implicit $vg
457457
PTEST_PP killed %3, killed %2, implicit-def $nzcv
458458
%4:gpr32 = COPY $wzr
459459
%5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv

0 commit comments

Comments
 (0)