Skip to content

Commit 0debf87

Browse files
committed
[RISCV] Clear vill for whole vector register moves in vsetvli insertion
This is an alternative to llvm#117866 that works by demanding a valid vtype instead using a separate pass. The main advantage of this is that it allows coalesceVSETVLIs to just reuse an existing vsetvli later in the block. To do this we need to first transfer the vsetvli info to some arbitrary valid state in transferBefore when we encounter a vector copy. Then we add a new vill demanded field that will happily accept any other known vtype, which allows us to coalesce these where possible. Note we also need to check for vector copies in computeVLVTYPEChanges, otherwise the pass will completely skip over functions that only have vector copies and nothing else. This is one part of a fix for llvm#114518. We still need to check if there's other cases where vector copies/whole register moves that are inserted after vsetvli insertion.
1 parent c9fa319 commit 0debf87

File tree

175 files changed

+3412
-1543
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

175 files changed

+3412
-1543
lines changed

llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,27 @@ static bool hasUndefinedPassthru(const MachineInstr &MI) {
195195
return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef();
196196
}
197197

198+
/// Return true if \p MI is a copy that will be lowered to one or more vmvNr.vs.
199+
static bool isVecCopy(const MachineInstr &MI) {
200+
static const TargetRegisterClass *RVVRegClasses[] = {
201+
&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
202+
&RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN2M2RegClass,
203+
&RISCV::VRN2M4RegClass, &RISCV::VRN3M1RegClass, &RISCV::VRN3M2RegClass,
204+
&RISCV::VRN4M1RegClass, &RISCV::VRN4M2RegClass, &RISCV::VRN5M1RegClass,
205+
&RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass, &RISCV::VRN8M1RegClass};
206+
if (!MI.isCopy())
207+
return false;
208+
209+
Register DstReg = MI.getOperand(0).getReg();
210+
Register SrcReg = MI.getOperand(1).getReg();
211+
for (const auto &RegClass : RVVRegClasses) {
212+
if (RegClass->contains(DstReg, SrcReg)) {
213+
return true;
214+
}
215+
}
216+
return false;
217+
}
218+
198219
/// Which subfields of VL or VTYPE have values we need to preserve?
199220
struct DemandedFields {
200221
// Some unknown property of VL is used. If demanded, must preserve entire
@@ -221,10 +242,13 @@ struct DemandedFields {
221242
bool SEWLMULRatio = false;
222243
bool TailPolicy = false;
223244
bool MaskPolicy = false;
245+
// If this is true, we demand that VTYPE is set to some legal state, i.e. that
246+
// vill is unset.
247+
bool VILL = false;
224248

225249
// Return true if any part of VTYPE was used
226250
bool usedVTYPE() const {
227-
return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy;
251+
return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL;
228252
}
229253

230254
// Return true if any property of VL was used
@@ -239,6 +263,7 @@ struct DemandedFields {
239263
SEWLMULRatio = true;
240264
TailPolicy = true;
241265
MaskPolicy = true;
266+
VILL = true;
242267
}
243268

244269
// Mark all VL properties as demanded
@@ -263,6 +288,7 @@ struct DemandedFields {
263288
SEWLMULRatio |= B.SEWLMULRatio;
264289
TailPolicy |= B.TailPolicy;
265290
MaskPolicy |= B.MaskPolicy;
291+
VILL |= B.VILL;
266292
}
267293

268294
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -308,7 +334,8 @@ struct DemandedFields {
308334
OS << ", ";
309335
OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
310336
OS << "TailPolicy=" << TailPolicy << ", ";
311-
OS << "MaskPolicy=" << MaskPolicy;
337+
OS << "MaskPolicy=" << MaskPolicy << ", ";
338+
OS << "VILL=" << VILL;
312339
OS << "}";
313340
}
314341
#endif
@@ -503,6 +530,16 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
503530
}
504531
}
505532

533+
// In §32.16.6, whole vector register moves have a dependency on SEW. At the
534+
// MIR level though we don't encode the element type, and it gives the same
535+
// result whatever the SEW may be.
536+
//
537+
// However it does need valid SEW, i.e. vill must be cleared. The entry to a
538+
// function, calls and inline assembly may all set it, so make sure we clear
539+
// it for whole register copies.
540+
if (isVecCopy(MI))
541+
Res.VILL = true;
542+
506543
return Res;
507544
}
508545

@@ -1208,6 +1245,17 @@ static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo,
12081245
// legal for MI, but may not be the state requested by MI.
12091246
void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
12101247
const MachineInstr &MI) const {
1248+
if (isVecCopy(MI) &&
1249+
(Info.isUnknown() || !Info.isValid() || Info.hasSEWLMULRatioOnly())) {
1250+
// Use an arbitrary but valid AVL and VTYPE so vill will be cleared. It may
1251+
// be coalesced into another vsetvli since we won't demand any fields.
1252+
VSETVLIInfo NewInfo; // Need a new VSETVLIInfo to clear SEWLMULRatioOnly
1253+
NewInfo.setAVLImm(0);
1254+
NewInfo.setVTYPE(RISCVII::VLMUL::LMUL_1, 8, true, true);
1255+
Info = NewInfo;
1256+
return;
1257+
}
1258+
12111259
if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
12121260
return;
12131261

@@ -1296,7 +1344,8 @@ bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
12961344
for (const MachineInstr &MI : MBB) {
12971345
transferBefore(Info, MI);
12981346

1299-
if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags))
1347+
if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags) ||
1348+
isVecCopy(MI))
13001349
HadVectorOp = true;
13011350

13021351
transferAfter(Info, MI);
@@ -1426,6 +1475,12 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
14261475
PrefixTransparent = false;
14271476
}
14281477

1478+
if (isVecCopy(MI) &&
1479+
!PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) {
1480+
insertVSETVLI(MBB, MI, MI.getDebugLoc(), CurInfo, PrevInfo);
1481+
PrefixTransparent = false;
1482+
}
1483+
14291484
uint64_t TSFlags = MI.getDesc().TSFlags;
14301485
if (RISCVII::hasSEWOp(TSFlags)) {
14311486
if (!PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) {

llvm/test/CodeGen/RISCV/inline-asm-v-constraint.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ define <vscale x 1 x i8> @constraint_vd(<vscale x 1 x i8> %0, <vscale x 1 x i8>
4545
define <vscale x 1 x i1> @constraint_vm(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1) nounwind {
4646
; RV32I-LABEL: constraint_vm:
4747
; RV32I: # %bb.0:
48+
; RV32I-NEXT: vsetivli zero, 0, e8, m1, ta, ma
4849
; RV32I-NEXT: vmv1r.v v9, v0
4950
; RV32I-NEXT: vmv1r.v v0, v8
5051
; RV32I-NEXT: #APP
@@ -54,6 +55,7 @@ define <vscale x 1 x i1> @constraint_vm(<vscale x 1 x i1> %0, <vscale x 1 x i1>
5455
;
5556
; RV64I-LABEL: constraint_vm:
5657
; RV64I: # %bb.0:
58+
; RV64I-NEXT: vsetivli zero, 0, e8, m1, ta, ma
5759
; RV64I-NEXT: vmv1r.v v9, v0
5860
; RV64I-NEXT: vmv1r.v v0, v8
5961
; RV64I-NEXT: #APP

llvm/test/CodeGen/RISCV/rvv/abs-vp.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,7 @@ define <vscale x 16 x i64> @vp_abs_nxv16i64(<vscale x 16 x i64> %va, <vscale x 1
567567
; CHECK-NEXT: slli a1, a1, 4
568568
; CHECK-NEXT: sub sp, sp, a1
569569
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
570+
; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
570571
; CHECK-NEXT: vmv1r.v v24, v0
571572
; CHECK-NEXT: csrr a1, vlenb
572573
; CHECK-NEXT: slli a1, a1, 3
@@ -576,7 +577,6 @@ define <vscale x 16 x i64> @vp_abs_nxv16i64(<vscale x 16 x i64> %va, <vscale x 1
576577
; CHECK-NEXT: csrr a1, vlenb
577578
; CHECK-NEXT: srli a2, a1, 3
578579
; CHECK-NEXT: sub a3, a0, a1
579-
; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
580580
; CHECK-NEXT: vslidedown.vx v0, v0, a2
581581
; CHECK-NEXT: sltu a2, a0, a3
582582
; CHECK-NEXT: addi a2, a2, -1

llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3075,6 +3075,7 @@ define <vscale x 64 x i16> @vp_bitreverse_nxv64i16(<vscale x 64 x i16> %va, <vsc
30753075
; CHECK-NEXT: slli a1, a1, 4
30763076
; CHECK-NEXT: sub sp, sp, a1
30773077
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
3078+
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
30783079
; CHECK-NEXT: vmv1r.v v24, v0
30793080
; CHECK-NEXT: csrr a1, vlenb
30803081
; CHECK-NEXT: slli a1, a1, 3
@@ -3086,7 +3087,6 @@ define <vscale x 64 x i16> @vp_bitreverse_nxv64i16(<vscale x 64 x i16> %va, <vsc
30863087
; CHECK-NEXT: lui a2, 3
30873088
; CHECK-NEXT: srli a4, a3, 1
30883089
; CHECK-NEXT: slli a3, a3, 2
3089-
; CHECK-NEXT: vsetvli a5, zero, e8, m1, ta, ma
30903090
; CHECK-NEXT: vslidedown.vx v0, v0, a4
30913091
; CHECK-NEXT: sub a4, a0, a3
30923092
; CHECK-NEXT: sltu a5, a0, a4
@@ -3158,11 +3158,11 @@ define <vscale x 64 x i16> @vp_bitreverse_nxv64i16(<vscale x 64 x i16> %va, <vsc
31583158
;
31593159
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i16:
31603160
; CHECK-ZVBB: # %bb.0:
3161+
; CHECK-ZVBB-NEXT: vsetvli a1, zero, e8, m1, ta, ma
31613162
; CHECK-ZVBB-NEXT: vmv1r.v v24, v0
31623163
; CHECK-ZVBB-NEXT: csrr a1, vlenb
31633164
; CHECK-ZVBB-NEXT: srli a2, a1, 1
31643165
; CHECK-ZVBB-NEXT: slli a1, a1, 2
3165-
; CHECK-ZVBB-NEXT: vsetvli a3, zero, e8, m1, ta, ma
31663166
; CHECK-ZVBB-NEXT: vslidedown.vx v0, v0, a2
31673167
; CHECK-ZVBB-NEXT: sub a2, a0, a1
31683168
; CHECK-ZVBB-NEXT: sltu a3, a0, a2

llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1584,6 +1584,7 @@ define <vscale x 64 x i16> @vp_bswap_nxv64i16(<vscale x 64 x i16> %va, <vscale x
15841584
; CHECK-NEXT: slli a1, a1, 4
15851585
; CHECK-NEXT: sub sp, sp, a1
15861586
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
1587+
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
15871588
; CHECK-NEXT: vmv1r.v v24, v0
15881589
; CHECK-NEXT: csrr a1, vlenb
15891590
; CHECK-NEXT: slli a1, a1, 3
@@ -1593,7 +1594,6 @@ define <vscale x 64 x i16> @vp_bswap_nxv64i16(<vscale x 64 x i16> %va, <vscale x
15931594
; CHECK-NEXT: csrr a1, vlenb
15941595
; CHECK-NEXT: srli a2, a1, 1
15951596
; CHECK-NEXT: slli a1, a1, 2
1596-
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
15971597
; CHECK-NEXT: vslidedown.vx v0, v0, a2
15981598
; CHECK-NEXT: sub a2, a0, a1
15991599
; CHECK-NEXT: sltu a3, a0, a2
@@ -1631,11 +1631,11 @@ define <vscale x 64 x i16> @vp_bswap_nxv64i16(<vscale x 64 x i16> %va, <vscale x
16311631
;
16321632
; CHECK-ZVKB-LABEL: vp_bswap_nxv64i16:
16331633
; CHECK-ZVKB: # %bb.0:
1634+
; CHECK-ZVKB-NEXT: vsetvli a1, zero, e8, m1, ta, ma
16341635
; CHECK-ZVKB-NEXT: vmv1r.v v24, v0
16351636
; CHECK-ZVKB-NEXT: csrr a1, vlenb
16361637
; CHECK-ZVKB-NEXT: srli a2, a1, 1
16371638
; CHECK-ZVKB-NEXT: slli a1, a1, 2
1638-
; CHECK-ZVKB-NEXT: vsetvli a3, zero, e8, m1, ta, ma
16391639
; CHECK-ZVKB-NEXT: vslidedown.vx v0, v0, a2
16401640
; CHECK-ZVKB-NEXT: sub a2, a0, a1
16411641
; CHECK-ZVKB-NEXT: sltu a3, a0, a2

llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_i32(<vsca
336336
; RV32-NEXT: add a1, a3, a1
337337
; RV32-NEXT: li a3, 2
338338
; RV32-NEXT: vs8r.v v16, (a1)
339+
; RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma
339340
; RV32-NEXT: vmv8r.v v8, v0
340341
; RV32-NEXT: vmv8r.v v16, v24
341342
; RV32-NEXT: call ext2
@@ -374,6 +375,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_i32(<vsca
374375
; RV64-NEXT: add a1, a3, a1
375376
; RV64-NEXT: li a3, 2
376377
; RV64-NEXT: vs8r.v v16, (a1)
378+
; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma
377379
; RV64-NEXT: vmv8r.v v8, v0
378380
; RV64-NEXT: vmv8r.v v16, v24
379381
; RV64-NEXT: call ext2
@@ -451,6 +453,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
451453
; RV32-NEXT: add a1, sp, a1
452454
; RV32-NEXT: addi a1, a1, 128
453455
; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
456+
; RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma
454457
; RV32-NEXT: vmv8r.v v16, v0
455458
; RV32-NEXT: call ext3
456459
; RV32-NEXT: addi sp, s0, -144
@@ -523,6 +526,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
523526
; RV64-NEXT: add a1, sp, a1
524527
; RV64-NEXT: addi a1, a1, 128
525528
; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
529+
; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma
526530
; RV64-NEXT: vmv8r.v v16, v0
527531
; RV64-NEXT: call ext3
528532
; RV64-NEXT: addi sp, s0, -144

llvm/test/CodeGen/RISCV/rvv/calling-conv.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @caller_tuple_return(
103103
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
104104
; RV32-NEXT: .cfi_offset ra, -4
105105
; RV32-NEXT: call callee_tuple_return
106+
; RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma
106107
; RV32-NEXT: vmv2r.v v6, v8
107108
; RV32-NEXT: vmv2r.v v8, v10
108109
; RV32-NEXT: vmv2r.v v10, v6
@@ -119,6 +120,7 @@ define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @caller_tuple_return(
119120
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
120121
; RV64-NEXT: .cfi_offset ra, -8
121122
; RV64-NEXT: call callee_tuple_return
123+
; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma
122124
; RV64-NEXT: vmv2r.v v6, v8
123125
; RV64-NEXT: vmv2r.v v8, v10
124126
; RV64-NEXT: vmv2r.v v10, v6
@@ -144,6 +146,7 @@ define void @caller_tuple_argument(target("riscv.vector.tuple", <vscale x 16 x i
144146
; RV32-NEXT: .cfi_def_cfa_offset 16
145147
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
146148
; RV32-NEXT: .cfi_offset ra, -4
149+
; RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma
147150
; RV32-NEXT: vmv2r.v v6, v8
148151
; RV32-NEXT: vmv2r.v v8, v10
149152
; RV32-NEXT: vmv2r.v v10, v6
@@ -160,6 +163,7 @@ define void @caller_tuple_argument(target("riscv.vector.tuple", <vscale x 16 x i
160163
; RV64-NEXT: .cfi_def_cfa_offset 16
161164
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
162165
; RV64-NEXT: .cfi_offset ra, -8
166+
; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma
163167
; RV64-NEXT: vmv2r.v v6, v8
164168
; RV64-NEXT: vmv2r.v v8, v10
165169
; RV64-NEXT: vmv2r.v v10, v6

0 commit comments

Comments
 (0)