Skip to content

Commit b6c0f1b

Browse files
authored
[RISCV] Clear vill for whole vector register moves in vsetvli insertion (llvm#118283)
This is an alternative to llvm#117866 that works by demanding a valid vtype instead of using a separate pass. The main advantage of this is that it allows coalesceVSETVLIs to just reuse an existing vsetvli later in the block. To do this we need to first transfer the vsetvli info to some arbitrary valid state in transferBefore when we encounter a vector copy. Then we add a new vill demanded field that will happily accept any other known vtype, which allows us to coalesce these where possible. Note we also need to check for vector copies in computeVLVTYPEChanges, otherwise the pass will completely skip over functions that only have vector copies and nothing else. This is one part of a fix for llvm#114518. We still need to check if there's other cases where vector copies/whole register moves that are inserted after vsetvli insertion.
1 parent bda0209 commit b6c0f1b

File tree

176 files changed

+3602
-3927
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

176 files changed

+3602
-3927
lines changed

llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp

Lines changed: 61 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,12 @@ using namespace llvm;
4040
STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
4141
STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced");
4242

43+
static cl::opt<bool> EnsureWholeVectorRegisterMoveValidVTYPE(
44+
DEBUG_TYPE "-whole-vector-register-move-valid-vtype", cl::Hidden,
45+
cl::desc("Insert vsetvlis before vmvNr.vs to ensure vtype is valid and "
46+
"vill is cleared"),
47+
cl::init(true));
48+
4349
namespace {
4450

4551
/// Given a virtual register \p Reg, return the corresponding VNInfo for it.
@@ -195,6 +201,14 @@ static bool hasUndefinedPassthru(const MachineInstr &MI) {
195201
return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef();
196202
}
197203

204+
/// Return true if \p MI is a copy that will be lowered to one or more vmvNr.vs.
205+
static bool isVectorCopy(const TargetRegisterInfo *TRI,
206+
const MachineInstr &MI) {
207+
return MI.isCopy() && MI.getOperand(0).getReg().isPhysical() &&
208+
RISCVRegisterInfo::isRVVRegClass(
209+
TRI->getMinimalPhysRegClass(MI.getOperand(0).getReg()));
210+
}
211+
198212
/// Which subfields of VL or VTYPE have values we need to preserve?
199213
struct DemandedFields {
200214
// Some unknown property of VL is used. If demanded, must preserve entire
@@ -221,10 +235,13 @@ struct DemandedFields {
221235
bool SEWLMULRatio = false;
222236
bool TailPolicy = false;
223237
bool MaskPolicy = false;
238+
// If this is true, we demand that VTYPE is set to some legal state, i.e. that
239+
// vill is unset.
240+
bool VILL = false;
224241

225242
// Return true if any part of VTYPE was used
226243
bool usedVTYPE() const {
227-
return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy;
244+
return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL;
228245
}
229246

230247
// Return true if any property of VL was used
@@ -239,6 +256,7 @@ struct DemandedFields {
239256
SEWLMULRatio = true;
240257
TailPolicy = true;
241258
MaskPolicy = true;
259+
VILL = true;
242260
}
243261

244262
// Mark all VL properties as demanded
@@ -263,6 +281,7 @@ struct DemandedFields {
263281
SEWLMULRatio |= B.SEWLMULRatio;
264282
TailPolicy |= B.TailPolicy;
265283
MaskPolicy |= B.MaskPolicy;
284+
VILL |= B.VILL;
266285
}
267286

268287
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -308,7 +327,8 @@ struct DemandedFields {
308327
OS << ", ";
309328
OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
310329
OS << "TailPolicy=" << TailPolicy << ", ";
311-
OS << "MaskPolicy=" << MaskPolicy;
330+
OS << "MaskPolicy=" << MaskPolicy << ", ";
331+
OS << "VILL=" << VILL;
312332
OS << "}";
313333
}
314334
#endif
@@ -503,6 +523,21 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
503523
}
504524
}
505525

526+
// In §32.16.6, whole vector register moves have a dependency on SEW. At the
527+
// MIR level though we don't encode the element type, and it gives the same
528+
// result whatever the SEW may be.
529+
//
530+
// However it does need valid SEW, i.e. vill must be cleared. The entry to a
531+
// function, calls and inline assembly may all set it, so make sure we clear
532+
// it for whole register copies. Do this by leaving VILL demanded.
533+
if (isVectorCopy(ST->getRegisterInfo(), MI)) {
534+
Res.LMUL = DemandedFields::LMULNone;
535+
Res.SEW = DemandedFields::SEWNone;
536+
Res.SEWLMULRatio = false;
537+
Res.TailPolicy = false;
538+
Res.MaskPolicy = false;
539+
}
540+
506541
return Res;
507542
}
508543

@@ -1208,6 +1243,18 @@ static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo,
12081243
// legal for MI, but may not be the state requested by MI.
12091244
void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
12101245
const MachineInstr &MI) const {
1246+
if (isVectorCopy(ST->getRegisterInfo(), MI) &&
1247+
(Info.isUnknown() || !Info.isValid() || Info.hasSEWLMULRatioOnly())) {
1248+
// Use an arbitrary but valid AVL and VTYPE so vill will be cleared. It may
1249+
// be coalesced into another vsetvli since we won't demand any fields.
1250+
VSETVLIInfo NewInfo; // Need a new VSETVLIInfo to clear SEWLMULRatioOnly
1251+
NewInfo.setAVLImm(1);
1252+
NewInfo.setVTYPE(RISCVII::VLMUL::LMUL_1, /*sew*/ 8, /*ta*/ true,
1253+
/*ma*/ true);
1254+
Info = NewInfo;
1255+
return;
1256+
}
1257+
12111258
if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
12121259
return;
12131260

@@ -1296,7 +1343,8 @@ bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
12961343
for (const MachineInstr &MI : MBB) {
12971344
transferBefore(Info, MI);
12981345

1299-
if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags))
1346+
if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags) ||
1347+
isVectorCopy(ST->getRegisterInfo(), MI))
13001348
HadVectorOp = true;
13011349

13021350
transferAfter(Info, MI);
@@ -1426,6 +1474,16 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
14261474
PrefixTransparent = false;
14271475
}
14281476

1477+
if (EnsureWholeVectorRegisterMoveValidVTYPE &&
1478+
isVectorCopy(ST->getRegisterInfo(), MI)) {
1479+
if (!PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) {
1480+
insertVSETVLI(MBB, MI, MI.getDebugLoc(), CurInfo, PrevInfo);
1481+
PrefixTransparent = false;
1482+
}
1483+
MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1484+
/*isImp*/ true));
1485+
}
1486+
14291487
uint64_t TSFlags = MI.getDesc().TSFlags;
14301488
if (RISCVII::hasSEWOp(TSFlags)) {
14311489
if (!PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) {

llvm/test/CodeGen/RISCV/inline-asm-v-constraint.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ define <vscale x 1 x i8> @constraint_vd(<vscale x 1 x i8> %0, <vscale x 1 x i8>
4545
define <vscale x 1 x i1> @constraint_vm(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1) nounwind {
4646
; RV32I-LABEL: constraint_vm:
4747
; RV32I: # %bb.0:
48+
; RV32I-NEXT: vsetivli zero, 1, e8, m1, ta, ma
4849
; RV32I-NEXT: vmv1r.v v9, v0
4950
; RV32I-NEXT: vmv1r.v v0, v8
5051
; RV32I-NEXT: #APP
@@ -54,6 +55,7 @@ define <vscale x 1 x i1> @constraint_vm(<vscale x 1 x i1> %0, <vscale x 1 x i1>
5455
;
5556
; RV64I-LABEL: constraint_vm:
5657
; RV64I: # %bb.0:
58+
; RV64I-NEXT: vsetivli zero, 1, e8, m1, ta, ma
5759
; RV64I-NEXT: vmv1r.v v9, v0
5860
; RV64I-NEXT: vmv1r.v v0, v8
5961
; RV64I-NEXT: #APP

llvm/test/CodeGen/RISCV/rvv/abs-vp.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,7 @@ define <vscale x 16 x i64> @vp_abs_nxv16i64(<vscale x 16 x i64> %va, <vscale x 1
567567
; CHECK-NEXT: slli a1, a1, 4
568568
; CHECK-NEXT: sub sp, sp, a1
569569
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
570+
; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
570571
; CHECK-NEXT: vmv1r.v v24, v0
571572
; CHECK-NEXT: csrr a1, vlenb
572573
; CHECK-NEXT: slli a1, a1, 3
@@ -576,7 +577,6 @@ define <vscale x 16 x i64> @vp_abs_nxv16i64(<vscale x 16 x i64> %va, <vscale x 1
576577
; CHECK-NEXT: csrr a1, vlenb
577578
; CHECK-NEXT: srli a2, a1, 3
578579
; CHECK-NEXT: sub a3, a0, a1
579-
; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
580580
; CHECK-NEXT: vslidedown.vx v0, v0, a2
581581
; CHECK-NEXT: sltu a2, a0, a3
582582
; CHECK-NEXT: addi a2, a2, -1

llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3075,6 +3075,7 @@ define <vscale x 64 x i16> @vp_bitreverse_nxv64i16(<vscale x 64 x i16> %va, <vsc
30753075
; CHECK-NEXT: slli a1, a1, 4
30763076
; CHECK-NEXT: sub sp, sp, a1
30773077
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
3078+
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
30783079
; CHECK-NEXT: vmv1r.v v24, v0
30793080
; CHECK-NEXT: csrr a1, vlenb
30803081
; CHECK-NEXT: slli a1, a1, 3
@@ -3086,7 +3087,6 @@ define <vscale x 64 x i16> @vp_bitreverse_nxv64i16(<vscale x 64 x i16> %va, <vsc
30863087
; CHECK-NEXT: lui a2, 3
30873088
; CHECK-NEXT: srli a4, a3, 1
30883089
; CHECK-NEXT: slli a3, a3, 2
3089-
; CHECK-NEXT: vsetvli a5, zero, e8, m1, ta, ma
30903090
; CHECK-NEXT: vslidedown.vx v0, v0, a4
30913091
; CHECK-NEXT: sub a4, a0, a3
30923092
; CHECK-NEXT: sltu a5, a0, a4
@@ -3158,11 +3158,11 @@ define <vscale x 64 x i16> @vp_bitreverse_nxv64i16(<vscale x 64 x i16> %va, <vsc
31583158
;
31593159
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i16:
31603160
; CHECK-ZVBB: # %bb.0:
3161+
; CHECK-ZVBB-NEXT: vsetvli a1, zero, e8, m1, ta, ma
31613162
; CHECK-ZVBB-NEXT: vmv1r.v v24, v0
31623163
; CHECK-ZVBB-NEXT: csrr a1, vlenb
31633164
; CHECK-ZVBB-NEXT: srli a2, a1, 1
31643165
; CHECK-ZVBB-NEXT: slli a1, a1, 2
3165-
; CHECK-ZVBB-NEXT: vsetvli a3, zero, e8, m1, ta, ma
31663166
; CHECK-ZVBB-NEXT: vslidedown.vx v0, v0, a2
31673167
; CHECK-ZVBB-NEXT: sub a2, a0, a1
31683168
; CHECK-ZVBB-NEXT: sltu a3, a0, a2

llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1584,6 +1584,7 @@ define <vscale x 64 x i16> @vp_bswap_nxv64i16(<vscale x 64 x i16> %va, <vscale x
15841584
; CHECK-NEXT: slli a1, a1, 4
15851585
; CHECK-NEXT: sub sp, sp, a1
15861586
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
1587+
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
15871588
; CHECK-NEXT: vmv1r.v v24, v0
15881589
; CHECK-NEXT: csrr a1, vlenb
15891590
; CHECK-NEXT: slli a1, a1, 3
@@ -1593,7 +1594,6 @@ define <vscale x 64 x i16> @vp_bswap_nxv64i16(<vscale x 64 x i16> %va, <vscale x
15931594
; CHECK-NEXT: csrr a1, vlenb
15941595
; CHECK-NEXT: srli a2, a1, 1
15951596
; CHECK-NEXT: slli a1, a1, 2
1596-
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
15971597
; CHECK-NEXT: vslidedown.vx v0, v0, a2
15981598
; CHECK-NEXT: sub a2, a0, a1
15991599
; CHECK-NEXT: sltu a3, a0, a2
@@ -1631,11 +1631,11 @@ define <vscale x 64 x i16> @vp_bswap_nxv64i16(<vscale x 64 x i16> %va, <vscale x
16311631
;
16321632
; CHECK-ZVKB-LABEL: vp_bswap_nxv64i16:
16331633
; CHECK-ZVKB: # %bb.0:
1634+
; CHECK-ZVKB-NEXT: vsetvli a1, zero, e8, m1, ta, ma
16341635
; CHECK-ZVKB-NEXT: vmv1r.v v24, v0
16351636
; CHECK-ZVKB-NEXT: csrr a1, vlenb
16361637
; CHECK-ZVKB-NEXT: srli a2, a1, 1
16371638
; CHECK-ZVKB-NEXT: slli a1, a1, 2
1638-
; CHECK-ZVKB-NEXT: vsetvli a3, zero, e8, m1, ta, ma
16391639
; CHECK-ZVKB-NEXT: vslidedown.vx v0, v0, a2
16401640
; CHECK-ZVKB-NEXT: sub a2, a0, a1
16411641
; CHECK-ZVKB-NEXT: sltu a3, a0, a2

llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_i32(<vsca
336336
; RV32-NEXT: add a1, a3, a1
337337
; RV32-NEXT: li a3, 2
338338
; RV32-NEXT: vs8r.v v16, (a1)
339+
; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
339340
; RV32-NEXT: vmv8r.v v8, v0
340341
; RV32-NEXT: vmv8r.v v16, v24
341342
; RV32-NEXT: call ext2
@@ -374,6 +375,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_i32(<vsca
374375
; RV64-NEXT: add a1, a3, a1
375376
; RV64-NEXT: li a3, 2
376377
; RV64-NEXT: vs8r.v v16, (a1)
378+
; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
377379
; RV64-NEXT: vmv8r.v v8, v0
378380
; RV64-NEXT: vmv8r.v v16, v24
379381
; RV64-NEXT: call ext2
@@ -451,6 +453,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
451453
; RV32-NEXT: add a1, sp, a1
452454
; RV32-NEXT: addi a1, a1, 128
453455
; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
456+
; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
454457
; RV32-NEXT: vmv8r.v v16, v0
455458
; RV32-NEXT: call ext3
456459
; RV32-NEXT: addi sp, s0, -144
@@ -523,6 +526,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
523526
; RV64-NEXT: add a1, sp, a1
524527
; RV64-NEXT: addi a1, a1, 128
525528
; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
529+
; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
526530
; RV64-NEXT: vmv8r.v v16, v0
527531
; RV64-NEXT: call ext3
528532
; RV64-NEXT: addi sp, s0, -144

llvm/test/CodeGen/RISCV/rvv/calling-conv.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @caller_tuple_return(
103103
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
104104
; RV32-NEXT: .cfi_offset ra, -4
105105
; RV32-NEXT: call callee_tuple_return
106+
; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
106107
; RV32-NEXT: vmv2r.v v6, v8
107108
; RV32-NEXT: vmv2r.v v8, v10
108109
; RV32-NEXT: vmv2r.v v10, v6
@@ -119,6 +120,7 @@ define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @caller_tuple_return(
119120
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
120121
; RV64-NEXT: .cfi_offset ra, -8
121122
; RV64-NEXT: call callee_tuple_return
123+
; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
122124
; RV64-NEXT: vmv2r.v v6, v8
123125
; RV64-NEXT: vmv2r.v v8, v10
124126
; RV64-NEXT: vmv2r.v v10, v6
@@ -144,6 +146,7 @@ define void @caller_tuple_argument(target("riscv.vector.tuple", <vscale x 16 x i
144146
; RV32-NEXT: .cfi_def_cfa_offset 16
145147
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
146148
; RV32-NEXT: .cfi_offset ra, -4
149+
; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
147150
; RV32-NEXT: vmv2r.v v6, v8
148151
; RV32-NEXT: vmv2r.v v8, v10
149152
; RV32-NEXT: vmv2r.v v10, v6
@@ -160,6 +163,7 @@ define void @caller_tuple_argument(target("riscv.vector.tuple", <vscale x 16 x i
160163
; RV64-NEXT: .cfi_def_cfa_offset 16
161164
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
162165
; RV64-NEXT: .cfi_offset ra, -8
166+
; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
163167
; RV64-NEXT: vmv2r.v v6, v8
164168
; RV64-NEXT: vmv2r.v v8, v10
165169
; RV64-NEXT: vmv2r.v v10, v6

0 commit comments

Comments
 (0)