Skip to content

[RISCV] Convert implicit_def tuples to noreg in post-isel peephole #91173

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3478,8 +3478,15 @@ static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
}

static bool isImplicitDef(SDValue V) {
return V.isMachineOpcode() &&
V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
if (!V.isMachineOpcode())
return false;
if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
for (unsigned I = 1; I < V.getNumOperands(); I += 2)
if (!isImplicitDef(V.getOperand(I)))
return false;
return true;
}
return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
}

// Optimize masked RVV pseudo instructions with a known all-ones mask to their
Expand Down
45 changes: 10 additions & 35 deletions llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,44 +173,18 @@ static bool isMaskRegOp(const MachineInstr &MI) {
/// Note that this is different from "agnostic" as defined by the vector
/// specification. Agnostic requires each lane to either be undisturbed, or
/// take the value -1; no other value is allowed.
static bool hasUndefinedMergeOp(const MachineInstr &MI,
const MachineRegisterInfo &MRI) {
static bool hasUndefinedMergeOp(const MachineInstr &MI) {

unsigned UseOpIdx;
if (!MI.isRegTiedToUseOperand(0, &UseOpIdx))
// If there is no passthrough operand, then the pass through
// lanes are undefined.
return true;

// If the tied operand is NoReg, an IMPLICIT_DEF, or a REG_SEQEUENCE whose
// operands are solely IMPLICIT_DEFS, then the pass through lanes are
// undefined.
// All undefined passthrus should be $noreg: see
// RISCVDAGToDAGISel::doPeepholeNoRegPassThru
const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
if (UseMO.getReg() == RISCV::NoRegister)
return true;

if (UseMO.isUndef())
return true;
if (UseMO.getReg().isPhysical())
return false;

MachineInstr *UseMI = MRI.getUniqueVRegDef(UseMO.getReg());
assert(UseMI);
if (UseMI->isImplicitDef())
return true;

if (UseMI->isRegSequence()) {
for (unsigned i = 1, e = UseMI->getNumOperands(); i < e; i += 2) {
MachineInstr *SourceMI =
MRI.getUniqueVRegDef(UseMI->getOperand(i).getReg());
assert(SourceMI);
if (!SourceMI->isImplicitDef())
return false;
}
return true;
}

return false;
return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef();
}

/// Which subfields of VL or VTYPE have values we need to preserve?
Expand Down Expand Up @@ -429,7 +403,7 @@ DemandedFields getDemanded(const MachineInstr &MI,
// this for any tail agnostic operation, but we can't as TA requires
// tail lanes to either be the original value or -1. We are writing
// unknown bits to the lanes here.
if (hasUndefinedMergeOp(MI, *MRI)) {
if (hasUndefinedMergeOp(MI)) {
if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
else
Expand Down Expand Up @@ -913,7 +887,7 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,

bool TailAgnostic = true;
bool MaskAgnostic = true;
if (!hasUndefinedMergeOp(MI, *MRI)) {
if (!hasUndefinedMergeOp(MI)) {
// Start with undisturbed.
TailAgnostic = false;
MaskAgnostic = false;
Expand Down Expand Up @@ -1109,7 +1083,7 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
// * The LMUL1 restriction is for machines whose latency may depend on VL.
// * As above, this is only legal for tail "undefined" not "agnostic".
if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {
isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI)) {
Used.VLAny = false;
Used.VLZeroness = true;
Used.LMUL = false;
Expand All @@ -1121,8 +1095,9 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
// immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place.
// Since a splat is non-constant time in LMUL, we do need to be careful to not
// increase the number of active vector registers (unlike for vmv.s.x.)
if (isScalarSplatInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {
if (isScalarSplatInstr(MI) && Require.hasAVLImm() &&
Require.getAVLImm() == 1 && isLMUL1OrSmaller(CurInfo.getVLMUL()) &&
hasUndefinedMergeOp(MI)) {
Used.LMUL = false;
Used.SEWLMULRatio = false;
Used.VLAny = false;
Expand Down
38 changes: 5 additions & 33 deletions llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,8 @@ define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i32 %vl) nounwind {
; SPILL-O0-NEXT: csrr a2, vlenb
; SPILL-O0-NEXT: slli a2, a2, 1
; SPILL-O0-NEXT: sub sp, sp, a2
; SPILL-O0-NEXT: # implicit-def: $v8
; SPILL-O0-NEXT: # implicit-def: $v9
; SPILL-O0-NEXT: # implicit-def: $v10
; SPILL-O0-NEXT: # implicit-def: $v9
; SPILL-O0-NEXT: # kill: def $v8 killed $v8 def $v8_v9
; SPILL-O0-NEXT: vmv1r.v v9, v10
; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; SPILL-O0-NEXT: # implicit-def: $v8_v9
; SPILL-O0-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O0-NEXT: vmv1r.v v8, v9
; SPILL-O0-NEXT: addi a0, sp, 16
Expand Down Expand Up @@ -95,13 +90,8 @@ define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(ptr %base, i32 %vl) nounwind {
; SPILL-O0-NEXT: csrr a2, vlenb
; SPILL-O0-NEXT: slli a2, a2, 1
; SPILL-O0-NEXT: sub sp, sp, a2
; SPILL-O0-NEXT: # implicit-def: $v8
; SPILL-O0-NEXT: # implicit-def: $v9
; SPILL-O0-NEXT: # implicit-def: $v10
; SPILL-O0-NEXT: # implicit-def: $v9
; SPILL-O0-NEXT: # kill: def $v8 killed $v8 def $v8_v9
; SPILL-O0-NEXT: vmv1r.v v9, v10
; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; SPILL-O0-NEXT: # implicit-def: $v8_v9
; SPILL-O0-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O0-NEXT: vmv1r.v v8, v9
; SPILL-O0-NEXT: addi a0, sp, 16
Expand Down Expand Up @@ -177,13 +167,8 @@ define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(ptr %base, i32 %vl) nounwind {
; SPILL-O0-NEXT: csrr a2, vlenb
; SPILL-O0-NEXT: slli a2, a2, 1
; SPILL-O0-NEXT: sub sp, sp, a2
; SPILL-O0-NEXT: # implicit-def: $v8m2
; SPILL-O0-NEXT: # implicit-def: $v10m2
; SPILL-O0-NEXT: # implicit-def: $v12m2
; SPILL-O0-NEXT: # implicit-def: $v10m2
; SPILL-O0-NEXT: # kill: def $v8m2 killed $v8m2 def $v8m2_v10m2
; SPILL-O0-NEXT: vmv2r.v v10, v12
; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; SPILL-O0-NEXT: # implicit-def: $v8m2_v10m2
; SPILL-O0-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O0-NEXT: vmv2r.v v8, v10
; SPILL-O0-NEXT: addi a0, sp, 16
Expand Down Expand Up @@ -262,13 +247,8 @@ define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(ptr %base, i32 %vl) nounwind {
; SPILL-O0-NEXT: csrr a2, vlenb
; SPILL-O0-NEXT: slli a2, a2, 2
; SPILL-O0-NEXT: sub sp, sp, a2
; SPILL-O0-NEXT: # implicit-def: $v8m4
; SPILL-O0-NEXT: # implicit-def: $v12m4
; SPILL-O0-NEXT: # implicit-def: $v16m4
; SPILL-O0-NEXT: # implicit-def: $v12m4
; SPILL-O0-NEXT: # kill: def $v8m4 killed $v8m4 def $v8m4_v12m4
; SPILL-O0-NEXT: vmv4r.v v12, v16
; SPILL-O0-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; SPILL-O0-NEXT: # implicit-def: $v8m4_v12m4
; SPILL-O0-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O0-NEXT: vmv4r.v v8, v12
; SPILL-O0-NEXT: addi a0, sp, 16
Expand Down Expand Up @@ -347,16 +327,8 @@ define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(ptr %base, i32 %vl) nounwind {
; SPILL-O0-NEXT: csrr a2, vlenb
; SPILL-O0-NEXT: slli a2, a2, 1
; SPILL-O0-NEXT: sub sp, sp, a2
; SPILL-O0-NEXT: # implicit-def: $v8m2
; SPILL-O0-NEXT: # implicit-def: $v10m2
; SPILL-O0-NEXT: # implicit-def: $v16m2
; SPILL-O0-NEXT: # implicit-def: $v10m2
; SPILL-O0-NEXT: # implicit-def: $v14m2
; SPILL-O0-NEXT: # implicit-def: $v10m2
; SPILL-O0-NEXT: # kill: def $v8m2 killed $v8m2 def $v8m2_v10m2_v12m2
; SPILL-O0-NEXT: vmv2r.v v10, v16
; SPILL-O0-NEXT: vmv2r.v v12, v14
; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; SPILL-O0-NEXT: # implicit-def: $v8m2_v10m2_v12m2
; SPILL-O0-NEXT: vlseg3e32.v v8, (a0)
; SPILL-O0-NEXT: vmv2r.v v8, v10
; SPILL-O0-NEXT: addi a0, sp, 16
Expand Down
38 changes: 5 additions & 33 deletions llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,8 @@ define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i64 %vl) nounwind {
; SPILL-O0-NEXT: csrr a2, vlenb
; SPILL-O0-NEXT: slli a2, a2, 1
; SPILL-O0-NEXT: sub sp, sp, a2
; SPILL-O0-NEXT: # implicit-def: $v8
; SPILL-O0-NEXT: # implicit-def: $v9
; SPILL-O0-NEXT: # implicit-def: $v10
; SPILL-O0-NEXT: # implicit-def: $v9
; SPILL-O0-NEXT: # kill: def $v8 killed $v8 def $v8_v9
; SPILL-O0-NEXT: vmv1r.v v9, v10
; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; SPILL-O0-NEXT: # implicit-def: $v8_v9
; SPILL-O0-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O0-NEXT: vmv1r.v v8, v9
; SPILL-O0-NEXT: addi a0, sp, 16
Expand Down Expand Up @@ -95,13 +90,8 @@ define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(ptr %base, i64 %vl) nounwind {
; SPILL-O0-NEXT: csrr a2, vlenb
; SPILL-O0-NEXT: slli a2, a2, 1
; SPILL-O0-NEXT: sub sp, sp, a2
; SPILL-O0-NEXT: # implicit-def: $v8
; SPILL-O0-NEXT: # implicit-def: $v9
; SPILL-O0-NEXT: # implicit-def: $v10
; SPILL-O0-NEXT: # implicit-def: $v9
; SPILL-O0-NEXT: # kill: def $v8 killed $v8 def $v8_v9
; SPILL-O0-NEXT: vmv1r.v v9, v10
; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; SPILL-O0-NEXT: # implicit-def: $v8_v9
; SPILL-O0-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O0-NEXT: vmv1r.v v8, v9
; SPILL-O0-NEXT: addi a0, sp, 16
Expand Down Expand Up @@ -177,13 +167,8 @@ define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(ptr %base, i64 %vl) nounwind {
; SPILL-O0-NEXT: csrr a2, vlenb
; SPILL-O0-NEXT: slli a2, a2, 1
; SPILL-O0-NEXT: sub sp, sp, a2
; SPILL-O0-NEXT: # implicit-def: $v8m2
; SPILL-O0-NEXT: # implicit-def: $v10m2
; SPILL-O0-NEXT: # implicit-def: $v12m2
; SPILL-O0-NEXT: # implicit-def: $v10m2
; SPILL-O0-NEXT: # kill: def $v8m2 killed $v8m2 def $v8m2_v10m2
; SPILL-O0-NEXT: vmv2r.v v10, v12
; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; SPILL-O0-NEXT: # implicit-def: $v8m2_v10m2
; SPILL-O0-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O0-NEXT: vmv2r.v v8, v10
; SPILL-O0-NEXT: addi a0, sp, 16
Expand Down Expand Up @@ -262,13 +247,8 @@ define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(ptr %base, i64 %vl) nounwind {
; SPILL-O0-NEXT: csrr a2, vlenb
; SPILL-O0-NEXT: slli a2, a2, 2
; SPILL-O0-NEXT: sub sp, sp, a2
; SPILL-O0-NEXT: # implicit-def: $v8m4
; SPILL-O0-NEXT: # implicit-def: $v12m4
; SPILL-O0-NEXT: # implicit-def: $v16m4
; SPILL-O0-NEXT: # implicit-def: $v12m4
; SPILL-O0-NEXT: # kill: def $v8m4 killed $v8m4 def $v8m4_v12m4
; SPILL-O0-NEXT: vmv4r.v v12, v16
; SPILL-O0-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; SPILL-O0-NEXT: # implicit-def: $v8m4_v12m4
; SPILL-O0-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O0-NEXT: vmv4r.v v8, v12
; SPILL-O0-NEXT: addi a0, sp, 16
Expand Down Expand Up @@ -347,16 +327,8 @@ define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(ptr %base, i64 %vl) nounwind {
; SPILL-O0-NEXT: csrr a2, vlenb
; SPILL-O0-NEXT: slli a2, a2, 1
; SPILL-O0-NEXT: sub sp, sp, a2
; SPILL-O0-NEXT: # implicit-def: $v8m2
; SPILL-O0-NEXT: # implicit-def: $v10m2
; SPILL-O0-NEXT: # implicit-def: $v16m2
; SPILL-O0-NEXT: # implicit-def: $v10m2
; SPILL-O0-NEXT: # implicit-def: $v14m2
; SPILL-O0-NEXT: # implicit-def: $v10m2
; SPILL-O0-NEXT: # kill: def $v8m2 killed $v8m2 def $v8m2_v10m2_v12m2
; SPILL-O0-NEXT: vmv2r.v v10, v16
; SPILL-O0-NEXT: vmv2r.v v12, v14
; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; SPILL-O0-NEXT: # implicit-def: $v8m2_v10m2_v12m2
; SPILL-O0-NEXT: vlseg3e32.v v8, (a0)
; SPILL-O0-NEXT: vmv2r.v v8, v10
; SPILL-O0-NEXT: addi a0, sp, 16
Expand Down
7 changes: 1 addition & 6 deletions llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,7 @@ define i64 @test_vlseg2ff_nxv8i8(ptr %base, i64 %vl, ptr %outvl) {
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x11
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vr = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vr = IMPLICIT_DEF
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vrn2m1 = REG_SEQUENCE [[DEF]], %subreg.sub_vrm1_0, [[DEF2]], %subreg.sub_vrm1_1
; CHECK-NEXT: [[PseudoVLSEG2E8FF_V_M1_:%[0-9]+]]:vrn2m1, [[PseudoVLSEG2E8FF_V_M1_1:%[0-9]+]]:gpr = PseudoVLSEG2E8FF_V_M1 [[REG_SEQUENCE]], [[COPY1]], [[COPY]], 3 /* e8 */, 2 /* tu, ma */, implicit-def dead $vl :: (load unknown-size from %ir.base, align 1)
; CHECK-NEXT: [[PseudoVLSEG2E8FF_V_M1_:%[0-9]+]]:vrn2m1, [[PseudoVLSEG2E8FF_V_M1_1:%[0-9]+]]:gpr = PseudoVLSEG2E8FF_V_M1 $noreg, [[COPY1]], [[COPY]], 3 /* e8 */, 2 /* tu, ma */, implicit-def dead $vl :: (load unknown-size from %ir.base, align 1)
; CHECK-NEXT: $x10 = COPY [[PseudoVLSEG2E8FF_V_M1_1]]
; CHECK-NEXT: PseudoRET implicit $x10
entry:
Expand Down