Skip to content

Commit a63bd7e

Browse files
committed
[RISCV] Use NoReg in place of IMPLICIT_DEF for undefined passthru operands
In a recent series of refactorings (described here: https://discourse.llvm.org/t/riscv-transition-in-vector-pseudo-structure-policy-variants/71295), I greatly increased the number of IMPLICIT_DEF operands to our vector instructions. This has turned out to have an unexpected negative impact because MachineCSE does not CSE IMPLICIT_DEFs, and thus does not CSE any instruction with an IMPLICIT_DEF operand. SelectionDAG *does* CSE the same case, but that only covers the same block case, not the cross block case. This lead to the performance regression reported in llvm#64282. This change is a slightly ugly hack to side step the issue. Instead of fixing the root cause (lack of CSE for IMPLICIT_DEF) or undoing the operand changes, we leave the extra operand in place, and use NoReg in place of IMPLICIT_DEF. I then convert back to IMPLICIT_DEF just before register allocation so that ProcessImplicitDefs and TwoAddressInstructions can do the normal transforms to Undef tied registers. We may end up backporting this into the 17.x release branch. Given how late in the release cycle this is landing, that's much less likely now, but still a possibility. Differential Revision: https://reviews.llvm.org/D156909
1 parent d9c2c6f commit a63bd7e

File tree

123 files changed

+5790
-5573
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

123 files changed

+5790
-5573
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,14 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() {
149149

150150
MadeChange |= doPeepholeMergeVVMFold();
151151

152+
// After we're done with everything else, convert IMPLICIT_DEF
153+
// passthru operands to NoRegister. This is required to workaround
154+
// an optimization deficiency in MachineCSE. This really should
155+
// be merged back into each of the patterns (i.e. there's no good
156+
// reason not to go directly to NoReg), but is being done this way
157+
// to allow easy backporting.
158+
MadeChange |= doPeepholeNoRegPassThru();
159+
152160
if (MadeChange)
153161
CurDAG->RemoveDeadNodes();
154162
}
@@ -3593,6 +3601,44 @@ bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
35933601
return MadeChange;
35943602
}
35953603

3604+
/// If our passthru is an implicit_def, use noreg instead. This side
3605+
/// steps issues with MachineCSE not being able to CSE expressions with
3606+
/// IMPLICIT_DEF operands while preserving the semantic intent. See
3607+
/// pr64282 for context. Note that this transform is the last one
3608+
/// performed at ISEL DAG to DAG.
3609+
bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
3610+
bool MadeChange = false;
3611+
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
3612+
3613+
while (Position != CurDAG->allnodes_begin()) {
3614+
SDNode *N = &*--Position;
3615+
if (N->use_empty() || !N->isMachineOpcode())
3616+
continue;
3617+
3618+
const unsigned Opc = N->getMachineOpcode();
3619+
if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
3620+
!RISCVII::isFirstDefTiedToFirstUse(TII->get(Opc)) ||
3621+
!isImplicitDef(N->getOperand(0)))
3622+
continue;
3623+
3624+
SmallVector<SDValue> Ops;
3625+
Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
3626+
for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
3627+
SDValue Op = N->getOperand(I);
3628+
Ops.push_back(Op);
3629+
}
3630+
3631+
MachineSDNode *Result =
3632+
CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3633+
Result->setFlags(N->getFlags());
3634+
CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
3635+
ReplaceUses(N, Result);
3636+
MadeChange = true;
3637+
}
3638+
return MadeChange;
3639+
}
3640+
3641+
35963642
// This pass converts a legalized DAG into a RISCV-specific DAG, ready
35973643
// for instruction scheduling.
35983644
FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM,

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
187187
bool doPeepholeSExtW(SDNode *Node);
188188
bool doPeepholeMaskedRVV(MachineSDNode *Node);
189189
bool doPeepholeMergeVVMFold();
190+
bool doPeepholeNoRegPassThru();
190191
bool performVMergeToVMv(SDNode *N);
191192
bool performCombineVMergeAndVOps(SDNode *N);
192193
};

llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,9 +160,13 @@ static bool hasUndefinedMergeOp(const MachineInstr &MI,
160160
// lanes are undefined.
161161
return true;
162162

163-
// If the tied operand is an IMPLICIT_DEF (or a REG_SEQUENCE whose operands
164-
// are solely IMPLICIT_DEFS), the pass through lanes are undefined.
163+
// If the tied operand is NoReg, an IMPLICIT_DEF, or a REG_SEQEUENCE whose
164+
// operands are solely IMPLICIT_DEFS, then the pass through lanes are
165+
// undefined.
165166
const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
167+
if (UseMO.getReg() == RISCV::NoRegister)
168+
return true;
169+
166170
if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) {
167171
if (UseMI->isImplicitDef())
168172
return true;

llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -52,16 +52,16 @@
5252
///
5353
/// Currently, the policy is represented via the following instrinsic families:
5454
/// * _MASK - Can represent all three policy states for both tail and mask. If
55-
/// passthrough is IMPLICIT_DEF, then represents "undefined". Otherwise,
56-
/// policy operand and tablegen flags drive the interpretation. (If policy
57-
/// operand is not present - there are a couple, thought we're rapidly
58-
/// removing them - a non-undefined policy defaults to "tail agnostic", and
59-
/// "mask undisturbed". Since this is the only variant with a mask, all
60-
/// other variants are "mask undefined".
55+
/// passthrough is IMPLICIT_DEF (or NoReg), then represents "undefined".
56+
/// Otherwise, policy operand and tablegen flags drive the interpretation.
57+
/// (If policy operand is not present - there are a couple, though we're
58+
/// rapidly removing them - a non-undefined policy defaults to "tail
59+
/// agnostic", and "mask undisturbed". Since this is the only variant with
60+
/// a mask, all other variants are "mask undefined".
6161
/// * Unsuffixed w/ both passthrough and policy operand. Can represent all
62-
/// three policy states. If passthrough is IMPLICIT_DEF, then represents
63-
/// "undefined". Otherwise, policy operand and tablegen flags drive the
64-
/// interpretation.
62+
/// three policy states. If passthrough is IMPLICIT_DEF (or NoReg), then
63+
/// represents "undefined". Otherwise, policy operand and tablegen flags
64+
/// drive the interpretation.
6565
/// * Unsuffixed w/o passthrough or policy operand -- Does not have a
6666
/// passthrough operand, and thus represents the "undefined" state. Note
6767
/// that terminology in code frequently refers to these as "TA" which is
@@ -70,8 +70,8 @@
7070
/// * _TU w/o policy operand -- Has a passthrough operand, and always
7171
/// represents the tail undisturbed state.
7272
/// * _TU w/policy operand - Can represent all three policy states. If
73-
/// passthrough is IMPLICIT_DEF, then represents "undefined". Otherwise,
74-
/// policy operand and tablegen flags drive the interpretation.
73+
/// passthrough is IMPLICIT_DEF (or NoReg), then represents "undefined".
74+
/// Otherwise, policy operand and tablegen flags drive the interpretation.
7575
///
7676
//===----------------------------------------------------------------------===//
7777

llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
// This file implements a function pass that initializes undef vector value to
1010
// temporary pseudo instruction and remove it in expandpseudo pass to prevent
1111
// register allocation resulting in a constraint violated result for vector
12-
// instruction.
12+
// instruction. It also rewrites the NoReg tied operand back to an
13+
// IMPLICIT_DEF.
1314
//
1415
// RISC-V vector instruction has register overlapping constraint for certain
1516
// instructions, and will cause illegal instruction trap if violated, we use
@@ -30,6 +31,12 @@
3031
//
3132
// See also: https://github.com/llvm/llvm-project/issues/50157
3233
//
34+
// Additionally, this pass rewrites tied operands of vector instructions
35+
// from NoReg to IMPLICIT_DEF. (Not that this is a non-overlapping set of
36+
// operands to the above.) We use NoReg to side step a MachineCSE
37+
// optimization quality problem but need to convert back before
38+
// TwoAddressInstruction. See pr64282 for context.
39+
//
3340
//===----------------------------------------------------------------------===//
3441

3542
#include "RISCV.h"
@@ -244,6 +251,26 @@ bool RISCVInitUndef::processBasicBlock(MachineFunction &MF,
244251
bool Changed = false;
245252
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
246253
MachineInstr &MI = *I;
254+
255+
// If we used NoReg to represent the passthru, switch this back to being
256+
// an IMPLICIT_DEF before TwoAddressInstructions.
257+
unsigned UseOpIdx;
258+
if (MI.getNumDefs() != 0 && MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
259+
MachineOperand &UseMO = MI.getOperand(UseOpIdx);
260+
if (UseMO.getReg() == RISCV::NoRegister) {
261+
const TargetRegisterClass *RC =
262+
TII->getRegClass(MI.getDesc(), UseOpIdx, TRI, MF);
263+
Register NewDest = MRI->createVirtualRegister(RC);
264+
// We don't have a way to update dead lanes, so keep track of the
265+
// new register so that we avoid querying it later.
266+
NewRegs.insert(NewDest);
267+
BuildMI(MBB, I, I->getDebugLoc(),
268+
TII->get(TargetOpcode::IMPLICIT_DEF), NewDest);
269+
UseMO.setReg(NewDest);
270+
Changed = true;
271+
}
272+
}
273+
247274
if (ST->enableSubRegLiveness() && isEarlyClobberMI(MI))
248275
Changed |= handleSubReg(MF, MI, DLD);
249276
if (MI.isImplicitDef()) {

llvm/lib/Target/RISCV/RISCVTargetMachine.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ class RISCVPassConfig : public TargetPassConfig {
273273
void addPreRegAlloc() override;
274274
void addPostRegAlloc() override;
275275
void addOptimizedRegAlloc() override;
276+
void addFastRegAlloc() override;
276277
};
277278
} // namespace
278279

@@ -392,12 +393,17 @@ void RISCVPassConfig::addPreRegAlloc() {
392393
}
393394

394395
void RISCVPassConfig::addOptimizedRegAlloc() {
395-
if (getOptimizeRegAlloc())
396-
insertPass(&DetectDeadLanesID, &RISCVInitUndefID);
396+
insertPass(&DetectDeadLanesID, &RISCVInitUndefID);
397397

398398
TargetPassConfig::addOptimizedRegAlloc();
399399
}
400400

401+
void RISCVPassConfig::addFastRegAlloc() {
402+
addPass(createRISCVInitUndefPass());
403+
TargetPassConfig::addFastRegAlloc();
404+
}
405+
406+
401407
void RISCVPassConfig::addPostRegAlloc() {
402408
if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination)
403409
addPass(createRISCVRedundantCopyEliminationPass());

llvm/test/CodeGen/RISCV/O0-pipeline.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
; CHECK-NEXT: RISC-V Pre-RA pseudo instruction expansion pass
4343
; CHECK-NEXT: RISC-V Insert VSETVLI pass
4444
; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass
45+
; CHECK-NEXT: RISC-V init undef pass
4546
; CHECK-NEXT: Eliminate PHI nodes for register allocation
4647
; CHECK-NEXT: Two-Address instruction pass
4748
; CHECK-NEXT: Fast Register Allocator

llvm/test/CodeGen/RISCV/calling-conv-vector-on-stack.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@ define void @bar() nounwind {
1717
; CHECK-NEXT: andi sp, sp, -64
1818
; CHECK-NEXT: mv s1, sp
1919
; CHECK-NEXT: addi sp, sp, -16
20-
; CHECK-NEXT: addi a0, s1, 64
21-
; CHECK-NEXT: sd a0, 0(sp)
22-
; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma
20+
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
2321
; CHECK-NEXT: vmv.v.i v8, 0
22+
; CHECK-NEXT: addi a0, s1, 64
2423
; CHECK-NEXT: vs8r.v v8, (a0)
24+
; CHECK-NEXT: sd a0, 0(sp)
2525
; CHECK-NEXT: li a0, 0
2626
; CHECK-NEXT: li a1, 0
2727
; CHECK-NEXT: li a2, 0

llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -103,15 +103,15 @@ define <8 x i1> @fv8(ptr %p, i64 %index, i64 %tc) {
103103
define <32 x i1> @fv32(ptr %p, i64 %index, i64 %tc) {
104104
; CHECK-LABEL: fv32:
105105
; CHECK: # %bb.0:
106-
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
107106
; CHECK-NEXT: lui a0, %hi(.LCPI8_0)
108107
; CHECK-NEXT: addi a0, a0, %lo(.LCPI8_0)
108+
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
109109
; CHECK-NEXT: vle64.v v8, (a0)
110-
; CHECK-NEXT: vid.v v16
111-
; CHECK-NEXT: vsaddu.vx v16, v16, a1
112-
; CHECK-NEXT: vmsltu.vx v0, v16, a2
113110
; CHECK-NEXT: vsaddu.vx v8, v8, a1
114111
; CHECK-NEXT: vmsltu.vx v16, v8, a2
112+
; CHECK-NEXT: vid.v v8
113+
; CHECK-NEXT: vsaddu.vx v8, v8, a1
114+
; CHECK-NEXT: vmsltu.vx v0, v8, a2
115115
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
116116
; CHECK-NEXT: vslideup.vi v0, v16, 2
117117
; CHECK-NEXT: ret
@@ -122,15 +122,15 @@ define <32 x i1> @fv32(ptr %p, i64 %index, i64 %tc) {
122122
define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) {
123123
; CHECK-LABEL: fv64:
124124
; CHECK: # %bb.0:
125-
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
126125
; CHECK-NEXT: lui a0, %hi(.LCPI9_0)
127126
; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_0)
127+
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
128128
; CHECK-NEXT: vle64.v v8, (a0)
129-
; CHECK-NEXT: vid.v v16
130-
; CHECK-NEXT: vsaddu.vx v16, v16, a1
131-
; CHECK-NEXT: vmsltu.vx v0, v16, a2
132129
; CHECK-NEXT: vsaddu.vx v8, v8, a1
133130
; CHECK-NEXT: vmsltu.vx v16, v8, a2
131+
; CHECK-NEXT: vid.v v8
132+
; CHECK-NEXT: vsaddu.vx v8, v8, a1
133+
; CHECK-NEXT: vmsltu.vx v0, v8, a2
134134
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
135135
; CHECK-NEXT: vslideup.vi v0, v16, 2
136136
; CHECK-NEXT: lui a0, %hi(.LCPI9_1)
@@ -157,15 +157,15 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) {
157157
define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) {
158158
; CHECK-LABEL: fv128:
159159
; CHECK: # %bb.0:
160-
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
161160
; CHECK-NEXT: lui a0, %hi(.LCPI10_0)
162161
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_0)
162+
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
163163
; CHECK-NEXT: vle64.v v8, (a0)
164-
; CHECK-NEXT: vid.v v16
165-
; CHECK-NEXT: vsaddu.vx v16, v16, a1
166-
; CHECK-NEXT: vmsltu.vx v0, v16, a2
167164
; CHECK-NEXT: vsaddu.vx v8, v8, a1
168165
; CHECK-NEXT: vmsltu.vx v16, v8, a2
166+
; CHECK-NEXT: vid.v v8
167+
; CHECK-NEXT: vsaddu.vx v8, v8, a1
168+
; CHECK-NEXT: vmsltu.vx v0, v8, a2
169169
; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma
170170
; CHECK-NEXT: vslideup.vi v0, v16, 2
171171
; CHECK-NEXT: lui a0, %hi(.LCPI10_1)

0 commit comments

Comments
 (0)