Skip to content

Commit fde2f52

Browse files
committed
[RISCV] Minimally modify incoming state in transferBefore
transferBefore currently takes an incoming state and an instruction, computes the new state needed for the instruction, and then modifies that new state to be more similar to the incoming state. This patch reverses the approach by instead taking the incoming state and modifying only the bits that are demanded by the instruction. I find this makes things slightly easier to reason about. It also us to use the x0, x0 form in more places, in particular with vmv.x.s since we're no longer relying on isScalarInsertInstr, but instead reusing the logic in getDemanded. I haven't had a chance to check, but hopefully this might also address some of the regressions seen in #71501.
1 parent 933dd03 commit fde2f52

13 files changed

+374
-510
lines changed

llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp

Lines changed: 54 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,8 @@ class VSETVLIInfo {
479479

480480
unsigned getSEW() const { return SEW; }
481481
RISCVII::VLMUL getVLMUL() const { return VLMul; }
482+
bool getTailAgnostic() const { return TailAgnostic; }
483+
bool getMaskAgnostic() const { return MaskAgnostic; }
482484

483485
bool hasNonZeroAVL(const MachineRegisterInfo &MRI) const {
484486
if (hasAVLImm())
@@ -1013,73 +1015,77 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
10131015
return true;
10141016
}
10151017

1016-
// Given an incoming state reaching MI, modifies that state so that it is minimally
1017-
// compatible with MI. The resulting state is guaranteed to be semantically legal
1018-
// for MI, but may not be the state requested by MI.
1018+
// Given an incoming state reaching MI, minimally modifies that state so that it
1019+
// is compatible with MI. The resulting state is guaranteed to be semantically
1020+
// legal for MI, but may not be the state requested by MI.
10191021
void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
10201022
const MachineInstr &MI) const {
10211023
uint64_t TSFlags = MI.getDesc().TSFlags;
10221024
if (!RISCVII::hasSEWOp(TSFlags))
10231025
return;
10241026

1025-
const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
1027+
VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
10261028
assert(NewInfo.isValid() && !NewInfo.isUnknown());
10271029
if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info))
10281030
return;
10291031

1030-
const VSETVLIInfo PrevInfo = Info;
1031-
Info = NewInfo;
1032+
if (Info.hasSEWLMULRatioOnly() || !Info.isValid() || Info.isUnknown())
1033+
Info = NewInfo;
10321034

1033-
if (!RISCVII::hasVLOp(TSFlags))
1034-
return;
1035+
DemandedFields Demanded = getDemanded(MI, MRI, ST);
10351036

10361037
// If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
10371038
// maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
10381039
// places.
1039-
DemandedFields Demanded = getDemanded(MI, MRI, ST);
1040-
if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() &&
1041-
!PrevInfo.isUnknown()) {
1042-
if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
1043-
PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW()))
1044-
Info.setVLMul(*NewVLMul);
1045-
}
1046-
1047-
// If we only demand VL zeroness (i.e. vmv.s.x and vmv.x.s), then there are
1048-
// only two behaviors, VL = 0 and VL > 0. We can discard the user requested
1049-
// AVL and just use the last one if we can prove it equally zero. This
1050-
// removes a vsetvli entirely if the types match or allows use of cheaper avl
1051-
// preserving variant if VLMAX doesn't change. If VLMAX might change, we
1052-
// couldn't use the 'vsetvli x0, x0, vtype" variant, so we avoid the transform
1053-
// to prevent extending live range of an avl register operand.
1054-
// TODO: We can probably relax this for immediates.
1055-
if (Demanded.VLZeroness && !Demanded.VLAny && PrevInfo.isValid() &&
1056-
PrevInfo.hasEquallyZeroAVL(Info, *MRI) && Info.hasSameVLMAX(PrevInfo)) {
1057-
if (PrevInfo.hasAVLImm())
1058-
Info.setAVLImm(PrevInfo.getAVLImm());
1059-
else
1060-
Info.setAVLReg(PrevInfo.getAVLReg());
1061-
return;
1040+
if (!Demanded.LMUL && !Demanded.SEWLMULRatio && Info.isValid() &&
1041+
!Info.isUnknown()) {
1042+
if (auto SameRatioLMUL = RISCVVType::getSameRatioLMUL(
1043+
Info.getSEW(), Info.getVLMUL(), NewInfo.getSEW())) {
1044+
NewInfo.setVLMul(*SameRatioLMUL);
1045+
Demanded.LMUL = true;
1046+
}
10621047
}
10631048

1064-
// If AVL is defined by a vsetvli with the same VLMAX, we can
1065-
// replace the AVL operand with the AVL of the defining vsetvli.
1066-
// We avoid general register AVLs to avoid extending live ranges
1067-
// without being sure we can kill the original source reg entirely.
1068-
if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual())
1069-
return;
1070-
MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg());
1071-
if (!DefMI || !isVectorConfigInstr(*DefMI))
1072-
return;
1049+
// If AVL is defined by a vsetvli with the same VLMAX, we can replace the AVL
1050+
// operand with the AVL of the defining vsetvli. We avoid general register
1051+
// AVLs to avoid extending live ranges without being sure we can kill the
1052+
// original source reg entirely.
1053+
if (RISCVII::hasVLOp(TSFlags) && NewInfo.hasAVLReg() &&
1054+
NewInfo.getAVLReg().isVirtual()) {
1055+
MachineInstr *DefMI = MRI->getVRegDef(NewInfo.getAVLReg());
1056+
if (DefMI && isVectorConfigInstr(*DefMI)) {
1057+
VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1058+
if (DefInfo.hasSameVLMAX(NewInfo) &&
1059+
(DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) {
1060+
if (DefInfo.hasAVLImm())
1061+
NewInfo.setAVLImm(DefInfo.getAVLImm());
1062+
else
1063+
NewInfo.setAVLReg(DefInfo.getAVLReg());
1064+
}
1065+
}
1066+
}
10731067

1074-
VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1075-
if (DefInfo.hasSameVLMAX(Info) &&
1076-
(DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) {
1077-
if (DefInfo.hasAVLImm())
1078-
Info.setAVLImm(DefInfo.getAVLImm());
1068+
// If MI only demands that VL has the same zeroness, we only need to set the
1069+
// AVL if the zeroness differs, or if VLMAX changes (since that prevents us
1070+
// from using vsetvli x0, x0).
1071+
bool CanUseX0X0Form =
1072+
Info.hasEquallyZeroAVL(NewInfo, *MRI) && Info.hasSameVLMAX(NewInfo);
1073+
if (Demanded.VLAny || (Demanded.VLZeroness && !CanUseX0X0Form)) {
1074+
if (NewInfo.hasAVLImm())
1075+
Info.setAVLImm(NewInfo.getAVLImm());
10791076
else
1080-
Info.setAVLReg(DefInfo.getAVLReg());
1081-
return;
1082-
}
1077+
Info.setAVLReg(NewInfo.getAVLReg());
1078+
}
1079+
1080+
Info.setVTYPE(
1081+
((Demanded.LMUL || Demanded.SEWLMULRatio) ? NewInfo : Info).getVLMUL(),
1082+
((Demanded.SEW || Demanded.SEWLMULRatio) ? NewInfo : Info).getSEW(),
1083+
// Prefer tail/mask agnostic since it can be relaxed to undisturbed later
1084+
// if needed.
1085+
(Demanded.TailPolicy ? NewInfo : Info).getTailAgnostic() ||
1086+
NewInfo.getTailAgnostic(),
1087+
(Demanded.MaskPolicy ? NewInfo : Info).getMaskAgnostic() ||
1088+
NewInfo.getMaskAgnostic());
10831089
}
10841090

10851091
// Given a state with which we evaluated MI (see transferBefore above for why

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ define i1 @extractelt_v16i1(ptr %x, i64 %idx) nounwind {
106106
; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
107107
; RV32-NEXT: vle8.v v8, (a0)
108108
; RV32-NEXT: vmseq.vi v8, v8, 0
109-
; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
109+
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
110110
; RV32-NEXT: vmv.x.s a0, v8
111111
; RV32-NEXT: srl a0, a0, a1
112112
; RV32-NEXT: andi a0, a0, 1
@@ -117,7 +117,7 @@ define i1 @extractelt_v16i1(ptr %x, i64 %idx) nounwind {
117117
; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
118118
; RV64-NEXT: vle8.v v8, (a0)
119119
; RV64-NEXT: vmseq.vi v8, v8, 0
120-
; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
120+
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
121121
; RV64-NEXT: vmv.x.s a0, v8
122122
; RV64-NEXT: srl a0, a0, a1
123123
; RV64-NEXT: andi a0, a0, 1
@@ -128,7 +128,7 @@ define i1 @extractelt_v16i1(ptr %x, i64 %idx) nounwind {
128128
; RV32ZBS-NEXT: vsetivli zero, 16, e8, m1, ta, ma
129129
; RV32ZBS-NEXT: vle8.v v8, (a0)
130130
; RV32ZBS-NEXT: vmseq.vi v8, v8, 0
131-
; RV32ZBS-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
131+
; RV32ZBS-NEXT: vsetvli zero, zero, e16, m2, ta, ma
132132
; RV32ZBS-NEXT: vmv.x.s a0, v8
133133
; RV32ZBS-NEXT: bext a0, a0, a1
134134
; RV32ZBS-NEXT: ret
@@ -138,7 +138,7 @@ define i1 @extractelt_v16i1(ptr %x, i64 %idx) nounwind {
138138
; RV64ZBS-NEXT: vsetivli zero, 16, e8, m1, ta, ma
139139
; RV64ZBS-NEXT: vle8.v v8, (a0)
140140
; RV64ZBS-NEXT: vmseq.vi v8, v8, 0
141-
; RV64ZBS-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
141+
; RV64ZBS-NEXT: vsetvli zero, zero, e16, m2, ta, ma
142142
; RV64ZBS-NEXT: vmv.x.s a0, v8
143143
; RV64ZBS-NEXT: bext a0, a0, a1
144144
; RV64ZBS-NEXT: ret
@@ -155,7 +155,7 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind {
155155
; RV32-NEXT: vsetvli zero, a2, e8, m2, ta, ma
156156
; RV32-NEXT: vle8.v v8, (a0)
157157
; RV32-NEXT: vmseq.vi v10, v8, 0
158-
; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
158+
; RV32-NEXT: vsetvli zero, zero, e32, m8, ta, ma
159159
; RV32-NEXT: vmv.x.s a0, v10
160160
; RV32-NEXT: srl a0, a0, a1
161161
; RV32-NEXT: andi a0, a0, 1
@@ -167,7 +167,7 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind {
167167
; RV64-NEXT: vsetvli zero, a2, e8, m2, ta, ma
168168
; RV64-NEXT: vle8.v v8, (a0)
169169
; RV64-NEXT: vmseq.vi v10, v8, 0
170-
; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
170+
; RV64-NEXT: vsetvli zero, zero, e32, m8, ta, ma
171171
; RV64-NEXT: vmv.x.s a0, v10
172172
; RV64-NEXT: srl a0, a0, a1
173173
; RV64-NEXT: andi a0, a0, 1
@@ -179,7 +179,7 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind {
179179
; RV32ZBS-NEXT: vsetvli zero, a2, e8, m2, ta, ma
180180
; RV32ZBS-NEXT: vle8.v v8, (a0)
181181
; RV32ZBS-NEXT: vmseq.vi v10, v8, 0
182-
; RV32ZBS-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
182+
; RV32ZBS-NEXT: vsetvli zero, zero, e32, m8, ta, ma
183183
; RV32ZBS-NEXT: vmv.x.s a0, v10
184184
; RV32ZBS-NEXT: bext a0, a0, a1
185185
; RV32ZBS-NEXT: ret
@@ -190,7 +190,7 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind {
190190
; RV64ZBS-NEXT: vsetvli zero, a2, e8, m2, ta, ma
191191
; RV64ZBS-NEXT: vle8.v v8, (a0)
192192
; RV64ZBS-NEXT: vmseq.vi v10, v8, 0
193-
; RV64ZBS-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
193+
; RV64ZBS-NEXT: vsetvli zero, zero, e32, m8, ta, ma
194194
; RV64ZBS-NEXT: vmv.x.s a0, v10
195195
; RV64ZBS-NEXT: bext a0, a0, a1
196196
; RV64ZBS-NEXT: ret
@@ -221,7 +221,7 @@ define i1 @extractelt_v64i1(ptr %x, i64 %idx) nounwind {
221221
; RV64-NEXT: vsetvli zero, a2, e8, m4, ta, ma
222222
; RV64-NEXT: vle8.v v8, (a0)
223223
; RV64-NEXT: vmseq.vi v12, v8, 0
224-
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
224+
; RV64-NEXT: vsetvli zero, a2, e64, m4, ta, ma
225225
; RV64-NEXT: vmv.x.s a0, v12
226226
; RV64-NEXT: srl a0, a0, a1
227227
; RV64-NEXT: andi a0, a0, 1
@@ -246,7 +246,7 @@ define i1 @extractelt_v64i1(ptr %x, i64 %idx) nounwind {
246246
; RV64ZBS-NEXT: vsetvli zero, a2, e8, m4, ta, ma
247247
; RV64ZBS-NEXT: vle8.v v8, (a0)
248248
; RV64ZBS-NEXT: vmseq.vi v12, v8, 0
249-
; RV64ZBS-NEXT: vsetivli zero, 1, e64, m1, ta, ma
249+
; RV64ZBS-NEXT: vsetvli zero, a2, e64, m4, ta, ma
250250
; RV64ZBS-NEXT: vmv.x.s a0, v12
251251
; RV64ZBS-NEXT: bext a0, a0, a1
252252
; RV64ZBS-NEXT: ret

0 commit comments

Comments
 (0)