Skip to content

Commit 365ebae

Browse files
committed
Rework to keep old logic
1 parent 98e51c8 commit 365ebae

14 files changed

+518
-346
lines changed

llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp

Lines changed: 55 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1042,6 +1042,10 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
10421042
return true;
10431043
}
10441044

1045+
static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo,
1046+
DemandedFields &Demanded,
1047+
const MachineRegisterInfo *MRI);
1048+
10451049
// Given an incoming state reaching MI, minimally modifies that state so that it
10461050
// is compatible with MI. The resulting state is guaranteed to be semantically
10471051
// legal for MI, but may not be the state requested by MI.
@@ -1051,44 +1055,70 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
10511055
if (!RISCVII::hasSEWOp(TSFlags))
10521056
return;
10531057

1054-
VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
1058+
const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
10551059
assert(NewInfo.isValid() && !NewInfo.isUnknown());
10561060
if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info))
10571061
return;
10581062

1063+
const VSETVLIInfo PrevInfo = Info;
10591064
if (Info.hasSEWLMULRatioOnly() || !Info.isValid() || Info.isUnknown())
10601065
Info = NewInfo;
10611066

1062-
// If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
1063-
// maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
1064-
// places.
1065-
DemandedFields Demanded = getDemanded(MI, MRI, ST);
1066-
if (!Demanded.LMUL && !Demanded.SEWLMULRatio && Info.isValid() &&
1067-
!Info.isUnknown()) {
1068-
if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
1069-
Info.getSEW(), Info.getVLMUL(), NewInfo.getSEW())) {
1070-
NewInfo.setVLMul(*NewVLMul);
1071-
Demanded.LMUL = true;
1072-
}
1067+
if (!RISCVII::hasVLOp(TSFlags)) {
1068+
Info = NewInfo;
1069+
return;
10731070
}
10741071

1075-
// If MI only demands that VL has the same zeroness, we only need to set the
1076-
// AVL if the zeroness differs, or if VLMAX changes (since that prevents us
1077-
// from using vsetvli x0, x0).
1078-
bool CanUseX0X0Form =
1079-
Info.hasEquallyZeroAVL(NewInfo, *MRI) && Info.hasSameVLMAX(NewInfo);
1080-
if (Demanded.VLAny || (Demanded.VLZeroness && !CanUseX0X0Form))
1081-
Info.setAVL(NewInfo);
1072+
DemandedFields Demanded = getDemanded(MI, MRI, ST);
1073+
const VSETVLIInfo IncomingInfo =
1074+
adjustIncoming(PrevInfo, NewInfo, Demanded, MRI);
1075+
1076+
if (Demanded.usedVL())
1077+
Info.setAVL(IncomingInfo);
10821078

10831079
Info.setVTYPE(
1084-
((Demanded.LMUL || Demanded.SEWLMULRatio) ? NewInfo : Info).getVLMUL(),
1085-
((Demanded.SEW || Demanded.SEWLMULRatio) ? NewInfo : Info).getSEW(),
1080+
((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info)
1081+
.getVLMUL(),
1082+
((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(),
10861083
// Prefer tail/mask agnostic since it can be relaxed to undisturbed later
10871084
// if needed.
1088-
(Demanded.TailPolicy ? NewInfo : Info).getTailAgnostic() ||
1089-
NewInfo.getTailAgnostic(),
1090-
(Demanded.MaskPolicy ? NewInfo : Info).getMaskAgnostic() ||
1091-
NewInfo.getMaskAgnostic());
1085+
(Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
1086+
IncomingInfo.getTailAgnostic(),
1087+
(Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
1088+
IncomingInfo.getMaskAgnostic());
1089+
}
1090+
1091+
static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo,
1092+
DemandedFields &Demanded,
1093+
const MachineRegisterInfo *MRI) {
1094+
VSETVLIInfo Info = NewInfo;
1095+
1096+
// If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
1097+
// maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
1098+
// places.
1099+
if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() &&
1100+
!PrevInfo.isUnknown()) {
1101+
if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
1102+
PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW()))
1103+
Info.setVLMul(*NewVLMul);
1104+
Demanded.LMUL = true;
1105+
}
1106+
1107+
// If we only demand VL zeroness (i.e. vmv.s.x and vmv.x.s), then there are
1108+
// only two behaviors, VL = 0 and VL > 0. We can discard the user requested
1109+
// AVL and just use the last one if we can prove it equally zero. This
1110+
// removes a vsetvli entirely if the types match or allows use of cheaper avl
1111+
// preserving variant if VLMAX doesn't change. If VLMAX might change, we
1112+
// couldn't use the 'vsetvli x0, x0, vtype" variant, so we avoid the transform
1113+
// to prevent extending live range of an avl register operand.
1114+
// TODO: We can probably relax this for immediates.
1115+
if (Demanded.VLZeroness && !Demanded.VLAny && PrevInfo.isValid() &&
1116+
PrevInfo.hasEquallyZeroAVL(Info, *MRI) && Info.hasSameVLMAX(PrevInfo)) {
1117+
Info.setAVL(PrevInfo);
1118+
Demanded.demandVL();
1119+
}
1120+
1121+
return Info;
10921122
}
10931123

10941124
// Given a state with which we evaluated MI (see transferBefore above for why

llvm/test/CodeGen/RISCV/double_reduct.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ define i16 @add_ext_v32i16(<32 x i8> %a, <16 x i8> %b) {
113113
; CHECK-NEXT: li a0, 32
114114
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
115115
; CHECK-NEXT: vwredsumu.vs v8, v8, v10
116-
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
116+
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
117117
; CHECK-NEXT: vmv.x.s a0, v8
118118
; CHECK-NEXT: ret
119119
%ae = zext <32 x i8> %a to <32 x i16>

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ define i1 @extractelt_v16i1(ptr %x, i64 %idx) nounwind {
106106
; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
107107
; RV32-NEXT: vle8.v v8, (a0)
108108
; RV32-NEXT: vmseq.vi v8, v8, 0
109-
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
109+
; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
110110
; RV32-NEXT: vmv.x.s a0, v8
111111
; RV32-NEXT: srl a0, a0, a1
112112
; RV32-NEXT: andi a0, a0, 1
@@ -117,7 +117,7 @@ define i1 @extractelt_v16i1(ptr %x, i64 %idx) nounwind {
117117
; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
118118
; RV64-NEXT: vle8.v v8, (a0)
119119
; RV64-NEXT: vmseq.vi v8, v8, 0
120-
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
120+
; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
121121
; RV64-NEXT: vmv.x.s a0, v8
122122
; RV64-NEXT: srl a0, a0, a1
123123
; RV64-NEXT: andi a0, a0, 1
@@ -128,7 +128,7 @@ define i1 @extractelt_v16i1(ptr %x, i64 %idx) nounwind {
128128
; RV32ZBS-NEXT: vsetivli zero, 16, e8, m1, ta, ma
129129
; RV32ZBS-NEXT: vle8.v v8, (a0)
130130
; RV32ZBS-NEXT: vmseq.vi v8, v8, 0
131-
; RV32ZBS-NEXT: vsetvli zero, zero, e16, m2, ta, ma
131+
; RV32ZBS-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
132132
; RV32ZBS-NEXT: vmv.x.s a0, v8
133133
; RV32ZBS-NEXT: bext a0, a0, a1
134134
; RV32ZBS-NEXT: ret
@@ -138,7 +138,7 @@ define i1 @extractelt_v16i1(ptr %x, i64 %idx) nounwind {
138138
; RV64ZBS-NEXT: vsetivli zero, 16, e8, m1, ta, ma
139139
; RV64ZBS-NEXT: vle8.v v8, (a0)
140140
; RV64ZBS-NEXT: vmseq.vi v8, v8, 0
141-
; RV64ZBS-NEXT: vsetvli zero, zero, e16, m2, ta, ma
141+
; RV64ZBS-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
142142
; RV64ZBS-NEXT: vmv.x.s a0, v8
143143
; RV64ZBS-NEXT: bext a0, a0, a1
144144
; RV64ZBS-NEXT: ret
@@ -155,7 +155,7 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind {
155155
; RV32-NEXT: vsetvli zero, a2, e8, m2, ta, ma
156156
; RV32-NEXT: vle8.v v8, (a0)
157157
; RV32-NEXT: vmseq.vi v10, v8, 0
158-
; RV32-NEXT: vsetvli zero, zero, e32, m8, ta, ma
158+
; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
159159
; RV32-NEXT: vmv.x.s a0, v10
160160
; RV32-NEXT: srl a0, a0, a1
161161
; RV32-NEXT: andi a0, a0, 1
@@ -167,7 +167,7 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind {
167167
; RV64-NEXT: vsetvli zero, a2, e8, m2, ta, ma
168168
; RV64-NEXT: vle8.v v8, (a0)
169169
; RV64-NEXT: vmseq.vi v10, v8, 0
170-
; RV64-NEXT: vsetvli zero, zero, e32, m8, ta, ma
170+
; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
171171
; RV64-NEXT: vmv.x.s a0, v10
172172
; RV64-NEXT: srl a0, a0, a1
173173
; RV64-NEXT: andi a0, a0, 1
@@ -179,7 +179,7 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind {
179179
; RV32ZBS-NEXT: vsetvli zero, a2, e8, m2, ta, ma
180180
; RV32ZBS-NEXT: vle8.v v8, (a0)
181181
; RV32ZBS-NEXT: vmseq.vi v10, v8, 0
182-
; RV32ZBS-NEXT: vsetvli zero, zero, e32, m8, ta, ma
182+
; RV32ZBS-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
183183
; RV32ZBS-NEXT: vmv.x.s a0, v10
184184
; RV32ZBS-NEXT: bext a0, a0, a1
185185
; RV32ZBS-NEXT: ret
@@ -190,7 +190,7 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind {
190190
; RV64ZBS-NEXT: vsetvli zero, a2, e8, m2, ta, ma
191191
; RV64ZBS-NEXT: vle8.v v8, (a0)
192192
; RV64ZBS-NEXT: vmseq.vi v10, v8, 0
193-
; RV64ZBS-NEXT: vsetvli zero, zero, e32, m8, ta, ma
193+
; RV64ZBS-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
194194
; RV64ZBS-NEXT: vmv.x.s a0, v10
195195
; RV64ZBS-NEXT: bext a0, a0, a1
196196
; RV64ZBS-NEXT: ret
@@ -221,7 +221,7 @@ define i1 @extractelt_v64i1(ptr %x, i64 %idx) nounwind {
221221
; RV64-NEXT: vsetvli zero, a2, e8, m4, ta, ma
222222
; RV64-NEXT: vle8.v v8, (a0)
223223
; RV64-NEXT: vmseq.vi v12, v8, 0
224-
; RV64-NEXT: vsetvli zero, a2, e64, m4, ta, ma
224+
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
225225
; RV64-NEXT: vmv.x.s a0, v12
226226
; RV64-NEXT: srl a0, a0, a1
227227
; RV64-NEXT: andi a0, a0, 1
@@ -246,7 +246,7 @@ define i1 @extractelt_v64i1(ptr %x, i64 %idx) nounwind {
246246
; RV64ZBS-NEXT: vsetvli zero, a2, e8, m4, ta, ma
247247
; RV64ZBS-NEXT: vle8.v v8, (a0)
248248
; RV64ZBS-NEXT: vmseq.vi v12, v8, 0
249-
; RV64ZBS-NEXT: vsetvli zero, a2, e64, m4, ta, ma
249+
; RV64ZBS-NEXT: vsetivli zero, 1, e64, m1, ta, ma
250250
; RV64ZBS-NEXT: vmv.x.s a0, v12
251251
; RV64ZBS-NEXT: bext a0, a0, a1
252252
; RV64ZBS-NEXT: ret

0 commit comments

Comments
 (0)