@@ -136,6 +136,26 @@ static bool isMaskRegOp(const MachineInstr &MI) {
136
136
return Log2SEW == 0 ;
137
137
}
138
138
139
+ // / Return true if the inactive elements in the result are entirely undefined.
140
+ // / Note that this is different from "agnostic" as defined by the vector
141
+ // / specification. Agnostic requires each lane to either be undisturbed, or
142
+ // / take the value -1; no other value is allowed.
143
+ static bool hasUndefinedMergeOp (const MachineInstr &MI,
144
+ const MachineRegisterInfo &MRI) {
145
+
146
+ unsigned UseOpIdx;
147
+ if (!MI.isRegTiedToUseOperand (0 , &UseOpIdx))
148
+ // If there is no passthrough operand, then the pass through
149
+ // lanes are undefined.
150
+ return true ;
151
+
152
+ // If the tied operand is an IMPLICIT_DEF, the pass through lanes
153
+ // are undefined.
154
+ const MachineOperand &UseMO = MI.getOperand (UseOpIdx);
155
+ MachineInstr *UseMI = MRI.getVRegDef (UseMO.getReg ());
156
+ return UseMI && UseMI->isImplicitDef ();
157
+ }
158
+
139
159
// / Which subfields of VL or VTYPE have values we need to preserve?
140
160
struct DemandedFields {
141
161
// Some unknown property of VL is used. If demanded, must preserve entire
@@ -315,14 +335,13 @@ DemandedFields getDemanded(const MachineInstr &MI,
315
335
Res.LMUL = false ;
316
336
Res.SEWLMULRatio = false ;
317
337
Res.VLAny = false ;
318
- // For vmv.s.x and vfmv.s.f, if writing to an implicit_def operand , we don't
338
+ // For vmv.s.x and vfmv.s.f, if the merge operand is *undefined* , we don't
319
339
// need to preserve any other bits and are thus compatible with any larger,
320
340
// etype and can disregard policy bits. Warning: It's tempting to try doing
321
341
// this for any tail agnostic operation, but we can't as TA requires
322
342
// tail lanes to either be the original value or -1. We are writing
323
343
// unknown bits to the lanes here.
324
- auto *VRegDef = MRI->getVRegDef (MI.getOperand (1 ).getReg ());
325
- if (VRegDef && VRegDef->isImplicitDef ()) {
344
+ if (hasUndefinedMergeOp (MI, *MRI)) {
326
345
Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
327
346
Res.TailPolicy = false ;
328
347
}
@@ -691,9 +710,9 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
691
710
const MachineRegisterInfo *MRI) {
692
711
VSETVLIInfo InstrInfo;
693
712
694
- bool TailAgnostic, MaskAgnostic ;
695
- unsigned UseOpIdx ;
696
- if (MI. isRegTiedToUseOperand ( 0 , &UseOpIdx )) {
713
+ bool TailAgnostic = true ;
714
+ bool MaskAgnostic = true ;
715
+ if (! hasUndefinedMergeOp (MI, *MRI )) {
697
716
// Start with undisturbed.
698
717
TailAgnostic = false ;
699
718
MaskAgnostic = false ;
@@ -708,27 +727,13 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
708
727
MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
709
728
}
710
729
711
- // If the tied operand is an IMPLICIT_DEF we can use TailAgnostic and
712
- // MaskAgnostic.
713
- const MachineOperand &UseMO = MI.getOperand (UseOpIdx);
714
- MachineInstr *UseMI = MRI->getVRegDef (UseMO.getReg ());
715
- if (UseMI && UseMI->isImplicitDef ()) {
716
- TailAgnostic = true ;
717
- MaskAgnostic = true ;
718
- }
719
730
// Some pseudo instructions force a tail agnostic policy despite having a
720
731
// tied def.
721
732
if (RISCVII::doesForceTailAgnostic (TSFlags))
722
733
TailAgnostic = true ;
723
734
724
735
if (!RISCVII::usesMaskPolicy (TSFlags))
725
736
MaskAgnostic = true ;
726
- } else {
727
- // If there is no tied operand,, there shouldn't be a policy operand.
728
- assert (!RISCVII::hasVecPolicyOp (TSFlags) && " Unexpected policy operand" );
729
- // No tied operand use agnostic policies.
730
- TailAgnostic = true ;
731
- MaskAgnostic = true ;
732
737
}
733
738
734
739
RISCVII::VLMUL VLMul = RISCVII::getLMul (TSFlags);
@@ -890,23 +895,20 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
890
895
891
896
DemandedFields Used = getDemanded (MI, MRI);
892
897
893
- // A slidedown/slideup with an IMPLICIT_DEF merge op can freely clobber
898
+ // A slidedown/slideup with an *undefined* merge op can freely clobber
894
899
// elements not copied from the source vector (e.g. masked off, tail, or
895
900
// slideup's prefix). Notes:
896
901
// * We can't modify SEW here since the slide amount is in units of SEW.
897
902
// * VL=1 is special only because we have existing support for zero vs
898
903
// non-zero VL. We could generalize this if we had a VL > C predicate.
899
904
// * The LMUL1 restriction is for machines whose latency may depend on VL.
900
- // * As above, this is only legal for IMPLICIT_DEF, not TA .
905
+ // * As above, this is only legal for tail "undefined" not "agnostic" .
901
906
if (isVSlideInstr (MI) && Require.hasAVLImm () && Require.getAVLImm () == 1 &&
902
- isLMUL1OrSmaller (CurInfo.getVLMUL ())) {
903
- auto *VRegDef = MRI->getVRegDef (MI.getOperand (1 ).getReg ());
904
- if (VRegDef && VRegDef->isImplicitDef ()) {
905
- Used.VLAny = false ;
906
- Used.VLZeroness = true ;
907
- Used.LMUL = false ;
908
- Used.TailPolicy = false ;
909
- }
907
+ isLMUL1OrSmaller (CurInfo.getVLMUL ()) && hasUndefinedMergeOp (MI, *MRI)) {
908
+ Used.VLAny = false ;
909
+ Used.VLZeroness = true ;
910
+ Used.LMUL = false ;
911
+ Used.TailPolicy = false ;
910
912
}
911
913
912
914
if (CurInfo.isCompatible (Used, Require, *MRI))
0 commit comments