Skip to content

Commit 9e9924c

Browse files
authored
[RISCV] Don't fold vmerge.vvm or vmv.v.v into vredsum.vs if AVL changed (#99006)
When folding, we currently check if the pseudo's result is not lanewise (e.g. vredsum.vs or viota.m) and bail if we're changing the mask. However we also need to check for the AVL too. This patch bails if the AVL changed for these pseudos, and also renames the pseudo table property to be more explicit.
1 parent 3850912 commit 9e9924c

File tree

5 files changed

+87
-14
lines changed

5 files changed

+87
-14
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3753,11 +3753,6 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
37533753
if (!Info)
37543754
return false;
37553755

3756-
// When Mask is not a true mask, this transformation is illegal for some
3757-
// operations whose results are affected by mask, like viota.m.
3758-
if (Info->MaskAffectsResult && Mask && !usesAllOnesMask(Mask, Glue))
3759-
return false;
3760-
37613756
// If True has a merge operand then it needs to be the same as vmerge's False,
37623757
// since False will be used for the result's merge operand.
37633758
if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
@@ -3835,6 +3830,16 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
38353830
if (!VL)
38363831
return false;
38373832

3833+
// Some operations produce different elementwise results depending on the
3834+
// active elements, like viota.m or vredsum. This transformation is illegal
3835+
// for these if we change the active elements (i.e. mask or VL).
3836+
if (Info->ActiveElementsAffectResult) {
3837+
if (Mask && !usesAllOnesMask(Mask, Glue))
3838+
return false;
3839+
if (TrueVL != VL)
3840+
return false;
3841+
}
3842+
38383843
// If we end up changing the VL or mask of True, then we need to make sure it
38393844
// doesn't raise any observable fp exceptions, since changing the active
38403845
// elements will affect how fflags is set.

llvm/lib/Target/RISCV/RISCVInstrInfo.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,7 @@ struct RISCVMaskedPseudoInfo {
381381
uint16_t MaskedPseudo;
382382
uint16_t UnmaskedPseudo;
383383
uint8_t MaskOpIdx;
384-
uint8_t MaskAffectsResult : 1;
384+
uint8_t ActiveElementsAffectResult : 1;
385385
};
386386
#define GET_RISCVMaskedPseudosTable_DECL
387387
#include "RISCVGenSearchableTables.inc"

llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -561,17 +561,17 @@ def RISCVVIntrinsicsTable : GenericTable {
561561
// unmasked variant. For all but compares, both the masked and
562562
// unmasked variant have a passthru and policy operand. For compares,
563563
// neither has a policy op, and only the masked version has a passthru.
564-
class RISCVMaskedPseudo<bits<4> MaskIdx, bit MaskAffectsRes=false> {
564+
class RISCVMaskedPseudo<bits<4> MaskIdx, bit ActiveAffectsRes=false> {
565565
Pseudo MaskedPseudo = !cast<Pseudo>(NAME);
566566
Pseudo UnmaskedPseudo = !cast<Pseudo>(!subst("_MASK", "", NAME));
567567
bits<4> MaskOpIdx = MaskIdx;
568-
bit MaskAffectsResult = MaskAffectsRes;
568+
bit ActiveElementsAffectResult = ActiveAffectsRes;
569569
}
570570

571571
def RISCVMaskedPseudosTable : GenericTable {
572572
let FilterClass = "RISCVMaskedPseudo";
573573
let CppTypeName = "RISCVMaskedPseudoInfo";
574-
let Fields = ["MaskedPseudo", "UnmaskedPseudo", "MaskOpIdx", "MaskAffectsResult"];
574+
let Fields = ["MaskedPseudo", "UnmaskedPseudo", "MaskOpIdx", "ActiveElementsAffectResult"];
575575
let PrimaryKey = ["MaskedPseudo"];
576576
let PrimaryKeyName = "getMaskedPseudoInfo";
577577
}
@@ -2065,7 +2065,7 @@ multiclass VPseudoVIOTA_M {
20652065
SchedUnary<"WriteVIotaV", "ReadVIotaV", mx,
20662066
forceMergeOpRead=true>;
20672067
def "_" # mx # "_MASK" : VPseudoUnaryMask<m.vrclass, VR, constraint>,
2068-
RISCVMaskedPseudo<MaskIdx=2, MaskAffectsRes=true>,
2068+
RISCVMaskedPseudo<MaskIdx=2, ActiveAffectsRes=true>,
20692069
SchedUnary<"WriteVIotaV", "ReadVIotaV", mx,
20702070
forceMergeOpRead=true>;
20712071
}
@@ -3162,7 +3162,7 @@ multiclass VPseudoTernaryWithTailPolicy<VReg RetClass,
31623162
defvar mx = MInfo.MX;
31633163
def "_" # mx # "_E" # sew : VPseudoTernaryNoMaskWithPolicy<RetClass, Op1Class, Op2Class>;
31643164
def "_" # mx # "_E" # sew # "_MASK" : VPseudoTernaryMaskPolicy<RetClass, Op1Class, Op2Class>,
3165-
RISCVMaskedPseudo<MaskIdx=3, MaskAffectsRes=true>;
3165+
RISCVMaskedPseudo<MaskIdx=3, ActiveAffectsRes=true>;
31663166
}
31673167
}
31683168

@@ -3179,7 +3179,7 @@ multiclass VPseudoTernaryWithTailPolicyRoundingMode<VReg RetClass,
31793179
def "_" # mx # "_E" # sew # "_MASK"
31803180
: VPseudoTernaryMaskPolicyRoundingMode<RetClass, Op1Class,
31813181
Op2Class>,
3182-
RISCVMaskedPseudo<MaskIdx=3, MaskAffectsRes=true>;
3182+
RISCVMaskedPseudo<MaskIdx=3, ActiveAffectsRes=true>;
31833183
}
31843184
}
31853185

llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,17 @@ define <vscale x 4 x i32> @vadd(<vscale x 4 x i32> %passthru, <vscale x 4 x i32>
1919
ret <vscale x 4 x i32> %w
2020
}
2121

22+
define <vscale x 4 x i32> @vadd_mask(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, iXLen %vl) {
23+
; CHECK-LABEL: vadd_mask:
24+
; CHECK: # %bb.0:
25+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu
26+
; CHECK-NEXT: vadd.vv v8, v10, v12, v0.t
27+
; CHECK-NEXT: ret
28+
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.mask.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, iXLen %vl, iXLen 3)
29+
%w = call <vscale x 4 x i32> @llvm.riscv.vmv.v.v.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, iXLen %vl)
30+
ret <vscale x 4 x i32> %w
31+
}
32+
2233
define <vscale x 4 x i32> @vadd_undef(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
2334
; CHECK-LABEL: vadd_undef:
2435
; CHECK: # %bb.0:
@@ -106,8 +117,8 @@ declare <vscale x 4 x float> @llvm.riscv.vmv.v.v.nxv4f32(<vscale x 4 x float>, <
106117

107118
declare <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, iXLen, iXLen)
108119

109-
define <vscale x 4 x float> @vfadd(<vscale x 4 x float> %passthru, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl1, iXLen %vl2) {
110-
; CHECK-LABEL: vfadd:
120+
define <vscale x 4 x float> @unfoldable_vfadd(<vscale x 4 x float> %passthru, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl1, iXLen %vl2) {
121+
; CHECK-LABEL: unfoldable_vfadd:
111122
; CHECK: # %bb.0:
112123
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
113124
; CHECK-NEXT: vfadd.vv v10, v10, v12
@@ -118,3 +129,42 @@ define <vscale x 4 x float> @vfadd(<vscale x 4 x float> %passthru, <vscale x 4 x
118129
%w = call <vscale x 4 x float> @llvm.riscv.vmv.v.v.nxv4f32(<vscale x 4 x float> %passthru, <vscale x 4 x float> %v, iXLen %vl2)
119130
ret <vscale x 4 x float> %w
120131
}
132+
133+
define <vscale x 4 x float> @foldable_vfadd(<vscale x 4 x float> %passthru, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) {
134+
; CHECK-LABEL: foldable_vfadd:
135+
; CHECK: # %bb.0:
136+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
137+
; CHECK-NEXT: vfadd.vv v8, v10, v12
138+
; CHECK-NEXT: ret
139+
%v = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen 7, iXLen %vl)
140+
%w = call <vscale x 4 x float> @llvm.riscv.vmv.v.v.nxv4f32(<vscale x 4 x float> %passthru, <vscale x 4 x float> %v, iXLen %vl)
141+
ret <vscale x 4 x float> %w
142+
}
143+
144+
; This shouldn't be folded because we need to preserve exceptions with
145+
; "fpexcept.strict" exception behaviour, and changing the VL may hide them.
146+
define <vscale x 4 x float> @unfoldable_constrained_fadd(<vscale x 4 x float> %passthru, <vscale x 4 x float> %x, <vscale x 4 x float> %y, iXLen %vl) strictfp {
147+
; CHECK-LABEL: unfoldable_constrained_fadd:
148+
; CHECK: # %bb.0:
149+
; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
150+
; CHECK-NEXT: vfadd.vv v10, v10, v12
151+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
152+
; CHECK-NEXT: vmv.v.v v8, v10
153+
; CHECK-NEXT: ret
154+
%a = call <vscale x 4 x float> @llvm.experimental.constrained.fadd(<vscale x 4 x float> %x, <vscale x 4 x float> %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp
155+
%b = call <vscale x 4 x float> @llvm.riscv.vmv.v.v.nxv4f32(<vscale x 4 x float> %passthru, <vscale x 4 x float> %a, iXLen %vl) strictfp
156+
ret <vscale x 4 x float> %b
157+
}
158+
159+
define <vscale x 2 x i32> @unfoldable_vredsum(<vscale x 2 x i32> %passthru, <vscale x 4 x i32> %x, <vscale x 2 x i32> %y) {
160+
; CHECK-LABEL: unfoldable_vredsum:
161+
; CHECK: # %bb.0:
162+
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
163+
; CHECK-NEXT: vredsum.vs v9, v10, v9
164+
; CHECK-NEXT: vsetivli zero, 1, e32, m1, tu, ma
165+
; CHECK-NEXT: vmv.v.v v8, v9
166+
; CHECK-NEXT: ret
167+
%a = call <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv4i32(<vscale x 2 x i32> poison, <vscale x 4 x i32> %x, <vscale x 2 x i32> %y, iXLen -1)
168+
%b = call <vscale x 2 x i32> @llvm.riscv.vmv.v.v.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, iXLen 1)
169+
ret <vscale x 2 x i32> %b
170+
}

llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,6 +1014,24 @@ define <vscale x 2 x float> @vfredusum_allones_mask(<vscale x 2 x float> %passth
10141014
ret <vscale x 2 x float> %b
10151015
}
10161016

1017+
define <vscale x 2 x i32> @unfoldable_vredsum_allones_mask_diff_vl(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y) {
1018+
; CHECK-LABEL: unfoldable_vredsum_allones_mask_diff_vl:
1019+
; CHECK: # %bb.0:
1020+
; CHECK-NEXT: vmv1r.v v11, v8
1021+
; CHECK-NEXT: vsetvli a0, zero, e32, m1, tu, ma
1022+
; CHECK-NEXT: vredsum.vs v11, v9, v10
1023+
; CHECK-NEXT: vsetivli zero, 1, e32, m1, tu, ma
1024+
; CHECK-NEXT: vmv.v.v v8, v11
1025+
; CHECK-NEXT: ret
1026+
%a = call <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv2i32(
1027+
<vscale x 2 x i32> %passthru,
1028+
<vscale x 2 x i32> %x,
1029+
<vscale x 2 x i32> %y,
1030+
i64 -1)
1031+
%b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> splat (i1 -1), i64 1)
1032+
ret <vscale x 2 x i32> %b
1033+
}
1034+
10171035
declare <vscale x 32 x i16> @llvm.riscv.vle.nxv32i16.i64(<vscale x 32 x i16>, ptr nocapture, i64)
10181036
declare <vscale x 32 x i8> @llvm.riscv.vssubu.mask.nxv32i8.i8.i64(<vscale x 32 x i8>, <vscale x 32 x i8>, i8, <vscale x 32 x i1>, i64, i64 immarg)
10191037
declare <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8.nxv32i8.i64(<vscale x 32 x i8>, <vscale x 32 x i8>, i64)

0 commit comments

Comments
 (0)