Skip to content

Commit 3362e2d

Browse files
committed
[VP] Add IR expansion for vp.icmp and vp.fcmp
These intrinsics are simply expanded to regular icmp/fcmp instructions. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D121594
1 parent 2d149d1 commit 3362e2d

File tree

2 files changed

+70
-0
lines changed

2 files changed

+70
-0
lines changed

llvm/lib/CodeGen/ExpandVectorPredication.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,10 @@ struct CachingVPExpander {
179179
Value *expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder,
180180
VPIntrinsic &VPI);
181181

182+
/// \brief Lower this VP comparison to a call to an unpredicated comparison.
183+
Value *expandPredicationInComparison(IRBuilder<> &Builder,
184+
VPCmpIntrinsic &PI);
185+
182186
/// \brief Query TTI and expand the vector predication in \p P accordingly.
183187
Value *expandPredication(VPIntrinsic &PI);
184188

@@ -462,6 +466,24 @@ CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder,
462466
return NewMemoryInst;
463467
}
464468

469+
Value *CachingVPExpander::expandPredicationInComparison(IRBuilder<> &Builder,
470+
VPCmpIntrinsic &VPI) {
471+
assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) &&
472+
"Implicitly dropping %evl in non-speculatable operator!");
473+
474+
auto OC = *VPI.getFunctionalOpcode();
475+
assert(OC == Instruction::ICmp || OC == Instruction::FCmp);
476+
477+
Value *Op0 = VPI.getOperand(0);
478+
Value *Op1 = VPI.getOperand(1);
479+
auto Pred = VPI.getPredicate();
480+
481+
auto *NewCmp = Builder.CreateCmp(Pred, Op0, Op1);
482+
483+
replaceOperation(*NewCmp, VPI);
484+
return NewCmp;
485+
}
486+
465487
void CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) {
466488
LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n");
467489

@@ -538,6 +560,9 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
538560
if (auto *VPRI = dyn_cast<VPReductionIntrinsic>(&VPI))
539561
return expandPredicationInReduction(Builder, *VPRI);
540562

563+
if (auto *VPCmp = dyn_cast<VPCmpIntrinsic>(&VPI))
564+
return expandPredicationInComparison(Builder, *VPCmp);
565+
541566
switch (VPI.getIntrinsicID()) {
542567
default:
543568
break;

llvm/test/CodeGen/Generic/expand-vp.ll

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ declare float @llvm.vp.reduce.fmin.v4f32(float, <4 x float>, <4 x i1>, i32)
3939
declare float @llvm.vp.reduce.fmax.v4f32(float, <4 x float>, <4 x i1>, i32)
4040
declare float @llvm.vp.reduce.fadd.v4f32(float, <4 x float>, <4 x i1>, i32)
4141
declare float @llvm.vp.reduce.fmul.v4f32(float, <4 x float>, <4 x i1>, i32)
42+
; Comparisons
43+
declare <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32>, <8 x i32>, metadata, <8 x i1>, i32)
44+
declare <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float>, <8 x float>, metadata, <8 x i1>, i32)
4245

4346
; Fixed vector test function.
4447
define void @test_vp_int_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i32> %f3, <8 x i1> %m, i32 %n) {
@@ -121,6 +124,14 @@ define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n
121124
ret void
122125
}
123126

127+
define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x float> %f1, <8 x i1> %m, i32 %n) {
128+
%r0 = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %i0, <8 x i32> %i1, metadata !"eq", <8 x i1> %m, i32 %n)
129+
%r1 = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %i0, <8 x i32> %i1, metadata !"slt", <8 x i1> %m, i32 %n)
130+
%r2 = call <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float> %f0, <8 x float> %f1, metadata !"oeq", <8 x i1> %m, i32 %n)
131+
%r3 = call <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float> %f0, <8 x float> %f1, metadata !"ult", <8 x i1> %m, i32 %n)
132+
ret void
133+
}
134+
124135
; All VP intrinsics have to be lowered into non-VP ops
125136
; Convert %evl into %mask for non-speculatable VP intrinsics and emit the
126137
; instruction+select idiom with a non-VP SIMD instruction.
@@ -233,6 +244,15 @@ define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n
233244
; ALL-CONVERT-NEXT: %{{.+}} = call reassoc float @llvm.vector.reduce.fmul.v4f32(float %f, <4 x float> [[FMUL]])
234245
; ALL-CONVERT-NEXT: ret void
235246

247+
; Check that comparisons use the correct condition codes
248+
; ALL-CONVERT: define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x float> %f1, <8 x i1> %m, i32 %n) {
249+
; ALL-CONVERT-NEXT: %{{.+}} = icmp eq <8 x i32> %i0, %i1
250+
; ALL-CONVERT-NEXT: %{{.+}} = icmp slt <8 x i32> %i0, %i1
251+
; ALL-CONVERT-NEXT: %{{.+}} = fcmp oeq <8 x float> %f0, %f1
252+
; ALL-CONVERT-NEXT: %{{.+}} = fcmp ult <8 x float> %f0, %f1
253+
; ALL-CONVERT-NEXT: ret void
254+
255+
236256
; All legal - don't transform anything.
237257

238258
; LEGAL_LEGAL: define void @test_vp_int_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i32> %f3, <8 x i1> %m, i32 %n) {
@@ -292,6 +312,13 @@ define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n
292312
; LEGAL_LEGAL-NEXT: %r9 = call reassoc float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
293313
; LEGAL_LEGAL-NEXT: ret void
294314

315+
; LEGAL_LEGAL: define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x float> %f1, <8 x i1> %m, i32 %n) {
316+
; LEGAL_LEGAL-NEXT: %r0 = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %i0, <8 x i32> %i1, metadata !"eq", <8 x i1> %m, i32 %n)
317+
; LEGAL_LEGAL-NEXT: %r1 = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %i0, <8 x i32> %i1, metadata !"slt", <8 x i1> %m, i32 %n)
318+
; LEGAL_LEGAL-NEXT: %r2 = call <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float> %f0, <8 x float> %f1, metadata !"oeq", <8 x i1> %m, i32 %n)
319+
; LEGAL_LEGAL-NEXT: %r3 = call <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float> %f0, <8 x float> %f1, metadata !"ult", <8 x i1> %m, i32 %n)
320+
; LEGAL_LEGAL-NEXT: ret void
321+
295322
; Drop %evl where possible else fold %evl into %mask (%evl Discard, %mask Legal)
296323
;
297324
; There is no caching yet in the ExpandVectorPredication pass and the %evl
@@ -372,6 +399,12 @@ define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n
372399
; DISCARD_LEGAL-NOT: %r9 = call reassoc float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
373400
; DISCARD_LEGAL: ret void
374401

402+
; DISCARD_LEGAL: define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x float> %f1, <8 x i1> %m, i32 %n) {
403+
; DISCARD_LEGAL-NEXT: %r0 = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %i0, <8 x i32> %i1, metadata !"eq", <8 x i1> %m, i32 8)
404+
; DISCARD_LEGAL-NEXT: %r1 = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %i0, <8 x i32> %i1, metadata !"slt", <8 x i1> %m, i32 8)
405+
; DISCARD_LEGAL-NEXT: %r2 = call <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float> %f0, <8 x float> %f1, metadata !"oeq", <8 x i1> %m, i32 8)
406+
; DISCARD_LEGAL-NEXT: %r3 = call <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float> %f0, <8 x float> %f1, metadata !"ult", <8 x i1> %m, i32 8)
407+
375408
; Convert %evl into %mask everywhere (%evl Convert, %mask Legal)
376409
;
377410
; For the same reasons as in the (%evl Discard, %mask Legal) case only check that..
@@ -441,3 +474,15 @@ define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n
441474
; CONVERT_LEGAL-NOT: %{{.+}} = call float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
442475
; CONVERT_LEGAL-NOT: %{{.+}} = call reassoc float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
443476
; CONVERT_LEGAL: ret void
477+
478+
; CONVERT_LEGAL: define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x float> %f1, <8 x i1> %m, i32 %n) {
479+
; CONVERT_LEGAL-NEXT: [[NINS:%.+]] = insertelement <8 x i32> poison, i32 %n, i32 0
480+
; CONVERT_LEGAL-NEXT: [[NSPLAT:%.+]] = shufflevector <8 x i32> [[NINS]], <8 x i32> poison, <8 x i32> zeroinitializer
481+
; CONVERT_LEGAL-NEXT: [[EVLM:%.+]] = icmp ult <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[NSPLAT]]
482+
; CONVERT_LEGAL-NEXT: [[NEWM:%.+]] = and <8 x i1> [[EVLM]], %m
483+
; CONVERT_LEGAL-NEXT: %{{.+}} = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %i0, <8 x i32> %i1, metadata !"eq", <8 x i1> [[NEWM]], i32 8)
484+
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %i0, <8 x i32> %i1, metadata !"eq", <8 x i1> %m, i32 %n)
485+
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %i0, <8 x i32> %i1, metadata !"slt", <8 x i1> %m, i32 %n
486+
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float> %f0, <8 x float> %f1, metadata !"oeq", <8 x i1> %m, i32 %n)
487+
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float> %f0, <8 x float> %f1, metadata !"ult", <8 x i1> %m, i32 %n)
488+
; CONVERT_LEGAL: ret void

0 commit comments

Comments
 (0)