Skip to content

Commit ede5709

Browse files
authored
[RISCV][TTI] Add llvm.vp.select into canSplatOperand. (#117982)
The second operand of llvm.vp.select is a splat operand , it can help llvm.vp.select fold vv instructions to vx instructions.
1 parent 5248e1d commit ede5709

File tree

2 files changed

+84
-0
lines changed

2 files changed

+84
-0
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2375,6 +2375,7 @@ bool RISCVTTIImpl::canSplatOperand(Instruction *I, int Operand) const {
23752375
case Intrinsic::vp_ssub_sat:
23762376
case Intrinsic::usub_sat:
23772377
case Intrinsic::vp_usub_sat:
2378+
case Intrinsic::vp_select:
23782379
return Operand == 1;
23792380
// These intrinsics are commutative.
23802381
case Intrinsic::vp_add:

llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5498,3 +5498,86 @@ vector.body: ; preds = %vector.body, %entry
54985498
for.cond.cleanup: ; preds = %vector.body
54995499
ret void
55005500
}
5501+
5502+
define void @sink_splat_vp_select_op1(ptr %a, i32 %x, i32 %vl) {
5503+
; CHECK-LABEL: sink_splat_vp_select_op1:
5504+
; CHECK: # %bb.0: # %entry
5505+
; CHECK-NEXT: lui a4, 1
5506+
; CHECK-NEXT: li a3, 42
5507+
; CHECK-NEXT: slli a5, a2, 32
5508+
; CHECK-NEXT: add a2, a0, a4
5509+
; CHECK-NEXT: srli a4, a5, 32
5510+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5511+
; CHECK-NEXT: .LBB119_1: # %vector.body
5512+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5513+
; CHECK-NEXT: vle32.v v8, (a0)
5514+
; CHECK-NEXT: vmseq.vx v0, v8, a3
5515+
; CHECK-NEXT: vsetvli zero, a4, e32, m1, ta, ma
5516+
; CHECK-NEXT: vmerge.vxm v8, v8, a1, v0
5517+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5518+
; CHECK-NEXT: vse32.v v8, (a0)
5519+
; CHECK-NEXT: addi a0, a0, 16
5520+
; CHECK-NEXT: bne a0, a2, .LBB119_1
5521+
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
5522+
; CHECK-NEXT: ret
5523+
entry:
5524+
%broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5525+
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5526+
br label %vector.body
5527+
5528+
vector.body:
5529+
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5530+
%0 = getelementptr inbounds i32, ptr %a, i64 %index
5531+
%load = load <4 x i32>, ptr %0, align 4
5532+
%cond = icmp eq <4 x i32> %load, splat (i32 42)
5533+
%1 = call <4 x i32> @llvm.vp.select(<4 x i1> %cond, <4 x i32> %broadcast.splat, <4 x i32> %load, i32 %vl)
5534+
store <4 x i32> %1, ptr %0, align 4
5535+
%index.next = add nuw i64 %index, 4
5536+
%2 = icmp eq i64 %index.next, 1024
5537+
br i1 %2, label %for.cond.cleanup, label %vector.body
5538+
5539+
for.cond.cleanup:
5540+
ret void
5541+
}
5542+
5543+
define void @sink_splat_vp_select_op2(ptr %a, i32 %x, i32 %vl) {
5544+
; CHECK-LABEL: sink_splat_vp_select_op2:
5545+
; CHECK: # %bb.0: # %entry
5546+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5547+
; CHECK-NEXT: vmv.v.x v8, a1
5548+
; CHECK-NEXT: lui a3, 1
5549+
; CHECK-NEXT: li a1, 42
5550+
; CHECK-NEXT: slli a4, a2, 32
5551+
; CHECK-NEXT: add a2, a0, a3
5552+
; CHECK-NEXT: srli a3, a4, 32
5553+
; CHECK-NEXT: .LBB120_1: # %vector.body
5554+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
5555+
; CHECK-NEXT: vle32.v v9, (a0)
5556+
; CHECK-NEXT: vmseq.vx v0, v9, a1
5557+
; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, ma
5558+
; CHECK-NEXT: vmerge.vvm v9, v8, v9, v0
5559+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5560+
; CHECK-NEXT: vse32.v v9, (a0)
5561+
; CHECK-NEXT: addi a0, a0, 16
5562+
; CHECK-NEXT: bne a0, a2, .LBB120_1
5563+
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
5564+
; CHECK-NEXT: ret
5565+
entry:
5566+
%broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5567+
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5568+
br label %vector.body
5569+
5570+
vector.body:
5571+
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5572+
%0 = getelementptr inbounds i32, ptr %a, i64 %index
5573+
%load = load <4 x i32>, ptr %0, align 4
5574+
%cond = icmp eq <4 x i32> %load, splat (i32 42)
5575+
%1 = call <4 x i32> @llvm.vp.select(<4 x i1> %cond, <4 x i32> %load, <4 x i32> %broadcast.splat, i32 %vl)
5576+
store <4 x i32> %1, ptr %0, align 4
5577+
%index.next = add nuw i64 %index, 4
5578+
%2 = icmp eq i64 %index.next, 1024
5579+
br i1 %2, label %for.cond.cleanup, label %vector.body
5580+
5581+
for.cond.cleanup:
5582+
ret void
5583+
}

0 commit comments

Comments
 (0)