Skip to content

Commit 46541a3

Browse files
committed
[ARM] Add a extra MVE low-trip-count loop. NFC
This makes use of half floats, which makes the masked stores expensive.
1 parent c9f94ba commit 46541a3

File tree

1 file changed

+35
-1
lines changed

1 file changed

+35
-1
lines changed

llvm/test/Transforms/LoopVectorize/ARM/mve-known-trip-count.ll

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -592,7 +592,41 @@ define dso_local i32 @predicated_test(i32 noundef %0, ptr %glob) #0 {
592592
ret i32 0
593593
}
594594

595+
; This has a maximum trip count of 4. The codegen is currently much better with <8 x half> vectorization.
596+
; CHECK-LABEL: arm_q15_to_f16_remainder
597+
; CHECK: LV: Selecting VF: 8
598+
define void @arm_q15_to_f16_remainder(ptr nocapture noundef readonly %pSrc, ptr nocapture noundef writeonly noalias %pDst, i32 noundef %blockSize) #0 {
599+
entry:
600+
%rem = and i32 %blockSize, 3
601+
%cmp.not5 = icmp eq i32 %rem, 0
602+
br i1 %cmp.not5, label %while.end, label %while.body.preheader
603+
604+
while.body.preheader: ; preds = %entry
605+
br label %while.body
606+
607+
while.body: ; preds = %while.body.preheader, %while.body
608+
%blkCnt.08 = phi i32 [ %dec, %while.body ], [ %rem, %while.body.preheader ]
609+
%pIn.07 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrc, %while.body.preheader ]
610+
%pDst.addr.06 = phi ptr [ %incdec.ptr2, %while.body ], [ %pDst, %while.body.preheader ]
611+
%incdec.ptr = getelementptr inbounds i8, ptr %pIn.07, i32 2
612+
%0 = load i16, ptr %pIn.07, align 2
613+
%conv1 = sitofp i16 %0 to half
614+
%1 = fmul fast half %conv1, 0xH0200
615+
%incdec.ptr2 = getelementptr inbounds i8, ptr %pDst.addr.06, i32 2
616+
store half %1, ptr %pDst.addr.06, align 2
617+
%dec = add nsw i32 %blkCnt.08, -1
618+
%cmp.not = icmp eq i32 %dec, 0
619+
br i1 %cmp.not, label %while.end.loopexit, label %while.body
620+
621+
while.end.loopexit: ; preds = %while.body
622+
br label %while.end
623+
624+
while.end: ; preds = %while.end.loopexit, %entry
625+
ret void
626+
}
627+
628+
595629
declare void @llvm.lifetime.start.p0(i64, ptr)
596630
declare void @llvm.lifetime.end.p0(i64, ptr)
597631

598-
attributes #0 = { "target-features"="+mve" }
632+
attributes #0 = { "target-features"="+mve.fp" }

0 commit comments

Comments
 (0)